darshan-core.c 52.4 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6
 */

7
#define _XOPEN_SOURCE 500
8
#define _GNU_SOURCE
9

10 11 12 13 14 15
#include "darshan-runtime-config.h"

#include <stdio.h>
#ifdef HAVE_MNTENT_H
#include <mntent.h>
#endif
16 17 18 19 20
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <pthread.h>
21 22 23
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/vfs.h>
24
#include <zlib.h>
25
#include <mpi.h>
26
#include <assert.h>
27

28
#include "uthash.h"
29 30
#include "darshan-core.h"

31
extern char* __progname;
32
extern char* __progname_full;
33

34
/* internal variable delcarations */
35
static struct darshan_core_runtime *darshan_core = NULL;
36
static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
37
static int my_rank = -1;
38
static int nprocs = -1;
39
static int darshan_mem_alignment = 1;
40

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
/* paths prefixed with the following directories are not traced by darshan */
char* darshan_path_exclusions[] = {
"/etc/",
"/dev/",
"/usr/",
"/bin/",
"/boot/",
"/lib/",
"/opt/",
"/sbin/",
"/sys/",
"/proc/",
NULL
};

Shane Snyder's avatar
Shane Snyder committed
56 57 58
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)

59 60 61 62 63 64
/* FS mount information */
#define DARSHAN_MAX_MNTS 64
#define DARSHAN_MAX_MNT_PATH 256
#define DARSHAN_MAX_MNT_TYPE 32
struct mnt_data
{
65
    int block_size;
66 67 68 69 70 71
    char path[DARSHAN_MAX_MNT_PATH];
    char type[DARSHAN_MAX_MNT_TYPE];
};
static struct mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
static int mnt_data_count = 0;

72 73
/* prototypes for internal helper functions */
static void darshan_core_initialize(
74
    int argc, char **argv);
75 76 77
static void darshan_core_shutdown(
    void);
static void darshan_core_cleanup(
78
    struct darshan_core_runtime* core);
79 80 81
static void darshan_get_logfile_name(
    char* logfile_name, int jobid, struct tm* start_tm);
static void darshan_log_record_hints_and_ver(
82 83 84 85 86 87
    struct darshan_core_runtime* core);
static void darshan_get_exe_and_mounts_root(
    struct darshan_core_runtime *core, char* trailing_data,
    int space_left);
static char* darshan_get_exe_and_mounts(
    struct darshan_core_runtime *core);
88 89
static void darshan_block_size_from_path(
    const char *path, int *block_size);
90
static void darshan_get_shared_records(
91
    struct darshan_core_runtime *core, darshan_record_id *shared_recs);
92
static int darshan_log_open_all(
93
    char *logfile_name, MPI_File *log_fh);
94 95 96
static int darshan_deflate_buffer(
    void **pointers, int *lengths, int count, int nocomp_flag,
    char *comp_buf, int *comp_length);
97
static int darshan_log_write_record_hash(
98
    MPI_File log_fh, struct darshan_core_runtime *core,
99 100 101 102
    uint64_t *inout_off);
static int darshan_log_append_all(
    MPI_File log_fh, struct darshan_core_runtime *core, void *buf,
    int count, uint64_t *inout_off, uint64_t *agg_uncomp_sz);
103

104
/* intercept MPI initialize and finalize to manage darshan core runtime */
105 106 107 108 109 110 111 112 113 114
int MPI_Init(int *argc, char ***argv)
{
    int ret;

    ret = DARSHAN_MPI_CALL(PMPI_Init)(argc, argv);
    if(ret != MPI_SUCCESS)
    {
        return(ret);
    }

115 116 117 118 119 120 121 122 123
    if(argc && argv)
    {
        darshan_core_initialize(*argc, *argv);
    }
    else
    {
        /* we don't see argc and argv here in fortran */
        darshan_core_initialize(0, NULL);
    }
124 125 126 127

    return(ret);
}

128
int MPI_Init_thread(int *argc, char ***argv, int required, int *provided)
129 130 131 132
{
    int ret;

    ret = DARSHAN_MPI_CALL(PMPI_Init_thread)(argc, argv, required, provided);
133
    if(ret != MPI_SUCCESS)
134 135 136 137
    {
        return(ret);
    }

138 139 140 141 142 143 144 145 146
    if(argc && argv)
    {
        darshan_core_initialize(*argc, *argv);
    }
    else
    {
        /* we don't see argc and argv here in fortran */
        darshan_core_initialize(0, NULL);
    }
147 148 149 150 151 152 153 154

    return(ret);
}

int MPI_Finalize(void)
{
    int ret;

155
    darshan_core_shutdown();
156 157 158 159 160

    ret = DARSHAN_MPI_CALL(PMPI_Finalize)();
    return(ret);
}

161 162
/* *********************************** */

163
static void darshan_core_initialize(int argc, char **argv)
164 165 166 167
{
    int i;
    int internal_timing_flag = 0;
    double init_start, init_time, init_max;
168
    char *envstr;
169 170 171
    char* truncate_string = "<TRUNCATED>";
    int truncate_offset;
    int chars_left = 0;
172 173
    int ret;
    int tmpval;
174 175

    DARSHAN_MPI_CALL(PMPI_Comm_size)(MPI_COMM_WORLD, &nprocs);
176
    DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank);
177 178 179 180

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

181
    if(internal_timing_flag)
182 183 184
        init_start = DARSHAN_MPI_CALL(PMPI_Wtime)();

    /* setup darshan runtime if darshan is enabled and hasn't been initialized already */
185
    if(!getenv("DARSHAN_DISABLE") && !darshan_core)
186
    {
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
        #if (__CP_MEM_ALIGNMENT < 1)
            #error Darshan must be configured with a positive value for --with-mem-align
        #endif
        envstr = getenv("DARSHAN_MEMALIGN");
        if(envstr)
        {
            ret = sscanf(envstr, "%d", &tmpval);
            /* silently ignore if the env variable is set poorly */
            if(ret == 1 && tmpval > 0)
            {
                darshan_mem_alignment = tmpval;
            }
        }
        else
        {
            darshan_mem_alignment = __CP_MEM_ALIGNMENT;
        }

        /* avoid floating point errors on faulty input */
        if (darshan_mem_alignment < 1)
        {
            darshan_mem_alignment = 1;
        }
210 211 212 213

        /* allocate structure to track darshan_core_runtime information */
        darshan_core = malloc(sizeof(*darshan_core));
        if(darshan_core)
214
        {
215
            memset(darshan_core, 0, sizeof(*darshan_core));
216

217 218 219 220
            darshan_core->log_job.uid = getuid();
            darshan_core->log_job.start_time = time(NULL);
            darshan_core->log_job.nprocs = nprocs;
            darshan_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
221 222

            /* record exe and arguments */
223
            for(i=0; i<argc; i++)
224
            {
225
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
226 227
                strncat(darshan_core->exe, argv[i], chars_left);
                if(i < (argc-1))
228
                {
229
                    chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
230
                    strncat(darshan_core->exe, " ", chars_left);
231 232 233 234 235 236 237 238
                }
            }

            /* if we don't see any arguments, then use glibc symbol to get
             * program name at least (this happens in fortran)
             */
            if(argc == 0)
            {
239
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
240
                strncat(darshan_core->exe, __progname_full, chars_left);
241
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
242
                strncat(darshan_core->exe, " <unknown args>", chars_left);
243 244 245 246 247
            }

            if(chars_left == 0)
            {
                /* we ran out of room; mark that string was truncated */
248
                truncate_offset = DARSHAN_EXE_LEN - strlen(truncate_string);
249
                sprintf(&darshan_core->exe[truncate_offset], "%s",
250 251
                    truncate_string);
            }
252 253 254

            /* collect information about command line and mounted file systems */
            darshan_core->trailing_data = darshan_get_exe_and_mounts(darshan_core);
255 256 257 258 259 260 261 262
        }
    }

    if(internal_timing_flag)
    {
        init_time = DARSHAN_MPI_CALL(PMPI_Wtime)() - init_start;
        DARSHAN_MPI_CALL(PMPI_Reduce)(&init_time, &init_max, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
263
        if(my_rank == 0)
264 265 266 267 268 269 270 271 272
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:init\t%d\t%f\n", nprocs, init_max);
        }
    }

    return;
}

273
static void darshan_core_shutdown()
274
{
275
    int i;
276
    char *logfile_name;
277
    struct darshan_core_runtime *final_core;
278
    int internal_timing_flag = 0;
279 280
    char *envjobid;
    char *jobid_str;
281
    int jobid;
282
    struct tm *start_tm;
283
    time_t start_time_tmp;
284 285
    int ret = 0;
    int all_ret = 0;
286 287
    int64_t first_start_time;
    int64_t last_end_time;
288 289
    int local_mod_use[DARSHAN_MAX_MODS] = {0};
    int global_mod_use_count[DARSHAN_MAX_MODS] = {0};
290
    darshan_record_id shared_recs[DARSHAN_CORE_MAX_RECORDS] = {0};
291
    double start_log_time;
292 293 294 295 296 297 298
    double open1, open2;
    double job1, job2;
    double rec1, rec2;
    double mod1[DARSHAN_MAX_MODS] = {0};
    double mod2[DARSHAN_MAX_MODS] = {0};
    double header1, header2;
    double tm_end;
299 300
    uint64_t gz_fp = 0;
    uint64_t tmp_off = 0;
301 302
    MPI_File log_fh;
    MPI_Status status;
303 304 305 306

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

307 308
    start_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();

Shane Snyder's avatar
Shane Snyder committed
309
    /* disable darhan-core while we shutdown */
310
    DARSHAN_CORE_LOCK();
311
    if(!darshan_core)
312
    {
313
        DARSHAN_CORE_UNLOCK();
314 315
        return;
    }
316 317
    final_core = darshan_core;
    darshan_core = NULL;
Shane Snyder's avatar
Shane Snyder committed
318

319
    /* we also need to set which modules were registered on this process and
Shane Snyder's avatar
Shane Snyder committed
320 321 322 323 324 325 326 327 328 329
     * disable tracing within those modules while we shutdown
     */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
    {
        if(final_core->mod_array[i])
        {
            local_mod_use[i] = 1;
            final_core->mod_array[i]->mod_funcs.disable_instrumentation();
        }
    }
330
    DARSHAN_CORE_UNLOCK();
331 332 333 334

    logfile_name = malloc(PATH_MAX);
    if(!logfile_name)
    {
335
        darshan_core_cleanup(final_core);
336 337 338
        return;
    }

339
    /* set darshan job id/metadata and constuct log file name on rank 0 */
340
    if(my_rank == 0)
341 342 343
    {
        /* Use CP_JOBID_OVERRIDE for the env var or CP_JOBID */
        envjobid = getenv(CP_JOBID_OVERRIDE);
344
        if(!envjobid)
345 346 347 348
        {
            envjobid = CP_JOBID;
        }

349
        /* find a job id */
350 351 352 353 354 355 356 357 358 359 360 361
        jobid_str = getenv(envjobid);
        if(jobid_str)
        {
            /* in cobalt we can find it in env var */
            ret = sscanf(jobid_str, "%d", &jobid);
        }
        if(!jobid_str || ret != 1)
        {
            /* use pid as fall back */
            jobid = getpid();
        }

362
        final_core->log_job.jobid = (int64_t)jobid;
363

364
        /* if we are using any hints to write the log file, then record those
365
         * hints with the darshan job information
366
         */
367
        darshan_log_record_hints_and_ver(final_core);
368

369
        /* use human readable start time format in log filename */
370
        start_time_tmp = final_core->log_job.start_time;
371
        start_tm = localtime(&start_time_tmp);
372

373 374
        /* construct log file name */
        darshan_get_logfile_name(logfile_name, jobid, start_tm);
375 376 377 378 379 380 381 382 383
    }

    /* broadcast log file name */
    DARSHAN_MPI_CALL(PMPI_Bcast)(logfile_name, PATH_MAX, MPI_CHAR, 0,
        MPI_COMM_WORLD);

    if(strlen(logfile_name) == 0)
    {
        /* failed to generate log file name */
384
        free(logfile_name);
385
        darshan_core_cleanup(final_core);
386 387 388
        return;
    }

389
    final_core->log_job.end_time = time(NULL);
390

391 392 393
    /* reduce to report first start time and last end time across all ranks
     * at rank 0
     */
394 395
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
396 397
    if(my_rank == 0)
    {
398 399
        final_core->log_job.start_time = first_start_time;
        final_core->log_job.end_time = last_end_time;
400
    }
401

402 403 404
    /* reduce the number of times a module was opened globally and bcast to everyone */   
    DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

405
    /* get a list of records which are shared across all processes */
406
    darshan_get_shared_records(final_core, shared_recs);
407

408 409
    if(internal_timing_flag)
        open1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
410
    /* collectively open the darshan log file */
411
    ret = darshan_log_open_all(logfile_name, &log_fh);
412 413
    if(internal_timing_flag)
        open2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
414 415 416 417 418 419 420 421

    /* error out if unable to open log file */
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
422 423
            fprintf(stderr, "darshan library warning: unable to open log file %s\n",
                logfile_name);
424 425 426
            unlink(logfile_name);
        }
        free(logfile_name);
427
        darshan_core_cleanup(final_core);
428 429 430
        return;
    }

431 432
    if(internal_timing_flag)
        job1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
433
    /* rank 0 is responsible for writing the compressed darshan job information */
Shane Snyder's avatar
Shane Snyder committed
434
    if(my_rank == 0)
435
    {
436 437 438
        void *pointers[2] = {&final_core->log_job, final_core->trailing_data};
        int lengths[2] = {sizeof(struct darshan_job), DARSHAN_EXE_LEN+1};
        int comp_buf_sz = 0;
439

440
        /* compress the job info and the trailing mount/exe data */
441
        all_ret = darshan_deflate_buffer(pointers, lengths, 2, 0,
442 443
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
444
        {
445
            fprintf(stderr, "darshan library warning: unable to compress job data\n");
446
            unlink(logfile_name);
447
        }
448 449 450
        else
        {
            /* write the job information, preallocing space for the log header */
451 452 453
            gz_fp += sizeof(struct darshan_header) + 23; /* gzip headers/trailers ... */
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, gz_fp,
                final_core->comp_buf, comp_buf_sz, MPI_BYTE, &status);
454 455 456 457 458
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
Shane Snyder's avatar
Shane Snyder committed
459
                
460
            }
Shane Snyder's avatar
Shane Snyder committed
461

462 463
            /* set the beginning offset of record hash, which follows job info just written */
            gz_fp += comp_buf_sz;
464
        }
465 466
    }

467 468 469 470 471
    /* error out if unable to write job information */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
472
        darshan_core_cleanup(final_core);
473 474
        return;
    }
475 476
    if(internal_timing_flag)
        job2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
477

478 479
    if(internal_timing_flag)
        rec1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
480
    /* write the record name->id hash to the log file */
481 482
    ret = darshan_log_write_record_hash(log_fh, final_core, &gz_fp);
    tmp_off = final_core->log_header.rec_map.off + final_core->log_header.rec_map.len;
483

484
    /* error out if unable to write record hash */
485 486 487 488 489 490
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
491
            fprintf(stderr, "darshan library warning: unable to write record hash to log file %s\n",
492
                logfile_name);
493
            unlink(logfile_name);
494 495
        }
        free(logfile_name);
496
        darshan_core_cleanup(final_core);
497 498
        return;
    }
Shane Snyder's avatar
Shane Snyder committed
499 500
    if(internal_timing_flag)
        rec2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
501 502

    /* loop over globally used darshan modules and:
503
     *      - perform shared file reductions, if possible
504
     *      - get final output buffer
505
     *      - compress (zlib) provided output buffer
Shane Snyder's avatar
Shane Snyder committed
506
     *      - append compressed buffer to log file
507 508
     *      - add module index info (file offset/length) to log header
     *      - shutdown the module
509
     */
510
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
511
    {
512
        struct darshan_core_module* this_mod = final_core->mod_array[i];
513 514
        darshan_record_id mod_shared_recs[DARSHAN_CORE_MAX_RECORDS];
        struct darshan_core_record_ref *ref = NULL;
515
        void* mod_buf = NULL;
516
        int mod_buf_sz = 0;
517
        int j;
518

519
        if(global_mod_use_count[i] == 0)
520 521
        {
            if(my_rank == 0)
522 523 524 525
            {
                final_core->log_header.mod_map[i].off = 0;
                final_core->log_header.mod_map[i].len = 0;
            }
526
            continue;
527
        }
528 529 530 531
 
        if(internal_timing_flag)
            mod1[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();   
        /* if all processes used this module, prepare to do a shared file reduction */
Shane Snyder's avatar
Shane Snyder committed
532
        if(global_mod_use_count[i] == nprocs)
533
        {
534 535 536 537 538 539 540 541 542 543 544 545 546
            int shared_rec_count = 0;
            int rec_sz = 0;
            void *red_send_buf = NULL, *red_recv_buf = NULL;
            MPI_Datatype red_type;
            MPI_Op red_op;

            /* set the shared file list for this module */
            memset(mod_shared_recs, 0, DARSHAN_CORE_MAX_RECORDS * sizeof(darshan_record_id));
            for(j = 0; j < DARSHAN_CORE_MAX_RECORDS && shared_recs[j] != 0; j++)
            {
                HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j],
                    sizeof(darshan_record_id), ref);
                assert(ref);
Shane Snyder's avatar
Shane Snyder committed
547
                if(DARSHAN_CORE_MOD_ISSET(ref->global_mod_flags, i))
548 549 550 551 552 553
                {
                    mod_shared_recs[shared_rec_count++] = shared_recs[j];
                }
            }

            /* if there are globally shared files, do a shared file reduction */
554 555
            if(shared_rec_count && this_mod->mod_funcs.prepare_for_reduction &&
               this_mod->mod_funcs.record_reduction_op)
556 557 558 559 560 561 562 563 564 565 566 567 568
            {
                this_mod->mod_funcs.prepare_for_reduction(mod_shared_recs, &shared_rec_count,
                    &red_send_buf, &red_recv_buf, &rec_sz);

                if(shared_rec_count)
                {
                    /* construct a datatype for a file record.  This is serving no purpose
                     * except to make sure we can do a reduction on proper boundaries
                     */
                    DARSHAN_MPI_CALL(PMPI_Type_contiguous)(rec_sz, MPI_BYTE, &red_type);
                    DARSHAN_MPI_CALL(PMPI_Type_commit)(&red_type);

                    /* register a reduction operator for this module */
569
                    DARSHAN_MPI_CALL(PMPI_Op_create)(this_mod->mod_funcs.record_reduction_op,
570 571 572 573 574 575 576 577 578 579
                        1, &red_op);

                    /* reduce shared file records for this module */
                    DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf,
                        shared_rec_count, red_type, red_op, 0, MPI_COMM_WORLD);

                    DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type);
                    DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op);
                }
            }
580
        }
581

582
        /* if module is registered locally, get the corresponding output buffer */
583
        if(this_mod)
584 585
        {
            /* get output buffer from module */
586
            this_mod->mod_funcs.get_output_data(&mod_buf, &mod_buf_sz);
587 588
        }

589
        final_core->log_header.mod_map[i].off = tmp_off;
590

591 592 593 594
        /* append this module's data to the darshan log */
        ret = darshan_log_append_all(log_fh, final_core, mod_buf, mod_buf_sz,
            &gz_fp, &(final_core->log_header.mod_map[i].len));
        tmp_off += final_core->log_header.mod_map[i].len;
595

596
        /* error out if the log append failed */
597 598 599
        DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
            MPI_LOR, MPI_COMM_WORLD);
        if(all_ret != 0)
600
        {
601 602 603 604 605 606 607 608
            if(my_rank == 0)
            {
                fprintf(stderr,
                    "darshan library warning: unable to write %s module data to log file %s\n",
                    darshan_module_names[i], logfile_name);
                unlink(logfile_name);
            }
            free(logfile_name);
609
            darshan_core_cleanup(final_core);
610
            return;
611 612 613
        }

        /* shutdown module if registered locally */
614
        if(this_mod)
615 616 617
        {
            this_mod->mod_funcs.shutdown();
        }
618 619
        if(internal_timing_flag)
            mod2[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();
620 621
    }

622 623
    if(internal_timing_flag)
        header1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
624
    /* rank 0 is responsible for writing the log header */
625 626
    if(my_rank == 0)
    {
627 628 629
        void *header_buf = &(final_core->log_header);
        int header_buf_sz = sizeof(struct darshan_header);
        int comp_buf_sz = 0;
630

631 632 633 634 635 636 637 638 639 640 641 642 643 644
        /* initialize the remaining header fields */
        strcpy(final_core->log_header.version_string, DARSHAN_LOG_VERSION);
        final_core->log_header.magic_nr = DARSHAN_MAGIC_NR;

        /* deflate the header */
        /* NOTE: the header is not actually compressed because space for it must
         *       be preallocated before writing. i.e., the "compressed" header
         *       must be constant sized, sizeof(struct darshan_header) + 23.
         *       it is still necessary to deflate the header or the resulting log
         *       file will not be a valid gzip file.
         */
        all_ret = darshan_deflate_buffer((void **)&header_buf, &header_buf_sz, 1, 1,
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
645
        {
646
            fprintf(stderr, "darshan library warning: unable to compress header\n");
647
            unlink(logfile_name);
648
        }
649 650 651 652 653 654 655 656 657 658 659
        else
        {
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, final_core->comp_buf,
                comp_buf_sz, MPI_BYTE, &status);
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write header to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
            }
        }
660 661
    }

662 663 664 665 666
    /* error out if unable to write log header */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
667
        darshan_core_cleanup(final_core);
668 669
        return;
    }
670 671
    if(internal_timing_flag)
        header2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
672

673 674 675 676 677 678
    DARSHAN_MPI_CALL(PMPI_File_close)(&log_fh);

    /* if we got this far, there are no errors, so rename from *.darshan_partial
     * to *-<logwritetime>.darshan.gz, which indicates that this log file is
     * complete and ready for analysis
     */
679 680
    if(my_rank == 0)
    {
Shane Snyder's avatar
Shane Snyder committed
681
        if(getenv("DARSHAN_LOGFILE"))
682
        {
683
#ifdef __CP_GROUP_READABLE_LOGS
Shane Snyder's avatar
Shane Snyder committed
684
            chmod(logfile_name, (S_IRUSR|S_IRGRP));
685
#else
Shane Snyder's avatar
Shane Snyder committed
686
            chmod(logfile_name, (S_IRUSR));
687
#endif
Shane Snyder's avatar
Shane Snyder committed
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711
        }
        else
        {
            char* tmp_index;
            double end_log_time;
            char* new_logfile_name;

            new_logfile_name = malloc(PATH_MAX);
            if(new_logfile_name)
            {
                new_logfile_name[0] = '\0';
                end_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
                strcat(new_logfile_name, logfile_name);
                tmp_index = strstr(new_logfile_name, ".darshan_partial");
                sprintf(tmp_index, "_%d.darshan.gz", (int)(end_log_time-start_log_time+1));
                rename(logfile_name, new_logfile_name);
                /* set permissions on log file */
#ifdef __CP_GROUP_READABLE_LOGS
                chmod(new_logfile_name, (S_IRUSR|S_IRGRP));
#else
                chmod(new_logfile_name, (S_IRUSR));
#endif
                free(new_logfile_name);
            }
712
        }
713
    }
714

715
    free(logfile_name);
716
    darshan_core_cleanup(final_core);
717

718
    if(internal_timing_flag)
719
    {
720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757
        double open_tm, open_slowest;
        double header_tm, header_slowest;
        double job_tm, job_slowest;
        double rec_tm, rec_slowest;
        double mod_tm[DARSHAN_MAX_MODS], mod_slowest[DARSHAN_MAX_MODS];
        double all_tm, all_slowest;

        tm_end = DARSHAN_MPI_CALL(PMPI_Wtime)();

        open_tm = open2 - open1;
        header_tm = header2 - header1;
        job_tm = job2 - job1;
        rec_tm = rec2 - rec1;
        all_tm = tm_end - start_log_time;
        for(i = 0;i < DARSHAN_MAX_MODS; i++)
        {
            mod_tm[i] = mod2[i] - mod1[i];
        }

        DARSHAN_MPI_CALL(PMPI_Reduce)(&open_tm, &open_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&header_tm, &header_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&job_tm, &job_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&rec_tm, &rec_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&all_tm, &all_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(mod_tm, mod_slowest, DARSHAN_MAX_MODS,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

        if(my_rank == 0)
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:log_open\t%d\t%f\n", nprocs, open_slowest);
            printf("darshan:job_write\t%d\t%f\n", nprocs, job_slowest);
            printf("darshan:hash_write\t%d\t%f\n", nprocs, rec_slowest);
Shane Snyder's avatar
Shane Snyder committed
758
            printf("darshan:header_write\t%d\t%f\n", nprocs, header_slowest);
759 760 761 762
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
            {
                if(global_mod_use_count[i])
                    printf("darshan:%s_shutdown\t%d\t%f\n", darshan_module_names[i],
Shane Snyder's avatar
Shane Snyder committed
763
                        nprocs, mod_slowest[i]);
764 765 766
            }
            printf("darshan:core_shutdown\t%d\t%f\n", nprocs, all_slowest);
        }
767 768 769 770
    }
    
    return;
}
771

772
/* free darshan core data structures to shutdown */
773
static void darshan_core_cleanup(struct darshan_core_runtime* core)
774
{
775
    struct darshan_core_record_ref *tmp, *ref;
776
    int i;
777

778 779 780 781 782 783
    HASH_ITER(hlink, core->rec_hash, ref, tmp)
    {
        HASH_DELETE(hlink, core->rec_hash, ref);
        free(ref->rec.name);
        free(ref);
    }
784

785
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
786
    {
787
        if(core->mod_array[i])
788
        {        
789 790
            free(core->mod_array[i]);
            core->mod_array[i] = NULL;
791
        }
792
    }
793

794
    free(core->trailing_data);
795
    free(core);
796 797 798 799

    return;
}

800
/* construct the darshan log file name */
801
static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* start_tm)
802
{
Shane Snyder's avatar
Shane Snyder committed
803
    char* user_logfile_name;
804 805 806 807 808 809 810 811 812 813 814 815 816
    char* logpath;
    char* logname_string;
    char* logpath_override = NULL;
#ifdef __CP_LOG_ENV
    char env_check[256];
    char* env_tok;
#endif
    uint64_t hlevel;
    char hname[HOST_NAME_MAX];
    uint64_t logmod;
    char cuser[L_cuserid] = {0};
    int ret;

Shane Snyder's avatar
Shane Snyder committed
817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
    /* first, check if user specifies a complete logpath to use */
    user_logfile_name = getenv("DARSHAN_LOGFILE");
    if(user_logfile_name)
    {
        if(strlen(user_logfile_name) >= (PATH_MAX-1))
        {
            fprintf(stderr, "darshan library warning: user log file name too long.\n");
            logfile_name[0] = '\0';
        }
        else
        {
            strcpy(logfile_name, user_logfile_name);
        }
    }
    else
832
    {
Shane Snyder's avatar
Shane Snyder committed
833 834 835 836 837 838
        /* otherwise, generate the log path automatically */

        /* Use CP_LOG_PATH_OVERRIDE for the value or __CP_LOG_PATH */
        logpath = getenv(CP_LOG_PATH_OVERRIDE);
        if(!logpath)
        {
839
#ifdef __CP_LOG_PATH
Shane Snyder's avatar
Shane Snyder committed
840
            logpath = __CP_LOG_PATH;
841
#endif
Shane Snyder's avatar
Shane Snyder committed
842
        }
843

Shane Snyder's avatar
Shane Snyder committed
844 845 846 847 848 849 850 851 852 853
        /* get the username for this job.  In order we will try each of the
         * following until one of them succeeds:
         *
         * - cuserid()
         * - getenv("LOGNAME")
         * - snprintf(..., geteuid());
         *
         * Note that we do not use getpwuid() because it generally will not
         * work in statically compiled binaries.
         */
854 855

#ifndef DARSHAN_DISABLE_CUSERID
Shane Snyder's avatar
Shane Snyder committed
856
        cuserid(cuser);
857 858
#endif

Shane Snyder's avatar
Shane Snyder committed
859 860
        /* if cuserid() didn't work, then check the environment */
        if(strcmp(cuser, "") == 0)
861
        {
Shane Snyder's avatar
Shane Snyder committed
862 863 864 865 866
            logname_string = getenv("LOGNAME");
            if(logname_string)
            {
                strncpy(cuser, logname_string, (L_cuserid-1));
            }
867 868
        }

Shane Snyder's avatar
Shane Snyder committed
869 870 871 872 873 874
        /* if cuserid() and environment both fail, then fall back to uid */
        if(strcmp(cuser, "") == 0)
        {
            uid_t uid = geteuid();
            snprintf(cuser, sizeof(cuser), "%u", uid);
        }
875

Shane Snyder's avatar
Shane Snyder committed
876 877 878 879
        /* generate a random number to help differentiate the log */
        hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000;
        (void)gethostname(hname, sizeof(hname));
        logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
880

Shane Snyder's avatar
Shane Snyder committed
881 882 883 884
        /* see if darshan was configured using the --with-logpath-by-env
         * argument, which allows the user to specify an absolute path to
         * place logs via an env variable.
         */
885
#ifdef __CP_LOG_ENV
Shane Snyder's avatar
Shane Snyder committed
886 887
        /* just silently skip if the environment variable list is too big */
        if(strlen(__CP_LOG_ENV) < 256)
888
        {
Shane Snyder's avatar
Shane Snyder committed
889 890 891 892 893
            /* copy env variable list to a temporary buffer */
            strcpy(env_check, __CP_LOG_ENV);
            /* tokenize the comma-separated list */
            env_tok = strtok(env_check, ",");
            if(env_tok)
894
            {
Shane Snyder's avatar
Shane Snyder committed
895
                do
896
                {
Shane Snyder's avatar
Shane Snyder committed
897 898 899 900 901 902 903 904 905
                    /* check each env variable in order */
                    logpath_override = getenv(env_tok);
                    if(logpath_override)
                    {
                        /* stop as soon as we find a match */
                        break;
                    }
                }while((env_tok = strtok(NULL, ",")));
            }
906 907 908
        }
#endif

Shane Snyder's avatar
Shane Snyder committed
909
        if(logpath_override)
910
        {
Shane Snyder's avatar
Shane Snyder committed
911 912 913 914 915 916 917 918 919 920 921 922 923 924 925
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath_override,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath_override, jobid);
            }
926
        }
Shane Snyder's avatar
Shane Snyder committed
927
        else if(logpath)
928
        {
Shane Snyder's avatar
Shane Snyder committed
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%d/%d/%d/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath, (start_tm->tm_year+1900),
                (start_tm->tm_mon+1), start_tm->tm_mday,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath, jobid);
            }
        }
        else
        {
            logfile_name[0] = '\0';
949 950 951 952
        }
    }

    return;
953 954
}

955
/* record any hints used to write the darshan log in the log header */
956
static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
Shane Snyder's avatar