darshan-core.c 50.7 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6
 */

7
#define _XOPEN_SOURCE 500
8
#define _GNU_SOURCE
9

10 11 12 13 14 15
#include "darshan-runtime-config.h"

#include <stdio.h>
#ifdef HAVE_MNTENT_H
#include <mntent.h>
#endif
16 17 18 19 20
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <pthread.h>
21 22 23
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/vfs.h>
24
#include <zlib.h>
25
#include <mpi.h>
26
#include <assert.h>
27

28
#include "uthash.h"
29 30
#include "darshan-core.h"

31
extern char* __progname;
32
extern char* __progname_full;
33

34
/* internal variable delcarations */
35
static struct darshan_core_runtime *darshan_core = NULL;
36
static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
37
static int my_rank = -1;
38
static int nprocs = -1;
39

Shane Snyder's avatar
Shane Snyder committed
40 41 42
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)

43 44 45 46 47 48 49 50 51 52 53 54 55
/* FS mount information */
#define DARSHAN_MAX_MNTS 64
#define DARSHAN_MAX_MNT_PATH 256
#define DARSHAN_MAX_MNT_TYPE 32
struct mnt_data
{
    int64_t block_size;
    char path[DARSHAN_MAX_MNT_PATH];
    char type[DARSHAN_MAX_MNT_TYPE];
};
static struct mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
static int mnt_data_count = 0;

56 57
/* prototypes for internal helper functions */
static void darshan_core_initialize(
58
    int argc, char **argv);
59 60 61
static void darshan_core_shutdown(
    void);
static void darshan_core_cleanup(
62
    struct darshan_core_runtime* core);
63 64 65
static void darshan_get_logfile_name(
    char* logfile_name, int jobid, struct tm* start_tm);
static void darshan_log_record_hints_and_ver(
66 67 68 69 70 71
    struct darshan_core_runtime* core);
static void darshan_get_exe_and_mounts_root(
    struct darshan_core_runtime *core, char* trailing_data,
    int space_left);
static char* darshan_get_exe_and_mounts(
    struct darshan_core_runtime *core);
72
static void darshan_get_shared_records(
73
    struct darshan_core_runtime *core, darshan_record_id *shared_recs);
74
static int darshan_log_open_all(
75
    char *logfile_name, MPI_File *log_fh);
76 77 78
static int darshan_deflate_buffer(
    void **pointers, int *lengths, int count, int nocomp_flag,
    char *comp_buf, int *comp_length);
79
static int darshan_log_write_record_hash(
80
    MPI_File log_fh, struct darshan_core_runtime *core,
81 82 83 84
    uint64_t *inout_off);
static int darshan_log_append_all(
    MPI_File log_fh, struct darshan_core_runtime *core, void *buf,
    int count, uint64_t *inout_off, uint64_t *agg_uncomp_sz);
85

86
/* intercept MPI initialize and finalize to manage darshan core runtime */
87 88 89 90 91 92 93 94 95 96
int MPI_Init(int *argc, char ***argv)
{
    int ret;

    ret = DARSHAN_MPI_CALL(PMPI_Init)(argc, argv);
    if(ret != MPI_SUCCESS)
    {
        return(ret);
    }

97 98 99 100 101 102 103 104 105
    if(argc && argv)
    {
        darshan_core_initialize(*argc, *argv);
    }
    else
    {
        /* we don't see argc and argv here in fortran */
        darshan_core_initialize(0, NULL);
    }
106 107 108 109

    return(ret);
}

110
int MPI_Init_thread(int *argc, char ***argv, int required, int *provided)
111 112 113 114
{
    int ret;

    ret = DARSHAN_MPI_CALL(PMPI_Init_thread)(argc, argv, required, provided);
115
    if(ret != MPI_SUCCESS)
116 117 118 119
    {
        return(ret);
    }

120 121 122 123 124 125 126 127 128
    if(argc && argv)
    {
        darshan_core_initialize(*argc, *argv);
    }
    else
    {
        /* we don't see argc and argv here in fortran */
        darshan_core_initialize(0, NULL);
    }
129 130 131 132 133 134 135 136

    return(ret);
}

int MPI_Finalize(void)
{
    int ret;

137
    darshan_core_shutdown();
138 139 140 141 142

    ret = DARSHAN_MPI_CALL(PMPI_Finalize)();
    return(ret);
}

143 144
/* *********************************** */

145
static void darshan_core_initialize(int argc, char **argv)
146 147 148 149 150 151 152 153 154
{
    int i;
    int internal_timing_flag = 0;
    double init_start, init_time, init_max;
    char* truncate_string = "<TRUNCATED>";
    int truncate_offset;
    int chars_left = 0;

    DARSHAN_MPI_CALL(PMPI_Comm_size)(MPI_COMM_WORLD, &nprocs);
155
    DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank);
156 157 158 159

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

160
    if(internal_timing_flag)
161 162 163
        init_start = DARSHAN_MPI_CALL(PMPI_Wtime)();

    /* setup darshan runtime if darshan is enabled and hasn't been initialized already */
164
    if(!getenv("DARSHAN_DISABLE") && !darshan_core)
165
    {
166 167 168 169 170
        /* TODO: darshan mem alignment code? */

        /* allocate structure to track darshan_core_runtime information */
        darshan_core = malloc(sizeof(*darshan_core));
        if(darshan_core)
171
        {
172
            memset(darshan_core, 0, sizeof(*darshan_core));
173

174 175 176 177
            darshan_core->log_job.uid = getuid();
            darshan_core->log_job.start_time = time(NULL);
            darshan_core->log_job.nprocs = nprocs;
            darshan_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
178 179

            /* record exe and arguments */
180
            for(i=0; i<argc; i++)
181
            {
182
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
183 184
                strncat(darshan_core->exe, argv[i], chars_left);
                if(i < (argc-1))
185
                {
186
                    chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
187
                    strncat(darshan_core->exe, " ", chars_left);
188 189 190 191 192 193 194 195
                }
            }

            /* if we don't see any arguments, then use glibc symbol to get
             * program name at least (this happens in fortran)
             */
            if(argc == 0)
            {
196
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
197
                strncat(darshan_core->exe, __progname_full, chars_left);
198
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
199
                strncat(darshan_core->exe, " <unknown args>", chars_left);
200 201 202 203 204
            }

            if(chars_left == 0)
            {
                /* we ran out of room; mark that string was truncated */
205
                truncate_offset = DARSHAN_EXE_LEN - strlen(truncate_string);
206
                sprintf(&darshan_core->exe[truncate_offset], "%s",
207 208
                    truncate_string);
            }
209 210 211

            /* collect information about command line and mounted file systems */
            darshan_core->trailing_data = darshan_get_exe_and_mounts(darshan_core);
212 213 214 215 216 217 218 219
        }
    }

    if(internal_timing_flag)
    {
        init_time = DARSHAN_MPI_CALL(PMPI_Wtime)() - init_start;
        DARSHAN_MPI_CALL(PMPI_Reduce)(&init_time, &init_max, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
220
        if(my_rank == 0)
221 222 223 224 225 226 227 228 229
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:init\t%d\t%f\n", nprocs, init_max);
        }
    }

    return;
}

230
static void darshan_core_shutdown()
231
{
232
    int i;
233
    char *logfile_name;
234
    struct darshan_core_runtime *final_core;
235
    int internal_timing_flag = 0;
236 237
    char *envjobid;
    char *jobid_str;
238
    int jobid;
239
    struct tm *start_tm;
240
    time_t start_time_tmp;
241 242
    int ret = 0;
    int all_ret = 0;
243 244
    int64_t first_start_time;
    int64_t last_end_time;
245 246
    int local_mod_use[DARSHAN_MAX_MODS] = {0};
    int global_mod_use_count[DARSHAN_MAX_MODS] = {0};
247
    darshan_record_id shared_recs[DARSHAN_CORE_MAX_RECORDS] = {0};
248
    double start_log_time;
249 250 251 252 253 254 255
    double open1, open2;
    double job1, job2;
    double rec1, rec2;
    double mod1[DARSHAN_MAX_MODS] = {0};
    double mod2[DARSHAN_MAX_MODS] = {0};
    double header1, header2;
    double tm_end;
256 257
    uint64_t gz_fp = 0;
    uint64_t tmp_off = 0;
258 259
    MPI_File log_fh;
    MPI_Status status;
260 261 262 263

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

264 265
    start_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();

Shane Snyder's avatar
Shane Snyder committed
266
    /* disable darhan-core while we shutdown */
267
    DARSHAN_CORE_LOCK();
268
    if(!darshan_core)
269
    {
270
        DARSHAN_CORE_UNLOCK();
271 272
        return;
    }
273 274
    final_core = darshan_core;
    darshan_core = NULL;
Shane Snyder's avatar
Shane Snyder committed
275

276
    /* we also need to set which modules were registered on this process and
Shane Snyder's avatar
Shane Snyder committed
277 278 279 280 281 282 283 284 285 286
     * disable tracing within those modules while we shutdown
     */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
    {
        if(final_core->mod_array[i])
        {
            local_mod_use[i] = 1;
            final_core->mod_array[i]->mod_funcs.disable_instrumentation();
        }
    }
287
    DARSHAN_CORE_UNLOCK();
288 289 290 291

    logfile_name = malloc(PATH_MAX);
    if(!logfile_name)
    {
292
        darshan_core_cleanup(final_core);
293 294 295
        return;
    }

296
    /* set darshan job id/metadata and constuct log file name on rank 0 */
297
    if(my_rank == 0)
298 299 300
    {
        /* Use CP_JOBID_OVERRIDE for the env var or CP_JOBID */
        envjobid = getenv(CP_JOBID_OVERRIDE);
301
        if(!envjobid)
302 303 304 305
        {
            envjobid = CP_JOBID;
        }

306
        /* find a job id */
307 308 309 310 311 312 313 314 315 316 317 318
        jobid_str = getenv(envjobid);
        if(jobid_str)
        {
            /* in cobalt we can find it in env var */
            ret = sscanf(jobid_str, "%d", &jobid);
        }
        if(!jobid_str || ret != 1)
        {
            /* use pid as fall back */
            jobid = getpid();
        }

319
        final_core->log_job.jobid = (int64_t)jobid;
320

321
        /* if we are using any hints to write the log file, then record those
322
         * hints with the darshan job information
323
         */
324
        darshan_log_record_hints_and_ver(final_core);
325

326
        /* use human readable start time format in log filename */
327
        start_time_tmp = final_core->log_job.start_time;
328
        start_tm = localtime(&start_time_tmp);
329

330 331
        /* construct log file name */
        darshan_get_logfile_name(logfile_name, jobid, start_tm);
332 333 334 335 336 337 338 339 340
    }

    /* broadcast log file name */
    DARSHAN_MPI_CALL(PMPI_Bcast)(logfile_name, PATH_MAX, MPI_CHAR, 0,
        MPI_COMM_WORLD);

    if(strlen(logfile_name) == 0)
    {
        /* failed to generate log file name */
341
        free(logfile_name);
342
        darshan_core_cleanup(final_core);
343 344 345
        return;
    }

346
    final_core->log_job.end_time = time(NULL);
347

348 349 350
    /* reduce to report first start time and last end time across all ranks
     * at rank 0
     */
351 352
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
353 354
    if(my_rank == 0)
    {
355 356
        final_core->log_job.start_time = first_start_time;
        final_core->log_job.end_time = last_end_time;
357
    }
358

359 360 361
    /* reduce the number of times a module was opened globally and bcast to everyone */   
    DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

362
    /* get a list of records which are shared across all processes */
363
    darshan_get_shared_records(final_core, shared_recs);
364

365 366
    if(internal_timing_flag)
        open1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
367
    /* collectively open the darshan log file */
368
    ret = darshan_log_open_all(logfile_name, &log_fh);
369 370
    if(internal_timing_flag)
        open2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
371 372 373 374 375 376 377 378

    /* error out if unable to open log file */
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
379 380
            fprintf(stderr, "darshan library warning: unable to open log file %s\n",
                logfile_name);
381 382 383
            unlink(logfile_name);
        }
        free(logfile_name);
384
        darshan_core_cleanup(final_core);
385 386 387
        return;
    }

388 389
    if(internal_timing_flag)
        job1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
390
    /* rank 0 is responsible for writing the compressed darshan job information */
Shane Snyder's avatar
Shane Snyder committed
391
    if(my_rank == 0)
392
    {
393 394 395
        void *pointers[2] = {&final_core->log_job, final_core->trailing_data};
        int lengths[2] = {sizeof(struct darshan_job), DARSHAN_EXE_LEN+1};
        int comp_buf_sz = 0;
396

397
        /* compress the job info and the trailing mount/exe data */
398
        all_ret = darshan_deflate_buffer(pointers, lengths, 2, 0,
399 400
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
401
        {
402
            fprintf(stderr, "darshan library warning: unable to compress job data\n");
403
            unlink(logfile_name);
404
        }
405 406 407
        else
        {
            /* write the job information, preallocing space for the log header */
408 409 410
            gz_fp += sizeof(struct darshan_header) + 23; /* gzip headers/trailers ... */
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, gz_fp,
                final_core->comp_buf, comp_buf_sz, MPI_BYTE, &status);
411 412 413 414 415
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
Shane Snyder's avatar
Shane Snyder committed
416
                
417
            }
Shane Snyder's avatar
Shane Snyder committed
418

419 420
            /* set the beginning offset of record hash, which follows job info just written */
            gz_fp += comp_buf_sz;
421
        }
422 423
    }

424 425 426 427 428
    /* error out if unable to write job information */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
429
        darshan_core_cleanup(final_core);
430 431
        return;
    }
432 433
    if(internal_timing_flag)
        job2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
434

435 436
    if(internal_timing_flag)
        rec1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
437
    /* write the record name->id hash to the log file */
438 439
    ret = darshan_log_write_record_hash(log_fh, final_core, &gz_fp);
    tmp_off = final_core->log_header.rec_map.off + final_core->log_header.rec_map.len;
440

441
    /* error out if unable to write record hash */
442 443 444 445 446 447
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
448
            fprintf(stderr, "darshan library warning: unable to write record hash to log file %s\n",
449
                logfile_name);
450
            unlink(logfile_name);
451 452
        }
        free(logfile_name);
453
        darshan_core_cleanup(final_core);
454 455
        return;
    }
Shane Snyder's avatar
Shane Snyder committed
456 457
    if(internal_timing_flag)
        rec2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
458 459

    /* loop over globally used darshan modules and:
460
     *      - perform shared file reductions, if possible
461
     *      - get final output buffer
462
     *      - compress (zlib) provided output buffer
Shane Snyder's avatar
Shane Snyder committed
463
     *      - append compressed buffer to log file
464 465
     *      - add module index info (file offset/length) to log header
     *      - shutdown the module
466
     */
467
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
468
    {
469
        struct darshan_core_module* this_mod = final_core->mod_array[i];
470 471
        darshan_record_id mod_shared_recs[DARSHAN_CORE_MAX_RECORDS];
        struct darshan_core_record_ref *ref = NULL;
472
        void* mod_buf = NULL;
473
        int mod_buf_sz = 0;
474
        int j;
475

476
        if(global_mod_use_count[i] == 0)
477 478
        {
            if(my_rank == 0)
479 480 481 482
            {
                final_core->log_header.mod_map[i].off = 0;
                final_core->log_header.mod_map[i].len = 0;
            }
483
            continue;
484
        }
485 486 487 488
 
        if(internal_timing_flag)
            mod1[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();   
        /* if all processes used this module, prepare to do a shared file reduction */
Shane Snyder's avatar
Shane Snyder committed
489
        if(global_mod_use_count[i] == nprocs)
490
        {
491 492 493 494 495 496 497 498 499 500 501 502 503
            int shared_rec_count = 0;
            int rec_sz = 0;
            void *red_send_buf = NULL, *red_recv_buf = NULL;
            MPI_Datatype red_type;
            MPI_Op red_op;

            /* set the shared file list for this module */
            memset(mod_shared_recs, 0, DARSHAN_CORE_MAX_RECORDS * sizeof(darshan_record_id));
            for(j = 0; j < DARSHAN_CORE_MAX_RECORDS && shared_recs[j] != 0; j++)
            {
                HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j],
                    sizeof(darshan_record_id), ref);
                assert(ref);
Shane Snyder's avatar
Shane Snyder committed
504
                if(DARSHAN_CORE_MOD_ISSET(ref->global_mod_flags, i))
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
                {
                    mod_shared_recs[shared_rec_count++] = shared_recs[j];
                }
            }

            /* if there are globally shared files, do a shared file reduction */
            if(shared_rec_count)
            {
                this_mod->mod_funcs.prepare_for_reduction(mod_shared_recs, &shared_rec_count,
                    &red_send_buf, &red_recv_buf, &rec_sz);

                if(shared_rec_count)
                {
                    /* construct a datatype for a file record.  This is serving no purpose
                     * except to make sure we can do a reduction on proper boundaries
                     */
                    DARSHAN_MPI_CALL(PMPI_Type_contiguous)(rec_sz, MPI_BYTE, &red_type);
                    DARSHAN_MPI_CALL(PMPI_Type_commit)(&red_type);

                    /* register a reduction operator for this module */
Shane Snyder's avatar
Shane Snyder committed
525
                    DARSHAN_MPI_CALL(PMPI_Op_create)(this_mod->mod_funcs.reduce_records,
526 527 528 529 530 531 532 533 534 535
                        1, &red_op);

                    /* reduce shared file records for this module */
                    DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf,
                        shared_rec_count, red_type, red_op, 0, MPI_COMM_WORLD);

                    DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type);
                    DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op);
                }
            }
536
        }
537

538
        /* if module is registered locally, get the corresponding output buffer */
539
        if(this_mod)
540 541
        {
            /* get output buffer from module */
542
            this_mod->mod_funcs.get_output_data(&mod_buf, &mod_buf_sz);
543 544
        }

545
        final_core->log_header.mod_map[i].off = tmp_off;
546

547 548 549 550
        /* append this module's data to the darshan log */
        ret = darshan_log_append_all(log_fh, final_core, mod_buf, mod_buf_sz,
            &gz_fp, &(final_core->log_header.mod_map[i].len));
        tmp_off += final_core->log_header.mod_map[i].len;
551

552
        /* error out if the log append failed */
553 554 555
        DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
            MPI_LOR, MPI_COMM_WORLD);
        if(all_ret != 0)
556
        {
557 558 559 560 561 562 563 564
            if(my_rank == 0)
            {
                fprintf(stderr,
                    "darshan library warning: unable to write %s module data to log file %s\n",
                    darshan_module_names[i], logfile_name);
                unlink(logfile_name);
            }
            free(logfile_name);
565
            darshan_core_cleanup(final_core);
566
            return;
567 568 569
        }

        /* shutdown module if registered locally */
570
        if(this_mod)
571 572 573
        {
            this_mod->mod_funcs.shutdown();
        }
574 575
        if(internal_timing_flag)
            mod2[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();
576 577
    }

578 579
    if(internal_timing_flag)
        header1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
580
    /* rank 0 is responsible for writing the log header */
581 582
    if(my_rank == 0)
    {
583 584 585
        void *header_buf = &(final_core->log_header);
        int header_buf_sz = sizeof(struct darshan_header);
        int comp_buf_sz = 0;
586

587 588 589 590 591 592 593 594 595 596 597 598 599 600
        /* initialize the remaining header fields */
        strcpy(final_core->log_header.version_string, DARSHAN_LOG_VERSION);
        final_core->log_header.magic_nr = DARSHAN_MAGIC_NR;

        /* deflate the header */
        /* NOTE: the header is not actually compressed because space for it must
         *       be preallocated before writing. i.e., the "compressed" header
         *       must be constant sized, sizeof(struct darshan_header) + 23.
         *       it is still necessary to deflate the header or the resulting log
         *       file will not be a valid gzip file.
         */
        all_ret = darshan_deflate_buffer((void **)&header_buf, &header_buf_sz, 1, 1,
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
601
        {
602
            fprintf(stderr, "darshan library warning: unable to compress header\n");
603
            unlink(logfile_name);
604
        }
605 606 607 608 609 610 611 612 613 614 615
        else
        {
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, final_core->comp_buf,
                comp_buf_sz, MPI_BYTE, &status);
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write header to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
            }
        }
616 617
    }

618 619 620 621 622
    /* error out if unable to write log header */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
623
        darshan_core_cleanup(final_core);
624 625
        return;
    }
626 627
    if(internal_timing_flag)
        header2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
628

629 630 631 632 633 634
    DARSHAN_MPI_CALL(PMPI_File_close)(&log_fh);

    /* if we got this far, there are no errors, so rename from *.darshan_partial
     * to *-<logwritetime>.darshan.gz, which indicates that this log file is
     * complete and ready for analysis
     */
635 636
    if(my_rank == 0)
    {
Shane Snyder's avatar
Shane Snyder committed
637
        if(getenv("DARSHAN_LOGFILE"))
638
        {
639
#ifdef __CP_GROUP_READABLE_LOGS
Shane Snyder's avatar
Shane Snyder committed
640
            chmod(logfile_name, (S_IRUSR|S_IRGRP));
641
#else
Shane Snyder's avatar
Shane Snyder committed
642
            chmod(logfile_name, (S_IRUSR));
643
#endif
Shane Snyder's avatar
Shane Snyder committed
644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667
        }
        else
        {
            char* tmp_index;
            double end_log_time;
            char* new_logfile_name;

            new_logfile_name = malloc(PATH_MAX);
            if(new_logfile_name)
            {
                new_logfile_name[0] = '\0';
                end_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
                strcat(new_logfile_name, logfile_name);
                tmp_index = strstr(new_logfile_name, ".darshan_partial");
                sprintf(tmp_index, "_%d.darshan.gz", (int)(end_log_time-start_log_time+1));
                rename(logfile_name, new_logfile_name);
                /* set permissions on log file */
#ifdef __CP_GROUP_READABLE_LOGS
                chmod(new_logfile_name, (S_IRUSR|S_IRGRP));
#else
                chmod(new_logfile_name, (S_IRUSR));
#endif
                free(new_logfile_name);
            }
668
        }
669
    }
670

671
    free(logfile_name);
672
    darshan_core_cleanup(final_core);
673

674
    if(internal_timing_flag)
675
    {
676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
        double open_tm, open_slowest;
        double header_tm, header_slowest;
        double job_tm, job_slowest;
        double rec_tm, rec_slowest;
        double mod_tm[DARSHAN_MAX_MODS], mod_slowest[DARSHAN_MAX_MODS];
        double all_tm, all_slowest;

        tm_end = DARSHAN_MPI_CALL(PMPI_Wtime)();

        open_tm = open2 - open1;
        header_tm = header2 - header1;
        job_tm = job2 - job1;
        rec_tm = rec2 - rec1;
        all_tm = tm_end - start_log_time;
        for(i = 0;i < DARSHAN_MAX_MODS; i++)
        {
            mod_tm[i] = mod2[i] - mod1[i];
        }

        DARSHAN_MPI_CALL(PMPI_Reduce)(&open_tm, &open_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&header_tm, &header_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&job_tm, &job_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&rec_tm, &rec_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&all_tm, &all_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(mod_tm, mod_slowest, DARSHAN_MAX_MODS,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

        if(my_rank == 0)
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:log_open\t%d\t%f\n", nprocs, open_slowest);
            printf("darshan:job_write\t%d\t%f\n", nprocs, job_slowest);
            printf("darshan:hash_write\t%d\t%f\n", nprocs, rec_slowest);
Shane Snyder's avatar
Shane Snyder committed
714
            printf("darshan:header_write\t%d\t%f\n", nprocs, header_slowest);
715 716 717 718
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
            {
                if(global_mod_use_count[i])
                    printf("darshan:%s_shutdown\t%d\t%f\n", darshan_module_names[i],
Shane Snyder's avatar
Shane Snyder committed
719
                        nprocs, mod_slowest[i]);
720 721 722
            }
            printf("darshan:core_shutdown\t%d\t%f\n", nprocs, all_slowest);
        }
723 724 725 726
    }
    
    return;
}
727

728
/* free darshan core data structures to shutdown */
729
static void darshan_core_cleanup(struct darshan_core_runtime* core)
730
{
731
    struct darshan_core_record_ref *tmp, *ref;
732
    int i;
733

734 735 736 737 738 739
    HASH_ITER(hlink, core->rec_hash, ref, tmp)
    {
        HASH_DELETE(hlink, core->rec_hash, ref);
        free(ref->rec.name);
        free(ref);
    }
740

741
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
742
    {
743
        if(core->mod_array[i])
744
        {        
745 746
            free(core->mod_array[i]);
            core->mod_array[i] = NULL;
747
        }
748
    }
749

750
    free(core->trailing_data);
751
    free(core);
752 753 754 755

    return;
}

756
/* construct the darshan log file name */
757
static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* start_tm)
758
{
Shane Snyder's avatar
Shane Snyder committed
759
    char* user_logfile_name;
760 761 762 763 764 765 766 767 768 769 770 771 772
    char* logpath;
    char* logname_string;
    char* logpath_override = NULL;
#ifdef __CP_LOG_ENV
    char env_check[256];
    char* env_tok;
#endif
    uint64_t hlevel;
    char hname[HOST_NAME_MAX];
    uint64_t logmod;
    char cuser[L_cuserid] = {0};
    int ret;

Shane Snyder's avatar
Shane Snyder committed
773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
    /* first, check if user specifies a complete logpath to use */
    user_logfile_name = getenv("DARSHAN_LOGFILE");
    if(user_logfile_name)
    {
        if(strlen(user_logfile_name) >= (PATH_MAX-1))
        {
            fprintf(stderr, "darshan library warning: user log file name too long.\n");
            logfile_name[0] = '\0';
        }
        else
        {
            strcpy(logfile_name, user_logfile_name);
        }
    }
    else
788
    {
Shane Snyder's avatar
Shane Snyder committed
789 790 791 792 793 794
        /* otherwise, generate the log path automatically */

        /* Use CP_LOG_PATH_OVERRIDE for the value or __CP_LOG_PATH */
        logpath = getenv(CP_LOG_PATH_OVERRIDE);
        if(!logpath)
        {
795
#ifdef __CP_LOG_PATH
Shane Snyder's avatar
Shane Snyder committed
796
            logpath = __CP_LOG_PATH;
797
#endif
Shane Snyder's avatar
Shane Snyder committed
798
        }
799

Shane Snyder's avatar
Shane Snyder committed
800 801 802 803 804 805 806 807 808 809
        /* get the username for this job.  In order we will try each of the
         * following until one of them succeeds:
         *
         * - cuserid()
         * - getenv("LOGNAME")
         * - snprintf(..., geteuid());
         *
         * Note that we do not use getpwuid() because it generally will not
         * work in statically compiled binaries.
         */
810 811

#ifndef DARSHAN_DISABLE_CUSERID
Shane Snyder's avatar
Shane Snyder committed
812
        cuserid(cuser);
813 814
#endif

Shane Snyder's avatar
Shane Snyder committed
815 816
        /* if cuserid() didn't work, then check the environment */
        if(strcmp(cuser, "") == 0)
817
        {
Shane Snyder's avatar
Shane Snyder committed
818 819 820 821 822
            logname_string = getenv("LOGNAME");
            if(logname_string)
            {
                strncpy(cuser, logname_string, (L_cuserid-1));
            }
823 824
        }

Shane Snyder's avatar
Shane Snyder committed
825 826 827 828 829 830
        /* if cuserid() and environment both fail, then fall back to uid */
        if(strcmp(cuser, "") == 0)
        {
            uid_t uid = geteuid();
            snprintf(cuser, sizeof(cuser), "%u", uid);
        }
831

Shane Snyder's avatar
Shane Snyder committed
832 833 834 835
        /* generate a random number to help differentiate the log */
        hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000;
        (void)gethostname(hname, sizeof(hname));
        logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
836

Shane Snyder's avatar
Shane Snyder committed
837 838 839 840
        /* see if darshan was configured using the --with-logpath-by-env
         * argument, which allows the user to specify an absolute path to
         * place logs via an env variable.
         */
841
#ifdef __CP_LOG_ENV
Shane Snyder's avatar
Shane Snyder committed
842 843
        /* just silently skip if the environment variable list is too big */
        if(strlen(__CP_LOG_ENV) < 256)
844
        {
Shane Snyder's avatar
Shane Snyder committed
845 846 847 848 849
            /* copy env variable list to a temporary buffer */
            strcpy(env_check, __CP_LOG_ENV);
            /* tokenize the comma-separated list */
            env_tok = strtok(env_check, ",");
            if(env_tok)
850
            {
Shane Snyder's avatar
Shane Snyder committed
851
                do
852
                {
Shane Snyder's avatar
Shane Snyder committed
853 854 855 856 857 858 859 860 861
                    /* check each env variable in order */
                    logpath_override = getenv(env_tok);
                    if(logpath_override)
                    {
                        /* stop as soon as we find a match */
                        break;
                    }
                }while((env_tok = strtok(NULL, ",")));
            }
862 863 864
        }
#endif

Shane Snyder's avatar
Shane Snyder committed
865
        if(logpath_override)
866
        {
Shane Snyder's avatar
Shane Snyder committed
867 868 869 870 871 872 873 874 875 876 877 878 879 880 881
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath_override,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath_override, jobid);
            }
882
        }
Shane Snyder's avatar
Shane Snyder committed
883
        else if(logpath)
884
        {
Shane Snyder's avatar
Shane Snyder committed
885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%d/%d/%d/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath, (start_tm->tm_year+1900),
                (start_tm->tm_mon+1), start_tm->tm_mday,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath, jobid);
            }
        }
        else
        {
            logfile_name[0] = '\0';
905 906 907 908
        }
    }

    return;
909 910
}

911
/* record any hints used to write the darshan log in the log header */
912
static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935
{
    char* hints;
    char* header_hints;
    int meta_remain = 0;
    char* m;

    /* check environment variable to see if the default MPI file hints have
     * been overridden
     */
    hints = getenv(CP_LOG_HINTS_OVERRIDE);
    if(!hints)
    {
        hints = __CP_LOG_HINTS;
    }

    if(!hints || strlen(hints) < 1)
        return;

    header_hints = strdup(hints);
    if(!header_hints)
        return;

    meta_remain = DARSHAN_JOB_METADATA_LEN -
936
        strlen(core->log_job.metadata) - 1;
937 938
    if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
    {
939
        sprintf(core->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION);
940 941 942 943
        meta_remain -= (strlen(PACKAGE_VERSION) + 9);
    }
    if(meta_remain >= (3 + strlen(header_hints)))
    {
944
        m = core->log_job.metadata + strlen(core->log_job.metadata);
945 946 947 948 949 950 951 952 953 954 955 956
        /* We have room to store the hints in the metadata portion of
         * the job header.  We just prepend an h= to the hints list.  The
         * metadata parser will ignore = characters that appear in the value
         * portion of the metadata key/value pair.
         */
        sprintf(m, "h=%s\n", header_hints);
    }
    free(header_hints);

    return;
}

957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975