darshan-core.c 50.8 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6
 */

7
#define _XOPEN_SOURCE 500
8
#define _GNU_SOURCE
9

10 11 12 13 14 15
#include "darshan-runtime-config.h"

#include <stdio.h>
#ifdef HAVE_MNTENT_H
#include <mntent.h>
#endif
16 17 18 19 20
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <pthread.h>
21 22 23
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/vfs.h>
24
#include <zlib.h>
25
#include <mpi.h>
26
#include <assert.h>
27

28
#include "uthash.h"
29 30
#include "darshan-core.h"

31
extern char* __progname;
32
extern char* __progname_full;
33

34
/* internal variable delcarations */
35
static struct darshan_core_runtime *darshan_core = NULL;
36
static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
37
static int my_rank = -1;
38
static int nprocs = -1;
39

Shane Snyder's avatar
Shane Snyder committed
40 41 42
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)

43 44 45 46 47 48 49 50 51 52 53 54 55
/* FS mount information */
#define DARSHAN_MAX_MNTS 64
#define DARSHAN_MAX_MNT_PATH 256
#define DARSHAN_MAX_MNT_TYPE 32
struct mnt_data
{
    int64_t block_size;
    char path[DARSHAN_MAX_MNT_PATH];
    char type[DARSHAN_MAX_MNT_TYPE];
};
static struct mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
static int mnt_data_count = 0;

56 57
/* prototypes for internal helper functions */
static void darshan_core_initialize(
58
    int argc, char **argv);
59 60 61
static void darshan_core_shutdown(
    void);
static void darshan_core_cleanup(
62
    struct darshan_core_runtime* core);
63 64 65
static void darshan_get_logfile_name(
    char* logfile_name, int jobid, struct tm* start_tm);
static void darshan_log_record_hints_and_ver(
66 67 68 69 70 71
    struct darshan_core_runtime* core);
static void darshan_get_exe_and_mounts_root(
    struct darshan_core_runtime *core, char* trailing_data,
    int space_left);
static char* darshan_get_exe_and_mounts(
    struct darshan_core_runtime *core);
72
static void darshan_get_shared_records(
73
    struct darshan_core_runtime *core, darshan_record_id *shared_recs);
74
static int darshan_log_open_all(
75
    char *logfile_name, MPI_File *log_fh);
76 77 78
static int darshan_deflate_buffer(
    void **pointers, int *lengths, int count, int nocomp_flag,
    char *comp_buf, int *comp_length);
79
static int darshan_log_write_record_hash(
80
    MPI_File log_fh, struct darshan_core_runtime *core,
81 82 83 84
    uint64_t *inout_off);
static int darshan_log_append_all(
    MPI_File log_fh, struct darshan_core_runtime *core, void *buf,
    int count, uint64_t *inout_off, uint64_t *agg_uncomp_sz);
85

86
/* intercept MPI initialize and finalize to manage darshan core runtime */
87 88 89 90 91 92 93 94 95 96
int MPI_Init(int *argc, char ***argv)
{
    int ret;

    ret = DARSHAN_MPI_CALL(PMPI_Init)(argc, argv);
    if(ret != MPI_SUCCESS)
    {
        return(ret);
    }

97 98 99 100 101 102 103 104 105
    if(argc && argv)
    {
        darshan_core_initialize(*argc, *argv);
    }
    else
    {
        /* we don't see argc and argv here in fortran */
        darshan_core_initialize(0, NULL);
    }
106 107 108 109

    return(ret);
}

110
int MPI_Init_thread(int *argc, char ***argv, int required, int *provided)
111 112 113 114
{
    int ret;

    ret = DARSHAN_MPI_CALL(PMPI_Init_thread)(argc, argv, required, provided);
115
    if(ret != MPI_SUCCESS)
116 117 118 119
    {
        return(ret);
    }

120 121 122 123 124 125 126 127 128
    if(argc && argv)
    {
        darshan_core_initialize(*argc, *argv);
    }
    else
    {
        /* we don't see argc and argv here in fortran */
        darshan_core_initialize(0, NULL);
    }
129 130 131 132 133 134 135 136

    return(ret);
}

int MPI_Finalize(void)
{
    int ret;

137
    darshan_core_shutdown();
138 139 140 141 142

    ret = DARSHAN_MPI_CALL(PMPI_Finalize)();
    return(ret);
}

143 144
/* *********************************** */

145
static void darshan_core_initialize(int argc, char **argv)
146 147 148 149 150 151 152 153 154
{
    int i;
    int internal_timing_flag = 0;
    double init_start, init_time, init_max;
    char* truncate_string = "<TRUNCATED>";
    int truncate_offset;
    int chars_left = 0;

    DARSHAN_MPI_CALL(PMPI_Comm_size)(MPI_COMM_WORLD, &nprocs);
155
    DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank);
156 157 158 159

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

160
    if(internal_timing_flag)
161 162 163
        init_start = DARSHAN_MPI_CALL(PMPI_Wtime)();

    /* setup darshan runtime if darshan is enabled and hasn't been initialized already */
164
    if(!getenv("DARSHAN_DISABLE") && !darshan_core)
165
    {
166 167 168 169 170
        /* TODO: darshan mem alignment code? */

        /* allocate structure to track darshan_core_runtime information */
        darshan_core = malloc(sizeof(*darshan_core));
        if(darshan_core)
171
        {
172
            memset(darshan_core, 0, sizeof(*darshan_core));
173

174 175 176 177
            darshan_core->log_job.uid = getuid();
            darshan_core->log_job.start_time = time(NULL);
            darshan_core->log_job.nprocs = nprocs;
            darshan_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
178 179

            /* record exe and arguments */
180
            for(i=0; i<argc; i++)
181
            {
182
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
183 184
                strncat(darshan_core->exe, argv[i], chars_left);
                if(i < (argc-1))
185
                {
186
                    chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
187
                    strncat(darshan_core->exe, " ", chars_left);
188 189 190 191 192 193 194 195
                }
            }

            /* if we don't see any arguments, then use glibc symbol to get
             * program name at least (this happens in fortran)
             */
            if(argc == 0)
            {
196
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
197
                strncat(darshan_core->exe, __progname_full, chars_left);
198
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
199
                strncat(darshan_core->exe, " <unknown args>", chars_left);
200 201 202 203 204
            }

            if(chars_left == 0)
            {
                /* we ran out of room; mark that string was truncated */
205
                truncate_offset = DARSHAN_EXE_LEN - strlen(truncate_string);
206
                sprintf(&darshan_core->exe[truncate_offset], "%s",
207 208
                    truncate_string);
            }
209 210 211

            /* collect information about command line and mounted file systems */
            darshan_core->trailing_data = darshan_get_exe_and_mounts(darshan_core);
212 213 214 215 216 217 218 219
        }
    }

    if(internal_timing_flag)
    {
        init_time = DARSHAN_MPI_CALL(PMPI_Wtime)() - init_start;
        DARSHAN_MPI_CALL(PMPI_Reduce)(&init_time, &init_max, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
220
        if(my_rank == 0)
221 222 223 224 225 226 227 228 229
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:init\t%d\t%f\n", nprocs, init_max);
        }
    }

    return;
}

230
static void darshan_core_shutdown()
231
{
232
    int i;
233
    char *logfile_name;
234
    struct darshan_core_runtime *final_core;
235
    int internal_timing_flag = 0;
236 237
    char *envjobid;
    char *jobid_str;
238
    int jobid;
239
    struct tm *start_tm;
240
    time_t start_time_tmp;
241 242
    int ret = 0;
    int all_ret = 0;
243 244
    int64_t first_start_time;
    int64_t last_end_time;
245 246
    int local_mod_use[DARSHAN_MAX_MODS] = {0};
    int global_mod_use_count[DARSHAN_MAX_MODS] = {0};
247
    darshan_record_id shared_recs[DARSHAN_CORE_MAX_RECORDS] = {0};
248
    double start_log_time;
249 250 251 252 253 254 255
    double open1, open2;
    double job1, job2;
    double rec1, rec2;
    double mod1[DARSHAN_MAX_MODS] = {0};
    double mod2[DARSHAN_MAX_MODS] = {0};
    double header1, header2;
    double tm_end;
256 257
    uint64_t gz_fp = 0;
    uint64_t tmp_off = 0;
258 259
    MPI_File log_fh;
    MPI_Status status;
260 261 262 263

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

264 265
    start_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();

Shane Snyder's avatar
Shane Snyder committed
266
    /* disable darhan-core while we shutdown */
267
    DARSHAN_CORE_LOCK();
268
    if(!darshan_core)
269
    {
270
        DARSHAN_CORE_UNLOCK();
271 272
        return;
    }
273 274
    final_core = darshan_core;
    darshan_core = NULL;
Shane Snyder's avatar
Shane Snyder committed
275

276
    /* we also need to set which modules were registered on this process and
Shane Snyder's avatar
Shane Snyder committed
277 278 279 280 281 282 283 284 285 286
     * disable tracing within those modules while we shutdown
     */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
    {
        if(final_core->mod_array[i])
        {
            local_mod_use[i] = 1;
            final_core->mod_array[i]->mod_funcs.disable_instrumentation();
        }
    }
287
    DARSHAN_CORE_UNLOCK();
288 289 290 291

    logfile_name = malloc(PATH_MAX);
    if(!logfile_name)
    {
292
        darshan_core_cleanup(final_core);
293 294 295
        return;
    }

296
    /* set darshan job id/metadata and constuct log file name on rank 0 */
297
    if(my_rank == 0)
298 299 300
    {
        /* Use CP_JOBID_OVERRIDE for the env var or CP_JOBID */
        envjobid = getenv(CP_JOBID_OVERRIDE);
301
        if(!envjobid)
302 303 304 305
        {
            envjobid = CP_JOBID;
        }

306
        /* find a job id */
307 308 309 310 311 312 313 314 315 316 317 318
        jobid_str = getenv(envjobid);
        if(jobid_str)
        {
            /* in cobalt we can find it in env var */
            ret = sscanf(jobid_str, "%d", &jobid);
        }
        if(!jobid_str || ret != 1)
        {
            /* use pid as fall back */
            jobid = getpid();
        }

319
        final_core->log_job.jobid = (int64_t)jobid;
320

321
        /* if we are using any hints to write the log file, then record those
322
         * hints with the darshan job information
323
         */
324
        darshan_log_record_hints_and_ver(final_core);
325

326
        /* use human readable start time format in log filename */
327
        start_time_tmp = final_core->log_job.start_time;
328
        start_tm = localtime(&start_time_tmp);
329

330 331
        /* construct log file name */
        darshan_get_logfile_name(logfile_name, jobid, start_tm);
332 333 334 335 336 337 338 339 340
    }

    /* broadcast log file name */
    DARSHAN_MPI_CALL(PMPI_Bcast)(logfile_name, PATH_MAX, MPI_CHAR, 0,
        MPI_COMM_WORLD);

    if(strlen(logfile_name) == 0)
    {
        /* failed to generate log file name */
341
        free(logfile_name);
342
        darshan_core_cleanup(final_core);
343 344 345
        return;
    }

346
    final_core->log_job.end_time = time(NULL);
347

348 349 350
    /* reduce to report first start time and last end time across all ranks
     * at rank 0
     */
351 352
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
353 354
    if(my_rank == 0)
    {
355 356
        final_core->log_job.start_time = first_start_time;
        final_core->log_job.end_time = last_end_time;
357
    }
358

359 360 361
    /* reduce the number of times a module was opened globally and bcast to everyone */   
    DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

362
    /* get a list of records which are shared across all processes */
363
    darshan_get_shared_records(final_core, shared_recs);
364

365 366
    if(internal_timing_flag)
        open1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
367
    /* collectively open the darshan log file */
368
    ret = darshan_log_open_all(logfile_name, &log_fh);
369 370
    if(internal_timing_flag)
        open2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
371 372 373 374 375 376 377 378

    /* error out if unable to open log file */
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
379 380
            fprintf(stderr, "darshan library warning: unable to open log file %s\n",
                logfile_name);
381 382 383
            unlink(logfile_name);
        }
        free(logfile_name);
384
        darshan_core_cleanup(final_core);
385 386 387
        return;
    }

388 389
    if(internal_timing_flag)
        job1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
390
    /* rank 0 is responsible for writing the compressed darshan job information */
Shane Snyder's avatar
Shane Snyder committed
391
    if(my_rank == 0)
392
    {
393 394 395
        void *pointers[2] = {&final_core->log_job, final_core->trailing_data};
        int lengths[2] = {sizeof(struct darshan_job), DARSHAN_EXE_LEN+1};
        int comp_buf_sz = 0;
396

397
        /* compress the job info and the trailing mount/exe data */
398
        all_ret = darshan_deflate_buffer(pointers, lengths, 2, 0,
399 400
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
401
        {
402
            fprintf(stderr, "darshan library warning: unable to compress job data\n");
403
            unlink(logfile_name);
404
        }
405 406 407
        else
        {
            /* write the job information, preallocing space for the log header */
408 409 410
            gz_fp += sizeof(struct darshan_header) + 23; /* gzip headers/trailers ... */
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, gz_fp,
                final_core->comp_buf, comp_buf_sz, MPI_BYTE, &status);
411 412 413 414 415
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
Shane Snyder's avatar
Shane Snyder committed
416
                
417
            }
Shane Snyder's avatar
Shane Snyder committed
418

419 420
            /* set the beginning offset of record hash, which follows job info just written */
            gz_fp += comp_buf_sz;
421
        }
422 423
    }

424 425 426 427 428
    /* error out if unable to write job information */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
429
        darshan_core_cleanup(final_core);
430 431
        return;
    }
432 433
    if(internal_timing_flag)
        job2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
434

435 436
    if(internal_timing_flag)
        rec1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
437
    /* write the record name->id hash to the log file */
438 439
    ret = darshan_log_write_record_hash(log_fh, final_core, &gz_fp);
    tmp_off = final_core->log_header.rec_map.off + final_core->log_header.rec_map.len;
440

441
    /* error out if unable to write record hash */
442 443 444 445 446 447
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
448
            fprintf(stderr, "darshan library warning: unable to write record hash to log file %s\n",
449
                logfile_name);
450
            unlink(logfile_name);
451 452
        }
        free(logfile_name);
453
        darshan_core_cleanup(final_core);
454 455
        return;
    }
Shane Snyder's avatar
Shane Snyder committed
456 457
    if(internal_timing_flag)
        rec2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
458 459

    /* loop over globally used darshan modules and:
460
     *      - perform shared file reductions, if possible
461
     *      - get final output buffer
462
     *      - compress (zlib) provided output buffer
Shane Snyder's avatar
Shane Snyder committed
463
     *      - append compressed buffer to log file
464 465
     *      - add module index info (file offset/length) to log header
     *      - shutdown the module
466
     */
467
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
468
    {
469
        struct darshan_core_module* this_mod = final_core->mod_array[i];
470 471
        darshan_record_id mod_shared_recs[DARSHAN_CORE_MAX_RECORDS];
        struct darshan_core_record_ref *ref = NULL;
472
        void* mod_buf = NULL;
473
        int mod_buf_sz = 0;
474
        int j;
475

476
        if(global_mod_use_count[i] == 0)
477 478
        {
            if(my_rank == 0)
479 480 481 482
            {
                final_core->log_header.mod_map[i].off = 0;
                final_core->log_header.mod_map[i].len = 0;
            }
483
            continue;
484
        }
485 486 487 488
 
        if(internal_timing_flag)
            mod1[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();   
        /* if all processes used this module, prepare to do a shared file reduction */
Shane Snyder's avatar
Shane Snyder committed
489
        if(global_mod_use_count[i] == nprocs)
490
        {
491 492 493 494 495 496 497 498 499 500 501 502 503
            int shared_rec_count = 0;
            int rec_sz = 0;
            void *red_send_buf = NULL, *red_recv_buf = NULL;
            MPI_Datatype red_type;
            MPI_Op red_op;

            /* set the shared file list for this module */
            memset(mod_shared_recs, 0, DARSHAN_CORE_MAX_RECORDS * sizeof(darshan_record_id));
            for(j = 0; j < DARSHAN_CORE_MAX_RECORDS && shared_recs[j] != 0; j++)
            {
                HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j],
                    sizeof(darshan_record_id), ref);
                assert(ref);
Shane Snyder's avatar
Shane Snyder committed
504
                if(DARSHAN_CORE_MOD_ISSET(ref->global_mod_flags, i))
505 506 507 508 509 510
                {
                    mod_shared_recs[shared_rec_count++] = shared_recs[j];
                }
            }

            /* if there are globally shared files, do a shared file reduction */
511 512
            if(shared_rec_count && this_mod->mod_funcs.prepare_for_reduction &&
               this_mod->mod_funcs.record_reduction_op)
513 514 515 516 517 518 519 520 521 522 523 524 525
            {
                this_mod->mod_funcs.prepare_for_reduction(mod_shared_recs, &shared_rec_count,
                    &red_send_buf, &red_recv_buf, &rec_sz);

                if(shared_rec_count)
                {
                    /* construct a datatype for a file record.  This is serving no purpose
                     * except to make sure we can do a reduction on proper boundaries
                     */
                    DARSHAN_MPI_CALL(PMPI_Type_contiguous)(rec_sz, MPI_BYTE, &red_type);
                    DARSHAN_MPI_CALL(PMPI_Type_commit)(&red_type);

                    /* register a reduction operator for this module */
526
                    DARSHAN_MPI_CALL(PMPI_Op_create)(this_mod->mod_funcs.record_reduction_op,
527 528 529 530 531 532 533 534 535 536
                        1, &red_op);

                    /* reduce shared file records for this module */
                    DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf,
                        shared_rec_count, red_type, red_op, 0, MPI_COMM_WORLD);

                    DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type);
                    DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op);
                }
            }
537
        }
538

539
        /* if module is registered locally, get the corresponding output buffer */
540
        if(this_mod)
541 542
        {
            /* get output buffer from module */
543
            this_mod->mod_funcs.get_output_data(&mod_buf, &mod_buf_sz);
544 545
        }

546
        final_core->log_header.mod_map[i].off = tmp_off;
547

548 549 550 551
        /* append this module's data to the darshan log */
        ret = darshan_log_append_all(log_fh, final_core, mod_buf, mod_buf_sz,
            &gz_fp, &(final_core->log_header.mod_map[i].len));
        tmp_off += final_core->log_header.mod_map[i].len;
552

553
        /* error out if the log append failed */
554 555 556
        DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
            MPI_LOR, MPI_COMM_WORLD);
        if(all_ret != 0)
557
        {
558 559 560 561 562 563 564 565
            if(my_rank == 0)
            {
                fprintf(stderr,
                    "darshan library warning: unable to write %s module data to log file %s\n",
                    darshan_module_names[i], logfile_name);
                unlink(logfile_name);
            }
            free(logfile_name);
566
            darshan_core_cleanup(final_core);
567
            return;
568 569 570
        }

        /* shutdown module if registered locally */
571
        if(this_mod)
572 573 574
        {
            this_mod->mod_funcs.shutdown();
        }
575 576
        if(internal_timing_flag)
            mod2[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();
577 578
    }

579 580
    if(internal_timing_flag)
        header1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
581
    /* rank 0 is responsible for writing the log header */
582 583
    if(my_rank == 0)
    {
584 585 586
        void *header_buf = &(final_core->log_header);
        int header_buf_sz = sizeof(struct darshan_header);
        int comp_buf_sz = 0;
587

588 589 590 591 592 593 594 595 596 597 598 599 600 601
        /* initialize the remaining header fields */
        strcpy(final_core->log_header.version_string, DARSHAN_LOG_VERSION);
        final_core->log_header.magic_nr = DARSHAN_MAGIC_NR;

        /* deflate the header */
        /* NOTE: the header is not actually compressed because space for it must
         *       be preallocated before writing. i.e., the "compressed" header
         *       must be constant sized, sizeof(struct darshan_header) + 23.
         *       it is still necessary to deflate the header or the resulting log
         *       file will not be a valid gzip file.
         */
        all_ret = darshan_deflate_buffer((void **)&header_buf, &header_buf_sz, 1, 1,
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
602
        {
603
            fprintf(stderr, "darshan library warning: unable to compress header\n");
604
            unlink(logfile_name);
605
        }
606 607 608 609 610 611 612 613 614 615 616
        else
        {
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, final_core->comp_buf,
                comp_buf_sz, MPI_BYTE, &status);
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write header to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
            }
        }
617 618
    }

619 620 621 622 623
    /* error out if unable to write log header */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
624
        darshan_core_cleanup(final_core);
625 626
        return;
    }
627 628
    if(internal_timing_flag)
        header2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
629

630 631 632 633 634 635
    DARSHAN_MPI_CALL(PMPI_File_close)(&log_fh);

    /* if we got this far, there are no errors, so rename from *.darshan_partial
     * to *-<logwritetime>.darshan.gz, which indicates that this log file is
     * complete and ready for analysis
     */
636 637
    if(my_rank == 0)
    {
Shane Snyder's avatar
Shane Snyder committed
638
        if(getenv("DARSHAN_LOGFILE"))
639
        {
640
#ifdef __CP_GROUP_READABLE_LOGS
Shane Snyder's avatar
Shane Snyder committed
641
            chmod(logfile_name, (S_IRUSR|S_IRGRP));
642
#else
Shane Snyder's avatar
Shane Snyder committed
643
            chmod(logfile_name, (S_IRUSR));
644
#endif
Shane Snyder's avatar
Shane Snyder committed
645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
        }
        else
        {
            char* tmp_index;
            double end_log_time;
            char* new_logfile_name;

            new_logfile_name = malloc(PATH_MAX);
            if(new_logfile_name)
            {
                new_logfile_name[0] = '\0';
                end_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
                strcat(new_logfile_name, logfile_name);
                tmp_index = strstr(new_logfile_name, ".darshan_partial");
                sprintf(tmp_index, "_%d.darshan.gz", (int)(end_log_time-start_log_time+1));
                rename(logfile_name, new_logfile_name);
                /* set permissions on log file */
#ifdef __CP_GROUP_READABLE_LOGS
                chmod(new_logfile_name, (S_IRUSR|S_IRGRP));
#else
                chmod(new_logfile_name, (S_IRUSR));
#endif
                free(new_logfile_name);
            }
669
        }
670
    }
671

672
    free(logfile_name);
673
    darshan_core_cleanup(final_core);
674

675
    if(internal_timing_flag)
676
    {
677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
        double open_tm, open_slowest;
        double header_tm, header_slowest;
        double job_tm, job_slowest;
        double rec_tm, rec_slowest;
        double mod_tm[DARSHAN_MAX_MODS], mod_slowest[DARSHAN_MAX_MODS];
        double all_tm, all_slowest;

        tm_end = DARSHAN_MPI_CALL(PMPI_Wtime)();

        open_tm = open2 - open1;
        header_tm = header2 - header1;
        job_tm = job2 - job1;
        rec_tm = rec2 - rec1;
        all_tm = tm_end - start_log_time;
        for(i = 0;i < DARSHAN_MAX_MODS; i++)
        {
            mod_tm[i] = mod2[i] - mod1[i];
        }

        DARSHAN_MPI_CALL(PMPI_Reduce)(&open_tm, &open_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&header_tm, &header_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&job_tm, &job_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&rec_tm, &rec_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&all_tm, &all_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(mod_tm, mod_slowest, DARSHAN_MAX_MODS,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

        if(my_rank == 0)
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:log_open\t%d\t%f\n", nprocs, open_slowest);
            printf("darshan:job_write\t%d\t%f\n", nprocs, job_slowest);
            printf("darshan:hash_write\t%d\t%f\n", nprocs, rec_slowest);
Shane Snyder's avatar
Shane Snyder committed
715
            printf("darshan:header_write\t%d\t%f\n", nprocs, header_slowest);
716 717 718 719
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
            {
                if(global_mod_use_count[i])
                    printf("darshan:%s_shutdown\t%d\t%f\n", darshan_module_names[i],
Shane Snyder's avatar
Shane Snyder committed
720
                        nprocs, mod_slowest[i]);
721 722 723
            }
            printf("darshan:core_shutdown\t%d\t%f\n", nprocs, all_slowest);
        }
724 725 726 727
    }
    
    return;
}
728

729
/* free darshan core data structures to shutdown */
730
static void darshan_core_cleanup(struct darshan_core_runtime* core)
731
{
732
    struct darshan_core_record_ref *tmp, *ref;
733
    int i;
734

735 736 737 738 739 740
    HASH_ITER(hlink, core->rec_hash, ref, tmp)
    {
        HASH_DELETE(hlink, core->rec_hash, ref);
        free(ref->rec.name);
        free(ref);
    }
741

742
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
743
    {
744
        if(core->mod_array[i])
745
        {        
746 747
            free(core->mod_array[i]);
            core->mod_array[i] = NULL;
748
        }
749
    }
750

751
    free(core->trailing_data);
752
    free(core);
753 754 755 756

    return;
}

757
/* construct the darshan log file name */
758
static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* start_tm)
759
{
Shane Snyder's avatar
Shane Snyder committed
760
    char* user_logfile_name;
761 762 763 764 765 766 767 768 769 770 771 772 773
    char* logpath;
    char* logname_string;
    char* logpath_override = NULL;
#ifdef __CP_LOG_ENV
    char env_check[256];
    char* env_tok;
#endif
    uint64_t hlevel;
    char hname[HOST_NAME_MAX];
    uint64_t logmod;
    char cuser[L_cuserid] = {0};
    int ret;

Shane Snyder's avatar
Shane Snyder committed
774 775 776 777 778 779 780 781 782 783 784 785 786 787 788
    /* first, check if user specifies a complete logpath to use */
    user_logfile_name = getenv("DARSHAN_LOGFILE");
    if(user_logfile_name)
    {
        if(strlen(user_logfile_name) >= (PATH_MAX-1))
        {
            fprintf(stderr, "darshan library warning: user log file name too long.\n");
            logfile_name[0] = '\0';
        }
        else
        {
            strcpy(logfile_name, user_logfile_name);
        }
    }
    else
789
    {
Shane Snyder's avatar
Shane Snyder committed
790 791 792 793 794 795
        /* otherwise, generate the log path automatically */

        /* Use CP_LOG_PATH_OVERRIDE for the value or __CP_LOG_PATH */
        logpath = getenv(CP_LOG_PATH_OVERRIDE);
        if(!logpath)
        {
796
#ifdef __CP_LOG_PATH
Shane Snyder's avatar
Shane Snyder committed
797
            logpath = __CP_LOG_PATH;
798
#endif
Shane Snyder's avatar
Shane Snyder committed
799
        }
800

Shane Snyder's avatar
Shane Snyder committed
801 802 803 804 805 806 807 808 809 810
        /* get the username for this job.  In order we will try each of the
         * following until one of them succeeds:
         *
         * - cuserid()
         * - getenv("LOGNAME")
         * - snprintf(..., geteuid());
         *
         * Note that we do not use getpwuid() because it generally will not
         * work in statically compiled binaries.
         */
811 812

#ifndef DARSHAN_DISABLE_CUSERID
Shane Snyder's avatar
Shane Snyder committed
813
        cuserid(cuser);
814 815
#endif

Shane Snyder's avatar
Shane Snyder committed
816 817
        /* if cuserid() didn't work, then check the environment */
        if(strcmp(cuser, "") == 0)
818
        {
Shane Snyder's avatar
Shane Snyder committed
819 820 821 822 823
            logname_string = getenv("LOGNAME");
            if(logname_string)
            {
                strncpy(cuser, logname_string, (L_cuserid-1));
            }
824 825
        }

Shane Snyder's avatar
Shane Snyder committed
826 827 828 829 830 831
        /* if cuserid() and environment both fail, then fall back to uid */
        if(strcmp(cuser, "") == 0)
        {
            uid_t uid = geteuid();
            snprintf(cuser, sizeof(cuser), "%u", uid);
        }
832

Shane Snyder's avatar
Shane Snyder committed
833 834 835 836
        /* generate a random number to help differentiate the log */
        hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000;
        (void)gethostname(hname, sizeof(hname));
        logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
837

Shane Snyder's avatar
Shane Snyder committed
838 839 840 841
        /* see if darshan was configured using the --with-logpath-by-env
         * argument, which allows the user to specify an absolute path to
         * place logs via an env variable.
         */
842
#ifdef __CP_LOG_ENV
Shane Snyder's avatar
Shane Snyder committed
843 844
        /* just silently skip if the environment variable list is too big */
        if(strlen(__CP_LOG_ENV) < 256)
845
        {
Shane Snyder's avatar
Shane Snyder committed
846 847 848 849 850
            /* copy env variable list to a temporary buffer */
            strcpy(env_check, __CP_LOG_ENV);
            /* tokenize the comma-separated list */
            env_tok = strtok(env_check, ",");
            if(env_tok)
851
            {
Shane Snyder's avatar
Shane Snyder committed
852
                do
853
                {
Shane Snyder's avatar
Shane Snyder committed
854 855 856 857 858 859 860 861 862
                    /* check each env variable in order */
                    logpath_override = getenv(env_tok);
                    if(logpath_override)
                    {
                        /* stop as soon as we find a match */
                        break;
                    }
                }while((env_tok = strtok(NULL, ",")));
            }
863 864 865
        }
#endif

Shane Snyder's avatar
Shane Snyder committed
866
        if(logpath_override)
867
        {
Shane Snyder's avatar
Shane Snyder committed
868 869 870 871 872 873 874 875 876 877 878 879 880 881 882
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath_override,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath_override, jobid);
            }
883
        }
Shane Snyder's avatar
Shane Snyder committed
884
        else if(logpath)
885
        {
Shane Snyder's avatar
Shane Snyder committed
886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%d/%d/%d/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath, (start_tm->tm_year+1900),
                (start_tm->tm_mon+1), start_tm->tm_mday,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath, jobid);
            }
        }
        else
        {
            logfile_name[0] = '\0';
906 907 908 909
        }
    }

    return;
910 911
}

912
/* record any hints used to write the darshan log in the log header */
913
static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
{
    char* hints;
    char* header_hints;
    int meta_remain = 0;
    char* m;

    /* check environment variable to see if the default MPI file hints have
     * been overridden
     */
    hints = getenv(CP_LOG_HINTS_OVERRIDE);
    if(!hints)
    {
        hints = __CP_LOG_HINTS;
    }

    if(!hints || strlen(hints) < 1)
        return;

    header_hints = strdup(hints);
    if(!header_hints)
        return;

    meta_remain = DARSHAN_JOB_METADATA_LEN -
937
        strlen(core->log_job.metadata) - 1;
938 939
    if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
    {
940
        sprintf(core->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION);
941 942 943 944
        meta_remain -= (strlen(PACKAGE_VERSION) + 9);
    }
    if(meta_remain >= (3 + strlen(header_hints)))
    {
945
        m = core->log_job.metadata + strlen(core->log_job.metadata);
946 947 948 949 950 951 952 953 954 955 956 957
        /* We have room to store the hints in the metadata portion of
         * the job header.  We just prepend an h= to the hints list.  The
         * metadata parser will ignore = characters that appear in the value
         * portion of the metadata key/value pair.
         */
        sprintf(m, "h=%s\n", header_hints);
    }
    free(header_hints);

    return;
}

958 959 960 961 962 963 964 965 966 967