darshan-core.c 50.8 KB
Newer Older
1 2 3 4 5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6
#define _XOPEN_SOURCE 500
7
#define _GNU_SOURCE
8

9 10 11 12 13 14
#include "darshan-runtime-config.h"

#include <stdio.h>
#ifdef HAVE_MNTENT_H
#include <mntent.h>
#endif
15 16 17 18 19
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <pthread.h>
20 21 22
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/vfs.h>
23
#include <zlib.h>
24
#include <mpi.h>
25
#include <assert.h>
26

27
#include "uthash.h"
28 29
#include "darshan-core.h"

30
extern char* __progname;
31
extern char* __progname_full;
32

33
/* internal variable delcarations */
34
static struct darshan_core_runtime *darshan_core = NULL;
35
static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
36
static int my_rank = -1;
37
static int nprocs = -1;
38

Shane Snyder's avatar
Shane Snyder committed
39 40 41
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)

42 43 44 45 46 47 48 49 50 51 52 53 54
/* FS mount information */
#define DARSHAN_MAX_MNTS 64
#define DARSHAN_MAX_MNT_PATH 256
#define DARSHAN_MAX_MNT_TYPE 32
struct mnt_data
{
    int64_t block_size;
    char path[DARSHAN_MAX_MNT_PATH];
    char type[DARSHAN_MAX_MNT_TYPE];
};
static struct mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
static int mnt_data_count = 0;

55 56
/* prototypes for internal helper functions */
static void darshan_core_initialize(
57
    int argc, char **argv);
58 59 60
static void darshan_core_shutdown(
    void);
static void darshan_core_cleanup(
61
    struct darshan_core_runtime* core);
62 63 64
static void darshan_get_logfile_name(
    char* logfile_name, int jobid, struct tm* start_tm);
static void darshan_log_record_hints_and_ver(
65 66 67 68 69 70
    struct darshan_core_runtime* core);
static void darshan_get_exe_and_mounts_root(
    struct darshan_core_runtime *core, char* trailing_data,
    int space_left);
static char* darshan_get_exe_and_mounts(
    struct darshan_core_runtime *core);
71
static void darshan_get_shared_records(
72
    struct darshan_core_runtime *core, darshan_record_id *shared_recs);
73
static int darshan_log_open_all(
74
    char *logfile_name, MPI_File *log_fh);
75 76 77
static int darshan_deflate_buffer(
    void **pointers, int *lengths, int count, int nocomp_flag,
    char *comp_buf, int *comp_length);
78
static int darshan_log_write_record_hash(
79
    MPI_File log_fh, struct darshan_core_runtime *core,
80 81 82 83
    uint64_t *inout_off);
static int darshan_log_append_all(
    MPI_File log_fh, struct darshan_core_runtime *core, void *buf,
    int count, uint64_t *inout_off, uint64_t *agg_uncomp_sz);
84

85
/* intercept MPI initialize and finalize to manage darshan core runtime */
86 87 88 89 90 91 92 93 94 95
int MPI_Init(int *argc, char ***argv)
{
    int ret;

    ret = DARSHAN_MPI_CALL(PMPI_Init)(argc, argv);
    if(ret != MPI_SUCCESS)
    {
        return(ret);
    }

96 97 98 99 100 101 102 103 104
    if(argc && argv)
    {
        darshan_core_initialize(*argc, *argv);
    }
    else
    {
        /* we don't see argc and argv here in fortran */
        darshan_core_initialize(0, NULL);
    }
105 106 107 108

    return(ret);
}

109
int MPI_Init_thread(int *argc, char ***argv, int required, int *provided)
110 111 112 113
{
    int ret;

    ret = DARSHAN_MPI_CALL(PMPI_Init_thread)(argc, argv, required, provided);
114
    if(ret != MPI_SUCCESS)
115 116 117 118
    {
        return(ret);
    }

119 120 121 122 123 124 125 126 127
    if(argc && argv)
    {
        darshan_core_initialize(*argc, *argv);
    }
    else
    {
        /* we don't see argc and argv here in fortran */
        darshan_core_initialize(0, NULL);
    }
128 129 130 131 132 133 134 135

    return(ret);
}

int MPI_Finalize(void)
{
    int ret;

136
    darshan_core_shutdown();
137 138 139 140 141

    ret = DARSHAN_MPI_CALL(PMPI_Finalize)();
    return(ret);
}

142 143
/* *********************************** */

144
static void darshan_core_initialize(int argc, char **argv)
145 146 147 148 149 150 151 152 153
{
    int i;
    int internal_timing_flag = 0;
    double init_start, init_time, init_max;
    char* truncate_string = "<TRUNCATED>";
    int truncate_offset;
    int chars_left = 0;

    DARSHAN_MPI_CALL(PMPI_Comm_size)(MPI_COMM_WORLD, &nprocs);
154
    DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank);
155 156 157 158

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

159
    if(internal_timing_flag)
160 161 162
        init_start = DARSHAN_MPI_CALL(PMPI_Wtime)();

    /* setup darshan runtime if darshan is enabled and hasn't been initialized already */
163
    if(!getenv("DARSHAN_DISABLE") && !darshan_core)
164
    {
165 166 167 168 169
        /* TODO: darshan mem alignment code? */

        /* allocate structure to track darshan_core_runtime information */
        darshan_core = malloc(sizeof(*darshan_core));
        if(darshan_core)
170
        {
171
            memset(darshan_core, 0, sizeof(*darshan_core));
172

173 174 175 176
            darshan_core->log_job.uid = getuid();
            darshan_core->log_job.start_time = time(NULL);
            darshan_core->log_job.nprocs = nprocs;
            darshan_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
177 178

            /* record exe and arguments */
179
            for(i=0; i<argc; i++)
180
            {
181
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
182 183
                strncat(darshan_core->exe, argv[i], chars_left);
                if(i < (argc-1))
184
                {
185
                    chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
186
                    strncat(darshan_core->exe, " ", chars_left);
187 188 189 190 191 192 193 194
                }
            }

            /* if we don't see any arguments, then use glibc symbol to get
             * program name at least (this happens in fortran)
             */
            if(argc == 0)
            {
195
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
196
                strncat(darshan_core->exe, __progname_full, chars_left);
197
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
198
                strncat(darshan_core->exe, " <unknown args>", chars_left);
199 200 201 202 203
            }

            if(chars_left == 0)
            {
                /* we ran out of room; mark that string was truncated */
204
                truncate_offset = DARSHAN_EXE_LEN - strlen(truncate_string);
205
                sprintf(&darshan_core->exe[truncate_offset], "%s",
206 207
                    truncate_string);
            }
208 209 210

            /* collect information about command line and mounted file systems */
            darshan_core->trailing_data = darshan_get_exe_and_mounts(darshan_core);
211 212 213 214 215 216 217 218
        }
    }

    if(internal_timing_flag)
    {
        init_time = DARSHAN_MPI_CALL(PMPI_Wtime)() - init_start;
        DARSHAN_MPI_CALL(PMPI_Reduce)(&init_time, &init_max, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
219
        if(my_rank == 0)
220 221 222 223 224 225 226 227 228
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:init\t%d\t%f\n", nprocs, init_max);
        }
    }

    return;
}

229
static void darshan_core_shutdown()
230
{
231
    int i;
232
    char *logfile_name;
233
    struct darshan_core_runtime *final_core;
234
    int internal_timing_flag = 0;
235 236
    char *envjobid;
    char *jobid_str;
237
    int jobid;
238
    struct tm *start_tm;
239
    time_t start_time_tmp;
240 241
    int ret = 0;
    int all_ret = 0;
242 243
    int64_t first_start_time;
    int64_t last_end_time;
244 245
    int local_mod_use[DARSHAN_MAX_MODS] = {0};
    int global_mod_use_count[DARSHAN_MAX_MODS] = {0};
246
    darshan_record_id shared_recs[DARSHAN_CORE_MAX_RECORDS] = {0};
247
    double start_log_time;
248 249 250 251 252 253 254
    double open1, open2;
    double job1, job2;
    double rec1, rec2;
    double mod1[DARSHAN_MAX_MODS] = {0};
    double mod2[DARSHAN_MAX_MODS] = {0};
    double header1, header2;
    double tm_end;
255
    long offset;
256 257
    uint64_t gz_fp = 0;
    uint64_t tmp_off = 0;
258 259
    MPI_File log_fh;
    MPI_Status status;
260 261 262 263

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

264 265
    start_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();

Shane Snyder's avatar
Shane Snyder committed
266
    /* disable darhan-core while we shutdown */
267
    DARSHAN_CORE_LOCK();
268
    if(!darshan_core)
269
    {
270
        DARSHAN_CORE_UNLOCK();
271 272
        return;
    }
273 274
    final_core = darshan_core;
    darshan_core = NULL;
Shane Snyder's avatar
Shane Snyder committed
275

276
    /* we also need to set which modules were registered on this process and
Shane Snyder's avatar
Shane Snyder committed
277 278 279 280 281 282 283 284 285 286
     * disable tracing within those modules while we shutdown
     */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
    {
        if(final_core->mod_array[i])
        {
            local_mod_use[i] = 1;
            final_core->mod_array[i]->mod_funcs.disable_instrumentation();
        }
    }
287
    DARSHAN_CORE_UNLOCK();
288 289 290 291

    logfile_name = malloc(PATH_MAX);
    if(!logfile_name)
    {
292
        darshan_core_cleanup(final_core);
293 294 295
        return;
    }

296
    /* set darshan job id/metadata and constuct log file name on rank 0 */
297
    if(my_rank == 0)
298 299 300
    {
        /* Use CP_JOBID_OVERRIDE for the env var or CP_JOBID */
        envjobid = getenv(CP_JOBID_OVERRIDE);
301
        if(!envjobid)
302 303 304 305
        {
            envjobid = CP_JOBID;
        }

306
        /* find a job id */
307 308 309 310 311 312 313 314 315 316 317 318
        jobid_str = getenv(envjobid);
        if(jobid_str)
        {
            /* in cobalt we can find it in env var */
            ret = sscanf(jobid_str, "%d", &jobid);
        }
        if(!jobid_str || ret != 1)
        {
            /* use pid as fall back */
            jobid = getpid();
        }

319
        final_core->log_job.jobid = (int64_t)jobid;
320

321
        /* if we are using any hints to write the log file, then record those
322
         * hints with the darshan job information
323
         */
324
        darshan_log_record_hints_and_ver(final_core);
325

326
        /* use human readable start time format in log filename */
327
        start_time_tmp = final_core->log_job.start_time;
328
        start_tm = localtime(&start_time_tmp);
329

330 331
        /* construct log file name */
        darshan_get_logfile_name(logfile_name, jobid, start_tm);
332 333 334 335 336 337 338 339 340
    }

    /* broadcast log file name */
    DARSHAN_MPI_CALL(PMPI_Bcast)(logfile_name, PATH_MAX, MPI_CHAR, 0,
        MPI_COMM_WORLD);

    if(strlen(logfile_name) == 0)
    {
        /* failed to generate log file name */
341
        free(logfile_name);
342
        darshan_core_cleanup(final_core);
343 344 345
        return;
    }

346
    final_core->log_job.end_time = time(NULL);
347

348 349 350
    /* reduce to report first start time and last end time across all ranks
     * at rank 0
     */
351 352
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
353 354
    if(my_rank == 0)
    {
355 356
        final_core->log_job.start_time = first_start_time;
        final_core->log_job.end_time = last_end_time;
357
    }
358

359 360 361
    /* reduce the number of times a module was opened globally and bcast to everyone */   
    DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

362
    /* get a list of records which are shared across all processes */
363
    darshan_get_shared_records(final_core, shared_recs);
364

365 366
    if(internal_timing_flag)
        open1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
367
    /* collectively open the darshan log file */
368
    ret = darshan_log_open_all(logfile_name, &log_fh);
369 370
    if(internal_timing_flag)
        open2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
371 372 373 374 375 376 377 378

    /* error out if unable to open log file */
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
379 380
            fprintf(stderr, "darshan library warning: unable to open log file %s\n",
                logfile_name);
381 382 383
            unlink(logfile_name);
        }
        free(logfile_name);
384
        darshan_core_cleanup(final_core);
385 386 387
        return;
    }

388 389
    if(internal_timing_flag)
        job1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
390
    /* rank 0 is responsible for writing the compressed darshan job information */
Shane Snyder's avatar
Shane Snyder committed
391
    if(my_rank == 0)
392
    {
393 394 395
        void *pointers[2] = {&final_core->log_job, final_core->trailing_data};
        int lengths[2] = {sizeof(struct darshan_job), DARSHAN_EXE_LEN+1};
        int comp_buf_sz = 0;
396

397
        /* compress the job info and the trailing mount/exe data */
398
        all_ret = darshan_deflate_buffer(pointers, lengths, 2, 0,
399 400
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
401
        {
402
            fprintf(stderr, "darshan library warning: unable to compress job data\n");
403
            unlink(logfile_name);
404
        }
405 406 407
        else
        {
            /* write the job information, preallocing space for the log header */
408 409 410
            gz_fp += sizeof(struct darshan_header) + 23; /* gzip headers/trailers ... */
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, gz_fp,
                final_core->comp_buf, comp_buf_sz, MPI_BYTE, &status);
411 412 413 414 415
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
Shane Snyder's avatar
Shane Snyder committed
416
                
417
            }
Shane Snyder's avatar
Shane Snyder committed
418

419 420
            /* set the beginning offset of record hash, which follows job info just written */
            gz_fp += comp_buf_sz;
421
        }
422 423
    }

424 425 426 427 428
    /* error out if unable to write job information */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
429
        darshan_core_cleanup(final_core);
430 431
        return;
    }
432 433
    if(internal_timing_flag)
        job2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
434

435 436
    if(internal_timing_flag)
        rec1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
437
    /* write the record name->id hash to the log file */
438 439
    ret = darshan_log_write_record_hash(log_fh, final_core, &gz_fp);
    tmp_off = final_core->log_header.rec_map.off + final_core->log_header.rec_map.len;
440

441
    /* error out if unable to write record hash */
442 443 444 445 446 447
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
448
            fprintf(stderr, "darshan library warning: unable to write record hash to log file %s\n",
449
                logfile_name);
450
            unlink(logfile_name);
451 452
        }
        free(logfile_name);
453
        darshan_core_cleanup(final_core);
454 455
        return;
    }
Shane Snyder's avatar
Shane Snyder committed
456 457
    if(internal_timing_flag)
        rec2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
458 459

    /* loop over globally used darshan modules and:
460
     *      - perform shared file reductions, if possible
461
     *      - get final output buffer
462
     *      - compress (zlib) provided output buffer
Shane Snyder's avatar
Shane Snyder committed
463
     *      - append compressed buffer to log file
464 465
     *      - add module index info (file offset/length) to log header
     *      - shutdown the module
466
     */
467
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
468
    {
469
        struct darshan_core_module* this_mod = final_core->mod_array[i];
470 471 472
        darshan_record_id mod_shared_recs[DARSHAN_CORE_MAX_RECORDS];
        struct darshan_core_record_ref *ref = NULL;
        int shared_rec_cnt = 0;
473
        void* mod_buf = NULL;
474 475
        int mod_buf_sz = 0;
        int comp_buf_sz = 0;
476
        int j;
477

478
        if(global_mod_use_count[i] == 0)
479 480
        {
            if(my_rank == 0)
481 482 483 484
            {
                final_core->log_header.mod_map[i].off = 0;
                final_core->log_header.mod_map[i].len = 0;
            }
485
            continue;
486
        }
487 488 489 490
 
        if(internal_timing_flag)
            mod1[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();   
        /* if all processes used this module, prepare to do a shared file reduction */
Shane Snyder's avatar
Shane Snyder committed
491
        if(global_mod_use_count[i] == nprocs)
492
        {
493 494 495 496 497 498 499 500 501 502 503 504 505
            int shared_rec_count = 0;
            int rec_sz = 0;
            void *red_send_buf = NULL, *red_recv_buf = NULL;
            MPI_Datatype red_type;
            MPI_Op red_op;

            /* set the shared file list for this module */
            memset(mod_shared_recs, 0, DARSHAN_CORE_MAX_RECORDS * sizeof(darshan_record_id));
            for(j = 0; j < DARSHAN_CORE_MAX_RECORDS && shared_recs[j] != 0; j++)
            {
                HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j],
                    sizeof(darshan_record_id), ref);
                assert(ref);
Shane Snyder's avatar
Shane Snyder committed
506
                if(DARSHAN_CORE_MOD_ISSET(ref->global_mod_flags, i))
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
                {
                    mod_shared_recs[shared_rec_count++] = shared_recs[j];
                }
            }

            /* if there are globally shared files, do a shared file reduction */
            if(shared_rec_count)
            {
                this_mod->mod_funcs.prepare_for_reduction(mod_shared_recs, &shared_rec_count,
                    &red_send_buf, &red_recv_buf, &rec_sz);

                if(shared_rec_count)
                {
                    /* construct a datatype for a file record.  This is serving no purpose
                     * except to make sure we can do a reduction on proper boundaries
                     */
                    DARSHAN_MPI_CALL(PMPI_Type_contiguous)(rec_sz, MPI_BYTE, &red_type);
                    DARSHAN_MPI_CALL(PMPI_Type_commit)(&red_type);

                    /* register a reduction operator for this module */
Shane Snyder's avatar
Shane Snyder committed
527
                    DARSHAN_MPI_CALL(PMPI_Op_create)(this_mod->mod_funcs.reduce_records,
528 529 530 531 532 533 534 535 536 537
                        1, &red_op);

                    /* reduce shared file records for this module */
                    DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf,
                        shared_rec_count, red_type, red_op, 0, MPI_COMM_WORLD);

                    DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type);
                    DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op);
                }
            }
538
        }
539

540
        /* if module is registered locally, get the corresponding output buffer */
541
        if(this_mod)
542 543
        {
            /* get output buffer from module */
544
            this_mod->mod_funcs.get_output_data(&mod_buf, &mod_buf_sz);
545 546
        }

547
        final_core->log_header.mod_map[i].off = tmp_off;
548

549 550 551 552
        /* append this module's data to the darshan log */
        ret = darshan_log_append_all(log_fh, final_core, mod_buf, mod_buf_sz,
            &gz_fp, &(final_core->log_header.mod_map[i].len));
        tmp_off += final_core->log_header.mod_map[i].len;
553

554
        /* error out if the log append failed */
555 556 557
        DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
            MPI_LOR, MPI_COMM_WORLD);
        if(all_ret != 0)
558
        {
559 560 561 562 563 564 565 566
            if(my_rank == 0)
            {
                fprintf(stderr,
                    "darshan library warning: unable to write %s module data to log file %s\n",
                    darshan_module_names[i], logfile_name);
                unlink(logfile_name);
            }
            free(logfile_name);
567
            darshan_core_cleanup(final_core);
568
            return;
569 570 571
        }

        /* shutdown module if registered locally */
572
        if(this_mod)
573 574 575
        {
            this_mod->mod_funcs.shutdown();
        }
576 577
        if(internal_timing_flag)
            mod2[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();
578 579
    }

580 581
    if(internal_timing_flag)
        header1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
582
    /* rank 0 is responsible for writing the log header */
583 584
    if(my_rank == 0)
    {
585 586 587
        void *header_buf = &(final_core->log_header);
        int header_buf_sz = sizeof(struct darshan_header);
        int comp_buf_sz = 0;
588

589 590 591 592 593 594 595 596 597 598 599 600 601 602
        /* initialize the remaining header fields */
        strcpy(final_core->log_header.version_string, DARSHAN_LOG_VERSION);
        final_core->log_header.magic_nr = DARSHAN_MAGIC_NR;

        /* deflate the header */
        /* NOTE: the header is not actually compressed because space for it must
         *       be preallocated before writing. i.e., the "compressed" header
         *       must be constant sized, sizeof(struct darshan_header) + 23.
         *       it is still necessary to deflate the header or the resulting log
         *       file will not be a valid gzip file.
         */
        all_ret = darshan_deflate_buffer((void **)&header_buf, &header_buf_sz, 1, 1,
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
603
        {
604
            fprintf(stderr, "darshan library warning: unable to compress header\n");
605
            unlink(logfile_name);
606
        }
607 608 609 610 611 612 613 614 615 616 617
        else
        {
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, final_core->comp_buf,
                comp_buf_sz, MPI_BYTE, &status);
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write header to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
            }
        }
618 619
    }

620 621 622 623 624
    /* error out if unable to write log header */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
625
        darshan_core_cleanup(final_core);
626 627
        return;
    }
628 629
    if(internal_timing_flag)
        header2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
630

631 632 633 634 635 636
    DARSHAN_MPI_CALL(PMPI_File_close)(&log_fh);

    /* if we got this far, there are no errors, so rename from *.darshan_partial
     * to *-<logwritetime>.darshan.gz, which indicates that this log file is
     * complete and ready for analysis
     */
637 638
    if(my_rank == 0)
    {
Shane Snyder's avatar
Shane Snyder committed
639
        if(getenv("DARSHAN_LOGFILE"))
640
        {
641
#ifdef __CP_GROUP_READABLE_LOGS
Shane Snyder's avatar
Shane Snyder committed
642
            chmod(logfile_name, (S_IRUSR|S_IRGRP));
643
#else
Shane Snyder's avatar
Shane Snyder committed
644
            chmod(logfile_name, (S_IRUSR));
645
#endif
Shane Snyder's avatar
Shane Snyder committed
646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
        }
        else
        {
            char* tmp_index;
            double end_log_time;
            char* new_logfile_name;

            new_logfile_name = malloc(PATH_MAX);
            if(new_logfile_name)
            {
                new_logfile_name[0] = '\0';
                end_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
                strcat(new_logfile_name, logfile_name);
                tmp_index = strstr(new_logfile_name, ".darshan_partial");
                sprintf(tmp_index, "_%d.darshan.gz", (int)(end_log_time-start_log_time+1));
                rename(logfile_name, new_logfile_name);
                /* set permissions on log file */
#ifdef __CP_GROUP_READABLE_LOGS
                chmod(new_logfile_name, (S_IRUSR|S_IRGRP));
#else
                chmod(new_logfile_name, (S_IRUSR));
#endif
                free(new_logfile_name);
            }
670
        }
671
    }
672

673
    free(logfile_name);
674
    darshan_core_cleanup(final_core);
675

676
    if(internal_timing_flag)
677
    {
678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
        double open_tm, open_slowest;
        double header_tm, header_slowest;
        double job_tm, job_slowest;
        double rec_tm, rec_slowest;
        double mod_tm[DARSHAN_MAX_MODS], mod_slowest[DARSHAN_MAX_MODS];
        double all_tm, all_slowest;

        tm_end = DARSHAN_MPI_CALL(PMPI_Wtime)();

        open_tm = open2 - open1;
        header_tm = header2 - header1;
        job_tm = job2 - job1;
        rec_tm = rec2 - rec1;
        all_tm = tm_end - start_log_time;
        for(i = 0;i < DARSHAN_MAX_MODS; i++)
        {
            mod_tm[i] = mod2[i] - mod1[i];
        }

        DARSHAN_MPI_CALL(PMPI_Reduce)(&open_tm, &open_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&header_tm, &header_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&job_tm, &job_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&rec_tm, &rec_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&all_tm, &all_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(mod_tm, mod_slowest, DARSHAN_MAX_MODS,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

        if(my_rank == 0)
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:log_open\t%d\t%f\n", nprocs, open_slowest);
            printf("darshan:job_write\t%d\t%f\n", nprocs, job_slowest);
            printf("darshan:hash_write\t%d\t%f\n", nprocs, rec_slowest);
Shane Snyder's avatar
Shane Snyder committed
716
            printf("darshan:header_write\t%d\t%f\n", nprocs, header_slowest);
717 718 719 720
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
            {
                if(global_mod_use_count[i])
                    printf("darshan:%s_shutdown\t%d\t%f\n", darshan_module_names[i],
Shane Snyder's avatar
Shane Snyder committed
721
                        nprocs, mod_slowest[i]);
722 723 724
            }
            printf("darshan:core_shutdown\t%d\t%f\n", nprocs, all_slowest);
        }
725 726 727 728
    }
    
    return;
}
729

730
/* free darshan core data structures to shutdown */
731
static void darshan_core_cleanup(struct darshan_core_runtime* core)
732
{
733
    struct darshan_core_record_ref *tmp, *ref;
734
    int i;
735

736 737 738 739 740 741
    HASH_ITER(hlink, core->rec_hash, ref, tmp)
    {
        HASH_DELETE(hlink, core->rec_hash, ref);
        free(ref->rec.name);
        free(ref);
    }
742

743
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
744
    {
745
        if(core->mod_array[i])
746
        {        
747 748
            free(core->mod_array[i]);
            core->mod_array[i] = NULL;
749
        }
750
    }
751

752
    free(core->trailing_data);
753
    free(core);
754 755 756 757

    return;
}

758
/* construct the darshan log file name */
759
static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* start_tm)
760
{
Shane Snyder's avatar
Shane Snyder committed
761
    char* user_logfile_name;
762 763 764 765 766 767 768 769 770 771 772 773 774
    char* logpath;
    char* logname_string;
    char* logpath_override = NULL;
#ifdef __CP_LOG_ENV
    char env_check[256];
    char* env_tok;
#endif
    uint64_t hlevel;
    char hname[HOST_NAME_MAX];
    uint64_t logmod;
    char cuser[L_cuserid] = {0};
    int ret;

Shane Snyder's avatar
Shane Snyder committed
775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
    /* first, check if user specifies a complete logpath to use */
    user_logfile_name = getenv("DARSHAN_LOGFILE");
    if(user_logfile_name)
    {
        if(strlen(user_logfile_name) >= (PATH_MAX-1))
        {
            fprintf(stderr, "darshan library warning: user log file name too long.\n");
            logfile_name[0] = '\0';
        }
        else
        {
            strcpy(logfile_name, user_logfile_name);
        }
    }
    else
790
    {
Shane Snyder's avatar
Shane Snyder committed
791 792 793 794 795 796
        /* otherwise, generate the log path automatically */

        /* Use CP_LOG_PATH_OVERRIDE for the value or __CP_LOG_PATH */
        logpath = getenv(CP_LOG_PATH_OVERRIDE);
        if(!logpath)
        {
797
#ifdef __CP_LOG_PATH
Shane Snyder's avatar
Shane Snyder committed
798
            logpath = __CP_LOG_PATH;
799
#endif
Shane Snyder's avatar
Shane Snyder committed
800
        }
801

Shane Snyder's avatar
Shane Snyder committed
802 803 804 805 806 807 808 809 810 811
        /* get the username for this job.  In order we will try each of the
         * following until one of them succeeds:
         *
         * - cuserid()
         * - getenv("LOGNAME")
         * - snprintf(..., geteuid());
         *
         * Note that we do not use getpwuid() because it generally will not
         * work in statically compiled binaries.
         */
812 813

#ifndef DARSHAN_DISABLE_CUSERID
Shane Snyder's avatar
Shane Snyder committed
814
        cuserid(cuser);
815 816
#endif

Shane Snyder's avatar
Shane Snyder committed
817 818
        /* if cuserid() didn't work, then check the environment */
        if(strcmp(cuser, "") == 0)
819
        {
Shane Snyder's avatar
Shane Snyder committed
820 821 822 823 824
            logname_string = getenv("LOGNAME");
            if(logname_string)
            {
                strncpy(cuser, logname_string, (L_cuserid-1));
            }
825 826
        }

Shane Snyder's avatar
Shane Snyder committed
827 828 829 830 831 832
        /* if cuserid() and environment both fail, then fall back to uid */
        if(strcmp(cuser, "") == 0)
        {
            uid_t uid = geteuid();
            snprintf(cuser, sizeof(cuser), "%u", uid);
        }
833

Shane Snyder's avatar
Shane Snyder committed
834 835 836 837
        /* generate a random number to help differentiate the log */
        hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000;
        (void)gethostname(hname, sizeof(hname));
        logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
838

Shane Snyder's avatar
Shane Snyder committed
839 840 841 842
        /* see if darshan was configured using the --with-logpath-by-env
         * argument, which allows the user to specify an absolute path to
         * place logs via an env variable.
         */
843
#ifdef __CP_LOG_ENV
Shane Snyder's avatar
Shane Snyder committed
844 845
        /* just silently skip if the environment variable list is too big */
        if(strlen(__CP_LOG_ENV) < 256)
846
        {
Shane Snyder's avatar
Shane Snyder committed
847 848 849 850 851
            /* copy env variable list to a temporary buffer */
            strcpy(env_check, __CP_LOG_ENV);
            /* tokenize the comma-separated list */
            env_tok = strtok(env_check, ",");
            if(env_tok)
852
            {
Shane Snyder's avatar
Shane Snyder committed
853
                do
854
                {
Shane Snyder's avatar
Shane Snyder committed
855 856 857 858 859 860 861 862 863
                    /* check each env variable in order */
                    logpath_override = getenv(env_tok);
                    if(logpath_override)
                    {
                        /* stop as soon as we find a match */
                        break;
                    }
                }while((env_tok = strtok(NULL, ",")));
            }
864 865 866
        }
#endif

Shane Snyder's avatar
Shane Snyder committed
867
        if(logpath_override)
868
        {
Shane Snyder's avatar
Shane Snyder committed
869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath_override,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath_override, jobid);
            }
884
        }
Shane Snyder's avatar
Shane Snyder committed
885
        else if(logpath)
886
        {
Shane Snyder's avatar
Shane Snyder committed
887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%d/%d/%d/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath, (start_tm->tm_year+1900),
                (start_tm->tm_mon+1), start_tm->tm_mday,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath, jobid);
            }
        }
        else
        {
            logfile_name[0] = '\0';
907 908 909 910
        }
    }

    return;
911 912
}

913
/* record any hints used to write the darshan log in the log header */
914
static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937
{
    char* hints;
    char* header_hints;
    int meta_remain = 0;
    char* m;

    /* check environment variable to see if the default MPI file hints have
     * been overridden
     */
    hints = getenv(CP_LOG_HINTS_OVERRIDE);
    if(!hints)
    {
        hints = __CP_LOG_HINTS;
    }

    if(!hints || strlen(hints) < 1)
        return;

    header_hints = strdup(hints);
    if(!header_hints)
        return;

    meta_remain = DARSHAN_JOB_METADATA_LEN -
938
        strlen(core->log_job.metadata) - 1;
939 940
    if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
    {
941
        sprintf(core->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION);
942 943 944 945
        meta_remain -= (strlen(PACKAGE_VERSION) + 9);
    }
    if(meta_remain >= (3 + strlen(header_hints)))
    {
946
        m = core->log_job.metadata + strlen(core->log_job.metadata);
947 948 949 950 951 952 953 954 955 956 957 958
        /* We have room to store the hints in the metadata portion of
         * the job header.  We just prepend an h= to the hints list.  The
         * metadata parser will ignore = characters that appear in the value
         * portion of the metadata key/value pair.
         */
        sprintf(m, "h=%s\n", header_hints);
    }
    free(header_hints);

    return;
}

959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978