darshan-core.c 51.3 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2
3
4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5
6
 */

7
#define _XOPEN_SOURCE 500
8
#define _GNU_SOURCE
9

10
11
12
13
14
15
#include "darshan-runtime-config.h"

#include <stdio.h>
#ifdef HAVE_MNTENT_H
#include <mntent.h>
#endif
16
17
18
19
20
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <pthread.h>
21
22
23
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/vfs.h>
24
#include <zlib.h>
25
#include <mpi.h>
26
#include <assert.h>
27

28
#include "uthash.h"
Shane Snyder's avatar
Shane Snyder committed
29
#include "darshan.h"
30
#include "darshan-core.h"
Shane Snyder's avatar
Shane Snyder committed
31
#include "darshan-dynamic.h"
32

33
extern char* __progname;
34
extern char* __progname_full;
35

36
/* internal variable delcarations */
37
static struct darshan_core_runtime *darshan_core = NULL;
38
static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
39
static int my_rank = -1;
40
static int nprocs = -1;
41
static int darshan_mem_alignment = 1;
42

43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/* paths prefixed with the following directories are not traced by darshan */
char* darshan_path_exclusions[] = {
"/etc/",
"/dev/",
"/usr/",
"/bin/",
"/boot/",
"/lib/",
"/opt/",
"/sbin/",
"/sys/",
"/proc/",
NULL
};

Shane Snyder's avatar
Shane Snyder committed
58
59
60
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)

61
62
63
64
65
66
/* FS mount information */
#define DARSHAN_MAX_MNTS 64
#define DARSHAN_MAX_MNT_PATH 256
#define DARSHAN_MAX_MNT_TYPE 32
struct mnt_data
{
67
    int block_size;
68
69
70
71
72
73
    char path[DARSHAN_MAX_MNT_PATH];
    char type[DARSHAN_MAX_MNT_TYPE];
};
static struct mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
static int mnt_data_count = 0;

74
75
76
77
/* prototypes for internal helper functions */
static void darshan_get_logfile_name(
    char* logfile_name, int jobid, struct tm* start_tm);
static void darshan_log_record_hints_and_ver(
78
79
80
81
82
83
    struct darshan_core_runtime* core);
static void darshan_get_exe_and_mounts_root(
    struct darshan_core_runtime *core, char* trailing_data,
    int space_left);
static char* darshan_get_exe_and_mounts(
    struct darshan_core_runtime *core);
84
85
static void darshan_block_size_from_path(
    const char *path, int *block_size);
86
static void darshan_get_shared_records(
87
    struct darshan_core_runtime *core, darshan_record_id *shared_recs);
88
static int darshan_log_open_all(
89
    char *logfile_name, MPI_File *log_fh);
90
91
92
static int darshan_deflate_buffer(
    void **pointers, int *lengths, int count, int nocomp_flag,
    char *comp_buf, int *comp_length);
93
static int darshan_log_write_record_hash(
94
    MPI_File log_fh, struct darshan_core_runtime *core,
95
96
97
98
    uint64_t *inout_off);
static int darshan_log_append_all(
    MPI_File log_fh, struct darshan_core_runtime *core, void *buf,
    int count, uint64_t *inout_off, uint64_t *agg_uncomp_sz);
Shane Snyder's avatar
Shane Snyder committed
99
100
static void darshan_core_cleanup(
    struct darshan_core_runtime* core);
101

102
103
/* *********************************** */

Shane Snyder's avatar
Shane Snyder committed
104
void darshan_core_initialize(int argc, char **argv)
105
106
107
108
{
    int i;
    int internal_timing_flag = 0;
    double init_start, init_time, init_max;
109
    char *envstr;
110
111
112
    char* truncate_string = "<TRUNCATED>";
    int truncate_offset;
    int chars_left = 0;
113
114
    int ret;
    int tmpval;
115
116

    DARSHAN_MPI_CALL(PMPI_Comm_size)(MPI_COMM_WORLD, &nprocs);
117
    DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank);
118
119
120
121

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

122
    if(internal_timing_flag)
123
124
125
        init_start = DARSHAN_MPI_CALL(PMPI_Wtime)();

    /* setup darshan runtime if darshan is enabled and hasn't been initialized already */
126
    if(!getenv("DARSHAN_DISABLE") && !darshan_core)
127
    {
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
        #if (__CP_MEM_ALIGNMENT < 1)
            #error Darshan must be configured with a positive value for --with-mem-align
        #endif
        envstr = getenv("DARSHAN_MEMALIGN");
        if(envstr)
        {
            ret = sscanf(envstr, "%d", &tmpval);
            /* silently ignore if the env variable is set poorly */
            if(ret == 1 && tmpval > 0)
            {
                darshan_mem_alignment = tmpval;
            }
        }
        else
        {
            darshan_mem_alignment = __CP_MEM_ALIGNMENT;
        }

        /* avoid floating point errors on faulty input */
        if (darshan_mem_alignment < 1)
        {
            darshan_mem_alignment = 1;
        }
151
152
153
154

        /* allocate structure to track darshan_core_runtime information */
        darshan_core = malloc(sizeof(*darshan_core));
        if(darshan_core)
155
        {
156
            memset(darshan_core, 0, sizeof(*darshan_core));
157

158
159
160
161
            darshan_core->log_job.uid = getuid();
            darshan_core->log_job.start_time = time(NULL);
            darshan_core->log_job.nprocs = nprocs;
            darshan_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
162
163

            /* record exe and arguments */
164
            for(i=0; i<argc; i++)
165
            {
166
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
167
168
                strncat(darshan_core->exe, argv[i], chars_left);
                if(i < (argc-1))
169
                {
170
                    chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
171
                    strncat(darshan_core->exe, " ", chars_left);
172
173
174
175
176
177
178
179
                }
            }

            /* if we don't see any arguments, then use glibc symbol to get
             * program name at least (this happens in fortran)
             */
            if(argc == 0)
            {
180
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
181
                strncat(darshan_core->exe, __progname_full, chars_left);
182
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
183
                strncat(darshan_core->exe, " <unknown args>", chars_left);
184
185
186
187
188
            }

            if(chars_left == 0)
            {
                /* we ran out of room; mark that string was truncated */
189
                truncate_offset = DARSHAN_EXE_LEN - strlen(truncate_string);
190
                sprintf(&darshan_core->exe[truncate_offset], "%s",
191
192
                    truncate_string);
            }
193
194
195

            /* collect information about command line and mounted file systems */
            darshan_core->trailing_data = darshan_get_exe_and_mounts(darshan_core);
196
197
198
199
200
201
202
203
        }
    }

    if(internal_timing_flag)
    {
        init_time = DARSHAN_MPI_CALL(PMPI_Wtime)() - init_start;
        DARSHAN_MPI_CALL(PMPI_Reduce)(&init_time, &init_max, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
204
        if(my_rank == 0)
205
206
207
208
209
210
211
212
213
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:init\t%d\t%f\n", nprocs, init_max);
        }
    }

    return;
}

Shane Snyder's avatar
Shane Snyder committed
214
void darshan_core_shutdown()
215
{
216
    int i;
217
    char *logfile_name;
218
    struct darshan_core_runtime *final_core;
219
    int internal_timing_flag = 0;
220
221
    char *envjobid;
    char *jobid_str;
222
    int jobid;
223
    struct tm *start_tm;
224
    time_t start_time_tmp;
225
226
    int ret = 0;
    int all_ret = 0;
227
228
    int64_t first_start_time;
    int64_t last_end_time;
229
230
    int local_mod_use[DARSHAN_MAX_MODS] = {0};
    int global_mod_use_count[DARSHAN_MAX_MODS] = {0};
231
    darshan_record_id shared_recs[DARSHAN_CORE_MAX_RECORDS] = {0};
232
    double start_log_time;
233
234
235
236
237
238
239
    double open1, open2;
    double job1, job2;
    double rec1, rec2;
    double mod1[DARSHAN_MAX_MODS] = {0};
    double mod2[DARSHAN_MAX_MODS] = {0};
    double header1, header2;
    double tm_end;
240
241
    uint64_t gz_fp = 0;
    uint64_t tmp_off = 0;
242
243
    MPI_File log_fh;
    MPI_Status status;
244
245
246
247

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

248
249
    start_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();

Shane Snyder's avatar
Shane Snyder committed
250
    /* disable darhan-core while we shutdown */
251
    DARSHAN_CORE_LOCK();
252
    if(!darshan_core)
253
    {
254
        DARSHAN_CORE_UNLOCK();
255
256
        return;
    }
257
258
    final_core = darshan_core;
    darshan_core = NULL;
Shane Snyder's avatar
Shane Snyder committed
259

260
    /* we also need to set which modules were registered on this process and
Shane Snyder's avatar
Shane Snyder committed
261
262
263
264
265
266
267
268
269
270
     * disable tracing within those modules while we shutdown
     */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
    {
        if(final_core->mod_array[i])
        {
            local_mod_use[i] = 1;
            final_core->mod_array[i]->mod_funcs.disable_instrumentation();
        }
    }
271
    DARSHAN_CORE_UNLOCK();
272
273
274
275

    logfile_name = malloc(PATH_MAX);
    if(!logfile_name)
    {
276
        darshan_core_cleanup(final_core);
277
278
279
        return;
    }

280
    /* set darshan job id/metadata and constuct log file name on rank 0 */
281
    if(my_rank == 0)
282
283
284
    {
        /* Use CP_JOBID_OVERRIDE for the env var or CP_JOBID */
        envjobid = getenv(CP_JOBID_OVERRIDE);
285
        if(!envjobid)
286
287
288
289
        {
            envjobid = CP_JOBID;
        }

290
        /* find a job id */
291
292
293
294
295
296
297
298
299
300
301
302
        jobid_str = getenv(envjobid);
        if(jobid_str)
        {
            /* in cobalt we can find it in env var */
            ret = sscanf(jobid_str, "%d", &jobid);
        }
        if(!jobid_str || ret != 1)
        {
            /* use pid as fall back */
            jobid = getpid();
        }

303
        final_core->log_job.jobid = (int64_t)jobid;
304

305
        /* if we are using any hints to write the log file, then record those
306
         * hints with the darshan job information
307
         */
308
        darshan_log_record_hints_and_ver(final_core);
309

310
        /* use human readable start time format in log filename */
311
        start_time_tmp = final_core->log_job.start_time;
312
        start_tm = localtime(&start_time_tmp);
313

314
315
        /* construct log file name */
        darshan_get_logfile_name(logfile_name, jobid, start_tm);
316
317
318
319
320
321
322
323
324
    }

    /* broadcast log file name */
    DARSHAN_MPI_CALL(PMPI_Bcast)(logfile_name, PATH_MAX, MPI_CHAR, 0,
        MPI_COMM_WORLD);

    if(strlen(logfile_name) == 0)
    {
        /* failed to generate log file name */
325
        free(logfile_name);
326
        darshan_core_cleanup(final_core);
327
328
329
        return;
    }

330
    final_core->log_job.end_time = time(NULL);
331

332
333
334
    /* reduce to report first start time and last end time across all ranks
     * at rank 0
     */
335
336
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
337
338
    if(my_rank == 0)
    {
339
340
        final_core->log_job.start_time = first_start_time;
        final_core->log_job.end_time = last_end_time;
341
    }
342

343
344
345
    /* reduce the number of times a module was opened globally and bcast to everyone */   
    DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

346
    /* get a list of records which are shared across all processes */
347
    darshan_get_shared_records(final_core, shared_recs);
348

349
350
    if(internal_timing_flag)
        open1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
351
    /* collectively open the darshan log file */
352
    ret = darshan_log_open_all(logfile_name, &log_fh);
353
354
    if(internal_timing_flag)
        open2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
355
356
357
358
359
360
361
362

    /* error out if unable to open log file */
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
363
364
            fprintf(stderr, "darshan library warning: unable to open log file %s\n",
                logfile_name);
365
366
367
            unlink(logfile_name);
        }
        free(logfile_name);
368
        darshan_core_cleanup(final_core);
369
370
371
        return;
    }

372
373
    if(internal_timing_flag)
        job1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
374
    /* rank 0 is responsible for writing the compressed darshan job information */
Shane Snyder's avatar
Shane Snyder committed
375
    if(my_rank == 0)
376
    {
377
378
379
        void *pointers[2] = {&final_core->log_job, final_core->trailing_data};
        int lengths[2] = {sizeof(struct darshan_job), DARSHAN_EXE_LEN+1};
        int comp_buf_sz = 0;
380

381
        /* compress the job info and the trailing mount/exe data */
382
        all_ret = darshan_deflate_buffer(pointers, lengths, 2, 0,
383
384
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
385
        {
386
            fprintf(stderr, "darshan library warning: unable to compress job data\n");
387
            unlink(logfile_name);
388
        }
389
390
391
        else
        {
            /* write the job information, preallocing space for the log header */
392
393
394
            gz_fp += sizeof(struct darshan_header) + 23; /* gzip headers/trailers ... */
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, gz_fp,
                final_core->comp_buf, comp_buf_sz, MPI_BYTE, &status);
395
396
397
398
399
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
Shane Snyder's avatar
Shane Snyder committed
400
                
401
            }
Shane Snyder's avatar
Shane Snyder committed
402

403
404
            /* set the beginning offset of record hash, which follows job info just written */
            gz_fp += comp_buf_sz;
405
        }
406
407
    }

408
409
410
411
412
    /* error out if unable to write job information */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
413
        darshan_core_cleanup(final_core);
414
415
        return;
    }
416
417
    if(internal_timing_flag)
        job2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
418

419
420
    if(internal_timing_flag)
        rec1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
421
    /* write the record name->id hash to the log file */
422
423
    ret = darshan_log_write_record_hash(log_fh, final_core, &gz_fp);
    tmp_off = final_core->log_header.rec_map.off + final_core->log_header.rec_map.len;
424

425
    /* error out if unable to write record hash */
426
427
428
429
430
431
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
432
            fprintf(stderr, "darshan library warning: unable to write record hash to log file %s\n",
433
                logfile_name);
434
            unlink(logfile_name);
435
436
        }
        free(logfile_name);
437
        darshan_core_cleanup(final_core);
438
439
        return;
    }
Shane Snyder's avatar
Shane Snyder committed
440
441
    if(internal_timing_flag)
        rec2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
442
443

    /* loop over globally used darshan modules and:
444
     *      - perform shared file reductions, if possible
445
     *      - get final output buffer
446
     *      - compress (zlib) provided output buffer
Shane Snyder's avatar
Shane Snyder committed
447
     *      - append compressed buffer to log file
448
449
     *      - add module index info (file offset/length) to log header
     *      - shutdown the module
450
     */
451
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
452
    {
453
        struct darshan_core_module* this_mod = final_core->mod_array[i];
454
455
        darshan_record_id mod_shared_recs[DARSHAN_CORE_MAX_RECORDS];
        struct darshan_core_record_ref *ref = NULL;
456
        void* mod_buf = NULL;
457
        int mod_buf_sz = 0;
458
        int j;
459

460
        if(global_mod_use_count[i] == 0)
461
462
        {
            if(my_rank == 0)
463
464
465
466
            {
                final_core->log_header.mod_map[i].off = 0;
                final_core->log_header.mod_map[i].len = 0;
            }
467
            continue;
468
        }
469
470
471
472
 
        if(internal_timing_flag)
            mod1[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();   
        /* if all processes used this module, prepare to do a shared file reduction */
Shane Snyder's avatar
Shane Snyder committed
473
        if(global_mod_use_count[i] == nprocs)
474
        {
475
476
477
478
479
480
481
482
483
484
485
486
487
            int shared_rec_count = 0;
            int rec_sz = 0;
            void *red_send_buf = NULL, *red_recv_buf = NULL;
            MPI_Datatype red_type;
            MPI_Op red_op;

            /* set the shared file list for this module */
            memset(mod_shared_recs, 0, DARSHAN_CORE_MAX_RECORDS * sizeof(darshan_record_id));
            for(j = 0; j < DARSHAN_CORE_MAX_RECORDS && shared_recs[j] != 0; j++)
            {
                HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j],
                    sizeof(darshan_record_id), ref);
                assert(ref);
Shane Snyder's avatar
Shane Snyder committed
488
                if(DARSHAN_CORE_MOD_ISSET(ref->global_mod_flags, i))
489
490
491
492
493
494
                {
                    mod_shared_recs[shared_rec_count++] = shared_recs[j];
                }
            }

            /* if there are globally shared files, do a shared file reduction */
495
496
            if(shared_rec_count && this_mod->mod_funcs.prepare_for_reduction &&
               this_mod->mod_funcs.record_reduction_op)
497
498
499
500
501
502
503
504
505
506
507
508
509
            {
                this_mod->mod_funcs.prepare_for_reduction(mod_shared_recs, &shared_rec_count,
                    &red_send_buf, &red_recv_buf, &rec_sz);

                if(shared_rec_count)
                {
                    /* construct a datatype for a file record.  This is serving no purpose
                     * except to make sure we can do a reduction on proper boundaries
                     */
                    DARSHAN_MPI_CALL(PMPI_Type_contiguous)(rec_sz, MPI_BYTE, &red_type);
                    DARSHAN_MPI_CALL(PMPI_Type_commit)(&red_type);

                    /* register a reduction operator for this module */
510
                    DARSHAN_MPI_CALL(PMPI_Op_create)(this_mod->mod_funcs.record_reduction_op,
511
512
513
514
515
516
517
518
519
520
                        1, &red_op);

                    /* reduce shared file records for this module */
                    DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf,
                        shared_rec_count, red_type, red_op, 0, MPI_COMM_WORLD);

                    DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type);
                    DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op);
                }
            }
521
        }
522

523
        /* if module is registered locally, get the corresponding output buffer */
524
        if(this_mod)
525
526
        {
            /* get output buffer from module */
527
            this_mod->mod_funcs.get_output_data(&mod_buf, &mod_buf_sz);
528
529
        }

530
        final_core->log_header.mod_map[i].off = tmp_off;
531

532
533
534
535
        /* append this module's data to the darshan log */
        ret = darshan_log_append_all(log_fh, final_core, mod_buf, mod_buf_sz,
            &gz_fp, &(final_core->log_header.mod_map[i].len));
        tmp_off += final_core->log_header.mod_map[i].len;
536

537
        /* error out if the log append failed */
538
539
540
        DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
            MPI_LOR, MPI_COMM_WORLD);
        if(all_ret != 0)
541
        {
542
543
544
545
546
547
548
549
            if(my_rank == 0)
            {
                fprintf(stderr,
                    "darshan library warning: unable to write %s module data to log file %s\n",
                    darshan_module_names[i], logfile_name);
                unlink(logfile_name);
            }
            free(logfile_name);
550
            darshan_core_cleanup(final_core);
551
            return;
552
553
554
        }

        /* shutdown module if registered locally */
555
        if(this_mod)
556
557
558
        {
            this_mod->mod_funcs.shutdown();
        }
559
560
        if(internal_timing_flag)
            mod2[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();
561
562
    }

563
564
    if(internal_timing_flag)
        header1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
565
    /* rank 0 is responsible for writing the log header */
566
567
    if(my_rank == 0)
    {
568
569
570
        void *header_buf = &(final_core->log_header);
        int header_buf_sz = sizeof(struct darshan_header);
        int comp_buf_sz = 0;
571

572
573
574
575
576
577
578
579
580
581
582
583
584
585
        /* initialize the remaining header fields */
        strcpy(final_core->log_header.version_string, DARSHAN_LOG_VERSION);
        final_core->log_header.magic_nr = DARSHAN_MAGIC_NR;

        /* deflate the header */
        /* NOTE: the header is not actually compressed because space for it must
         *       be preallocated before writing. i.e., the "compressed" header
         *       must be constant sized, sizeof(struct darshan_header) + 23.
         *       it is still necessary to deflate the header or the resulting log
         *       file will not be a valid gzip file.
         */
        all_ret = darshan_deflate_buffer((void **)&header_buf, &header_buf_sz, 1, 1,
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
586
        {
587
            fprintf(stderr, "darshan library warning: unable to compress header\n");
588
            unlink(logfile_name);
589
        }
590
591
592
593
594
595
596
597
598
599
600
        else
        {
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, final_core->comp_buf,
                comp_buf_sz, MPI_BYTE, &status);
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write header to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
            }
        }
601
602
    }

603
604
605
606
607
    /* error out if unable to write log header */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
608
        darshan_core_cleanup(final_core);
609
610
        return;
    }
611
612
    if(internal_timing_flag)
        header2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
613

614
615
616
617
618
619
    DARSHAN_MPI_CALL(PMPI_File_close)(&log_fh);

    /* if we got this far, there are no errors, so rename from *.darshan_partial
     * to *-<logwritetime>.darshan.gz, which indicates that this log file is
     * complete and ready for analysis
     */
620
621
    if(my_rank == 0)
    {
Shane Snyder's avatar
Shane Snyder committed
622
        if(getenv("DARSHAN_LOGFILE"))
623
        {
624
#ifdef __CP_GROUP_READABLE_LOGS
Shane Snyder's avatar
Shane Snyder committed
625
            chmod(logfile_name, (S_IRUSR|S_IRGRP));
626
#else
Shane Snyder's avatar
Shane Snyder committed
627
            chmod(logfile_name, (S_IRUSR));
628
#endif
Shane Snyder's avatar
Shane Snyder committed
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
        }
        else
        {
            char* tmp_index;
            double end_log_time;
            char* new_logfile_name;

            new_logfile_name = malloc(PATH_MAX);
            if(new_logfile_name)
            {
                new_logfile_name[0] = '\0';
                end_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
                strcat(new_logfile_name, logfile_name);
                tmp_index = strstr(new_logfile_name, ".darshan_partial");
                sprintf(tmp_index, "_%d.darshan.gz", (int)(end_log_time-start_log_time+1));
                rename(logfile_name, new_logfile_name);
                /* set permissions on log file */
#ifdef __CP_GROUP_READABLE_LOGS
                chmod(new_logfile_name, (S_IRUSR|S_IRGRP));
#else
                chmod(new_logfile_name, (S_IRUSR));
#endif
                free(new_logfile_name);
            }
653
        }
654
    }
655

656
    free(logfile_name);
657
    darshan_core_cleanup(final_core);
658

659
    if(internal_timing_flag)
660
    {
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
        double open_tm, open_slowest;
        double header_tm, header_slowest;
        double job_tm, job_slowest;
        double rec_tm, rec_slowest;
        double mod_tm[DARSHAN_MAX_MODS], mod_slowest[DARSHAN_MAX_MODS];
        double all_tm, all_slowest;

        tm_end = DARSHAN_MPI_CALL(PMPI_Wtime)();

        open_tm = open2 - open1;
        header_tm = header2 - header1;
        job_tm = job2 - job1;
        rec_tm = rec2 - rec1;
        all_tm = tm_end - start_log_time;
        for(i = 0;i < DARSHAN_MAX_MODS; i++)
        {
            mod_tm[i] = mod2[i] - mod1[i];
        }

        DARSHAN_MPI_CALL(PMPI_Reduce)(&open_tm, &open_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&header_tm, &header_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&job_tm, &job_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&rec_tm, &rec_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&all_tm, &all_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(mod_tm, mod_slowest, DARSHAN_MAX_MODS,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

        if(my_rank == 0)
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:log_open\t%d\t%f\n", nprocs, open_slowest);
            printf("darshan:job_write\t%d\t%f\n", nprocs, job_slowest);
            printf("darshan:hash_write\t%d\t%f\n", nprocs, rec_slowest);
Shane Snyder's avatar
Shane Snyder committed
699
            printf("darshan:header_write\t%d\t%f\n", nprocs, header_slowest);
700
701
702
703
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
            {
                if(global_mod_use_count[i])
                    printf("darshan:%s_shutdown\t%d\t%f\n", darshan_module_names[i],
Shane Snyder's avatar
Shane Snyder committed
704
                        nprocs, mod_slowest[i]);
705
706
707
            }
            printf("darshan:core_shutdown\t%d\t%f\n", nprocs, all_slowest);
        }
708
709
710
711
    }
    
    return;
}
712

Shane Snyder's avatar
Shane Snyder committed
713
/* *********************************** */
714

715
/* construct the darshan log file name */
716
static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* start_tm)
717
{
Shane Snyder's avatar
Shane Snyder committed
718
    char* user_logfile_name;
719
720
721
722
723
724
725
726
727
728
729
730
731
    char* logpath;
    char* logname_string;
    char* logpath_override = NULL;
#ifdef __CP_LOG_ENV
    char env_check[256];
    char* env_tok;
#endif
    uint64_t hlevel;
    char hname[HOST_NAME_MAX];
    uint64_t logmod;
    char cuser[L_cuserid] = {0};
    int ret;

Shane Snyder's avatar
Shane Snyder committed
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
    /* first, check if user specifies a complete logpath to use */
    user_logfile_name = getenv("DARSHAN_LOGFILE");
    if(user_logfile_name)
    {
        if(strlen(user_logfile_name) >= (PATH_MAX-1))
        {
            fprintf(stderr, "darshan library warning: user log file name too long.\n");
            logfile_name[0] = '\0';
        }
        else
        {
            strcpy(logfile_name, user_logfile_name);
        }
    }
    else
747
    {
Shane Snyder's avatar
Shane Snyder committed
748
749
750
751
752
753
        /* otherwise, generate the log path automatically */

        /* Use CP_LOG_PATH_OVERRIDE for the value or __CP_LOG_PATH */
        logpath = getenv(CP_LOG_PATH_OVERRIDE);
        if(!logpath)
        {
754
#ifdef __CP_LOG_PATH
Shane Snyder's avatar
Shane Snyder committed
755
            logpath = __CP_LOG_PATH;
756
#endif
Shane Snyder's avatar
Shane Snyder committed
757
        }
758

Shane Snyder's avatar
Shane Snyder committed
759
760
761
762
763
764
765
766
767
768
        /* get the username for this job.  In order we will try each of the
         * following until one of them succeeds:
         *
         * - cuserid()
         * - getenv("LOGNAME")
         * - snprintf(..., geteuid());
         *
         * Note that we do not use getpwuid() because it generally will not
         * work in statically compiled binaries.
         */
769
770

#ifndef DARSHAN_DISABLE_CUSERID
Shane Snyder's avatar
Shane Snyder committed
771
        cuserid(cuser);
772
773
#endif

Shane Snyder's avatar
Shane Snyder committed
774
775
        /* if cuserid() didn't work, then check the environment */
        if(strcmp(cuser, "") == 0)
776
        {
Shane Snyder's avatar
Shane Snyder committed
777
778
779
780
781
            logname_string = getenv("LOGNAME");
            if(logname_string)
            {
                strncpy(cuser, logname_string, (L_cuserid-1));
            }
782
783
        }

Shane Snyder's avatar
Shane Snyder committed
784
785
786
787
788
789
        /* if cuserid() and environment both fail, then fall back to uid */
        if(strcmp(cuser, "") == 0)
        {
            uid_t uid = geteuid();
            snprintf(cuser, sizeof(cuser), "%u", uid);
        }
790

Shane Snyder's avatar
Shane Snyder committed
791
792
793
794
        /* generate a random number to help differentiate the log */
        hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000;
        (void)gethostname(hname, sizeof(hname));
        logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
795

Shane Snyder's avatar
Shane Snyder committed
796
797
798
799
        /* see if darshan was configured using the --with-logpath-by-env
         * argument, which allows the user to specify an absolute path to
         * place logs via an env variable.
         */
800
#ifdef __CP_LOG_ENV
Shane Snyder's avatar
Shane Snyder committed
801
802
        /* just silently skip if the environment variable list is too big */
        if(strlen(__CP_LOG_ENV) < 256)
803
        {
Shane Snyder's avatar
Shane Snyder committed
804
805
806
807
808
            /* copy env variable list to a temporary buffer */
            strcpy(env_check, __CP_LOG_ENV);
            /* tokenize the comma-separated list */
            env_tok = strtok(env_check, ",");
            if(env_tok)
809
            {
Shane Snyder's avatar
Shane Snyder committed
810
                do
811
                {
Shane Snyder's avatar
Shane Snyder committed
812
813
814
815
816
817
818
819
820
                    /* check each env variable in order */
                    logpath_override = getenv(env_tok);
                    if(logpath_override)
                    {
                        /* stop as soon as we find a match */
                        break;
                    }
                }while((env_tok = strtok(NULL, ",")));
            }
821
822
823
        }
#endif

Shane Snyder's avatar
Shane Snyder committed
824
        if(logpath_override)
825
        {
Shane Snyder's avatar
Shane Snyder committed
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath_override,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath_override, jobid);
            }
841
        }
Shane Snyder's avatar
Shane Snyder committed
842
        else if(logpath)
843
        {
Shane Snyder's avatar
Shane Snyder committed
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%d/%d/%d/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath, (start_tm->tm_year+1900),
                (start_tm->tm_mon+1), start_tm->tm_mday,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath, jobid);
            }
        }
        else
        {
            logfile_name[0] = '\0';
864
865
866
867
        }
    }

    return;
868
869
}

870
/* record any hints used to write the darshan log in the log header */
871
static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
{
    char* hints;
    char* header_hints;
    int meta_remain = 0;
    char* m;

    /* check environment variable to see if the default MPI file hints have
     * been overridden
     */
    hints = getenv(CP_LOG_HINTS_OVERRIDE);
    if(!hints)
    {
        hints = __CP_LOG_HINTS;
    }

    if(!hints || strlen(hints) < 1)
        return;

    header_hints = strdup(hints);
    if(!header_hints)
        return;

    meta_remain = DARSHAN_JOB_METADATA_LEN -
895
        strlen(core->log_job.metadata) - 1;
896
897
    if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
    {
898
        sprintf(core->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION);
899
900
901
902
        meta_remain -= (strlen(PACKAGE_VERSION) + 9);
    }
    if(meta_remain >= (3 + strlen(header_hints)))
    {
903
        m = core->log_job.metadata + strlen(core->log_job.metadata);
904
905
906
907
908
909
910
911
912
913
914
915
        /* We have room to store the hints in the metadata portion of
         * the job header.  We just prepend an h= to the hints list.  The
         * metadata parser will ignore = characters that appear in the value
         * portion of the metadata key/value pair.
         */
        sprintf(m, "h=%s\n", header_hints);
    }
    free(header_hints);

    return;
}

916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
static int mnt_data_cmp(const void* a, const void* b)
{
    const struct mnt_data *d_a = (const struct mnt_data*)a;
    const struct mnt_data *d_b = (const struct mnt_data*)b;

    if(strlen(d_a->path) > strlen(d_b->path))
        return(-1);
    else if(strlen(d_a->path) < strlen(d_b->path))
        return(1);
    else
        return(0);
}

/* adds an entry to table of mounted file systems */
static void add_entry(char* trailing_data, int* space_left, struct mntent *entry)
{
    int ret;
    char tmp_mnt[256];
    struct statfs statfsbuf;

    strncpy(mnt_data_array[mnt_data_count].path, entry->mnt_dir,
        DARSHAN_MAX_MNT_PATH-1);
    strncpy(mnt_data_array[mnt_data_count].type, entry->mnt_type,
        DARSHAN_MAX_MNT_TYPE-1);
    /* NOTE: we now try to detect the preferred block size for each file 
     * system using fstatfs().  On Lustre we assume a size of 1 MiB 
     * because fstatfs() reports 4 KiB. 
     */
#ifndef LL_SUPER_MAGIC
#define LL_SUPER_MAGIC 0x0BD00BD0
#endif
    ret = statfs(entry->mnt_dir, &statfsbuf);
    if(ret == 0 && statfsbuf.f_type != LL_SUPER_MAGIC)
        mnt_data_array[mnt_data_count].block_size = statfsbuf.f_bsize;
    else if(ret == 0 && statfsbuf.f_type == LL_SUPER_MAGIC)
        mnt_data_array[mnt_data_count].block_size = 1024*1024;
    else
        mnt_data_array[mnt_data_count].block_size = 4096;

    /* store mount information for use in header of darshan log */
956
    ret = snprintf(tmp_mnt, 256, "\n%s\t%s",
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
        entry->mnt_type, entry->mnt_dir);
    if(ret < 256 && strlen(tmp_mnt) <= (*space_left))
    {
        strcat(trailing_data, tmp_mnt);
        (*space_left) -= strlen(tmp_mnt);
    }

    mnt_data_count++;
    return;
}

/* darshan_get_exe_and_mounts_root()
 *
 * collects command line and list of mounted file systems into a string that
 * will be stored with the job header
 */
static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core,
    char* trailing_data, int space_left)
{
    FILE* tab;
    struct mntent *entry;
    char* exclude;
    int tmp_index = 0;
    int skip = 0;

    /* skip these fs types */
    static char* fs_exclusions[] = {
        "tmpfs",
        "proc",
        "sysfs",
        "devpts",
        "binfmt_misc",
        "fusectl",
        "debugfs",
        "securityfs",
        "nfsd",
        "none",
        "rpc_pipefs",
        "hugetlbfs",
        "cgroup",
        NULL
    };

    /* length of exe has already been safety checked in darshan-posix.c */
    strcat(trailing_data, core->exe);
1002
    space_left = DARSHAN_EXE_LEN - strlen(trailing_data);
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065

    /* we make two passes through mounted file systems; in the first pass we
     * grab any non-nfs mount points, then on the second pass we grab nfs
     * mount points
     */

    tab = setmntent("/etc/mtab", "r");
    if(!tab)
        return;
    /* loop through list of mounted file systems */
    while(mnt_data_count<DARSHAN_MAX_MNTS && (entry = getmntent(tab)) != NULL)
    {
        /* filter out excluded fs types */
        tmp_index = 0;
        skip = 0;
        while((exclude = fs_exclusions[tmp_index]))
        {
            if(!(strcmp(exclude, entry->mnt_type)))
            {
                skip =1;
                break;
            }
            tmp_index++;
        }

        if(skip || (strcmp(entry->mnt_type, "nfs") == 0))
            continue;

        add_entry(trailing_data, &space_left, entry);
    }
    endmntent(tab);

    tab = setmntent("/etc/mtab", "r");
    if(!tab)
        return;
    /* loop through list of mounted file systems */
    while(mnt_data_count<DARSHAN_MAX_MNTS && (entry = getmntent(tab)) != NULL)
    {
        if(strcmp(entry->mnt_type, "nfs") != 0)
            continue;

        add_entry(trailing_data, &space_left, entry);
    }
    endmntent(tab);

    /* Sort mount points in order of longest path to shortest path.  This is
     * necessary so that if we try to match file paths to mount points later
     * we don't match on "/" every time.
     */
    qsort(mnt_data_array, mnt_data_count, sizeof(mnt_data_array[0]), mnt_data_cmp);
    return;
}

/* darshan_get_exe_and_mounts()
 *
 * collects command line and list of mounted file systems into a string that
 * will be stored with the job header
 */
static char* darshan_get_exe_and_mounts(struct darshan_core_runtime *core)
{
    char* trailing_data;
    int space_left;

1066
    space_left = DARSHAN_EXE_LEN + 1;
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
    trailing_data = malloc(space_left);
    if(!trailing_data)
    {
        return(NULL);
    }
    memset(trailing_data, 0, space_left);

    if(my_rank == 0)
    {
        darshan_get_exe_and_mounts_root(core, trailing_data, space_left);
    }

    /* broadcast trailing data to all nodes */
    DARSHAN_MPI_CALL(PMPI_Bcast)(trailing_data, space_left, MPI_CHAR, 0,
        MPI_COMM_WORLD);
    /* broadcast mount count to all nodes */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&mnt_data_count, 1, MPI_INT, 0,
        MPI_COMM_WORLD);
    /* broadcast mount data to all nodes */
    DARSHAN_MPI_CALL(PMPI_Bcast)(mnt_data_array,
        mnt_data_count*sizeof(mnt_data_array[0]), MPI_BYTE, 0, MPI_COMM_WORLD);

    return(trailing_data);
}

1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
static void darshan_block_size_from_path(const char *path, int *block_size)
{
    int i;
    *block_size = -1;

    for(i=0; i<mnt_data_count; i++)
    {
        if(!(strncmp(mnt_data_array[i].path, path, strlen(mnt_data_array[i].path))))
        {
            *block_size = mnt_data_array[i].block_size;
            return;
        }
    }

    return;
}

1109
static void darshan_get_shared_records(struct darshan_core_runtime *core,
1110
    darshan_record_id *shared_recs)
1111
1112
1113
{
    int i;
    int ndx;
1114
    struct darshan_core_record_ref *tmp, *ref;
1115
    darshan_record_id id_array[DARSHAN_CORE_MAX_RECORDS] = {0};
1116
1117
    uint64_t mod_flags[DARSHAN_CORE_MAX_RECORDS] = {0};
    uint64_t global_mod_flags[DARSHAN_CORE_MAX_RECORDS] = {0};
1118
1119
1120
1121
1122

    /* first, determine list of records root process has opened */
    if(my_rank == 0)
    {
        ndx = 0;
1123
        HASH_ITER(hlink, core->rec_hash, ref, tmp)
1124
        {
1125
            id_array[ndx++] = ref->rec.id;           
1126
1127
1128
1129
        }
    }

    /* broadcast root's list of records to all other processes */
1130
    DARSHAN_MPI_CALL(PMPI_Bcast)(id_array,
1131
1132
1133
1134
1135
1136
        (DARSHAN_CORE_MAX_RECORDS * sizeof(darshan_record_id)),
        MPI_BYTE, 0, MPI_COMM_WORLD);

    /* everyone looks to see if they opened the same records as root */
    for(i=0; (i<DARSHAN_CORE_MAX_RECORDS && id_array[i] != 0); i++)
    {
1137
1138
        HASH_FIND(hlink, core->rec_hash, &id_array[i], sizeof(darshan_record_id), ref);
        if(ref)
1139
        {
1140
1141
1142
            /* we opened that record too, save the mod_flags */
            mod_flags[i] = ref->mod_flags;
            break;
1143
1144
1145
        }
    }

1146
1147
1148
1149
    /* now allreduce so everyone agrees which files are shared and
     * which modules accessed them collectively
     */
    DARSHAN_MPI_CALL(PMPI_Allreduce)(mod_flags, global_mod_flags,
Shane Snyder's avatar
Shane Snyder committed
1150
        DARSHAN_CORE_MAX_RECORDS, MPI_UINT64_T, MPI_BAND, MPI_COMM_WORLD);
1151
1152
1153
1154

    ndx = 0;
    for(i=0; (i<DARSHAN_CORE_MAX_RECORDS && id_array[i] != 0); i++)
    {
1155
        if(global_mod_flags[i] != 0)
1156
1157
        {
            shared_recs[ndx++] = id_array[i];
1158
1159
1160
1161
1162
1163
1164
1165

            /* set global_mod_flags so we know which modules collectively
             * accessed this module. we need this info to support shared
             * file reductions
             */
            HASH_FIND(hlink, core->rec_hash, &id_array[i], sizeof(darshan_record_id), ref);
            assert(ref);
            ref->global_mod_flags = global_mod_flags[i];
1166
1167
1168
        }
    }

1169
1170
1171
    return;
}

1172
static int darshan_log_open_all(char *logfile_name, MPI_File *log_fh)
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
{
    char *hints;
    char *tok_str;
    char *orig_tok_str;
    char *key;
    char *value;
    char *saveptr = NULL;
    int ret;
    MPI_Info info;

    /* check environment variable to see if the default MPI file hints have
     * been overridden
     */
    MPI_Info_create(&info);

    hints = getenv(CP_LOG_HINTS_OVERRIDE);
    if(!hints)
    {
        hints = __CP_LOG_HINTS;
    }

    if(hints && strlen(hints) > 0)
    {
        tok_str = strdup(hints);
        if(tok_str)
        {
            orig_tok_str = tok_str;
            do
            {
                /* split string on semicolon */
                key = strtok_r(tok_str, ";", &saveptr);
                if(key)
                {
                    tok_str = NULL;
                    /* look for = sign splitting key/value pairs */
                    value = index(key, '=');
                    if(value)
                    {
                        /* break key and value into separate null terminated strings */
                        value[0] = '\0';
                        value++;
                        if(strlen(key) > 0)
                            MPI_Info_set(info, key, value);
                    }
                }
            }while(key != NULL);
            free(orig_tok_str);
        }
    }

    /* open the darshan log file for writing */
    ret = DARSHAN_MPI_CALL(PMPI_File_open)(MPI_COMM_WORLD, logfile_name,
        MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_EXCL, info, log_fh);
Shane Snyder's avatar
Shane Snyder committed
1226
    if(ret != MPI_SUCCESS)
1227
1228
1229
        return(-1);

    MPI_Info_free(&info);
Shane Snyder's avatar
Shane Snyder committed
1230
1231
1232
    return(0);
}

1233
1234
static int darshan_deflate_buffer(void **pointers, int *lengths, int count,
    int nocomp_flag, char *comp_buf, int *comp_length)
1235
1236
1237
1238
{
    int ret = 0;
    int i;
    int total_target = 0;
1239
    int z_comp_level;
1240
1241
    z_stream tmp_stream;

1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252