darshan-core.c 50.5 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2
3
4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5
6
 */

7
#define _XOPEN_SOURCE 500
8
#define _GNU_SOURCE
9

10
11
12
13
14
15
#include "darshan-runtime-config.h"

#include <stdio.h>
#ifdef HAVE_MNTENT_H
#include <mntent.h>
#endif
16
17
18
19
20
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <pthread.h>
21
22
23
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/vfs.h>
24
#include <zlib.h>
25
#include <mpi.h>
26
#include <assert.h>
27

28
#include "uthash.h"
Shane Snyder's avatar
Shane Snyder committed
29
#include "darshan.h"
30
#include "darshan-core.h"
Shane Snyder's avatar
Shane Snyder committed
31
#include "darshan-dynamic.h"
32

33
extern char* __progname;
34
extern char* __progname_full;
35

36
/* internal variable delcarations */
37
static struct darshan_core_runtime *darshan_core = NULL;
38
static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
39
static int my_rank = -1;
40
static int nprocs = -1;
41
static int darshan_mem_alignment = 1;
42

43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/* paths prefixed with the following directories are not traced by darshan */
char* darshan_path_exclusions[] = {
"/etc/",
"/dev/",
"/usr/",
"/bin/",
"/boot/",
"/lib/",
"/opt/",
"/sbin/",
"/sys/",
"/proc/",
NULL
};

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#ifdef DARSHAN_BGQ
extern void bgq_runtime_initialize();
#endif

/* array of init functions for modules which need to be statically
 * initialized by darshan at startup time
 */
void (*mod_static_init_fns[])(void) =
{
#ifdef DARSHAN_BGQ
    &bgq_runtime_initialize,
#endif
    NULL
};

Shane Snyder's avatar
Shane Snyder committed
73
74
75
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)

76
77
78
79
80
81
/* FS mount information */
#define DARSHAN_MAX_MNTS 64
#define DARSHAN_MAX_MNT_PATH 256
#define DARSHAN_MAX_MNT_TYPE 32
struct mnt_data
{
82
    int block_size;
83
84
85
86
87
88
    char path[DARSHAN_MAX_MNT_PATH];
    char type[DARSHAN_MAX_MNT_TYPE];
};
static struct mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
static int mnt_data_count = 0;

89
90
91
92
/* prototypes for internal helper functions */
static void darshan_get_logfile_name(
    char* logfile_name, int jobid, struct tm* start_tm);
static void darshan_log_record_hints_and_ver(
93
94
95
96
97
98
    struct darshan_core_runtime* core);
static void darshan_get_exe_and_mounts_root(
    struct darshan_core_runtime *core, char* trailing_data,
    int space_left);
static char* darshan_get_exe_and_mounts(
    struct darshan_core_runtime *core);
99
100
static void darshan_block_size_from_path(
    const char *path, int *block_size);
101
static void darshan_get_shared_records(
102
103
    struct darshan_core_runtime *core, darshan_record_id **shared_recs,
    int *shared_rec_cnt);
104
static int darshan_log_open_all(
105
    char *logfile_name, MPI_File *log_fh);
106
static int darshan_deflate_buffer(
Shane Snyder's avatar
Shane Snyder committed
107
108
    void **pointers, int *lengths, int count, char *comp_buf,
    int *comp_buf_length);
109
static int darshan_log_write_record_hash(
110
    MPI_File log_fh, struct darshan_core_runtime *core,
111
112
113
    uint64_t *inout_off);
static int darshan_log_append_all(
    MPI_File log_fh, struct darshan_core_runtime *core, void *buf,
Shane Snyder's avatar
Shane Snyder committed
114
    int count, uint64_t *inout_off);
Shane Snyder's avatar
Shane Snyder committed
115
116
static void darshan_core_cleanup(
    struct darshan_core_runtime* core);
117

118
119
/* *********************************** */

Shane Snyder's avatar
Shane Snyder committed
120
void darshan_core_initialize(int argc, char **argv)
121
122
123
124
{
    int i;
    int internal_timing_flag = 0;
    double init_start, init_time, init_max;
125
    char *envstr;
126
127
128
    char* truncate_string = "<TRUNCATED>";
    int truncate_offset;
    int chars_left = 0;
129
130
    int ret;
    int tmpval;
131
132

    DARSHAN_MPI_CALL(PMPI_Comm_size)(MPI_COMM_WORLD, &nprocs);
133
    DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank);
134
135
136
137

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

138
    if(internal_timing_flag)
139
140
141
        init_start = DARSHAN_MPI_CALL(PMPI_Wtime)();

    /* setup darshan runtime if darshan is enabled and hasn't been initialized already */
142
    if(!getenv("DARSHAN_DISABLE") && !darshan_core)
143
    {
144
        #if (__DARSHAN_MEM_ALIGNMENT < 1)
145
146
            #error Darshan must be configured with a positive value for --with-mem-align
        #endif
147
        envstr = getenv(DARSHAN_MEM_ALIGNMENT_OVERRIDE);
148
149
150
151
152
153
154
155
156
157
158
        if(envstr)
        {
            ret = sscanf(envstr, "%d", &tmpval);
            /* silently ignore if the env variable is set poorly */
            if(ret == 1 && tmpval > 0)
            {
                darshan_mem_alignment = tmpval;
            }
        }
        else
        {
159
            darshan_mem_alignment = __DARSHAN_MEM_ALIGNMENT;
160
161
162
163
164
165
166
        }

        /* avoid floating point errors on faulty input */
        if (darshan_mem_alignment < 1)
        {
            darshan_mem_alignment = 1;
        }
167
168
169
170

        /* allocate structure to track darshan_core_runtime information */
        darshan_core = malloc(sizeof(*darshan_core));
        if(darshan_core)
171
        {
172
            memset(darshan_core, 0, sizeof(*darshan_core));
173

174
175
176
177
            darshan_core->log_job.uid = getuid();
            darshan_core->log_job.start_time = time(NULL);
            darshan_core->log_job.nprocs = nprocs;
            darshan_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
178
179

            /* record exe and arguments */
180
            for(i=0; i<argc; i++)
181
            {
182
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
183
184
                strncat(darshan_core->exe, argv[i], chars_left);
                if(i < (argc-1))
185
                {
186
                    chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
187
                    strncat(darshan_core->exe, " ", chars_left);
188
189
190
191
192
193
194
195
                }
            }

            /* if we don't see any arguments, then use glibc symbol to get
             * program name at least (this happens in fortran)
             */
            if(argc == 0)
            {
196
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
197
                strncat(darshan_core->exe, __progname_full, chars_left);
198
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
199
                strncat(darshan_core->exe, " <unknown args>", chars_left);
200
201
202
203
204
            }

            if(chars_left == 0)
            {
                /* we ran out of room; mark that string was truncated */
205
                truncate_offset = DARSHAN_EXE_LEN - strlen(truncate_string);
206
                sprintf(&darshan_core->exe[truncate_offset], "%s",
207
208
                    truncate_string);
            }
209
210
211

            /* collect information about command line and mounted file systems */
            darshan_core->trailing_data = darshan_get_exe_and_mounts(darshan_core);
212
213
        }

214
215
216
217
218
219
220
        /* maybe bootstrap modules with static initializers */
        i = 0;
        while(mod_static_init_fns[i])
        {
            (*mod_static_init_fns[i])();
            i++;
        }
221
222
    }

223
224
225
226
227
    if(internal_timing_flag)
    {
        init_time = DARSHAN_MPI_CALL(PMPI_Wtime)() - init_start;
        DARSHAN_MPI_CALL(PMPI_Reduce)(&init_time, &init_max, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
228
        if(my_rank == 0)
229
        {
230
231
            fprintf(stderr, "#darshan:<op>\t<nprocs>\t<time>\n");
            fprintf(stderr, "darshan:init\t%d\t%f\n", nprocs, init_max);
232
233
234
235
236
237
        }
    }

    return;
}

Shane Snyder's avatar
Shane Snyder committed
238
void darshan_core_shutdown()
239
{
240
    int i;
241
    char *logfile_name;
242
    struct darshan_core_runtime *final_core;
243
    int internal_timing_flag = 0;
244
245
    char *envjobid;
    char *jobid_str;
246
    int jobid;
247
    struct tm *start_tm;
248
    time_t start_time_tmp;
249
250
    int ret = 0;
    int all_ret = 0;
251
252
    int64_t first_start_time;
    int64_t last_end_time;
253
254
    int local_mod_use[DARSHAN_MAX_MODS] = {0};
    int global_mod_use_count[DARSHAN_MAX_MODS] = {0};
255
256
257
    darshan_record_id *shared_recs;
    darshan_record_id *mod_shared_recs;
    int shared_rec_cnt = 0;
258
    double start_log_time;
259
260
261
262
263
264
265
    double open1, open2;
    double job1, job2;
    double rec1, rec2;
    double mod1[DARSHAN_MAX_MODS] = {0};
    double mod2[DARSHAN_MAX_MODS] = {0};
    double header1, header2;
    double tm_end;
266
    uint64_t gz_fp = 0;
267
    uint32_t tmp_partial_flag;
268
269
    MPI_File log_fh;
    MPI_Status status;
270
271
272
273

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

274
275
    start_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();

Shane Snyder's avatar
Shane Snyder committed
276
    /* disable darhan-core while we shutdown */
277
    DARSHAN_CORE_LOCK();
278
    if(!darshan_core)
279
    {
280
        DARSHAN_CORE_UNLOCK();
281
282
        return;
    }
283
284
    final_core = darshan_core;
    darshan_core = NULL;
Shane Snyder's avatar
Shane Snyder committed
285

286
    /* we also need to set which modules were registered on this process and
287
288
     * call into those modules and give them a chance to perform any necessary
     * pre-shutdown steps.
Shane Snyder's avatar
Shane Snyder committed
289
290
291
292
293
294
     */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
    {
        if(final_core->mod_array[i])
        {
            local_mod_use[i] = 1;
295
            final_core->mod_array[i]->mod_funcs.begin_shutdown();
Shane Snyder's avatar
Shane Snyder committed
296
297
        }
    }
298
    DARSHAN_CORE_UNLOCK();
299
300
301
302

    logfile_name = malloc(PATH_MAX);
    if(!logfile_name)
    {
303
        darshan_core_cleanup(final_core);
304
305
306
        return;
    }

307
    /* set darshan job id/metadata and constuct log file name on rank 0 */
308
    if(my_rank == 0)
309
    {
310
        /* Use DARSHAN_JOBID_OVERRIDE for the env var for __DARSHAN_JOBID */
311
        envjobid = getenv(DARSHAN_JOBID_OVERRIDE);
312
        if(!envjobid)
313
        {
314
            envjobid = __DARSHAN_JOBID;
315
316
        }

317
        /* find a job id */
318
319
320
321
322
323
324
325
326
327
328
329
        jobid_str = getenv(envjobid);
        if(jobid_str)
        {
            /* in cobalt we can find it in env var */
            ret = sscanf(jobid_str, "%d", &jobid);
        }
        if(!jobid_str || ret != 1)
        {
            /* use pid as fall back */
            jobid = getpid();
        }

330
        final_core->log_job.jobid = (int64_t)jobid;
331

332
        /* if we are using any hints to write the log file, then record those
333
         * hints with the darshan job information
334
         */
335
        darshan_log_record_hints_and_ver(final_core);
336

337
        /* use human readable start time format in log filename */
338
        start_time_tmp = final_core->log_job.start_time;
339
        start_tm = localtime(&start_time_tmp);
340

341
342
        /* construct log file name */
        darshan_get_logfile_name(logfile_name, jobid, start_tm);
343
344
345
346
347
348
349
350
351
    }

    /* broadcast log file name */
    DARSHAN_MPI_CALL(PMPI_Bcast)(logfile_name, PATH_MAX, MPI_CHAR, 0,
        MPI_COMM_WORLD);

    if(strlen(logfile_name) == 0)
    {
        /* failed to generate log file name */
352
        free(logfile_name);
353
        darshan_core_cleanup(final_core);
354
355
356
        return;
    }

357
    final_core->log_job.end_time = time(NULL);
358

359
360
361
    /* reduce to report first start time and last end time across all ranks
     * at rank 0
     */
362
363
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
364
365
    if(my_rank == 0)
    {
366
367
        final_core->log_job.start_time = first_start_time;
        final_core->log_job.end_time = last_end_time;
368
    }
369

370
371
372
    /* reduce the number of times a module was opened globally and bcast to everyone */   
    DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

373
    /* get a list of records which are shared across all processes */
374
    darshan_get_shared_records(final_core, &shared_recs, &shared_rec_cnt);
375

376
377
    if(internal_timing_flag)
        open1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
378
    /* collectively open the darshan log file */
379
    ret = darshan_log_open_all(logfile_name, &log_fh);
380
381
    if(internal_timing_flag)
        open2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
382
383
384
385
386
387
388
389

    /* error out if unable to open log file */
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
390
391
            fprintf(stderr, "darshan library warning: unable to open log file %s\n",
                logfile_name);
392
393
394
            unlink(logfile_name);
        }
        free(logfile_name);
395
        darshan_core_cleanup(final_core);
396
397
398
        return;
    }

399
400
    if(internal_timing_flag)
        job1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
401
    /* rank 0 is responsible for writing the compressed darshan job information */
Shane Snyder's avatar
Shane Snyder committed
402
    if(my_rank == 0)
403
    {
404
        void *pointers[2] = {&final_core->log_job, final_core->trailing_data};
405
        int lengths[2] = {sizeof(struct darshan_job), strlen(final_core->trailing_data)};
406
        int comp_buf_sz = 0;
407

408
        /* compress the job info and the trailing mount/exe data */
Shane Snyder's avatar
Shane Snyder committed
409
        all_ret = darshan_deflate_buffer(pointers, lengths, 2,
410
411
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
412
        {
413
            fprintf(stderr, "darshan library warning: unable to compress job data\n");
414
            unlink(logfile_name);
415
        }
416
417
418
        else
        {
            /* write the job information, preallocing space for the log header */
Shane Snyder's avatar
Shane Snyder committed
419
            gz_fp += sizeof(struct darshan_header);
420
421
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, gz_fp,
                final_core->comp_buf, comp_buf_sz, MPI_BYTE, &status);
422
423
424
425
426
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
Shane Snyder's avatar
Shane Snyder committed
427
                
428
            }
429
            gz_fp += comp_buf_sz;
430
        }
431
432
    }

433
434
435
436
437
    /* error out if unable to write job information */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
438
        darshan_core_cleanup(final_core);
439
440
        return;
    }
441
442
    if(internal_timing_flag)
        job2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
443

444
445
    if(internal_timing_flag)
        rec1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
446
    /* write the record name->id hash to the log file */
Shane Snyder's avatar
Shane Snyder committed
447
    final_core->log_header.rec_map.off = gz_fp;
448
    ret = darshan_log_write_record_hash(log_fh, final_core, &gz_fp);
Shane Snyder's avatar
Shane Snyder committed
449
    final_core->log_header.rec_map.len = gz_fp - final_core->log_header.rec_map.off;
450

451
    /* error out if unable to write record hash */
452
453
454
455
456
457
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
458
            fprintf(stderr, "darshan library warning: unable to write record hash to log file %s\n",
459
                logfile_name);
460
            unlink(logfile_name);
461
462
        }
        free(logfile_name);
463
        darshan_core_cleanup(final_core);
464
465
        return;
    }
Shane Snyder's avatar
Shane Snyder committed
466
467
    if(internal_timing_flag)
        rec2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
468

469
470
471
    mod_shared_recs = malloc(shared_rec_cnt * sizeof(darshan_record_id));
    assert(mod_shared_recs);

472
    /* loop over globally used darshan modules and:
473
     *      - perform shared file reductions, if possible
474
     *      - get final output buffer
475
     *      - compress (zlib) provided output buffer
Shane Snyder's avatar
Shane Snyder committed
476
     *      - append compressed buffer to log file
477
478
     *      - add module index info (file offset/length) to log header
     *      - shutdown the module
479
     */
480
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
481
    {
482
        struct darshan_core_module* this_mod = final_core->mod_array[i];
483
        struct darshan_core_record_ref *ref = NULL;
484
        int mod_shared_rec_cnt = 0;
485
        void* mod_buf = NULL;
486
        int mod_buf_sz = 0;
487
        int j;
488

489
        if(global_mod_use_count[i] == 0)
490
491
        {
            if(my_rank == 0)
492
493
494
495
            {
                final_core->log_header.mod_map[i].off = 0;
                final_core->log_header.mod_map[i].len = 0;
            }
496
            continue;
497
        }
498
499
 
        if(internal_timing_flag)
500
            mod1[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();
501

502
        /* set the shared file list for this module */
503
504
        memset(mod_shared_recs, 0, shared_rec_cnt * sizeof(darshan_record_id));
        for(j = 0; j < shared_rec_cnt; j++)
505
506
507
508
        {
            HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j],
                sizeof(darshan_record_id), ref);
            assert(ref);
509
            if(DARSHAN_MOD_FLAG_ISSET(ref->global_mod_flags, i))
510
            {
511
                mod_shared_recs[mod_shared_rec_cnt++] = shared_recs[j];
512
            }
513
        }
514

515
516
517
518
519
        /* if module is registered locally, get the corresponding output buffer
         * 
         * NOTE: this function can be used to run collective operations across
         * modules, if there are file records shared globally.
         */
520
        if(this_mod)
521
        {
522
523
            this_mod->mod_funcs.get_output_data(MPI_COMM_WORLD, mod_shared_recs,
                mod_shared_rec_cnt, &mod_buf, &mod_buf_sz);
524
525
        }

526
        /* append this module's data to the darshan log */
Shane Snyder's avatar
Shane Snyder committed
527
528
529
530
        final_core->log_header.mod_map[i].off = gz_fp;
        ret = darshan_log_append_all(log_fh, final_core, mod_buf, mod_buf_sz, &gz_fp);
        final_core->log_header.mod_map[i].len =
            gz_fp - final_core->log_header.mod_map[i].off;
531

532
        /* error out if the log append failed */
533
534
535
        DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
            MPI_LOR, MPI_COMM_WORLD);
        if(all_ret != 0)
536
        {
537
538
539
540
541
542
543
544
            if(my_rank == 0)
            {
                fprintf(stderr,
                    "darshan library warning: unable to write %s module data to log file %s\n",
                    darshan_module_names[i], logfile_name);
                unlink(logfile_name);
            }
            free(logfile_name);
545
            darshan_core_cleanup(final_core);
546
            return;
547
548
549
        }

        /* shutdown module if registered locally */
550
        if(this_mod)
551
552
553
        {
            this_mod->mod_funcs.shutdown();
        }
554
555
        if(internal_timing_flag)
            mod2[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();
556
557
    }

558
    /* run a reduction to determine if any application processes had to set the
559
560
     * partial flag for any modules. this happens when a module exhausts its memory
     * and does not track every possible record
561
562
     */
    DARSHAN_MPI_CALL(PMPI_Reduce)(&(final_core->log_header.partial_flag),
563
        &tmp_partial_flag, 1, MPI_UINT32_T, MPI_BOR, 0, MPI_COMM_WORLD);
564

565
566
    if(internal_timing_flag)
        header1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
567
    /* rank 0 is responsible for writing the log header */
568
569
    if(my_rank == 0)
    {
570
571
572
        /* initialize the remaining header fields */
        strcpy(final_core->log_header.version_string, DARSHAN_LOG_VERSION);
        final_core->log_header.magic_nr = DARSHAN_MAGIC_NR;
573
        final_core->log_header.comp_type = DARSHAN_ZLIB_COMP;
574
        final_core->log_header.partial_flag = tmp_partial_flag;
575

Shane Snyder's avatar
Shane Snyder committed
576
577
578
        all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, &(final_core->log_header),
            sizeof(struct darshan_header), MPI_BYTE, &status);
        if(all_ret != MPI_SUCCESS)
579
        {
Shane Snyder's avatar
Shane Snyder committed
580
581
            fprintf(stderr, "darshan library warning: unable to write header to log file %s\n",
                    logfile_name);
582
            unlink(logfile_name);
583
        }
584
585
    }

586
587
588
589
590
    /* error out if unable to write log header */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
591
        darshan_core_cleanup(final_core);
592
593
        return;
    }
594
595
    if(internal_timing_flag)
        header2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
596

597
598
599
    DARSHAN_MPI_CALL(PMPI_File_close)(&log_fh);

    /* if we got this far, there are no errors, so rename from *.darshan_partial
600
     * to *-<logwritetime>.darshan, which indicates that this log file is
601
602
     * complete and ready for analysis
     */
603
604
    if(my_rank == 0)
    {
Shane Snyder's avatar
Shane Snyder committed
605
        if(getenv("DARSHAN_LOGFILE"))
606
        {
607
#ifdef __DARSHAN_GROUP_READABLE_LOGS
Shane Snyder's avatar
Shane Snyder committed
608
            chmod(logfile_name, (S_IRUSR|S_IRGRP));
609
#else
Shane Snyder's avatar
Shane Snyder committed
610
            chmod(logfile_name, (S_IRUSR));
611
#endif
Shane Snyder's avatar
Shane Snyder committed
612
613
614
615
616
617
618
619
620
621
622
623
624
625
        }
        else
        {
            char* tmp_index;
            double end_log_time;
            char* new_logfile_name;

            new_logfile_name = malloc(PATH_MAX);
            if(new_logfile_name)
            {
                new_logfile_name[0] = '\0';
                end_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
                strcat(new_logfile_name, logfile_name);
                tmp_index = strstr(new_logfile_name, ".darshan_partial");
626
                sprintf(tmp_index, "_%d.darshan", (int)(end_log_time-start_log_time+1));
Shane Snyder's avatar
Shane Snyder committed
627
628
                rename(logfile_name, new_logfile_name);
                /* set permissions on log file */
629
#ifdef __DARSHAN_GROUP_READABLE_LOGS
Shane Snyder's avatar
Shane Snyder committed
630
631
632
633
634
635
                chmod(new_logfile_name, (S_IRUSR|S_IRGRP));
#else
                chmod(new_logfile_name, (S_IRUSR));
#endif
                free(new_logfile_name);
            }
636
        }
637
    }
638

639
    free(logfile_name);
640
    darshan_core_cleanup(final_core);
641

642
    if(internal_timing_flag)
643
    {
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
        double open_tm, open_slowest;
        double header_tm, header_slowest;
        double job_tm, job_slowest;
        double rec_tm, rec_slowest;
        double mod_tm[DARSHAN_MAX_MODS], mod_slowest[DARSHAN_MAX_MODS];
        double all_tm, all_slowest;

        tm_end = DARSHAN_MPI_CALL(PMPI_Wtime)();

        open_tm = open2 - open1;
        header_tm = header2 - header1;
        job_tm = job2 - job1;
        rec_tm = rec2 - rec1;
        all_tm = tm_end - start_log_time;
        for(i = 0;i < DARSHAN_MAX_MODS; i++)
        {
            mod_tm[i] = mod2[i] - mod1[i];
        }

        DARSHAN_MPI_CALL(PMPI_Reduce)(&open_tm, &open_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&header_tm, &header_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&job_tm, &job_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&rec_tm, &rec_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&all_tm, &all_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(mod_tm, mod_slowest, DARSHAN_MAX_MODS,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

        if(my_rank == 0)
        {
678
679
680
681
682
            fprintf(stderr, "#darshan:<op>\t<nprocs>\t<time>\n");
            fprintf(stderr, "darshan:log_open\t%d\t%f\n", nprocs, open_slowest);
            fprintf(stderr, "darshan:job_write\t%d\t%f\n", nprocs, job_slowest);
            fprintf(stderr, "darshan:hash_write\t%d\t%f\n", nprocs, rec_slowest);
            fprintf(stderr, "darshan:header_write\t%d\t%f\n", nprocs, header_slowest);
683
684
685
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
            {
                if(global_mod_use_count[i])
686
                    fprintf(stderr, "darshan:%s_shutdown\t%d\t%f\n", darshan_module_names[i],
Shane Snyder's avatar
Shane Snyder committed
687
                        nprocs, mod_slowest[i]);
688
            }
689
            fprintf(stderr, "darshan:core_shutdown\t%d\t%f\n", nprocs, all_slowest);
690
        }
691
692
693
694
    }
    
    return;
}
695

Shane Snyder's avatar
Shane Snyder committed
696
/* *********************************** */
697

698
/* construct the darshan log file name */
699
static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* start_tm)
700
{
Shane Snyder's avatar
Shane Snyder committed
701
    char* user_logfile_name;
702
703
704
    char* logpath;
    char* logname_string;
    char* logpath_override = NULL;
705
#ifdef __DARSHAN_LOG_ENV
706
707
708
709
710
711
712
713
714
    char env_check[256];
    char* env_tok;
#endif
    uint64_t hlevel;
    char hname[HOST_NAME_MAX];
    uint64_t logmod;
    char cuser[L_cuserid] = {0};
    int ret;

Shane Snyder's avatar
Shane Snyder committed
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
    /* first, check if user specifies a complete logpath to use */
    user_logfile_name = getenv("DARSHAN_LOGFILE");
    if(user_logfile_name)
    {
        if(strlen(user_logfile_name) >= (PATH_MAX-1))
        {
            fprintf(stderr, "darshan library warning: user log file name too long.\n");
            logfile_name[0] = '\0';
        }
        else
        {
            strcpy(logfile_name, user_logfile_name);
        }
    }
    else
730
    {
Shane Snyder's avatar
Shane Snyder committed
731
732
        /* otherwise, generate the log path automatically */

733
734
        /* Use DARSHAN_LOG_PATH_OVERRIDE for the value or __DARSHAN_LOG_PATH */
        logpath = getenv(DARSHAN_LOG_PATH_OVERRIDE);
Shane Snyder's avatar
Shane Snyder committed
735
736
        if(!logpath)
        {
737
738
#ifdef __DARSHAN_LOG_PATH
            logpath = __DARSHAN_LOG_PATH;
739
#endif
Shane Snyder's avatar
Shane Snyder committed
740
        }
741

Shane Snyder's avatar
Shane Snyder committed
742
743
744
745
746
747
748
749
750
751
        /* get the username for this job.  In order we will try each of the
         * following until one of them succeeds:
         *
         * - cuserid()
         * - getenv("LOGNAME")
         * - snprintf(..., geteuid());
         *
         * Note that we do not use getpwuid() because it generally will not
         * work in statically compiled binaries.
         */
752
753

#ifndef DARSHAN_DISABLE_CUSERID
Shane Snyder's avatar
Shane Snyder committed
754
        cuserid(cuser);
755
756
#endif

Shane Snyder's avatar
Shane Snyder committed
757
758
        /* if cuserid() didn't work, then check the environment */
        if(strcmp(cuser, "") == 0)
759
        {
Shane Snyder's avatar
Shane Snyder committed
760
761
762
763
764
            logname_string = getenv("LOGNAME");
            if(logname_string)
            {
                strncpy(cuser, logname_string, (L_cuserid-1));
            }
765
766
        }

Shane Snyder's avatar
Shane Snyder committed
767
768
769
770
771
772
        /* if cuserid() and environment both fail, then fall back to uid */
        if(strcmp(cuser, "") == 0)
        {
            uid_t uid = geteuid();
            snprintf(cuser, sizeof(cuser), "%u", uid);
        }
773

Shane Snyder's avatar
Shane Snyder committed
774
775
776
777
        /* generate a random number to help differentiate the log */
        hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000;
        (void)gethostname(hname, sizeof(hname));
        logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
778

Shane Snyder's avatar
Shane Snyder committed
779
780
781
782
        /* see if darshan was configured using the --with-logpath-by-env
         * argument, which allows the user to specify an absolute path to
         * place logs via an env variable.
         */
783
#ifdef __DARSHAN_LOG_ENV
Shane Snyder's avatar
Shane Snyder committed
784
        /* just silently skip if the environment variable list is too big */
785
        if(strlen(__DARSHAN_LOG_ENV) < 256)
786
        {
Shane Snyder's avatar
Shane Snyder committed
787
            /* copy env variable list to a temporary buffer */
788
            strcpy(env_check, __DARSHAN_LOG_ENV);
Shane Snyder's avatar
Shane Snyder committed
789
790
791
            /* tokenize the comma-separated list */
            env_tok = strtok(env_check, ",");
            if(env_tok)
792
            {
Shane Snyder's avatar
Shane Snyder committed
793
                do
794
                {
Shane Snyder's avatar
Shane Snyder committed
795
796
797
798
799
800
801
802
803
                    /* check each env variable in order */
                    logpath_override = getenv(env_tok);
                    if(logpath_override)
                    {
                        /* stop as soon as we find a match */
                        break;
                    }
                }while((env_tok = strtok(NULL, ",")));
            }
804
805
806
        }
#endif

Shane Snyder's avatar
Shane Snyder committed
807
        if(logpath_override)
808
        {
Shane Snyder's avatar
Shane Snyder committed
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath_override,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath_override, jobid);
            }
824
        }
Shane Snyder's avatar
Shane Snyder committed
825
        else if(logpath)
826
        {
Shane Snyder's avatar
Shane Snyder committed
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%d/%d/%d/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath, (start_tm->tm_year+1900),
                (start_tm->tm_mon+1), start_tm->tm_mday,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath, jobid);
            }
        }
        else
        {
            logfile_name[0] = '\0';
847
848
849
850
        }
    }

    return;
851
852
}

853
/* record any hints used to write the darshan log in the log header */
854
static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
855
856
857
858
859
860
861
862
863
{
    char* hints;
    char* header_hints;
    int meta_remain = 0;
    char* m;

    /* check environment variable to see if the default MPI file hints have
     * been overridden
     */
864
    hints = getenv(DARSHAN_LOG_HINTS_OVERRIDE);
865
866
    if(!hints)
    {
867
        hints = __DARSHAN_LOG_HINTS;
868
869
870
871
872
873
874
875
876
877
    }

    if(!hints || strlen(hints) < 1)
        return;

    header_hints = strdup(hints);
    if(!header_hints)
        return;

    meta_remain = DARSHAN_JOB_METADATA_LEN -
878
        strlen(core->log_job.metadata) - 1;
879
880
    if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
    {
881
        sprintf(core->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION);
882
883
884
885
        meta_remain -= (strlen(PACKAGE_VERSION) + 9);
    }
    if(meta_remain >= (3 + strlen(header_hints)))
    {
886
        m = core->log_job.metadata + strlen(core->log_job.metadata);
887
888
889
890
891
892
893
894
895
896
897
898
        /* We have room to store the hints in the metadata portion of
         * the job header.  We just prepend an h= to the hints list.  The
         * metadata parser will ignore = characters that appear in the value
         * portion of the metadata key/value pair.
         */
        sprintf(m, "h=%s\n", header_hints);
    }
    free(header_hints);

    return;
}

899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
static int mnt_data_cmp(const void* a, const void* b)
{
    const struct mnt_data *d_a = (const struct mnt_data*)a;
    const struct mnt_data *d_b = (const struct mnt_data*)b;

    if(strlen(d_a->path) > strlen(d_b->path))
        return(-1);
    else if(strlen(d_a->path) < strlen(d_b->path))
        return(1);
    else
        return(0);
}

/* adds an entry to table of mounted file systems */
static void add_entry(char* trailing_data, int* space_left, struct mntent *entry)
{
    int ret;
    char tmp_mnt[256];
    struct statfs statfsbuf;

    strncpy(mnt_data_array[mnt_data_count].path, entry->mnt_dir,
        DARSHAN_MAX_MNT_PATH-1);
    strncpy(mnt_data_array[mnt_data_count].type, entry->mnt_type,
        DARSHAN_MAX_MNT_TYPE-1);
    /* NOTE: we now try to detect the preferred block size for each file 
     * system using fstatfs().  On Lustre we assume a size of 1 MiB 
     * because fstatfs() reports 4 KiB. 
     */
#ifndef LL_SUPER_MAGIC
#define LL_SUPER_MAGIC 0x0BD00BD0
#endif
    ret = statfs(entry->mnt_dir, &statfsbuf);
    if(ret == 0 && statfsbuf.f_type != LL_SUPER_MAGIC)
        mnt_data_array[mnt_data_count].block_size = statfsbuf.f_bsize;
    else if(ret == 0 && statfsbuf.f_type == LL_SUPER_MAGIC)
        mnt_data_array[mnt_data_count].block_size = 1024*1024;
    else
        mnt_data_array[mnt_data_count].block_size = 4096;

    /* store mount information for use in header of darshan log */
939
    ret = snprintf(tmp_mnt, 256, "\n%s\t%s",
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
        entry->mnt_type, entry->mnt_dir);
    if(ret < 256 && strlen(tmp_mnt) <= (*space_left))
    {
        strcat(trailing_data, tmp_mnt);
        (*space_left) -= strlen(tmp_mnt);
    }

    mnt_data_count++;
    return;
}

/* darshan_get_exe_and_mounts_root()
 *
 * collects command line and list of mounted file systems into a string that
 * will be stored with the job header
 */
static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core,
    char* trailing_data, int space_left)
{
    FILE* tab;
    struct mntent *entry;
    char* exclude;
    int tmp_index = 0;
    int skip = 0;

    /* skip these fs types */
    static char* fs_exclusions[] = {
        "tmpfs",
        "proc",
        "sysfs",
        "devpts",
        "binfmt_misc",
        "fusectl",
        "debugfs",
        "securityfs",
        "nfsd",
        "none",
        "rpc_pipefs",
        "hugetlbfs",
        "cgroup",
        NULL
    };

    /* length of exe has already been safety checked in darshan-posix.c */
    strcat(trailing_data, core->exe);
985
    space_left = DARSHAN_EXE_LEN - strlen(trailing_data);
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000

    /* we make two passes through mounted file systems; in the first pass we
     * grab any non-nfs mount points, then on the second pass we grab nfs
     * mount points
     */

    tab = setmntent("/etc/mtab", "r");
    if(!tab)
        return;
    /* loop through list of mounted file systems */
    while(mnt_data_count<DARSHAN_MAX_MNTS && (entry = getmntent(tab)) != NULL)
    {
        /* filter out excluded fs types */
        tmp_index = 0;
        skip = 0;
For faster browsing, not all history is shown. View entire blame