darshan-core.c 50.7 KB
Newer Older
1
2
3
4
5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6
#define _XOPEN_SOURCE 500
7
#define _GNU_SOURCE
8

9
10
11
12
13
14
#include "darshan-runtime-config.h"

#include <stdio.h>
#ifdef HAVE_MNTENT_H
#include <mntent.h>
#endif
15
16
17
18
19
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <pthread.h>
20
21
22
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/vfs.h>
23
#include <zlib.h>
24
#include <mpi.h>
25
#include <assert.h>
26

27
#include "uthash.h"
28
29
#include "darshan-core.h"

30
extern char* __progname;
31
extern char* __progname_full;
32

33
/* internal variable delcarations */
34
static struct darshan_core_runtime *darshan_core = NULL;
35
static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
36
static int my_rank = -1;
37
static int nprocs = -1;
38

Shane Snyder's avatar
Shane Snyder committed
39
40
41
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)

42
43
44
45
46
47
48
49
50
51
52
53
54
/* FS mount information */
#define DARSHAN_MAX_MNTS 64
#define DARSHAN_MAX_MNT_PATH 256
#define DARSHAN_MAX_MNT_TYPE 32
struct mnt_data
{
    int64_t block_size;
    char path[DARSHAN_MAX_MNT_PATH];
    char type[DARSHAN_MAX_MNT_TYPE];
};
static struct mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
static int mnt_data_count = 0;

55
56
/* prototypes for internal helper functions */
static void darshan_core_initialize(
57
    int argc, char **argv);
58
59
60
static void darshan_core_shutdown(
    void);
static void darshan_core_cleanup(
61
    struct darshan_core_runtime* core);
62
63
64
static void darshan_get_logfile_name(
    char* logfile_name, int jobid, struct tm* start_tm);
static void darshan_log_record_hints_and_ver(
65
66
67
68
69
70
    struct darshan_core_runtime* core);
static void darshan_get_exe_and_mounts_root(
    struct darshan_core_runtime *core, char* trailing_data,
    int space_left);
static char* darshan_get_exe_and_mounts(
    struct darshan_core_runtime *core);
71
static void darshan_get_shared_records(
72
    struct darshan_core_runtime *core, darshan_record_id *shared_recs);
73
static int darshan_log_open_all(
74
    char *logfile_name, MPI_File *log_fh);
75
76
77
static int darshan_deflate_buffer(
    void **pointers, int *lengths, int count, int nocomp_flag,
    char *comp_buf, int *comp_length);
78
static int darshan_log_write_record_hash(
79
    MPI_File log_fh, struct darshan_core_runtime *core,
80
81
82
83
    uint64_t *inout_off);
static int darshan_log_append_all(
    MPI_File log_fh, struct darshan_core_runtime *core, void *buf,
    int count, uint64_t *inout_off, uint64_t *agg_uncomp_sz);
84

85
/* intercept MPI initialize and finalize to manage darshan core runtime */
86
87
88
89
90
91
92
93
94
95
int MPI_Init(int *argc, char ***argv)
{
    int ret;

    ret = DARSHAN_MPI_CALL(PMPI_Init)(argc, argv);
    if(ret != MPI_SUCCESS)
    {
        return(ret);
    }

96
97
98
99
100
101
102
103
104
    if(argc && argv)
    {
        darshan_core_initialize(*argc, *argv);
    }
    else
    {
        /* we don't see argc and argv here in fortran */
        darshan_core_initialize(0, NULL);
    }
105
106
107
108

    return(ret);
}

109
int MPI_Init_thread(int *argc, char ***argv, int required, int *provided)
110
111
112
113
{
    int ret;

    ret = DARSHAN_MPI_CALL(PMPI_Init_thread)(argc, argv, required, provided);
114
    if(ret != MPI_SUCCESS)
115
116
117
118
    {
        return(ret);
    }

119
120
121
122
123
124
125
126
127
    if(argc && argv)
    {
        darshan_core_initialize(*argc, *argv);
    }
    else
    {
        /* we don't see argc and argv here in fortran */
        darshan_core_initialize(0, NULL);
    }
128
129
130
131
132
133
134
135

    return(ret);
}

int MPI_Finalize(void)
{
    int ret;

136
    darshan_core_shutdown();
137
138
139
140
141

    ret = DARSHAN_MPI_CALL(PMPI_Finalize)();
    return(ret);
}

142
143
/* *********************************** */

144
static void darshan_core_initialize(int argc, char **argv)
145
146
147
148
149
150
151
152
153
{
    int i;
    int internal_timing_flag = 0;
    double init_start, init_time, init_max;
    char* truncate_string = "<TRUNCATED>";
    int truncate_offset;
    int chars_left = 0;

    DARSHAN_MPI_CALL(PMPI_Comm_size)(MPI_COMM_WORLD, &nprocs);
154
    DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank);
155
156
157
158

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

159
    if(internal_timing_flag)
160
161
162
        init_start = DARSHAN_MPI_CALL(PMPI_Wtime)();

    /* setup darshan runtime if darshan is enabled and hasn't been initialized already */
163
    if(!getenv("DARSHAN_DISABLE") && !darshan_core)
164
    {
165
166
167
168
169
        /* TODO: darshan mem alignment code? */

        /* allocate structure to track darshan_core_runtime information */
        darshan_core = malloc(sizeof(*darshan_core));
        if(darshan_core)
170
        {
171
            memset(darshan_core, 0, sizeof(*darshan_core));
172

173
174
175
176
            darshan_core->log_job.uid = getuid();
            darshan_core->log_job.start_time = time(NULL);
            darshan_core->log_job.nprocs = nprocs;
            darshan_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
177
178

            /* record exe and arguments */
179
            for(i=0; i<argc; i++)
180
            {
181
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
182
183
                strncat(darshan_core->exe, argv[i], chars_left);
                if(i < (argc-1))
184
                {
185
                    chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
186
                    strncat(darshan_core->exe, " ", chars_left);
187
188
189
190
191
192
193
194
                }
            }

            /* if we don't see any arguments, then use glibc symbol to get
             * program name at least (this happens in fortran)
             */
            if(argc == 0)
            {
195
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
196
                strncat(darshan_core->exe, __progname_full, chars_left);
197
                chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
198
                strncat(darshan_core->exe, " <unknown args>", chars_left);
199
200
201
202
203
            }

            if(chars_left == 0)
            {
                /* we ran out of room; mark that string was truncated */
204
                truncate_offset = DARSHAN_EXE_LEN - strlen(truncate_string);
205
                sprintf(&darshan_core->exe[truncate_offset], "%s",
206
207
                    truncate_string);
            }
208
209
210

            /* collect information about command line and mounted file systems */
            darshan_core->trailing_data = darshan_get_exe_and_mounts(darshan_core);
211
212
213
214
215
216
217
218
        }
    }

    if(internal_timing_flag)
    {
        init_time = DARSHAN_MPI_CALL(PMPI_Wtime)() - init_start;
        DARSHAN_MPI_CALL(PMPI_Reduce)(&init_time, &init_max, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
219
        if(my_rank == 0)
220
221
222
223
224
225
226
227
228
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:init\t%d\t%f\n", nprocs, init_max);
        }
    }

    return;
}

229
static void darshan_core_shutdown()
230
{
231
    int i;
232
    char *logfile_name;
233
    struct darshan_core_runtime *final_core;
234
    int internal_timing_flag = 0;
235
236
    char *envjobid;
    char *jobid_str;
237
    int jobid;
238
    struct tm *start_tm;
239
    time_t start_time_tmp;
240
241
    int ret = 0;
    int all_ret = 0;
242
243
    int64_t first_start_time;
    int64_t last_end_time;
244
245
    int local_mod_use[DARSHAN_MAX_MODS] = {0};
    int global_mod_use_count[DARSHAN_MAX_MODS] = {0};
246
    darshan_record_id shared_recs[DARSHAN_CORE_MAX_RECORDS] = {0};
247
    double start_log_time;
248
249
250
251
252
253
254
    double open1, open2;
    double job1, job2;
    double rec1, rec2;
    double mod1[DARSHAN_MAX_MODS] = {0};
    double mod2[DARSHAN_MAX_MODS] = {0};
    double header1, header2;
    double tm_end;
255
256
    uint64_t gz_fp = 0;
    uint64_t tmp_off = 0;
257
258
    MPI_File log_fh;
    MPI_Status status;
259
260
261
262

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

263
264
    start_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();

Shane Snyder's avatar
Shane Snyder committed
265
    /* disable darhan-core while we shutdown */
266
    DARSHAN_CORE_LOCK();
267
    if(!darshan_core)
268
    {
269
        DARSHAN_CORE_UNLOCK();
270
271
        return;
    }
272
273
    final_core = darshan_core;
    darshan_core = NULL;
Shane Snyder's avatar
Shane Snyder committed
274

275
    /* we also need to set which modules were registered on this process and
Shane Snyder's avatar
Shane Snyder committed
276
277
278
279
280
281
282
283
284
285
     * disable tracing within those modules while we shutdown
     */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
    {
        if(final_core->mod_array[i])
        {
            local_mod_use[i] = 1;
            final_core->mod_array[i]->mod_funcs.disable_instrumentation();
        }
    }
286
    DARSHAN_CORE_UNLOCK();
287
288
289
290

    logfile_name = malloc(PATH_MAX);
    if(!logfile_name)
    {
291
        darshan_core_cleanup(final_core);
292
293
294
        return;
    }

295
    /* set darshan job id/metadata and constuct log file name on rank 0 */
296
    if(my_rank == 0)
297
298
299
    {
        /* Use CP_JOBID_OVERRIDE for the env var or CP_JOBID */
        envjobid = getenv(CP_JOBID_OVERRIDE);
300
        if(!envjobid)
301
302
303
304
        {
            envjobid = CP_JOBID;
        }

305
        /* find a job id */
306
307
308
309
310
311
312
313
314
315
316
317
        jobid_str = getenv(envjobid);
        if(jobid_str)
        {
            /* in cobalt we can find it in env var */
            ret = sscanf(jobid_str, "%d", &jobid);
        }
        if(!jobid_str || ret != 1)
        {
            /* use pid as fall back */
            jobid = getpid();
        }

318
        final_core->log_job.jobid = (int64_t)jobid;
319

320
        /* if we are using any hints to write the log file, then record those
321
         * hints with the darshan job information
322
         */
323
        darshan_log_record_hints_and_ver(final_core);
324

325
        /* use human readable start time format in log filename */
326
        start_time_tmp = final_core->log_job.start_time;
327
        start_tm = localtime(&start_time_tmp);
328

329
330
        /* construct log file name */
        darshan_get_logfile_name(logfile_name, jobid, start_tm);
331
332
333
334
335
336
337
338
339
    }

    /* broadcast log file name */
    DARSHAN_MPI_CALL(PMPI_Bcast)(logfile_name, PATH_MAX, MPI_CHAR, 0,
        MPI_COMM_WORLD);

    if(strlen(logfile_name) == 0)
    {
        /* failed to generate log file name */
340
        free(logfile_name);
341
        darshan_core_cleanup(final_core);
342
343
344
        return;
    }

345
    final_core->log_job.end_time = time(NULL);
346

347
348
349
    /* reduce to report first start time and last end time across all ranks
     * at rank 0
     */
350
351
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
352
353
    if(my_rank == 0)
    {
354
355
        final_core->log_job.start_time = first_start_time;
        final_core->log_job.end_time = last_end_time;
356
    }
357

358
359
360
    /* reduce the number of times a module was opened globally and bcast to everyone */   
    DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

361
    /* get a list of records which are shared across all processes */
362
    darshan_get_shared_records(final_core, shared_recs);
363

364
365
    if(internal_timing_flag)
        open1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
366
    /* collectively open the darshan log file */
367
    ret = darshan_log_open_all(logfile_name, &log_fh);
368
369
    if(internal_timing_flag)
        open2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
370
371
372
373
374
375
376
377

    /* error out if unable to open log file */
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
378
379
            fprintf(stderr, "darshan library warning: unable to open log file %s\n",
                logfile_name);
380
381
382
            unlink(logfile_name);
        }
        free(logfile_name);
383
        darshan_core_cleanup(final_core);
384
385
386
        return;
    }

387
388
    if(internal_timing_flag)
        job1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
389
    /* rank 0 is responsible for writing the compressed darshan job information */
Shane Snyder's avatar
Shane Snyder committed
390
    if(my_rank == 0)
391
    {
392
393
394
        void *pointers[2] = {&final_core->log_job, final_core->trailing_data};
        int lengths[2] = {sizeof(struct darshan_job), DARSHAN_EXE_LEN+1};
        int comp_buf_sz = 0;
395

396
        /* compress the job info and the trailing mount/exe data */
397
        all_ret = darshan_deflate_buffer(pointers, lengths, 2, 0,
398
399
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
400
        {
401
            fprintf(stderr, "darshan library warning: unable to compress job data\n");
402
            unlink(logfile_name);
403
        }
404
405
406
        else
        {
            /* write the job information, preallocing space for the log header */
407
408
409
            gz_fp += sizeof(struct darshan_header) + 23; /* gzip headers/trailers ... */
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, gz_fp,
                final_core->comp_buf, comp_buf_sz, MPI_BYTE, &status);
410
411
412
413
414
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
Shane Snyder's avatar
Shane Snyder committed
415
                
416
            }
Shane Snyder's avatar
Shane Snyder committed
417

418
419
            /* set the beginning offset of record hash, which follows job info just written */
            gz_fp += comp_buf_sz;
420
        }
421
422
    }

423
424
425
426
427
    /* error out if unable to write job information */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
428
        darshan_core_cleanup(final_core);
429
430
        return;
    }
431
432
    if(internal_timing_flag)
        job2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
433

434
435
    if(internal_timing_flag)
        rec1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
436
    /* write the record name->id hash to the log file */
437
438
    ret = darshan_log_write_record_hash(log_fh, final_core, &gz_fp);
    tmp_off = final_core->log_header.rec_map.off + final_core->log_header.rec_map.len;
439

440
    /* error out if unable to write record hash */
441
442
443
444
445
446
    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
        MPI_LOR, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        if(my_rank == 0)
        {
447
            fprintf(stderr, "darshan library warning: unable to write record hash to log file %s\n",
448
                logfile_name);
449
            unlink(logfile_name);
450
451
        }
        free(logfile_name);
452
        darshan_core_cleanup(final_core);
453
454
        return;
    }
Shane Snyder's avatar
Shane Snyder committed
455
456
    if(internal_timing_flag)
        rec2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
457
458

    /* loop over globally used darshan modules and:
459
     *      - perform shared file reductions, if possible
460
     *      - get final output buffer
461
     *      - compress (zlib) provided output buffer
Shane Snyder's avatar
Shane Snyder committed
462
     *      - append compressed buffer to log file
463
464
     *      - add module index info (file offset/length) to log header
     *      - shutdown the module
465
     */
466
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
467
    {
468
        struct darshan_core_module* this_mod = final_core->mod_array[i];
469
470
        darshan_record_id mod_shared_recs[DARSHAN_CORE_MAX_RECORDS];
        struct darshan_core_record_ref *ref = NULL;
471
        void* mod_buf = NULL;
472
        int mod_buf_sz = 0;
473
        int j;
474

475
        if(global_mod_use_count[i] == 0)
476
477
        {
            if(my_rank == 0)
478
479
480
481
            {
                final_core->log_header.mod_map[i].off = 0;
                final_core->log_header.mod_map[i].len = 0;
            }
482
            continue;
483
        }
484
485
486
487
 
        if(internal_timing_flag)
            mod1[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();   
        /* if all processes used this module, prepare to do a shared file reduction */
Shane Snyder's avatar
Shane Snyder committed
488
        if(global_mod_use_count[i] == nprocs)
489
        {
490
491
492
493
494
495
496
497
498
499
500
501
502
            int shared_rec_count = 0;
            int rec_sz = 0;
            void *red_send_buf = NULL, *red_recv_buf = NULL;
            MPI_Datatype red_type;
            MPI_Op red_op;

            /* set the shared file list for this module */
            memset(mod_shared_recs, 0, DARSHAN_CORE_MAX_RECORDS * sizeof(darshan_record_id));
            for(j = 0; j < DARSHAN_CORE_MAX_RECORDS && shared_recs[j] != 0; j++)
            {
                HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j],
                    sizeof(darshan_record_id), ref);
                assert(ref);
Shane Snyder's avatar
Shane Snyder committed
503
                if(DARSHAN_CORE_MOD_ISSET(ref->global_mod_flags, i))
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
                {
                    mod_shared_recs[shared_rec_count++] = shared_recs[j];
                }
            }

            /* if there are globally shared files, do a shared file reduction */
            if(shared_rec_count)
            {
                this_mod->mod_funcs.prepare_for_reduction(mod_shared_recs, &shared_rec_count,
                    &red_send_buf, &red_recv_buf, &rec_sz);

                if(shared_rec_count)
                {
                    /* construct a datatype for a file record.  This is serving no purpose
                     * except to make sure we can do a reduction on proper boundaries
                     */
                    DARSHAN_MPI_CALL(PMPI_Type_contiguous)(rec_sz, MPI_BYTE, &red_type);
                    DARSHAN_MPI_CALL(PMPI_Type_commit)(&red_type);

                    /* register a reduction operator for this module */
Shane Snyder's avatar
Shane Snyder committed
524
                    DARSHAN_MPI_CALL(PMPI_Op_create)(this_mod->mod_funcs.reduce_records,
525
526
527
528
529
530
531
532
533
534
                        1, &red_op);

                    /* reduce shared file records for this module */
                    DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf,
                        shared_rec_count, red_type, red_op, 0, MPI_COMM_WORLD);

                    DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type);
                    DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op);
                }
            }
535
        }
536

537
        /* if module is registered locally, get the corresponding output buffer */
538
        if(this_mod)
539
540
        {
            /* get output buffer from module */
541
            this_mod->mod_funcs.get_output_data(&mod_buf, &mod_buf_sz);
542
543
        }

544
        final_core->log_header.mod_map[i].off = tmp_off;
545

546
547
548
549
        /* append this module's data to the darshan log */
        ret = darshan_log_append_all(log_fh, final_core, mod_buf, mod_buf_sz,
            &gz_fp, &(final_core->log_header.mod_map[i].len));
        tmp_off += final_core->log_header.mod_map[i].len;
550

551
        /* error out if the log append failed */
552
553
554
        DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
            MPI_LOR, MPI_COMM_WORLD);
        if(all_ret != 0)
555
        {
556
557
558
559
560
561
562
563
            if(my_rank == 0)
            {
                fprintf(stderr,
                    "darshan library warning: unable to write %s module data to log file %s\n",
                    darshan_module_names[i], logfile_name);
                unlink(logfile_name);
            }
            free(logfile_name);
564
            darshan_core_cleanup(final_core);
565
            return;
566
567
568
        }

        /* shutdown module if registered locally */
569
        if(this_mod)
570
571
572
        {
            this_mod->mod_funcs.shutdown();
        }
573
574
        if(internal_timing_flag)
            mod2[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();
575
576
    }

577
578
    if(internal_timing_flag)
        header1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
579
    /* rank 0 is responsible for writing the log header */
580
581
    if(my_rank == 0)
    {
582
583
584
        void *header_buf = &(final_core->log_header);
        int header_buf_sz = sizeof(struct darshan_header);
        int comp_buf_sz = 0;
585

586
587
588
589
590
591
592
593
594
595
596
597
598
599
        /* initialize the remaining header fields */
        strcpy(final_core->log_header.version_string, DARSHAN_LOG_VERSION);
        final_core->log_header.magic_nr = DARSHAN_MAGIC_NR;

        /* deflate the header */
        /* NOTE: the header is not actually compressed because space for it must
         *       be preallocated before writing. i.e., the "compressed" header
         *       must be constant sized, sizeof(struct darshan_header) + 23.
         *       it is still necessary to deflate the header or the resulting log
         *       file will not be a valid gzip file.
         */
        all_ret = darshan_deflate_buffer((void **)&header_buf, &header_buf_sz, 1, 1,
            final_core->comp_buf, &comp_buf_sz);
        if(all_ret)
600
        {
601
            fprintf(stderr, "darshan library warning: unable to compress header\n");
602
            unlink(logfile_name);
603
        }
604
605
606
607
608
609
610
611
612
613
614
        else
        {
            all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, final_core->comp_buf,
                comp_buf_sz, MPI_BYTE, &status);
            if(all_ret != MPI_SUCCESS)
            {
                fprintf(stderr, "darshan library warning: unable to write header to log file %s\n",
                        logfile_name);
                unlink(logfile_name);
            }
        }
615
616
    }

617
618
619
620
621
    /* error out if unable to write log header */
    DARSHAN_MPI_CALL(PMPI_Bcast)(&all_ret, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(all_ret != 0)
    {
        free(logfile_name);
622
        darshan_core_cleanup(final_core);
623
624
        return;
    }
625
626
    if(internal_timing_flag)
        header2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
627

628
629
630
631
632
633
    DARSHAN_MPI_CALL(PMPI_File_close)(&log_fh);

    /* if we got this far, there are no errors, so rename from *.darshan_partial
     * to *-<logwritetime>.darshan.gz, which indicates that this log file is
     * complete and ready for analysis
     */
634
635
    if(my_rank == 0)
    {
Shane Snyder's avatar
Shane Snyder committed
636
        if(getenv("DARSHAN_LOGFILE"))
637
        {
638
#ifdef __CP_GROUP_READABLE_LOGS
Shane Snyder's avatar
Shane Snyder committed
639
            chmod(logfile_name, (S_IRUSR|S_IRGRP));
640
#else
Shane Snyder's avatar
Shane Snyder committed
641
            chmod(logfile_name, (S_IRUSR));
642
#endif
Shane Snyder's avatar
Shane Snyder committed
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
        }
        else
        {
            char* tmp_index;
            double end_log_time;
            char* new_logfile_name;

            new_logfile_name = malloc(PATH_MAX);
            if(new_logfile_name)
            {
                new_logfile_name[0] = '\0';
                end_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
                strcat(new_logfile_name, logfile_name);
                tmp_index = strstr(new_logfile_name, ".darshan_partial");
                sprintf(tmp_index, "_%d.darshan.gz", (int)(end_log_time-start_log_time+1));
                rename(logfile_name, new_logfile_name);
                /* set permissions on log file */
#ifdef __CP_GROUP_READABLE_LOGS
                chmod(new_logfile_name, (S_IRUSR|S_IRGRP));
#else
                chmod(new_logfile_name, (S_IRUSR));
#endif
                free(new_logfile_name);
            }
667
        }
668
    }
669

670
    free(logfile_name);
671
    darshan_core_cleanup(final_core);
672

673
    if(internal_timing_flag)
674
    {
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
        double open_tm, open_slowest;
        double header_tm, header_slowest;
        double job_tm, job_slowest;
        double rec_tm, rec_slowest;
        double mod_tm[DARSHAN_MAX_MODS], mod_slowest[DARSHAN_MAX_MODS];
        double all_tm, all_slowest;

        tm_end = DARSHAN_MPI_CALL(PMPI_Wtime)();

        open_tm = open2 - open1;
        header_tm = header2 - header1;
        job_tm = job2 - job1;
        rec_tm = rec2 - rec1;
        all_tm = tm_end - start_log_time;
        for(i = 0;i < DARSHAN_MAX_MODS; i++)
        {
            mod_tm[i] = mod2[i] - mod1[i];
        }

        DARSHAN_MPI_CALL(PMPI_Reduce)(&open_tm, &open_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&header_tm, &header_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&job_tm, &job_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&rec_tm, &rec_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(&all_tm, &all_slowest, 1,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        DARSHAN_MPI_CALL(PMPI_Reduce)(mod_tm, mod_slowest, DARSHAN_MAX_MODS,
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

        if(my_rank == 0)
        {
            printf("#darshan:<op>\t<nprocs>\t<time>\n");
            printf("darshan:log_open\t%d\t%f\n", nprocs, open_slowest);
            printf("darshan:job_write\t%d\t%f\n", nprocs, job_slowest);
            printf("darshan:hash_write\t%d\t%f\n", nprocs, rec_slowest);
Shane Snyder's avatar
Shane Snyder committed
713
            printf("darshan:header_write\t%d\t%f\n", nprocs, header_slowest);
714
715
716
717
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
            {
                if(global_mod_use_count[i])
                    printf("darshan:%s_shutdown\t%d\t%f\n", darshan_module_names[i],
Shane Snyder's avatar
Shane Snyder committed
718
                        nprocs, mod_slowest[i]);
719
720
721
            }
            printf("darshan:core_shutdown\t%d\t%f\n", nprocs, all_slowest);
        }
722
723
724
725
    }
    
    return;
}
726

727
/* free darshan core data structures to shutdown */
728
static void darshan_core_cleanup(struct darshan_core_runtime* core)
729
{
730
    struct darshan_core_record_ref *tmp, *ref;
731
    int i;
732

733
734
735
736
737
738
    HASH_ITER(hlink, core->rec_hash, ref, tmp)
    {
        HASH_DELETE(hlink, core->rec_hash, ref);
        free(ref->rec.name);
        free(ref);
    }
739

740
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
741
    {
742
        if(core->mod_array[i])
743
        {        
744
745
            free(core->mod_array[i]);
            core->mod_array[i] = NULL;
746
        }
747
    }
748

749
    free(core->trailing_data);
750
    free(core);
751
752
753
754

    return;
}

755
/* construct the darshan log file name */
756
static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* start_tm)
757
{
Shane Snyder's avatar
Shane Snyder committed
758
    char* user_logfile_name;
759
760
761
762
763
764
765
766
767
768
769
770
771
    char* logpath;
    char* logname_string;
    char* logpath_override = NULL;
#ifdef __CP_LOG_ENV
    char env_check[256];
    char* env_tok;
#endif
    uint64_t hlevel;
    char hname[HOST_NAME_MAX];
    uint64_t logmod;
    char cuser[L_cuserid] = {0};
    int ret;

Shane Snyder's avatar
Shane Snyder committed
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
    /* first, check if user specifies a complete logpath to use */
    user_logfile_name = getenv("DARSHAN_LOGFILE");
    if(user_logfile_name)
    {
        if(strlen(user_logfile_name) >= (PATH_MAX-1))
        {
            fprintf(stderr, "darshan library warning: user log file name too long.\n");
            logfile_name[0] = '\0';
        }
        else
        {
            strcpy(logfile_name, user_logfile_name);
        }
    }
    else
787
    {
Shane Snyder's avatar
Shane Snyder committed
788
789
790
791
792
793
        /* otherwise, generate the log path automatically */

        /* Use CP_LOG_PATH_OVERRIDE for the value or __CP_LOG_PATH */
        logpath = getenv(CP_LOG_PATH_OVERRIDE);
        if(!logpath)
        {
794
#ifdef __CP_LOG_PATH
Shane Snyder's avatar
Shane Snyder committed
795
            logpath = __CP_LOG_PATH;
796
#endif
Shane Snyder's avatar
Shane Snyder committed
797
        }
798

Shane Snyder's avatar
Shane Snyder committed
799
800
801
802
803
804
805
806
807
808
        /* get the username for this job.  In order we will try each of the
         * following until one of them succeeds:
         *
         * - cuserid()
         * - getenv("LOGNAME")
         * - snprintf(..., geteuid());
         *
         * Note that we do not use getpwuid() because it generally will not
         * work in statically compiled binaries.
         */
809
810

#ifndef DARSHAN_DISABLE_CUSERID
Shane Snyder's avatar
Shane Snyder committed
811
        cuserid(cuser);
812
813
#endif

Shane Snyder's avatar
Shane Snyder committed
814
815
        /* if cuserid() didn't work, then check the environment */
        if(strcmp(cuser, "") == 0)
816
        {
Shane Snyder's avatar
Shane Snyder committed
817
818
819
820
821
            logname_string = getenv("LOGNAME");
            if(logname_string)
            {
                strncpy(cuser, logname_string, (L_cuserid-1));
            }
822
823
        }

Shane Snyder's avatar
Shane Snyder committed
824
825
826
827
828
829
        /* if cuserid() and environment both fail, then fall back to uid */
        if(strcmp(cuser, "") == 0)
        {
            uid_t uid = geteuid();
            snprintf(cuser, sizeof(cuser), "%u", uid);
        }
830

Shane Snyder's avatar
Shane Snyder committed
831
832
833
834
        /* generate a random number to help differentiate the log */
        hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000;
        (void)gethostname(hname, sizeof(hname));
        logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
835

Shane Snyder's avatar
Shane Snyder committed
836
837
838
839
        /* see if darshan was configured using the --with-logpath-by-env
         * argument, which allows the user to specify an absolute path to
         * place logs via an env variable.
         */
840
#ifdef __CP_LOG_ENV
Shane Snyder's avatar
Shane Snyder committed
841
842
        /* just silently skip if the environment variable list is too big */
        if(strlen(__CP_LOG_ENV) < 256)
843
        {
Shane Snyder's avatar
Shane Snyder committed
844
845
846
847
848
            /* copy env variable list to a temporary buffer */
            strcpy(env_check, __CP_LOG_ENV);
            /* tokenize the comma-separated list */
            env_tok = strtok(env_check, ",");
            if(env_tok)
849
            {
Shane Snyder's avatar
Shane Snyder committed
850
                do
851
                {
Shane Snyder's avatar
Shane Snyder committed
852
853
854
855
856
857
858
859
860
                    /* check each env variable in order */
                    logpath_override = getenv(env_tok);
                    if(logpath_override)
                    {
                        /* stop as soon as we find a match */
                        break;
                    }
                }while((env_tok = strtok(NULL, ",")));
            }
861
862
863
        }
#endif

Shane Snyder's avatar
Shane Snyder committed
864
        if(logpath_override)
865
        {
Shane Snyder's avatar
Shane Snyder committed
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath_override,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath_override, jobid);
            }
881
        }
Shane Snyder's avatar
Shane Snyder committed
882
        else if(logpath)
883
        {
Shane Snyder's avatar
Shane Snyder committed
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
            ret = snprintf(logfile_name, PATH_MAX,
                "%s/%d/%d/%d/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
                logpath, (start_tm->tm_year+1900),
                (start_tm->tm_mon+1), start_tm->tm_mday,
                cuser, __progname, jobid,
                (start_tm->tm_mon+1),
                start_tm->tm_mday,
                (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
                logmod);
            if(ret == (PATH_MAX-1))
            {
                /* file name was too big; squish it down */
                snprintf(logfile_name, PATH_MAX,
                    "%s/id%d.darshan_partial",
                    logpath, jobid);
            }
        }
        else
        {
            logfile_name[0] = '\0';
904
905
906
907
        }
    }

    return;
908
909
}

910
/* record any hints used to write the darshan log in the log header */
911
static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
{
    char* hints;
    char* header_hints;
    int meta_remain = 0;
    char* m;

    /* check environment variable to see if the default MPI file hints have
     * been overridden
     */
    hints = getenv(CP_LOG_HINTS_OVERRIDE);
    if(!hints)
    {
        hints = __CP_LOG_HINTS;
    }

    if(!hints || strlen(hints) < 1)
        return;

    header_hints = strdup(hints);
    if(!header_hints)
        return;

    meta_remain = DARSHAN_JOB_METADATA_LEN -
935
        strlen(core->log_job.metadata) - 1;
936
937
    if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
    {
938
        sprintf(core->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION);
939
940
941
942
        meta_remain -= (strlen(PACKAGE_VERSION) + 9);
    }
    if(meta_remain >= (3 + strlen(header_hints)))
    {
943
        m = core->log_job.metadata + strlen(core->log_job.metadata);
944
945
946
947
948
949
950
951
952
953
954
955
        /* We have room to store the hints in the metadata portion of
         * the job header.  We just prepend an h= to the hints list.  The
         * metadata parser will ignore = characters that appear in the value
         * portion of the metadata key/value pair.
         */
        sprintf(m, "h=%s\n", header_hints);
    }
    free(header_hints);

    return;
}

956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
static int mnt_data_cmp(const void* a, const void* b)
{
    const struct mnt_data *d_a = (const struct mnt_data*)a;
    const struct mnt_data *d_b = (const struct mnt_data*)b;

    if(strlen(d_a->path) > strlen(d_b->path))
        return(-1);
    else if(strlen(d_a->path) < strlen(d_b->path))
        return(1);
    else
        return(0);
}

/* adds an entry to table of mounted file systems */
static void add_entry(char* trailing_data, int* space_left, struct mntent *entry)
{
    int ret;
    char tmp_mnt[256];
    struct statfs statfsbuf;

    strncpy(mnt_data_array[mnt_data_count].path, entry->mnt_dir,
        DARSHAN_MAX_MNT_PATH-1);
    strncpy(mnt_data_array[mnt_data_count].type, entry->mnt_type,
        DARSHAN_MAX_MNT_TYPE-1);
    /* NOTE: we now try to detect the preferred block size for each file 
     * system using fstatfs().  On Lustre we assume a size of 1 MiB 
     * because fstatfs() reports 4 KiB. 
     */
#ifndef LL_SUPER_MAGIC
#define LL_SUPER_MAGIC 0x0BD00BD0
#endif
    ret = statfs(entry->mnt_dir, &statfsbuf);
    if(ret == 0 && statfsbuf.f_type != LL_SUPER_MAGIC)
        mnt_data_array[mnt_data_count].block_size = statfsbuf.f_bsize;
    else if(ret == 0 && statfsbuf.f_type == LL_SUPER_MAGIC)
        mnt_data_array[mnt_data_count].block_size = 1024*1024;
    else
        mnt_data_array[mnt_data_count].block_size = 4096;

    /* store mount information for use in header of darshan log */
996
    ret = snprintf(tmp_mnt, 256, "\n%s\t%s",
997
998
999
1000
        entry->mnt_type, entry->mnt_dir);
    if(ret < 256 && strlen(tmp_mnt) <= (*space_left))
    {
        strcat(trailing_data, tmp_mnt);
For faster browsing, not all history is shown. View entire blame