darshan-core.c 70.1 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2
3
4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5
6
 */

7
#define _XOPEN_SOURCE 500
8
#define _GNU_SOURCE
9

10
11
12
13
14
15
#include "darshan-runtime-config.h"

#include <stdio.h>
#ifdef HAVE_MNTENT_H
#include <mntent.h>
#endif
16
17
18
19
20
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <pthread.h>
21
#include <fcntl.h>
Shane Snyder's avatar
Shane Snyder committed
22
#include <stdarg.h>
23
24
#include <dirent.h>
#include <sys/ioctl.h>
25
26
#include <sys/types.h>
#include <sys/stat.h>
27
#include <sys/mman.h>
28
#include <sys/time.h>
29
#include <sys/vfs.h>
30
#include <zlib.h>
31
#include <assert.h>
32

33
34
35
36
#ifdef HAVE_MPI
#include <mpi.h>
#endif

37
#include "uthash.h"
Shane Snyder's avatar
Shane Snyder committed
38
#include "darshan.h"
39
#include "darshan-core.h"
Shane Snyder's avatar
Shane Snyder committed
40
#include "darshan-dynamic.h"
41

42
#ifdef DARSHAN_LUSTRE
43
#include <lustre/lustre_user.h>
44
#endif
45

46
extern char* __progname;
47
extern char* __progname_full;
48

49
/* internal variable delcarations */
50
static struct darshan_core_runtime *darshan_core = NULL;
51
static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
52
53
54
static int using_mpi = 0;
static int my_rank = 0;
static int nprocs = 1;
55
static int darshan_mem_alignment = 1;
56
static long darshan_mod_mem_quota = DARSHAN_MOD_MEM_MAX;
57

58
59
60
static struct darshan_core_mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
static int mnt_data_count = 0;

61
/* paths prefixed with the following directories are not tracked by darshan */
62
char* darshan_path_exclusions[] = {
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    "/etc/",
    "/dev/",
    "/usr/",
    "/bin/",
    "/boot/",
    "/lib/",
    "/opt/",
    "/sbin/",
    "/sys/",
    "/proc/",
    "/var/",
    NULL
};
/* paths prefixed with the following directories are tracked by darshan even if
 * they share a root with a path listed in darshan_path_exclusions
 */
char* darshan_path_inclusions[] = {
    "/var/opt/cray/dws/mounts/",
    NULL
82
83
};

84
85
86
/* allow users to override the path exclusions */
char** user_darshan_path_exclusions = NULL;

87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#ifdef DARSHAN_BGQ
extern void bgq_runtime_initialize();
#endif

/* array of init functions for modules which need to be statically
 * initialized by darshan at startup time
 */
void (*mod_static_init_fns[])(void) =
{
#ifdef DARSHAN_BGQ
    &bgq_runtime_initialize,
#endif
    NULL
};

102
103
104
105
106
107
108
#ifdef DARSHAN_LUSTRE
/* XXX need to use extern to get Lustre module's instrumentation function
 * since modules have no way of providing this to darshan-core
 */
extern void darshan_instrument_lustre_file(const char *filepath, int fd);
#endif

109
/* prototypes for internal helper functions */
110
#ifdef __DARSHAN_ENABLE_MMAP_LOGS
111
112
static void *darshan_init_mmap_log(
    struct darshan_core_runtime* core, int jobid);
113
#endif
114
static void darshan_log_record_hints_and_ver(
115
    struct darshan_core_runtime* core);
116
117
static void darshan_get_exe_and_mounts(
    struct darshan_core_runtime *core, int argc, char **argv);
118
119
static void darshan_fs_info_from_path(
    const char *path, struct darshan_fs_info *fs_info);
120
static int darshan_add_name_record_ref(
121
    struct darshan_core_runtime *core, darshan_record_id rec_id,
122
    const char *name, darshan_module_id mod_id);
123
124
static void darshan_get_user_name(
    char *user);
125
static void darshan_get_shared_records(
126
127
    struct darshan_core_runtime *core, darshan_record_id **shared_recs,
    int *shared_rec_cnt);
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
static void darshan_get_logfile_name(
    char* logfile_name, int jobid, time_t start_time);
static int darshan_log_open(
    char *logfile_name, darshan_core_log_fh *log_fh);
static int darshan_log_write_job_record(
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core,   
    uint64_t *inout_off);
static int darshan_log_write_name_record_hash(
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core,
    uint64_t *inout_off);
static int darshan_log_write_header(
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core);
static int darshan_log_append(
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core,
    void *buf, int count, uint64_t *inout_off);
void darshan_log_close(
    darshan_core_log_fh log_fh);
void darshan_log_finalize(
    char *logfile_name, double start_log_time);
147
static int darshan_deflate_buffer(
Shane Snyder's avatar
Shane Snyder committed
148
149
    void **pointers, int *lengths, int count, char *comp_buf,
    int *comp_buf_length);
Shane Snyder's avatar
Shane Snyder committed
150
151
static void darshan_core_cleanup(
    struct darshan_core_runtime* core);
152
static double darshan_core_wtime_absolute(void);
153

154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)

#define DARSHAN_WARN(__err_str, ...) do { \
    darshan_core_fprintf(stderr, "darshan_library_warning: " \
        __err_str ".\n", ## __VA_ARGS__); \
} while(0)

#ifdef HAVE_MPI

/* MPI variant of darshan logging helpers */
#define DARSHAN_CHECK_ERR(__ret, __err_str, ...) do { \
    if(using_mpi) \
        PMPI_Allreduce(MPI_IN_PLACE, &__ret, 1, MPI_INT, MPI_LOR, MPI_COMM_WORLD); \
    if(__ret != 0) { \
        if(my_rank == 0) { \
            DARSHAN_WARN(__err_str); \
            if(log_created) \
                unlink(logfile_name); \
        } \
        goto exit; \
    } \
} while(0)

#else

/* Non-MPI variant of darshan logging helpers */
#define DARSHAN_CHECK_ERR(__ret, __err_str, ...) do { \
    if(__ret != 0) { \
        DARSHAN_WARN(__err_str); \
        if(log_created) \
            unlink(logfile_name); \
        goto exit; \
    } \
} while(0)

#endif

192
193
/* *********************************** */

Shane Snyder's avatar
Shane Snyder committed
194
void darshan_core_initialize(int argc, char **argv)
195
{
196
    struct darshan_core_runtime *init_core = NULL;
197
    int internal_timing_flag = 0;
198
    double init_start, init_time;
199
    char *envstr;
200
201
    char *jobid_str;
    int jobid;
202
    int ret;
203
    int i;
204
205
    int tmpval;
    double tmpfloat;
206

207
208
209
210
211
212
213
    /* bail out _before_ attempting to [re]set using_mpi */
    if (darshan_core != NULL)
        return;

#ifdef HAVE_MPI
    PMPI_Initialized(&using_mpi);

214
215
    PMPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    PMPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
216
#endif
217
218

    if(getenv("DARSHAN_INTERNAL_TIMING"))
219
    {
220
        internal_timing_flag = 1;
221
222
        init_start = darshan_core_wtime();
    }
223
224

    /* setup darshan runtime if darshan is enabled and hasn't been initialized already */
225
    if(!getenv("DARSHAN_DISABLE") && !darshan_core)
226
    {
227
        #if (__DARSHAN_MEM_ALIGNMENT < 1)
228
229
            #error Darshan must be configured with a positive value for --with-mem-align
        #endif
230
        envstr = getenv(DARSHAN_MEM_ALIGNMENT_OVERRIDE);
231
232
233
234
235
236
237
238
239
240
241
        if(envstr)
        {
            ret = sscanf(envstr, "%d", &tmpval);
            /* silently ignore if the env variable is set poorly */
            if(ret == 1 && tmpval > 0)
            {
                darshan_mem_alignment = tmpval;
            }
        }
        else
        {
242
            darshan_mem_alignment = __DARSHAN_MEM_ALIGNMENT;
243
244
245
        }

        /* avoid floating point errors on faulty input */
246
        if(darshan_mem_alignment < 1)
247
248
249
        {
            darshan_mem_alignment = 1;
        }
250

251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
        /* Use DARSHAN_JOBID_OVERRIDE for the env var for __DARSHAN_JOBID */
        envstr = getenv(DARSHAN_JOBID_OVERRIDE);
        if(!envstr)
        {
            envstr = __DARSHAN_JOBID;
        }

        /* find a job id */
        jobid_str = getenv(envstr);
        if(jobid_str)
        {
            /* in cobalt we can find it in env var */
            ret = sscanf(jobid_str, "%d", &jobid);
        }
        if(!jobid_str || ret != 1)
        {
            /* use pid as fall back */
            jobid = getpid();
        }

271
272
273
274
        /* set the memory quota for darshan modules' records */
        envstr = getenv(DARSHAN_MOD_MEM_OVERRIDE);
        if(envstr)
        {
275
            ret = sscanf(envstr, "%lf", &tmpfloat);
276
            /* silently ignore if the env variable is set poorly */
277
            if(ret == 1 && tmpfloat > 0)
278
            {
279
                darshan_mod_mem_quota = tmpfloat * 1024 * 1024; /* convert from MiB */
280
281
282
            }
        }

283
284
285
        /* allocate structure to track darshan core runtime information */
        init_core = malloc(sizeof(*init_core));
        if(init_core)
286
        {
287
            memset(init_core, 0, sizeof(*init_core));
288
289
290
291
            /* record absolute start time at startup so that we can later
             * generate relative times with this as a reference point.
             */
            init_core->wtime_offset = darshan_core_wtime_absolute();
292

293
294
        /* TODO: do we alloc new memory as we go or just do everything up front? */

295
296
297
298
299
#ifndef __DARSHAN_ENABLE_MMAP_LOGS
            /* just allocate memory for each log file region */
            init_core->log_hdr_p = malloc(sizeof(struct darshan_header));
            init_core->log_job_p = malloc(sizeof(struct darshan_job));
            init_core->log_exemnt_p = malloc(DARSHAN_EXE_LEN+1);
300
            init_core->log_name_p = malloc(DARSHAN_NAME_RECORD_BUF_SIZE);
301
            init_core->log_mod_p = malloc(darshan_mod_mem_quota);
302
303

            if(!(init_core->log_hdr_p) || !(init_core->log_job_p) ||
304
               !(init_core->log_exemnt_p) || !(init_core->log_name_p) ||
305
306
307
308
309
310
311
312
313
               !(init_core->log_mod_p))
            {
                free(init_core);
                return;
            }
            /* if allocation succeeds, zero fill memory regions */
            memset(init_core->log_hdr_p, 0, sizeof(struct darshan_header));
            memset(init_core->log_job_p, 0, sizeof(struct darshan_job));
            memset(init_core->log_exemnt_p, 0, DARSHAN_EXE_LEN+1);
314
            memset(init_core->log_name_p, 0, DARSHAN_NAME_RECORD_BUF_SIZE);
315
            memset(init_core->log_mod_p, 0, darshan_mod_mem_quota);
316
317
318
319
#else
            /* if mmap logs are enabled, we need to initialize the mmap region
             * before setting the corresponding log file region pointers
             */
320
321
            void *mmap_p = darshan_init_mmap_log(init_core, jobid);
            if(!mmap_p)
322
            {
323
324
                free(init_core);
                return;
325
326
            }

327
            /* set the memory pointers for each log file region */
328
            init_core->log_hdr_p = (struct darshan_header *)mmap_p;
329
            init_core->log_job_p = (struct darshan_job *)
330
                ((char *)init_core->log_hdr_p + sizeof(struct darshan_header));
331
            init_core->log_exemnt_p = (char *)
332
                ((char *)init_core->log_job_p + sizeof(struct darshan_job));
333
            init_core->log_name_p = (void *)
334
335
                ((char *)init_core->log_exemnt_p + DARSHAN_EXE_LEN + 1);
            init_core->log_mod_p = (void *)
336
                ((char *)init_core->log_name_p + DARSHAN_NAME_RECORD_BUF_SIZE);
337

338
            /* set header fields needed for the mmap log mechanism */
339
            init_core->log_hdr_p->comp_type = DARSHAN_NO_COMP;
340
            init_core->log_hdr_p->name_map.off =
341
                ((char *)init_core->log_name_p - (char *)init_core->log_hdr_p);
342
343
344
345
346
#endif

            /* set known header fields for the log file */
            strcpy(init_core->log_hdr_p->version_string, DARSHAN_LOG_VERSION);
            init_core->log_hdr_p->magic_nr = DARSHAN_MAGIC_NR;
347

348
349
350
351
352
            /* set known job-level metadata fields for the log file */
            init_core->log_job_p->uid = getuid();
            init_core->log_job_p->start_time = time(NULL);
            init_core->log_job_p->nprocs = nprocs;
            init_core->log_job_p->jobid = (int64_t)jobid;
353
354
355
356
357
358

            /* if we are using any hints to write the log file, then record those
             * hints with the darshan job information
             */
            darshan_log_record_hints_and_ver(init_core);

359
            /* collect information about command line and mounted file systems */
360
            darshan_get_exe_and_mounts(init_core, argc, argv);
361

362
363
364
365
            /* if darshan was successfully initialized, set the global pointer
             * and bootstrap any modules with static initialization routines
             */
            DARSHAN_CORE_LOCK();
366
            darshan_core = init_core;
367
368
369
370
371
372
373
374
            DARSHAN_CORE_UNLOCK();

            i = 0;
            while(mod_static_init_fns[i])
            {
                (*mod_static_init_fns[i])();
                i++;
            }
375
        }
376
377
    }

378
379
    if(internal_timing_flag)
    {
380
381
382
        init_time = darshan_core_wtime() - init_start;
#ifdef HAVE_MPI
        PMPI_Reduce(MPI_IN_PLACE, &init_time, 1,
383
            MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
384
385
386
387
388
        if(my_rank > 0) return;
#endif

        darshan_core_fprintf(stderr, "#darshan:<op>\t<nprocs>\t<time>\n");
        darshan_core_fprintf(stderr, "darshan:init\t%d\t%f\n", nprocs, init_time);
389
390
391
392
393
    }

    return;
}

Shane Snyder's avatar
Shane Snyder committed
394
void darshan_core_shutdown()
395
{
396
    struct darshan_core_runtime *final_core;
397
    double start_log_time;
398
    int internal_timing_flag = 0;
399
400
401
    double open1 = 0, open2 = 0;
    double job1 = 0, job2 = 0;
    double rec1 = 0, rec2 = 0;
402
403
    double mod1[DARSHAN_MAX_MODS] = {0};
    double mod2[DARSHAN_MAX_MODS] = {0};
404
    double header1 = 0, header2 = 0;
405
406
    double tm_end;
    int active_mods[DARSHAN_MAX_MODS] = {0};
407
    uint64_t gz_fp = 0;
408
409
410
411
412
    char *logfile_name = NULL;
    darshan_core_log_fh log_fh;
    int log_created = 0;
    int i;
    int ret;
413

Shane Snyder's avatar
Shane Snyder committed
414
    /* disable darhan-core while we shutdown */
415
    DARSHAN_CORE_LOCK();
416
    if(!darshan_core)
417
    {
418
        DARSHAN_CORE_UNLOCK();
419
420
        return;
    }
421
422
    final_core = darshan_core;
    darshan_core = NULL;
423
424
    DARSHAN_CORE_UNLOCK();

425
426
427
428
429
430
431
432
433
434
435
436
    /* grab some initial timing information */
#ifdef HAVE_MPI
    /* if using mpi, sync across procs first */
    if(using_mpi)
        PMPI_Barrier(MPI_COMM_WORLD);
#endif
    start_log_time = darshan_core_wtime();
    final_core->log_job_p->end_time = time(NULL);

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

437
#ifdef __DARSHAN_ENABLE_MMAP_LOGS
438
439
440
441
442
    /* remove the temporary mmap log files */
    /* NOTE: this unlink is not immediate as it must wait for the mapping
     * to no longer be referenced, which in our case happens when the
     * executable exits. If the application terminates mid-shutdown, then
     * there will be no mmap files and no final log file.
443
     */
444
    unlink(final_core->mmap_log_name);
445
#endif
Shane Snyder's avatar
Shane Snyder committed
446

447
448
449
450
    final_core->comp_buf = malloc(darshan_mod_mem_quota);
    logfile_name = malloc(PATH_MAX);
    if(!final_core->comp_buf || !logfile_name)
        goto exit;
451

452
453
    /* set which modules were used locally */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
Shane Snyder's avatar
Shane Snyder committed
454
    {
455
456
        if(final_core->mod_array[i])
            active_mods[i] = 1;
Shane Snyder's avatar
Shane Snyder committed
457
    }
458

459
460
461
462
#ifdef HAVE_MPI
    darshan_record_id *shared_recs = NULL;
    darshan_record_id *mod_shared_recs = NULL;
    int shared_rec_cnt = 0;
463

464
    if(using_mpi)
465
    {
466
467
468
        /* allreduce locally active mods to determine globally active mods */
        PMPI_Allreduce(MPI_IN_PLACE, active_mods, DARSHAN_MAX_MODS, MPI_INT,
            MPI_SUM, MPI_COMM_WORLD);
469

470
471
472
473
474
        /* reduce to report first start and last end time across all ranks at rank 0 */
        PMPI_Reduce(MPI_IN_PLACE, &final_core->log_job_p->start_time,
            1, MPI_INT64_T, MPI_MIN, 0, MPI_COMM_WORLD);
        PMPI_Reduce(MPI_IN_PLACE, &final_core->log_job_p->end_time,
            1, MPI_INT64_T, MPI_MAX, 0, MPI_COMM_WORLD);
475

476
477
        /* get a list of records which are shared across all processes */
        darshan_get_shared_records(final_core, &shared_recs, &shared_rec_cnt);
478

479
480
481
482
        mod_shared_recs = malloc(shared_rec_cnt * sizeof(darshan_record_id));
        assert(mod_shared_recs);
    }
#endif
483

484
485
486
    /* get the log file name */
    darshan_get_logfile_name(logfile_name, final_core->log_job_p->jobid,
        final_core->log_job_p->start_time);
487
488
489
    if(strlen(logfile_name) == 0)
    {
        /* failed to generate log file name */
490
        goto exit;
491
492
493
    }

    if(internal_timing_flag)
494
495
496
        open1 = darshan_core_wtime();
    /* open the darshan log file */
    ret = darshan_log_open(logfile_name, &log_fh);
497
    if(internal_timing_flag)
498
        open2 = darshan_core_wtime();
499
    /* error out if unable to open log file */
500
501
    DARSHAN_CHECK_ERR(ret, "unable to create log file %s", logfile_name);
    log_created = 1;
502
503

    if(internal_timing_flag)
504
505
506
        job1 = darshan_core_wtime();
    /* write the the compressed darshan job information */
    ret = darshan_log_write_job_record(log_fh, final_core, &gz_fp);
507
    if(internal_timing_flag)
508
509
510
        job2 = darshan_core_wtime();
    /* error out if unable to write job information */
    DARSHAN_CHECK_ERR(ret, "unable to write job record to file %s", logfile_name);
511
512

    if(internal_timing_flag)
513
        rec1 = darshan_core_wtime();
514
    /* write the record name->id hash to the log file */
515
    final_core->log_hdr_p->name_map.off = gz_fp;
516
    ret = darshan_log_write_name_record_hash(log_fh, final_core, &gz_fp);
517
    if(internal_timing_flag)
518
519
520
521
        rec2 = darshan_core_wtime();
    final_core->log_hdr_p->name_map.len = gz_fp - final_core->log_hdr_p->name_map.off;
    /* error out if unable to write name records */
    DARSHAN_CHECK_ERR(ret, "unable to write name records to log file %s", logfile_name);
522
523
524
525
526

    /* loop over globally used darshan modules and:
     *      - get final output buffer
     *      - compress (zlib) provided output buffer
     *      - append compressed buffer to log file
527
     *      - add module map info (file offset/length) to log header
528
529
530
531
532
533
534
535
     *      - shutdown the module
     */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
    {
        struct darshan_core_module* this_mod = final_core->mod_array[i];
        void* mod_buf = NULL;
        int mod_buf_sz = 0;

536
        if(!active_mods[i])
537
        {
538
539
            final_core->log_hdr_p->mod_map[i].off = 0;
            final_core->log_hdr_p->mod_map[i].len = 0;
540
541
542
543
            continue;
        }

        if(internal_timing_flag)
544
545
546
547
548
549
            mod1[i] = darshan_core_wtime();

#ifdef HAVE_MPI
        struct darshan_core_name_record_ref *ref = NULL;
        int mod_shared_rec_cnt = 0;
        int j;
550

551
        if(using_mpi)
552
        {
553
554
            /* set the shared record list for this module */
            for(j = 0; j < shared_rec_cnt; j++)
555
            {
556
557
558
559
560
561
562
                HASH_FIND(hlink, final_core->name_hash, &shared_recs[j],
                    sizeof(darshan_record_id), ref);
                assert(ref);
                if(DARSHAN_MOD_FLAG_ISSET(ref->global_mod_flags, i))
                {
                    mod_shared_recs[mod_shared_rec_cnt++] = shared_recs[j];
                }
563
            }
564
565

            /* allow the module an opportunity to reduce shared files */
566
567
568
            if(this_mod->mod_funcs.mod_redux_func && (mod_shared_recs > 0) &&
               (!getenv("DARSHAN_DISABLE_SHARED_REDUCTION")))
                this_mod->mod_funcs.mod_redux_func(mod_buf, MPI_COMM_WORLD, mod_shared_recs,
569
                    mod_shared_rec_cnt); 
570
        }
571
#endif
572
573
574
575

        /* if module is registered locally, get the corresponding output buffer
         * 
         * NOTE: this function can be used to run collective operations across
576
         * modules, if there are records shared globally.
577
578
579
         */
        if(this_mod)
        {
580
581
            mod_buf = final_core->mod_array[i]->rec_buf_start;
            mod_buf_sz = final_core->mod_array[i]->rec_buf_p - mod_buf;
582
            this_mod->mod_funcs.mod_shutdown_func(&mod_buf, &mod_buf_sz);
583
584
585
        }

        /* append this module's data to the darshan log */
586
        final_core->log_hdr_p->mod_map[i].off = gz_fp;
587
        ret = darshan_log_append(log_fh, final_core, mod_buf, mod_buf_sz, &gz_fp);
588
589
        final_core->log_hdr_p->mod_map[i].len =
            gz_fp - final_core->log_hdr_p->mod_map[i].off;
590

591
592
593
594
595
596
        /* XXX: DXT manages its own module memory buffers, so we need to
         * explicitly free them
         */
        if(i == DXT_POSIX_MOD || i == DXT_MPIIO_MOD)
            free(mod_buf);

597
        if(internal_timing_flag)
598
            mod2[i] = darshan_core_wtime();
599

600
601
602
        /* error out if unable to write module data */
        DARSHAN_CHECK_ERR(ret, "unable to write %s module data to log file %s",
            darshan_module_names[i], logfile_name);
603
    }
604
605

    if(internal_timing_flag)
606
607
608
609
610
        header1 = darshan_core_wtime();
    ret = darshan_log_write_header(log_fh, final_core);
    if(internal_timing_flag)
        header2 = darshan_core_wtime();
    DARSHAN_CHECK_ERR(ret, "unable to write header to file %s", logfile_name);
611

612
613
    /* done writing data, close the log file */
    darshan_log_close(log_fh);
614

615
616
    /* finalize log file name and permissions */
    darshan_log_finalize(logfile_name, start_log_time);
617

618
    if(internal_timing_flag)
619
    {
620
621
622
623
624
625
        double open_tm;
        double header_tm;
        double job_tm;
        double rec_tm;
        double mod_tm[DARSHAN_MAX_MODS];
        double all_tm;
626

627
        tm_end = darshan_core_wtime();
628

629
630
631
632
        open_tm = open2 - open1;
        header_tm = header2 - header1;
        job_tm = job2 - job1;
        rec_tm = rec2 - rec1;
633
        all_tm = tm_end - start_log_time;
634
        for(i = 0; i < DARSHAN_MAX_MODS; i++)
635
636
637
        {
            mod_tm[i] = mod2[i] - mod1[i];
        }
638

639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
#ifdef HAVE_MPI
        if(using_mpi)
        {
            PMPI_Reduce(MPI_IN_PLACE, &open_tm, 1,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
            PMPI_Reduce(MPI_IN_PLACE, &header_tm, 1,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
            PMPI_Reduce(MPI_IN_PLACE, &job_tm, 1,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
            PMPI_Reduce(MPI_IN_PLACE, &rec_tm, 1,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
            PMPI_Reduce(MPI_IN_PLACE, &all_tm, 1,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
            PMPI_Reduce(MPI_IN_PLACE, mod_tm, DARSHAN_MAX_MODS,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

            /* let rank 0 report the timing info */
            if(my_rank > 0)
                goto exit;
        }
#endif
660

661
662
663
664
665
666
        darshan_core_fprintf(stderr, "#darshan:<op>\t<nprocs>\t<time>\n");
        darshan_core_fprintf(stderr, "darshan:log_open\t%d\t%f\n", nprocs, open_tm);
        darshan_core_fprintf(stderr, "darshan:job_write\t%d\t%f\n", nprocs, job_tm);
        darshan_core_fprintf(stderr, "darshan:hash_write\t%d\t%f\n", nprocs, rec_tm);
        darshan_core_fprintf(stderr, "darshan:header_write\t%d\t%f\n", nprocs, header_tm);
        for(i = 0; i < DARSHAN_MAX_MODS; i++)
667
        {
668
669
670
            if(active_mods[i])
                darshan_core_fprintf(stderr, "darshan:%s_shutdown\t%d\t%f\n",
                    darshan_module_names[i], nprocs, mod_tm[i]);
671
        }
672
        darshan_core_fprintf(stderr, "darshan:core_shutdown\t%d\t%f\n", nprocs, all_tm);
673
    }
674

675
676
677
678
679
680
681
682
exit:
#ifdef HAVE_MPI
    free(shared_recs);
    free(mod_shared_recs);
#endif
    free(logfile_name);
    darshan_core_cleanup(final_core);

683
684
    return;
}
685

Shane Snyder's avatar
Shane Snyder committed
686
/* *********************************** */
687

688
#ifdef __DARSHAN_ENABLE_MMAP_LOGS
689
690
691
692
693
694
695
static void *darshan_init_mmap_log(struct darshan_core_runtime* core, int jobid)
{
    int ret;
    int mmap_fd;
    int mmap_size;
    int sys_page_size;
    char cuser[L_cuserid] = {0};
696
697
698
    uint64_t hlevel;
    char hname[HOST_NAME_MAX];
    uint64_t logmod;
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
    char *envstr;
    char *mmap_log_path;
    void *mmap_p;

    sys_page_size = sysconf(_SC_PAGESIZE);
    assert(sys_page_size > 0);

    mmap_size = sizeof(struct darshan_header) + DARSHAN_JOB_RECORD_SIZE +
        + DARSHAN_NAME_RECORD_BUF_SIZE + darshan_mod_mem_quota;
    if(mmap_size % sys_page_size)
        mmap_size = ((mmap_size / sys_page_size) + 1) * sys_page_size;

    envstr = getenv(DARSHAN_MMAP_LOG_PATH_OVERRIDE);
    if(envstr)
        mmap_log_path = envstr;
    else
        mmap_log_path = DARSHAN_DEF_MMAP_LOG_PATH;

    darshan_get_user_name(cuser);

719
720
721
722
723
724
725
    /* generate a random number to help differentiate the temporary log */
    /* NOTE: job id is not sufficient for constructing a unique log file name,
     * since a job could be composed of multiple application runs, so we also
     * add a random number component to the log name
     */
    if(my_rank == 0)
    {
726
        hlevel = darshan_core_wtime_absolute() * 1000000;
727
728
729
        (void)gethostname(hname, sizeof(hname));
        logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
    }
730
    PMPI_Bcast(&logmod, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD);
731

732
733
734
735
    /* construct a unique temporary log file name for this process
     * to write mmap log data to
     */
    snprintf(core->mmap_log_name, PATH_MAX,
736
737
        "/%s/%s_%s_id%d_mmap-log-%" PRIu64 "-%d.darshan",
        mmap_log_path, cuser, __progname, jobid, logmod, my_rank);
738
739
740
741
742

    /* create the temporary mmapped darshan log */
    mmap_fd = open(core->mmap_log_name, O_CREAT|O_RDWR|O_EXCL , 0644);
    if(mmap_fd < 0)
    {
743
        darshan_core_fprintf(stderr, "darshan library warning: "
744
745
746
747
748
749
750
751
752
            "unable to create darshan log file %s\n", core->mmap_log_name);
        return(NULL);
    }

    /* TODO: ftruncate or just zero fill? */
    /* allocate the necessary space in the log file */
    ret = ftruncate(mmap_fd, mmap_size);
    if(ret < 0)
    {
753
        darshan_core_fprintf(stderr, "darshan library warning: "
754
755
756
757
758
759
760
761
762
763
764
765
            "unable to allocate darshan log file %s\n", core->mmap_log_name);
        close(mmap_fd);
        unlink(core->mmap_log_name);
        return(NULL);
    }

    /* create the memory map for darshan's data structures so they are
     * persisted to file as the application executes
     */
    mmap_p = mmap(NULL, mmap_size, PROT_WRITE, MAP_SHARED, mmap_fd, 0);
    if(mmap_p == MAP_FAILED)
    {
766
        darshan_core_fprintf(stderr, "darshan library warning: "
767
768
769
770
771
772
773
774
775
776
777
            "unable to mmap darshan log file %s\n", core->mmap_log_name);
        close(mmap_fd);
        unlink(core->mmap_log_name);
        return(NULL);
    }

    /* close darshan log file (this does *not* unmap the log file) */
    close(mmap_fd);

    return(mmap_p);
}
778
#endif
779

780
/* record any hints used to write the darshan log in the job data */
781
static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
782
783
{
    char* hints;
784
    char* job_hints;
785
786
787
788
789
790
    int meta_remain = 0;
    char* m;

    /* check environment variable to see if the default MPI file hints have
     * been overridden
     */
791
    hints = getenv(DARSHAN_LOG_HINTS_OVERRIDE);
792
793
    if(!hints)
    {
794
        hints = __DARSHAN_LOG_HINTS;
795
796
797
798
799
    }

    if(!hints || strlen(hints) < 1)
        return;

800
801
    job_hints = strdup(hints);
    if(!job_hints)
802
803
804
        return;

    meta_remain = DARSHAN_JOB_METADATA_LEN -
805
        strlen(core->log_job_p->metadata) - 1;
806
807
    if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
    {
808
        sprintf(core->log_job_p->metadata, "lib_ver=%s\n", PACKAGE_VERSION);
809
810
        meta_remain -= (strlen(PACKAGE_VERSION) + 9);
    }
811
    if(meta_remain >= (3 + strlen(job_hints)))
812
    {
813
        m = core->log_job_p->metadata + strlen(core->log_job_p->metadata);
814
        /* We have room to store the hints in the metadata portion of
815
         * the job structure.  We just prepend an h= to the hints list.  The
816
817
818
         * metadata parser will ignore = characters that appear in the value
         * portion of the metadata key/value pair.
         */
819
        sprintf(m, "h=%s\n", job_hints);
820
    }
821
    free(job_hints);
822
823
824
825

    return;
}

826
827
static int mnt_data_cmp(const void* a, const void* b)
{
828
829
    const struct darshan_core_mnt_data *d_a = (const struct darshan_core_mnt_data*)a;
    const struct darshan_core_mnt_data *d_b = (const struct darshan_core_mnt_data*)b;
830
831
832
833
834
835
836
837
838
839

    if(strlen(d_a->path) > strlen(d_b->path))
        return(-1);
    else if(strlen(d_a->path) < strlen(d_b->path))
        return(1);
    else
        return(0);
}

/* adds an entry to table of mounted file systems */
840
static void add_entry(char* buf, int* space_left, struct mntent* entry)
841
{
842
    int i;
843
844
845
846
    int ret;
    char tmp_mnt[256];
    struct statfs statfsbuf;

847
848
849
850
851
852
853
854
855
856
    /* avoid adding the same mount points multiple times -- to limit
     * storage space and potential statfs, ioctl, etc calls
     */
    for(i = 0; i < mnt_data_count; i++)
    {
        if((strncmp(mnt_data_array[i].path, entry->mnt_dir, DARSHAN_MAX_MNT_PATH) == 0) &&
           (strncmp(mnt_data_array[i].type, entry->mnt_type, DARSHAN_MAX_MNT_PATH) == 0))
            return;
    }

857
858
859
860
861
862
863
864
865
866
867
868
    strncpy(mnt_data_array[mnt_data_count].path, entry->mnt_dir,
        DARSHAN_MAX_MNT_PATH-1);
    strncpy(mnt_data_array[mnt_data_count].type, entry->mnt_type,
        DARSHAN_MAX_MNT_TYPE-1);
    /* NOTE: we now try to detect the preferred block size for each file 
     * system using fstatfs().  On Lustre we assume a size of 1 MiB 
     * because fstatfs() reports 4 KiB. 
     */
#ifndef LL_SUPER_MAGIC
#define LL_SUPER_MAGIC 0x0BD00BD0
#endif
    ret = statfs(entry->mnt_dir, &statfsbuf);
869
    mnt_data_array[mnt_data_count].fs_info.fs_type = statfsbuf.f_type;
870
    if(ret == 0 && statfsbuf.f_type != LL_SUPER_MAGIC)
871
        mnt_data_array[mnt_data_count].fs_info.block_size = statfsbuf.f_bsize;
872
    else if(ret == 0 && statfsbuf.f_type == LL_SUPER_MAGIC)
873
        mnt_data_array[mnt_data_count].fs_info.block_size = 1024*1024;
874
    else
875
        mnt_data_array[mnt_data_count].fs_info.block_size = 4096;
876

877
#ifdef DARSHAN_LUSTRE
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
    /* attempt to retrieve OST and MDS counts from Lustre */
    mnt_data_array[mnt_data_count].fs_info.ost_count = -1;
    mnt_data_array[mnt_data_count].fs_info.mdt_count = -1;
    if ( statfsbuf.f_type == LL_SUPER_MAGIC )
    {
        int n_ost, n_mdt;
        int ret_ost, ret_mdt;
        DIR *mount_dir;

        mount_dir = opendir( entry->mnt_dir );
        if ( mount_dir  ) 
        {
            /* n_ost and n_mdt are used for both input and output to ioctl */
            n_ost = 0;
            n_mdt = 1;

            ret_ost = ioctl( dirfd(mount_dir), LL_IOC_GETOBDCOUNT, &n_ost );
            ret_mdt = ioctl( dirfd(mount_dir), LL_IOC_GETOBDCOUNT, &n_mdt );

897
            if ( !(ret_ost < 0 || ret_mdt < 0) )
898
899
900
901
902
903
904
            {
                mnt_data_array[mnt_data_count].fs_info.ost_count = n_ost;
                mnt_data_array[mnt_data_count].fs_info.mdt_count = n_mdt;
            }
            closedir( mount_dir );
        }
    }
905
#endif
906

907
    /* store mount information with the job-level metadata in darshan log */
908
    ret = snprintf(tmp_mnt, 256, "\n%s\t%s",
909
910
911
        entry->mnt_type, entry->mnt_dir);
    if(ret < 256 && strlen(tmp_mnt) <= (*space_left))
    {
912
        strcat(buf, tmp_mnt);
913
914
915
916
917
918
919
        (*space_left) -= strlen(tmp_mnt);
    }

    mnt_data_count++;
    return;
}

920
/* darshan_get_exe_and_mounts()
921
922
 *
 * collects command line and list of mounted file systems into a string that
923
 * will be stored with the job-level metadata
924
 */
925
static void darshan_get_exe_and_mounts(struct darshan_core_runtime *core,
926
    int argc, char **argv)
927
928
929
930
{
    FILE* tab;
    struct mntent *entry;
    char* exclude;
931
932
933
    char* truncate_string = "<TRUNCATED>";
    int truncate_offset;
    int space_left = DARSHAN_EXE_LEN;
934
935
936
    FILE *fh;
    int i, ii;
    char cmdl[DARSHAN_EXE_LEN];
937
938
    int tmp_index = 0;
    int skip = 0;
939
940
941
    char* env_exclusions;
    char* string;
    char* token;
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960

    /* skip these fs types */
    static char* fs_exclusions[] = {
        "tmpfs",
        "proc",
        "sysfs",
        "devpts",
        "binfmt_misc",
        "fusectl",
        "debugfs",
        "securityfs",
        "nfsd",
        "none",
        "rpc_pipefs",
        "hugetlbfs",
        "cgroup",
        NULL
    };

961
962
963
964
965
966
967
968
969
    /* Check if user has set the env variable DARSHAN_EXCLUDE_DIRS */
    env_exclusions = getenv("DARSHAN_EXCLUDE_DIRS");
    if(env_exclusions)
    {
        fs_exclusions[0]=NULL;
        /* if DARSHAN_EXCLUDE_DIRS=none, do not exclude any dir */
        if(strncmp(env_exclusions,"none",strlen(env_exclusions))>=0)
        {
            if (my_rank == 0) 
970
                darshan_core_fprintf(stderr, "Darshan info: no system dirs will be excluded\n");
971
972
973
974
975
            darshan_path_exclusions[0]=NULL;
        }
        else
        {
            if (my_rank == 0) 
976
                darshan_core_fprintf(stderr, "Darshan info: the following system dirs will be excluded: %s\n",
Shane Snyder's avatar
Shane Snyder committed
977
                    env_exclusions);
978
            string = strdup(env_exclusions);
979
980
            i = 0;
            /* get the comma separated number of directories */
981
982
            token = strtok(string, ",");
            while (token != NULL)
983
            {
984
985
                i++;
                token = strtok(NULL, ",");
986
987
            }
            user_darshan_path_exclusions=(char **)malloc((i+1)*sizeof(char *));
988
989
            assert(user_darshan_path_exclusions);

990
            i = 0;
991
992
993
            strcpy(string, env_exclusions);
            token = strtok(string, ",");
            while (token != NULL)
994
995
            {
                user_darshan_path_exclusions[i]=(char *)malloc(strlen(token)+1);
996
                assert(user_darshan_path_exclusions[i]);
997
998
                strcpy(user_darshan_path_exclusions[i],token);
                i++;
999
                token = strtok(NULL, ",");
1000
            }
For faster browsing, not all history is shown. View entire blame