darshan-core.c 70.5 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6
 */

7
#define _XOPEN_SOURCE 500
8
#define _GNU_SOURCE
9

10 11 12 13 14 15
#include "darshan-runtime-config.h"

#include <stdio.h>
#ifdef HAVE_MNTENT_H
#include <mntent.h>
#endif
16 17 18 19 20
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <pthread.h>
21
#include <fcntl.h>
Shane Snyder's avatar
Shane Snyder committed
22
#include <stdarg.h>
23 24
#include <dirent.h>
#include <sys/ioctl.h>
25 26
#include <sys/types.h>
#include <sys/stat.h>
27
#include <sys/mman.h>
28
#include <sys/time.h>
29
#include <sys/vfs.h>
30
#include <zlib.h>
31
#include <assert.h>
32

33 34 35 36
#ifdef HAVE_MPI
#include <mpi.h>
#endif

37
#include "uthash.h"
Shane Snyder's avatar
Shane Snyder committed
38
#include "darshan.h"
39
#include "darshan-core.h"
Shane Snyder's avatar
Shane Snyder committed
40
#include "darshan-dynamic.h"
41

42
#ifdef DARSHAN_LUSTRE
43
#include <lustre/lustre_user.h>
44
#endif
45

46
extern char* __progname;
47
extern char* __progname_full;
48

49
/* internal variable delcarations */
50
static struct darshan_core_runtime *darshan_core = NULL;
51
static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
52 53 54
static int using_mpi = 0;
static int my_rank = 0;
static int nprocs = 1;
55
static int darshan_mem_alignment = 1;
56
static long darshan_mod_mem_quota = DARSHAN_MOD_MEM_MAX;
57

58 59 60
static struct darshan_core_mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
static int mnt_data_count = 0;

61
/* paths prefixed with the following directories are not tracked by darshan */
62
char* darshan_path_exclusions[] = {
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
    "/etc/",
    "/dev/",
    "/usr/",
    "/bin/",
    "/boot/",
    "/lib/",
    "/opt/",
    "/sbin/",
    "/sys/",
    "/proc/",
    "/var/",
    NULL
};
/* paths prefixed with the following directories are tracked by darshan even if
 * they share a root with a path listed in darshan_path_exclusions
 */
char* darshan_path_inclusions[] = {
    "/var/opt/cray/dws/mounts/",
    NULL
82 83
};

84 85 86
/* allow users to override the path exclusions */
char** user_darshan_path_exclusions = NULL;

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
#ifdef DARSHAN_BGQ
extern void bgq_runtime_initialize();
#endif

/* array of init functions for modules which need to be statically
 * initialized by darshan at startup time
 */
void (*mod_static_init_fns[])(void) =
{
#ifdef DARSHAN_BGQ
    &bgq_runtime_initialize,
#endif
    NULL
};

102 103 104 105 106 107 108
#ifdef DARSHAN_LUSTRE
/* XXX need to use extern to get Lustre module's instrumentation function
 * since modules have no way of providing this to darshan-core
 */
extern void darshan_instrument_lustre_file(const char *filepath, int fd);
#endif

109
/* prototypes for internal helper functions */
110
#ifdef __DARSHAN_ENABLE_MMAP_LOGS
111 112
static void *darshan_init_mmap_log(
    struct darshan_core_runtime* core, int jobid);
113
#endif
114
static void darshan_log_record_hints_and_ver(
115
    struct darshan_core_runtime* core);
116 117
static void darshan_get_exe_and_mounts(
    struct darshan_core_runtime *core, int argc, char **argv);
118 119
static void darshan_fs_info_from_path(
    const char *path, struct darshan_fs_info *fs_info);
120
static int darshan_add_name_record_ref(
121
    struct darshan_core_runtime *core, darshan_record_id rec_id,
122
    const char *name, darshan_module_id mod_id);
123 124
static void darshan_get_user_name(
    char *user);
125
#ifdef HAVE_MPI
126
static void darshan_get_shared_records(
127 128
    struct darshan_core_runtime *core, darshan_record_id **shared_recs,
    int *shared_rec_cnt);
129
#endif
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
static void darshan_get_logfile_name(
    char* logfile_name, int jobid, time_t start_time);
static int darshan_log_open(
    char *logfile_name, darshan_core_log_fh *log_fh);
static int darshan_log_write_job_record(
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core,   
    uint64_t *inout_off);
static int darshan_log_write_name_record_hash(
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core,
    uint64_t *inout_off);
static int darshan_log_write_header(
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core);
static int darshan_log_append(
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core,
    void *buf, int count, uint64_t *inout_off);
void darshan_log_close(
    darshan_core_log_fh log_fh);
void darshan_log_finalize(
    char *logfile_name, double start_log_time);
149
static int darshan_deflate_buffer(
Shane Snyder's avatar
Shane Snyder committed
150 151
    void **pointers, int *lengths, int count, char *comp_buf,
    int *comp_buf_length);
Shane Snyder's avatar
Shane Snyder committed
152 153
static void darshan_core_cleanup(
    struct darshan_core_runtime* core);
154
static double darshan_core_wtime_absolute(void);
155

156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)

#define DARSHAN_WARN(__err_str, ...) do { \
    darshan_core_fprintf(stderr, "darshan_library_warning: " \
        __err_str ".\n", ## __VA_ARGS__); \
} while(0)

#ifdef HAVE_MPI

/* MPI variant of darshan logging helpers */
#define DARSHAN_CHECK_ERR(__ret, __err_str, ...) do { \
    if(using_mpi) \
        PMPI_Allreduce(MPI_IN_PLACE, &__ret, 1, MPI_INT, MPI_LOR, MPI_COMM_WORLD); \
    if(__ret != 0) { \
        if(my_rank == 0) { \
            DARSHAN_WARN(__err_str); \
            if(log_created) \
                unlink(logfile_name); \
        } \
        goto exit; \
    } \
} while(0)

#else

/* Non-MPI variant of darshan logging helpers */
#define DARSHAN_CHECK_ERR(__ret, __err_str, ...) do { \
    if(__ret != 0) { \
        DARSHAN_WARN(__err_str); \
        if(log_created) \
            unlink(logfile_name); \
        goto exit; \
    } \
} while(0)

#endif

194 195
/* *********************************** */

Shane Snyder's avatar
Shane Snyder committed
196
void darshan_core_initialize(int argc, char **argv)
197
{
198
    struct darshan_core_runtime *init_core = NULL;
199
    int internal_timing_flag = 0;
200
    double init_start, init_time;
201
    char *envstr;
202 203
    char *jobid_str;
    int jobid;
204
    int ret;
205
    int i;
206 207
    int tmpval;
    double tmpfloat;
208

209 210 211 212 213 214
    /* bail out _before_ attempting to [re]set using_mpi */
    if (darshan_core != NULL)
        return;

#ifdef HAVE_MPI
    PMPI_Initialized(&using_mpi);
215 216 217 218 219
    if(using_mpi)
    {
        PMPI_Comm_size(MPI_COMM_WORLD, &nprocs);
        PMPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    }
220
#endif
221 222

    if(getenv("DARSHAN_INTERNAL_TIMING"))
223
    {
224
        internal_timing_flag = 1;
225 226
        init_start = darshan_core_wtime();
    }
227 228

    /* setup darshan runtime if darshan is enabled and hasn't been initialized already */
229
    if(!getenv("DARSHAN_DISABLE") && !darshan_core)
230
    {
231
        #if (__DARSHAN_MEM_ALIGNMENT < 1)
232 233
            #error Darshan must be configured with a positive value for --with-mem-align
        #endif
234
        envstr = getenv(DARSHAN_MEM_ALIGNMENT_OVERRIDE);
235 236 237 238 239 240 241 242 243 244 245
        if(envstr)
        {
            ret = sscanf(envstr, "%d", &tmpval);
            /* silently ignore if the env variable is set poorly */
            if(ret == 1 && tmpval > 0)
            {
                darshan_mem_alignment = tmpval;
            }
        }
        else
        {
246
            darshan_mem_alignment = __DARSHAN_MEM_ALIGNMENT;
247 248 249
        }

        /* avoid floating point errors on faulty input */
250
        if(darshan_mem_alignment < 1)
251 252 253
        {
            darshan_mem_alignment = 1;
        }
254

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
        /* Use DARSHAN_JOBID_OVERRIDE for the env var for __DARSHAN_JOBID */
        envstr = getenv(DARSHAN_JOBID_OVERRIDE);
        if(!envstr)
        {
            envstr = __DARSHAN_JOBID;
        }

        /* find a job id */
        jobid_str = getenv(envstr);
        if(jobid_str)
        {
            /* in cobalt we can find it in env var */
            ret = sscanf(jobid_str, "%d", &jobid);
        }
        if(!jobid_str || ret != 1)
        {
            /* use pid as fall back */
            jobid = getpid();
        }

275 276 277 278
        /* set the memory quota for darshan modules' records */
        envstr = getenv(DARSHAN_MOD_MEM_OVERRIDE);
        if(envstr)
        {
279
            ret = sscanf(envstr, "%lf", &tmpfloat);
280
            /* silently ignore if the env variable is set poorly */
281
            if(ret == 1 && tmpfloat > 0)
282
            {
283
                darshan_mod_mem_quota = tmpfloat * 1024 * 1024; /* convert from MiB */
284 285 286
            }
        }

287 288 289
        /* allocate structure to track darshan core runtime information */
        init_core = malloc(sizeof(*init_core));
        if(init_core)
290
        {
291
            memset(init_core, 0, sizeof(*init_core));
292 293 294 295
            /* record absolute start time at startup so that we can later
             * generate relative times with this as a reference point.
             */
            init_core->wtime_offset = darshan_core_wtime_absolute();
296

297 298
        /* TODO: do we alloc new memory as we go or just do everything up front? */

299 300 301 302 303
#ifndef __DARSHAN_ENABLE_MMAP_LOGS
            /* just allocate memory for each log file region */
            init_core->log_hdr_p = malloc(sizeof(struct darshan_header));
            init_core->log_job_p = malloc(sizeof(struct darshan_job));
            init_core->log_exemnt_p = malloc(DARSHAN_EXE_LEN+1);
304
            init_core->log_name_p = malloc(DARSHAN_NAME_RECORD_BUF_SIZE);
305
            init_core->log_mod_p = malloc(darshan_mod_mem_quota);
306 307

            if(!(init_core->log_hdr_p) || !(init_core->log_job_p) ||
308
               !(init_core->log_exemnt_p) || !(init_core->log_name_p) ||
309 310 311 312 313 314 315 316 317
               !(init_core->log_mod_p))
            {
                free(init_core);
                return;
            }
            /* if allocation succeeds, zero fill memory regions */
            memset(init_core->log_hdr_p, 0, sizeof(struct darshan_header));
            memset(init_core->log_job_p, 0, sizeof(struct darshan_job));
            memset(init_core->log_exemnt_p, 0, DARSHAN_EXE_LEN+1);
318
            memset(init_core->log_name_p, 0, DARSHAN_NAME_RECORD_BUF_SIZE);
319
            memset(init_core->log_mod_p, 0, darshan_mod_mem_quota);
320 321 322 323
#else
            /* if mmap logs are enabled, we need to initialize the mmap region
             * before setting the corresponding log file region pointers
             */
324 325
            void *mmap_p = darshan_init_mmap_log(init_core, jobid);
            if(!mmap_p)
326
            {
327 328
                free(init_core);
                return;
329 330
            }

331
            /* set the memory pointers for each log file region */
332
            init_core->log_hdr_p = (struct darshan_header *)mmap_p;
333
            init_core->log_job_p = (struct darshan_job *)
334
                ((char *)init_core->log_hdr_p + sizeof(struct darshan_header));
335
            init_core->log_exemnt_p = (char *)
336
                ((char *)init_core->log_job_p + sizeof(struct darshan_job));
337
            init_core->log_name_p = (void *)
338 339
                ((char *)init_core->log_exemnt_p + DARSHAN_EXE_LEN + 1);
            init_core->log_mod_p = (void *)
340
                ((char *)init_core->log_name_p + DARSHAN_NAME_RECORD_BUF_SIZE);
341

342
            /* set header fields needed for the mmap log mechanism */
343
            init_core->log_hdr_p->comp_type = DARSHAN_NO_COMP;
344
            init_core->log_hdr_p->name_map.off =
345
                ((char *)init_core->log_name_p - (char *)init_core->log_hdr_p);
346 347 348 349 350
#endif

            /* set known header fields for the log file */
            strcpy(init_core->log_hdr_p->version_string, DARSHAN_LOG_VERSION);
            init_core->log_hdr_p->magic_nr = DARSHAN_MAGIC_NR;
351

352 353 354 355 356
            /* set known job-level metadata fields for the log file */
            init_core->log_job_p->uid = getuid();
            init_core->log_job_p->start_time = time(NULL);
            init_core->log_job_p->nprocs = nprocs;
            init_core->log_job_p->jobid = (int64_t)jobid;
357 358 359 360 361 362

            /* if we are using any hints to write the log file, then record those
             * hints with the darshan job information
             */
            darshan_log_record_hints_and_ver(init_core);

363
            /* collect information about command line and mounted file systems */
364
            darshan_get_exe_and_mounts(init_core, argc, argv);
365

366 367 368 369
            /* if darshan was successfully initialized, set the global pointer
             * and bootstrap any modules with static initialization routines
             */
            DARSHAN_CORE_LOCK();
370
            darshan_core = init_core;
371 372 373 374 375 376 377 378
            DARSHAN_CORE_UNLOCK();

            i = 0;
            while(mod_static_init_fns[i])
            {
                (*mod_static_init_fns[i])();
                i++;
            }
379
        }
380 381
    }

382 383
    if(internal_timing_flag)
    {
384 385
        init_time = darshan_core_wtime() - init_start;
#ifdef HAVE_MPI
386 387 388 389 390
        if(using_mpi)
        {
            PMPI_Reduce(MPI_IN_PLACE, &init_time, 1,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
        }
391 392 393 394 395
        if(my_rank > 0) return;
#endif

        darshan_core_fprintf(stderr, "#darshan:<op>\t<nprocs>\t<time>\n");
        darshan_core_fprintf(stderr, "darshan:init\t%d\t%f\n", nprocs, init_time);
396 397 398 399 400
    }

    return;
}

Shane Snyder's avatar
Shane Snyder committed
401
void darshan_core_shutdown()
402
{
403
    struct darshan_core_runtime *final_core;
404
    double start_log_time;
405
    int internal_timing_flag = 0;
406 407 408
    double open1 = 0, open2 = 0;
    double job1 = 0, job2 = 0;
    double rec1 = 0, rec2 = 0;
409 410
    double mod1[DARSHAN_MAX_MODS] = {0};
    double mod2[DARSHAN_MAX_MODS] = {0};
411
    double header1 = 0, header2 = 0;
412 413
    double tm_end;
    int active_mods[DARSHAN_MAX_MODS] = {0};
414
    uint64_t gz_fp = 0;
415 416 417 418 419
    char *logfile_name = NULL;
    darshan_core_log_fh log_fh;
    int log_created = 0;
    int i;
    int ret;
420

Shane Snyder's avatar
Shane Snyder committed
421
    /* disable darhan-core while we shutdown */
422
    DARSHAN_CORE_LOCK();
423
    if(!darshan_core)
424
    {
425
        DARSHAN_CORE_UNLOCK();
426 427
        return;
    }
428 429
    final_core = darshan_core;
    darshan_core = NULL;
430 431
    DARSHAN_CORE_UNLOCK();

432 433 434 435 436 437 438 439 440 441 442 443
    /* grab some initial timing information */
#ifdef HAVE_MPI
    /* if using mpi, sync across procs first */
    if(using_mpi)
        PMPI_Barrier(MPI_COMM_WORLD);
#endif
    start_log_time = darshan_core_wtime();
    final_core->log_job_p->end_time = time(NULL);

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

444
#ifdef __DARSHAN_ENABLE_MMAP_LOGS
445 446 447 448 449
    /* remove the temporary mmap log files */
    /* NOTE: this unlink is not immediate as it must wait for the mapping
     * to no longer be referenced, which in our case happens when the
     * executable exits. If the application terminates mid-shutdown, then
     * there will be no mmap files and no final log file.
450
     */
451
    unlink(final_core->mmap_log_name);
452
#endif
Shane Snyder's avatar
Shane Snyder committed
453

454 455 456 457
    final_core->comp_buf = malloc(darshan_mod_mem_quota);
    logfile_name = malloc(PATH_MAX);
    if(!final_core->comp_buf || !logfile_name)
        goto exit;
458

459 460
    /* set which modules were used locally */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
Shane Snyder's avatar
Shane Snyder committed
461
    {
462 463
        if(final_core->mod_array[i])
            active_mods[i] = 1;
Shane Snyder's avatar
Shane Snyder committed
464
    }
465

466 467 468 469
#ifdef HAVE_MPI
    darshan_record_id *shared_recs = NULL;
    darshan_record_id *mod_shared_recs = NULL;
    int shared_rec_cnt = 0;
470

471
    if(using_mpi)
472
    {
473 474 475
        /* allreduce locally active mods to determine globally active mods */
        PMPI_Allreduce(MPI_IN_PLACE, active_mods, DARSHAN_MAX_MODS, MPI_INT,
            MPI_SUM, MPI_COMM_WORLD);
476

477 478 479 480 481
        /* reduce to report first start and last end time across all ranks at rank 0 */
        PMPI_Reduce(MPI_IN_PLACE, &final_core->log_job_p->start_time,
            1, MPI_INT64_T, MPI_MIN, 0, MPI_COMM_WORLD);
        PMPI_Reduce(MPI_IN_PLACE, &final_core->log_job_p->end_time,
            1, MPI_INT64_T, MPI_MAX, 0, MPI_COMM_WORLD);
482

483 484
        /* get a list of records which are shared across all processes */
        darshan_get_shared_records(final_core, &shared_recs, &shared_rec_cnt);
485

486 487 488 489
        mod_shared_recs = malloc(shared_rec_cnt * sizeof(darshan_record_id));
        assert(mod_shared_recs);
    }
#endif
490

491 492 493
    /* get the log file name */
    darshan_get_logfile_name(logfile_name, final_core->log_job_p->jobid,
        final_core->log_job_p->start_time);
494 495 496
    if(strlen(logfile_name) == 0)
    {
        /* failed to generate log file name */
497
        goto exit;
498 499 500
    }

    if(internal_timing_flag)
501 502 503
        open1 = darshan_core_wtime();
    /* open the darshan log file */
    ret = darshan_log_open(logfile_name, &log_fh);
504
    if(internal_timing_flag)
505
        open2 = darshan_core_wtime();
506
    /* error out if unable to open log file */
507 508
    DARSHAN_CHECK_ERR(ret, "unable to create log file %s", logfile_name);
    log_created = 1;
509 510

    if(internal_timing_flag)
511 512 513
        job1 = darshan_core_wtime();
    /* write the the compressed darshan job information */
    ret = darshan_log_write_job_record(log_fh, final_core, &gz_fp);
514
    if(internal_timing_flag)
515 516 517
        job2 = darshan_core_wtime();
    /* error out if unable to write job information */
    DARSHAN_CHECK_ERR(ret, "unable to write job record to file %s", logfile_name);
518 519

    if(internal_timing_flag)
520
        rec1 = darshan_core_wtime();
521
    /* write the record name->id hash to the log file */
522
    final_core->log_hdr_p->name_map.off = gz_fp;
523
    ret = darshan_log_write_name_record_hash(log_fh, final_core, &gz_fp);
524
    if(internal_timing_flag)
525 526 527 528
        rec2 = darshan_core_wtime();
    final_core->log_hdr_p->name_map.len = gz_fp - final_core->log_hdr_p->name_map.off;
    /* error out if unable to write name records */
    DARSHAN_CHECK_ERR(ret, "unable to write name records to log file %s", logfile_name);
529 530 531 532 533

    /* loop over globally used darshan modules and:
     *      - get final output buffer
     *      - compress (zlib) provided output buffer
     *      - append compressed buffer to log file
534
     *      - add module map info (file offset/length) to log header
535 536 537 538 539 540 541 542
     *      - shutdown the module
     */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
    {
        struct darshan_core_module* this_mod = final_core->mod_array[i];
        void* mod_buf = NULL;
        int mod_buf_sz = 0;

543
        if(!active_mods[i])
544
        {
545 546
            final_core->log_hdr_p->mod_map[i].off = 0;
            final_core->log_hdr_p->mod_map[i].len = 0;
547 548 549 550
            continue;
        }

        if(internal_timing_flag)
551 552 553 554 555 556
            mod1[i] = darshan_core_wtime();

#ifdef HAVE_MPI
        struct darshan_core_name_record_ref *ref = NULL;
        int mod_shared_rec_cnt = 0;
        int j;
557

558
        if(using_mpi)
559
        {
560 561
            /* set the shared record list for this module */
            for(j = 0; j < shared_rec_cnt; j++)
562
            {
563 564 565 566 567 568 569
                HASH_FIND(hlink, final_core->name_hash, &shared_recs[j],
                    sizeof(darshan_record_id), ref);
                assert(ref);
                if(DARSHAN_MOD_FLAG_ISSET(ref->global_mod_flags, i))
                {
                    mod_shared_recs[mod_shared_rec_cnt++] = shared_recs[j];
                }
570
            }
571 572

            /* allow the module an opportunity to reduce shared files */
573 574 575
            if(this_mod->mod_funcs.mod_redux_func && (mod_shared_recs > 0) &&
               (!getenv("DARSHAN_DISABLE_SHARED_REDUCTION")))
                this_mod->mod_funcs.mod_redux_func(mod_buf, MPI_COMM_WORLD, mod_shared_recs,
576
                    mod_shared_rec_cnt); 
577
        }
578
#endif
579 580 581 582

        /* if module is registered locally, get the corresponding output buffer
         * 
         * NOTE: this function can be used to run collective operations across
583
         * modules, if there are records shared globally.
584 585 586
         */
        if(this_mod)
        {
587 588
            mod_buf = final_core->mod_array[i]->rec_buf_start;
            mod_buf_sz = final_core->mod_array[i]->rec_buf_p - mod_buf;
589
            this_mod->mod_funcs.mod_shutdown_func(&mod_buf, &mod_buf_sz);
590 591 592
        }

        /* append this module's data to the darshan log */
593
        final_core->log_hdr_p->mod_map[i].off = gz_fp;
594
        ret = darshan_log_append(log_fh, final_core, mod_buf, mod_buf_sz, &gz_fp);
595 596
        final_core->log_hdr_p->mod_map[i].len =
            gz_fp - final_core->log_hdr_p->mod_map[i].off;
597

598 599 600 601 602 603
        /* XXX: DXT manages its own module memory buffers, so we need to
         * explicitly free them
         */
        if(i == DXT_POSIX_MOD || i == DXT_MPIIO_MOD)
            free(mod_buf);

604
        if(internal_timing_flag)
605
            mod2[i] = darshan_core_wtime();
606

607 608 609
        /* error out if unable to write module data */
        DARSHAN_CHECK_ERR(ret, "unable to write %s module data to log file %s",
            darshan_module_names[i], logfile_name);
610
    }
611 612

    if(internal_timing_flag)
613 614 615 616 617
        header1 = darshan_core_wtime();
    ret = darshan_log_write_header(log_fh, final_core);
    if(internal_timing_flag)
        header2 = darshan_core_wtime();
    DARSHAN_CHECK_ERR(ret, "unable to write header to file %s", logfile_name);
618

619 620
    /* done writing data, close the log file */
    darshan_log_close(log_fh);
621

622 623
    /* finalize log file name and permissions */
    darshan_log_finalize(logfile_name, start_log_time);
624

625
    if(internal_timing_flag)
626
    {
627 628 629 630 631 632
        double open_tm;
        double header_tm;
        double job_tm;
        double rec_tm;
        double mod_tm[DARSHAN_MAX_MODS];
        double all_tm;
633

634
        tm_end = darshan_core_wtime();
635

636 637 638 639
        open_tm = open2 - open1;
        header_tm = header2 - header1;
        job_tm = job2 - job1;
        rec_tm = rec2 - rec1;
640
        all_tm = tm_end - start_log_time;
641
        for(i = 0; i < DARSHAN_MAX_MODS; i++)
642 643 644
        {
            mod_tm[i] = mod2[i] - mod1[i];
        }
645

646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
#ifdef HAVE_MPI
        if(using_mpi)
        {
            PMPI_Reduce(MPI_IN_PLACE, &open_tm, 1,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
            PMPI_Reduce(MPI_IN_PLACE, &header_tm, 1,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
            PMPI_Reduce(MPI_IN_PLACE, &job_tm, 1,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
            PMPI_Reduce(MPI_IN_PLACE, &rec_tm, 1,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
            PMPI_Reduce(MPI_IN_PLACE, &all_tm, 1,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
            PMPI_Reduce(MPI_IN_PLACE, mod_tm, DARSHAN_MAX_MODS,
                MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

            /* let rank 0 report the timing info */
            if(my_rank > 0)
                goto exit;
        }
#endif
667

668 669 670 671 672 673
        darshan_core_fprintf(stderr, "#darshan:<op>\t<nprocs>\t<time>\n");
        darshan_core_fprintf(stderr, "darshan:log_open\t%d\t%f\n", nprocs, open_tm);
        darshan_core_fprintf(stderr, "darshan:job_write\t%d\t%f\n", nprocs, job_tm);
        darshan_core_fprintf(stderr, "darshan:hash_write\t%d\t%f\n", nprocs, rec_tm);
        darshan_core_fprintf(stderr, "darshan:header_write\t%d\t%f\n", nprocs, header_tm);
        for(i = 0; i < DARSHAN_MAX_MODS; i++)
674
        {
675 676 677
            if(active_mods[i])
                darshan_core_fprintf(stderr, "darshan:%s_shutdown\t%d\t%f\n",
                    darshan_module_names[i], nprocs, mod_tm[i]);
678
        }
679
        darshan_core_fprintf(stderr, "darshan:core_shutdown\t%d\t%f\n", nprocs, all_tm);
680
    }
681

682 683
exit:
#ifdef HAVE_MPI
684 685 686 687 688
    if(using_mpi)
    {
        free(shared_recs);
        free(mod_shared_recs);
    }
689 690 691 692
#endif
    free(logfile_name);
    darshan_core_cleanup(final_core);

693 694
    return;
}
695

Shane Snyder's avatar
Shane Snyder committed
696
/* *********************************** */
697

698
#ifdef __DARSHAN_ENABLE_MMAP_LOGS
699 700 701 702 703 704 705
static void *darshan_init_mmap_log(struct darshan_core_runtime* core, int jobid)
{
    int ret;
    int mmap_fd;
    int mmap_size;
    int sys_page_size;
    char cuser[L_cuserid] = {0};
706 707 708
    uint64_t hlevel;
    char hname[HOST_NAME_MAX];
    uint64_t logmod;
709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728
    char *envstr;
    char *mmap_log_path;
    void *mmap_p;

    sys_page_size = sysconf(_SC_PAGESIZE);
    assert(sys_page_size > 0);

    mmap_size = sizeof(struct darshan_header) + DARSHAN_JOB_RECORD_SIZE +
        + DARSHAN_NAME_RECORD_BUF_SIZE + darshan_mod_mem_quota;
    if(mmap_size % sys_page_size)
        mmap_size = ((mmap_size / sys_page_size) + 1) * sys_page_size;

    envstr = getenv(DARSHAN_MMAP_LOG_PATH_OVERRIDE);
    if(envstr)
        mmap_log_path = envstr;
    else
        mmap_log_path = DARSHAN_DEF_MMAP_LOG_PATH;

    darshan_get_user_name(cuser);

729 730 731 732 733 734 735
    /* generate a random number to help differentiate the temporary log */
    /* NOTE: job id is not sufficient for constructing a unique log file name,
     * since a job could be composed of multiple application runs, so we also
     * add a random number component to the log name
     */
    if(my_rank == 0)
    {
736
        hlevel = darshan_core_wtime_absolute() * 1000000;
737 738 739
        (void)gethostname(hname, sizeof(hname));
        logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
    }
740
    PMPI_Bcast(&logmod, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD);
741

742 743 744 745
    /* construct a unique temporary log file name for this process
     * to write mmap log data to
     */
    snprintf(core->mmap_log_name, PATH_MAX,
746 747
        "/%s/%s_%s_id%d_mmap-log-%" PRIu64 "-%d.darshan",
        mmap_log_path, cuser, __progname, jobid, logmod, my_rank);
748 749 750 751 752

    /* create the temporary mmapped darshan log */
    mmap_fd = open(core->mmap_log_name, O_CREAT|O_RDWR|O_EXCL , 0644);
    if(mmap_fd < 0)
    {
753
        darshan_core_fprintf(stderr, "darshan library warning: "
754 755 756 757 758 759 760 761 762
            "unable to create darshan log file %s\n", core->mmap_log_name);
        return(NULL);
    }

    /* TODO: ftruncate or just zero fill? */
    /* allocate the necessary space in the log file */
    ret = ftruncate(mmap_fd, mmap_size);
    if(ret < 0)
    {
763
        darshan_core_fprintf(stderr, "darshan library warning: "
764 765 766 767 768 769 770 771 772 773 774 775
            "unable to allocate darshan log file %s\n", core->mmap_log_name);
        close(mmap_fd);
        unlink(core->mmap_log_name);
        return(NULL);
    }

    /* create the memory map for darshan's data structures so they are
     * persisted to file as the application executes
     */
    mmap_p = mmap(NULL, mmap_size, PROT_WRITE, MAP_SHARED, mmap_fd, 0);
    if(mmap_p == MAP_FAILED)
    {
776
        darshan_core_fprintf(stderr, "darshan library warning: "
777 778 779 780 781 782 783 784 785 786 787
            "unable to mmap darshan log file %s\n", core->mmap_log_name);
        close(mmap_fd);
        unlink(core->mmap_log_name);
        return(NULL);
    }

    /* close darshan log file (this does *not* unmap the log file) */
    close(mmap_fd);

    return(mmap_p);
}
788
#endif
789

790
/* record any hints used to write the darshan log in the job data */
791
static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
792 793
{
    char* hints;
794
    char* job_hints;
795 796 797 798 799 800
    int meta_remain = 0;
    char* m;

    /* check environment variable to see if the default MPI file hints have
     * been overridden
     */
801
    hints = getenv(DARSHAN_LOG_HINTS_OVERRIDE);
802 803
    if(!hints)
    {
804
        hints = __DARSHAN_LOG_HINTS;
805 806 807 808 809
    }

    if(!hints || strlen(hints) < 1)
        return;

810 811
    job_hints = strdup(hints);
    if(!job_hints)
812 813 814
        return;

    meta_remain = DARSHAN_JOB_METADATA_LEN -
815
        strlen(core->log_job_p->metadata) - 1;
816 817
    if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
    {
818
        sprintf(core->log_job_p->metadata, "lib_ver=%s\n", PACKAGE_VERSION);
819 820
        meta_remain -= (strlen(PACKAGE_VERSION) + 9);
    }
821
    if(meta_remain >= (3 + strlen(job_hints)))
822
    {
823
        m = core->log_job_p->metadata + strlen(core->log_job_p->metadata);
824
        /* We have room to store the hints in the metadata portion of
825
         * the job structure.  We just prepend an h= to the hints list.  The
826 827 828
         * metadata parser will ignore = characters that appear in the value
         * portion of the metadata key/value pair.
         */
829
        sprintf(m, "h=%s\n", job_hints);
830
    }
831
    free(job_hints);
832 833 834 835

    return;
}

836 837
static int mnt_data_cmp(const void* a, const void* b)
{
838 839
    const struct darshan_core_mnt_data *d_a = (const struct darshan_core_mnt_data*)a;
    const struct darshan_core_mnt_data *d_b = (const struct darshan_core_mnt_data*)b;
840 841 842 843 844 845 846 847 848 849

    if(strlen(d_a->path) > strlen(d_b->path))
        return(-1);
    else if(strlen(d_a->path) < strlen(d_b->path))
        return(1);
    else
        return(0);
}

/* adds an entry to table of mounted file systems */
850
static void add_entry(char* buf, int* space_left, struct mntent* entry)
851
{
852
    int i;
853 854 855 856
    int ret;
    char tmp_mnt[256];
    struct statfs statfsbuf;

857 858 859 860 861 862 863 864 865 866
    /* avoid adding the same mount points multiple times -- to limit
     * storage space and potential statfs, ioctl, etc calls
     */
    for(i = 0; i < mnt_data_count; i++)
    {
        if((strncmp(mnt_data_array[i].path, entry->mnt_dir, DARSHAN_MAX_MNT_PATH) == 0) &&
           (strncmp(mnt_data_array[i].type, entry->mnt_type, DARSHAN_MAX_MNT_PATH) == 0))
            return;
    }

867 868 869 870 871 872 873 874 875 876 877 878
    strncpy(mnt_data_array[mnt_data_count].path, entry->mnt_dir,
        DARSHAN_MAX_MNT_PATH-1);
    strncpy(mnt_data_array[mnt_data_count].type, entry->mnt_type,
        DARSHAN_MAX_MNT_TYPE-1);
    /* NOTE: we now try to detect the preferred block size for each file 
     * system using fstatfs().  On Lustre we assume a size of 1 MiB 
     * because fstatfs() reports 4 KiB. 
     */
#ifndef LL_SUPER_MAGIC
#define LL_SUPER_MAGIC 0x0BD00BD0
#endif
    ret = statfs(entry->mnt_dir, &statfsbuf);
879
    mnt_data_array[mnt_data_count].fs_info.fs_type = statfsbuf.f_type;
880
    if(ret == 0 && statfsbuf.f_type != LL_SUPER_MAGIC)
881
        mnt_data_array[mnt_data_count].fs_info.block_size = statfsbuf.f_bsize;
882
    else if(ret == 0 && statfsbuf.f_type == LL_SUPER_MAGIC)
883
        mnt_data_array[mnt_data_count].fs_info.block_size = 1024*1024;
884
    else
885
        mnt_data_array[mnt_data_count].fs_info.block_size = 4096;
886

887
#ifdef DARSHAN_LUSTRE
888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
    /* attempt to retrieve OST and MDS counts from Lustre */
    mnt_data_array[mnt_data_count].fs_info.ost_count = -1;
    mnt_data_array[mnt_data_count].fs_info.mdt_count = -1;
    if ( statfsbuf.f_type == LL_SUPER_MAGIC )
    {
        int n_ost, n_mdt;
        int ret_ost, ret_mdt;
        DIR *mount_dir;

        mount_dir = opendir( entry->mnt_dir );
        if ( mount_dir  ) 
        {
            /* n_ost and n_mdt are used for both input and output to ioctl */
            n_ost = 0;
            n_mdt = 1;

            ret_ost = ioctl( dirfd(mount_dir), LL_IOC_GETOBDCOUNT, &n_ost );
            ret_mdt = ioctl( dirfd(mount_dir), LL_IOC_GETOBDCOUNT, &n_mdt );

907
            if ( !(ret_ost < 0 || ret_mdt < 0) )
908 909 910 911 912 913 914
            {
                mnt_data_array[mnt_data_count].fs_info.ost_count = n_ost;
                mnt_data_array[mnt_data_count].fs_info.mdt_count = n_mdt;
            }
            closedir( mount_dir );
        }
    }
915
#endif
916

917
    /* store mount information with the job-level metadata in darshan log */
918
    ret = snprintf(tmp_mnt, 256, "\n%s\t%s",
919 920 921
        entry->mnt_type, entry->mnt_dir);
    if(ret < 256 && strlen(tmp_mnt) <= (*space_left))
    {
922
        strcat(buf, tmp_mnt);
923 924 925 926 927 928 929
        (*space_left) -= strlen(tmp_mnt);
    }

    mnt_data_count++;
    return;
}

930
/* darshan_get_exe_and_mounts()
931 932
 *
 * collects command line and list of mounted file systems into a string that
933
 * will be stored with the job-level metadata
934
 */
935
static void darshan_get_exe_and_mounts(struct darshan_core_runtime *core,
936
    int argc, char **argv)
937 938 939 940
{
    FILE* tab;
    struct mntent *entry;
    char* exclude;
941 942 943
    char* truncate_string = "<TRUNCATED>";
    int truncate_offset;
    int space_left = DARSHAN_EXE_LEN;
944 945 946
    FILE *fh;
    int i, ii;
    char cmdl[DARSHAN_EXE_LEN];