darshan-core.c 72.1 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6
 */

7
#define _XOPEN_SOURCE 500
8
#define _GNU_SOURCE
9

10 11 12 13 14 15
#include "darshan-runtime-config.h"

#include <stdio.h>
#ifdef HAVE_MNTENT_H
#include <mntent.h>
#endif
16 17 18 19 20
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <pthread.h>
21
#include <fcntl.h>
Shane Snyder's avatar
Shane Snyder committed
22
#include <stdarg.h>
23 24
#include <dirent.h>
#include <sys/ioctl.h>
25 26
#include <sys/types.h>
#include <sys/stat.h>
27
#include <sys/mman.h>
28
#include <sys/time.h>
29
#include <sys/vfs.h>
30
#include <zlib.h>
31
#include <assert.h>
32

33 34 35 36
#ifdef HAVE_MPI
#include <mpi.h>
#endif

37
#include "uthash.h"
Shane Snyder's avatar
Shane Snyder committed
38
#include "darshan.h"
39
#include "darshan-core.h"
Shane Snyder's avatar
Shane Snyder committed
40
#include "darshan-dynamic.h"
41

42
#ifdef DARSHAN_LUSTRE
43
#include <lustre/lustre_user.h>
44
#endif
45

46
extern char* __progname;
47
extern char* __progname_full;
48

49
/* internal variable delcarations */
50
static struct darshan_core_runtime *darshan_core = NULL;
51
static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
52 53 54
static int using_mpi = 0;
static int my_rank = 0;
static int nprocs = 1;
55
static int darshan_mem_alignment = 1;
56
static long darshan_mod_mem_quota = DARSHAN_MOD_MEM_MAX;
57

58 59 60
static struct darshan_core_mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
static int mnt_data_count = 0;

61
/* paths prefixed with the following directories are not tracked by darshan */
62
char* darshan_path_exclusions[] = {
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
    "/etc/",
    "/dev/",
    "/usr/",
    "/bin/",
    "/boot/",
    "/lib/",
    "/opt/",
    "/sbin/",
    "/sys/",
    "/proc/",
    "/var/",
    NULL
};
/* paths prefixed with the following directories are tracked by darshan even if
 * they share a root with a path listed in darshan_path_exclusions
 */
char* darshan_path_inclusions[] = {
    "/var/opt/cray/dws/mounts/",
    NULL
82 83
};

84 85 86
/* allow users to override the path exclusions */
char** user_darshan_path_exclusions = NULL;

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
#ifdef DARSHAN_BGQ
extern void bgq_runtime_initialize();
#endif

/* array of init functions for modules which need to be statically
 * initialized by darshan at startup time
 */
void (*mod_static_init_fns[])(void) =
{
#ifdef DARSHAN_BGQ
    &bgq_runtime_initialize,
#endif
    NULL
};

102 103 104 105 106 107 108
#ifdef DARSHAN_LUSTRE
/* XXX need to use extern to get Lustre module's instrumentation function
 * since modules have no way of providing this to darshan-core
 */
extern void darshan_instrument_lustre_file(const char *filepath, int fd);
#endif

109
/* prototypes for internal helper functions */
110
#ifdef __DARSHAN_ENABLE_MMAP_LOGS
111 112
static void *darshan_init_mmap_log(
    struct darshan_core_runtime* core, int jobid);
113
#endif
114
static void darshan_log_record_hints_and_ver(
115
    struct darshan_core_runtime* core);
116 117
static void darshan_get_exe_and_mounts(
    struct darshan_core_runtime *core, int argc, char **argv);
118 119
static void darshan_fs_info_from_path(
    const char *path, struct darshan_fs_info *fs_info);
120
static int darshan_add_name_record_ref(
121
    struct darshan_core_runtime *core, darshan_record_id rec_id,
122
    const char *name, darshan_module_id mod_id);
123 124
static void darshan_get_user_name(
    char *user);
125
#ifdef HAVE_MPI
126
static void darshan_get_shared_records(
127 128
    struct darshan_core_runtime *core, darshan_record_id **shared_recs,
    int *shared_rec_cnt);
129
#endif
130
static void darshan_get_logfile_name(
131
    char* logfile_name, struct darshan_core_runtime* core);
132
static int darshan_log_open(
133 134
    char *logfile_name, struct darshan_core_runtime *core,
    darshan_core_log_fh *log_fh);
135
static int darshan_log_write_job_record(
Philip Carns's avatar
Philip Carns committed
136
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core,
137 138 139 140 141 142 143 144 145 146 147 148 149
    uint64_t *inout_off);
static int darshan_log_write_name_record_hash(
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core,
    uint64_t *inout_off);
static int darshan_log_write_header(
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core);
static int darshan_log_append(
    darshan_core_log_fh log_fh, struct darshan_core_runtime *core,
    void *buf, int count, uint64_t *inout_off);
void darshan_log_close(
    darshan_core_log_fh log_fh);
void darshan_log_finalize(
    char *logfile_name, double start_log_time);
150
static int darshan_deflate_buffer(
Shane Snyder's avatar
Shane Snyder committed
151 152
    void **pointers, int *lengths, int count, char *comp_buf,
    int *comp_buf_length);
Shane Snyder's avatar
Shane Snyder committed
153 154
static void darshan_core_cleanup(
    struct darshan_core_runtime* core);
155
static double darshan_core_wtime_absolute(void);
156

157 158 159 160 161 162 163 164 165 166 167 168 169
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)

#define DARSHAN_WARN(__err_str, ...) do { \
    darshan_core_fprintf(stderr, "darshan_library_warning: " \
        __err_str ".\n", ## __VA_ARGS__); \
} while(0)

#ifdef HAVE_MPI

/* MPI variant of darshan logging helpers */
#define DARSHAN_CHECK_ERR(__ret, __err_str, ...) do { \
    if(using_mpi) \
170
        PMPI_Allreduce(MPI_IN_PLACE, &__ret, 1, MPI_INT, MPI_LOR, final_core->mpi_comm); \
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
    if(__ret != 0) { \
        if(my_rank == 0) { \
            DARSHAN_WARN(__err_str); \
            if(log_created) \
                unlink(logfile_name); \
        } \
        goto exit; \
    } \
} while(0)

#else

/* Non-MPI variant of darshan logging helpers */
#define DARSHAN_CHECK_ERR(__ret, __err_str, ...) do { \
    if(__ret != 0) { \
        DARSHAN_WARN(__err_str); \
        if(log_created) \
            unlink(logfile_name); \
        goto exit; \
    } \
} while(0)

#endif

195 196
/* *********************************** */

Shane Snyder's avatar
Shane Snyder committed
197
void darshan_core_initialize(int argc, char **argv)
198
{
199
    struct darshan_core_runtime *init_core = NULL;
200
    int internal_timing_flag = 0;
201
    double init_start, init_time;
202
    char *envstr;
203 204
    char *jobid_str;
    int jobid;
205
    int ret;
206
    int i;
207 208
    int tmpval;
    double tmpfloat;
209

210 211
    /* setup darshan runtime if darshan is enabled and hasn't been initialized already */
    if (darshan_core != NULL || getenv("DARSHAN_DISABLE"))
212 213
        return;

214
    if(getenv("DARSHAN_INTERNAL_TIMING"))
215
    {
216
        internal_timing_flag = 1;
217 218
        init_start = darshan_core_wtime();
    }
219

220 221 222 223 224
    #if (__DARSHAN_MEM_ALIGNMENT < 1)
        #error Darshan must be configured with a positive value for --with-mem-align
    #endif
    envstr = getenv(DARSHAN_MEM_ALIGNMENT_OVERRIDE);
    if(envstr)
225
    {
226 227 228
        ret = sscanf(envstr, "%d", &tmpval);
        /* silently ignore if the env variable is set poorly */
        if(ret == 1 && tmpval > 0)
229
        {
230
            darshan_mem_alignment = tmpval;
231
        }
232 233 234 235 236
    }
    else
    {
        darshan_mem_alignment = __DARSHAN_MEM_ALIGNMENT;
    }
237

238 239 240 241 242
    /* avoid floating point errors on faulty input */
    if(darshan_mem_alignment < 1)
    {
        darshan_mem_alignment = 1;
    }
243

244 245 246 247 248 249
    /* Use DARSHAN_JOBID_OVERRIDE for the env var for __DARSHAN_JOBID */
    envstr = getenv(DARSHAN_JOBID_OVERRIDE);
    if(!envstr)
    {
        envstr = __DARSHAN_JOBID;
    }
250

251 252 253 254 255 256 257 258 259 260 261 262
    /* find a job id */
    jobid_str = getenv(envstr);
    if(jobid_str)
    {
        /* in cobalt we can find it in env var */
        ret = sscanf(jobid_str, "%d", &jobid);
    }
    if(!jobid_str || ret != 1)
    {
        /* use pid as fall back */
        jobid = getpid();
    }
263

264 265 266 267 268 269 270
    /* set the memory quota for darshan modules' records */
    envstr = getenv(DARSHAN_MOD_MEM_OVERRIDE);
    if(envstr)
    {
        ret = sscanf(envstr, "%lf", &tmpfloat);
        /* silently ignore if the env variable is set poorly */
        if(ret == 1 && tmpfloat > 0)
271
        {
272
            darshan_mod_mem_quota = tmpfloat * 1024 * 1024; /* convert from MiB */
273
        }
274
    }
275

276 277 278 279 280 281 282 283 284 285 286 287 288
    /* allocate structure to track darshan core runtime information */
    init_core = malloc(sizeof(*init_core));
    if(init_core)
    {
        memset(init_core, 0, sizeof(*init_core));
        /* record absolute start time at startup so that we can later
         * generate relative times with this as a reference point.
         */
        init_core->wtime_offset = darshan_core_wtime_absolute();

#ifdef HAVE_MPI
        PMPI_Initialized(&using_mpi);
        if(using_mpi)
289
        {
290 291 292 293 294
            PMPI_Comm_dup(MPI_COMM_WORLD, &init_core->mpi_comm);
            PMPI_Comm_size(init_core->mpi_comm, &nprocs);
            PMPI_Comm_rank(init_core->mpi_comm, &my_rank);
        }
#endif
295

296
    /* TODO: do we alloc new memory as we go or just do everything up front? */
297

298
#ifndef __DARSHAN_ENABLE_MMAP_LOGS
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
        /* just allocate memory for each log file region */
        init_core->log_hdr_p = malloc(sizeof(struct darshan_header));
        init_core->log_job_p = malloc(sizeof(struct darshan_job));
        init_core->log_exemnt_p = malloc(DARSHAN_EXE_LEN+1);
        init_core->log_name_p = malloc(DARSHAN_NAME_RECORD_BUF_SIZE);
        init_core->log_mod_p = malloc(darshan_mod_mem_quota);

        if(!(init_core->log_hdr_p) || !(init_core->log_job_p) ||
           !(init_core->log_exemnt_p) || !(init_core->log_name_p) ||
           !(init_core->log_mod_p))
        {
            free(init_core);
            return;
        }
        /* if allocation succeeds, zero fill memory regions */
        memset(init_core->log_hdr_p, 0, sizeof(struct darshan_header));
        memset(init_core->log_job_p, 0, sizeof(struct darshan_job));
        memset(init_core->log_exemnt_p, 0, DARSHAN_EXE_LEN+1);
        memset(init_core->log_name_p, 0, DARSHAN_NAME_RECORD_BUF_SIZE);
        memset(init_core->log_mod_p, 0, darshan_mod_mem_quota);
319
#else
320 321 322 323 324 325 326 327 328
        /* if mmap logs are enabled, we need to initialize the mmap region
         * before setting the corresponding log file region pointers
         */
        void *mmap_p = darshan_init_mmap_log(init_core, jobid);
        if(!mmap_p)
        {
            free(init_core);
            return;
        }
329

330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
        /* set the memory pointers for each log file region */
        init_core->log_hdr_p = (struct darshan_header *)mmap_p;
        init_core->log_job_p = (struct darshan_job *)
            ((char *)init_core->log_hdr_p + sizeof(struct darshan_header));
        init_core->log_exemnt_p = (char *)
            ((char *)init_core->log_job_p + sizeof(struct darshan_job));
        init_core->log_name_p = (void *)
            ((char *)init_core->log_exemnt_p + DARSHAN_EXE_LEN + 1);
        init_core->log_mod_p = (void *)
            ((char *)init_core->log_name_p + DARSHAN_NAME_RECORD_BUF_SIZE);

        /* set header fields needed for the mmap log mechanism */
        init_core->log_hdr_p->comp_type = DARSHAN_NO_COMP;
        init_core->log_hdr_p->name_map.off =
            ((char *)init_core->log_name_p - (char *)init_core->log_hdr_p);
345 346
#endif

347 348 349
        /* set known header fields for the log file */
        strcpy(init_core->log_hdr_p->version_string, DARSHAN_LOG_VERSION);
        init_core->log_hdr_p->magic_nr = DARSHAN_MAGIC_NR;
350

351 352 353 354 355
        /* set known job-level metadata fields for the log file */
        init_core->log_job_p->uid = getuid();
        init_core->log_job_p->start_time = time(NULL);
        init_core->log_job_p->nprocs = nprocs;
        init_core->log_job_p->jobid = (int64_t)jobid;
356

357 358 359 360
        /* if we are using any hints to write the log file, then record those
         * hints with the darshan job information
         */
        darshan_log_record_hints_and_ver(init_core);
361

362 363
        /* collect information about command line and mounted file systems */
        darshan_get_exe_and_mounts(init_core, argc, argv);
364

365 366 367 368 369 370
        /* if darshan was successfully initialized, set the global pointer
         * and bootstrap any modules with static initialization routines
         */
        DARSHAN_CORE_LOCK();
        darshan_core = init_core;
        DARSHAN_CORE_UNLOCK();
371

372 373 374 375 376
        i = 0;
        while(mod_static_init_fns[i])
        {
            (*mod_static_init_fns[i])();
            i++;
377
        }
378 379
    }

380 381
    if(internal_timing_flag)
    {
382 383
        init_time = darshan_core_wtime() - init_start;
#ifdef HAVE_MPI
384 385
        if(using_mpi)
        {
386 387 388 389 390 391 392 393 394 395 396
            if(my_rank == 0)
            {
                PMPI_Reduce(MPI_IN_PLACE, &init_time, 1,
                    MPI_DOUBLE, MPI_MAX, 0, darshan_core->mpi_comm);
            }
            else
            {
                PMPI_Reduce(&init_time, &init_time, 1,
                    MPI_DOUBLE, MPI_MAX, 0, darshan_core->mpi_comm);
                return; /* return early so every rank doesn't print */
            }
397
        }
398 399 400 401
#endif

        darshan_core_fprintf(stderr, "#darshan:<op>\t<nprocs>\t<time>\n");
        darshan_core_fprintf(stderr, "darshan:init\t%d\t%f\n", nprocs, init_time);
402 403 404 405 406
    }

    return;
}

Shane Snyder's avatar
Shane Snyder committed
407
void darshan_core_shutdown()
408
{
409
    struct darshan_core_runtime *final_core;
410
    double start_log_time;
411
    int internal_timing_flag = 0;
412 413 414
    double open1 = 0, open2 = 0;
    double job1 = 0, job2 = 0;
    double rec1 = 0, rec2 = 0;
415 416
    double mod1[DARSHAN_MAX_MODS] = {0};
    double mod2[DARSHAN_MAX_MODS] = {0};
417
    double header1 = 0, header2 = 0;
418 419
    double tm_end;
    int active_mods[DARSHAN_MAX_MODS] = {0};
420
    uint64_t gz_fp = 0;
421 422 423 424 425
    char *logfile_name = NULL;
    darshan_core_log_fh log_fh;
    int log_created = 0;
    int i;
    int ret;
426

Shane Snyder's avatar
Shane Snyder committed
427
    /* disable darhan-core while we shutdown */
428
    DARSHAN_CORE_LOCK();
429
    if(!darshan_core)
430
    {
431
        DARSHAN_CORE_UNLOCK();
432 433
        return;
    }
434 435
    final_core = darshan_core;
    darshan_core = NULL;
436 437
    DARSHAN_CORE_UNLOCK();

438 439 440 441
    /* grab some initial timing information */
#ifdef HAVE_MPI
    /* if using mpi, sync across procs first */
    if(using_mpi)
442
        PMPI_Barrier(final_core->mpi_comm);
443 444 445 446 447 448 449
#endif
    start_log_time = darshan_core_wtime();
    final_core->log_job_p->end_time = time(NULL);

    if(getenv("DARSHAN_INTERNAL_TIMING"))
        internal_timing_flag = 1;

450
#ifdef __DARSHAN_ENABLE_MMAP_LOGS
451 452 453 454 455
    /* remove the temporary mmap log files */
    /* NOTE: this unlink is not immediate as it must wait for the mapping
     * to no longer be referenced, which in our case happens when the
     * executable exits. If the application terminates mid-shutdown, then
     * there will be no mmap files and no final log file.
456
     */
457
    unlink(final_core->mmap_log_name);
458
#endif
Shane Snyder's avatar
Shane Snyder committed
459

460 461 462 463
    final_core->comp_buf = malloc(darshan_mod_mem_quota);
    logfile_name = malloc(PATH_MAX);
    if(!final_core->comp_buf || !logfile_name)
        goto exit;
464

465 466
    /* set which modules were used locally */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
Shane Snyder's avatar
Shane Snyder committed
467
    {
468 469
        if(final_core->mod_array[i])
            active_mods[i] = 1;
Shane Snyder's avatar
Shane Snyder committed
470
    }
471

472 473 474 475
#ifdef HAVE_MPI
    darshan_record_id *shared_recs = NULL;
    darshan_record_id *mod_shared_recs = NULL;
    int shared_rec_cnt = 0;
476

477
    if(using_mpi)
478
    {
479 480
        /* allreduce locally active mods to determine globally active mods */
        PMPI_Allreduce(MPI_IN_PLACE, active_mods, DARSHAN_MAX_MODS, MPI_INT,
481
            MPI_SUM, final_core->mpi_comm);
482

483
        /* reduce to report first start and last end time across all ranks at rank 0 */
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
        if(my_rank == 0)
        {
            PMPI_Reduce(MPI_IN_PLACE, &final_core->log_job_p->start_time,
                1, MPI_INT64_T, MPI_MIN, 0, final_core->mpi_comm);
            PMPI_Reduce(MPI_IN_PLACE, &final_core->log_job_p->end_time,
                1, MPI_INT64_T, MPI_MAX, 0, final_core->mpi_comm);
        }
        else
        {
            PMPI_Reduce(&final_core->log_job_p->start_time,
                &final_core->log_job_p->start_time,
                1, MPI_INT64_T, MPI_MIN, 0, final_core->mpi_comm);
            PMPI_Reduce(&final_core->log_job_p->end_time,
                &final_core->log_job_p->end_time,
                1, MPI_INT64_T, MPI_MAX, 0, final_core->mpi_comm);
        }
500

501 502
        /* get a list of records which are shared across all processes */
        darshan_get_shared_records(final_core, &shared_recs, &shared_rec_cnt);
503

504 505 506 507
        mod_shared_recs = malloc(shared_rec_cnt * sizeof(darshan_record_id));
        assert(mod_shared_recs);
    }
#endif
508

509
    /* get the log file name */
510
    darshan_get_logfile_name(logfile_name, final_core);
511 512 513
    if(strlen(logfile_name) == 0)
    {
        /* failed to generate log file name */
514
        goto exit;
515 516 517
    }

    if(internal_timing_flag)
518 519
        open1 = darshan_core_wtime();
    /* open the darshan log file */
520
    ret = darshan_log_open(logfile_name, final_core, &log_fh);
521
    if(internal_timing_flag)
522
        open2 = darshan_core_wtime();
523
    /* error out if unable to open log file */
524 525
    DARSHAN_CHECK_ERR(ret, "unable to create log file %s", logfile_name);
    log_created = 1;
526 527

    if(internal_timing_flag)
528 529 530
        job1 = darshan_core_wtime();
    /* write the the compressed darshan job information */
    ret = darshan_log_write_job_record(log_fh, final_core, &gz_fp);
531
    if(internal_timing_flag)
532 533 534
        job2 = darshan_core_wtime();
    /* error out if unable to write job information */
    DARSHAN_CHECK_ERR(ret, "unable to write job record to file %s", logfile_name);
535 536

    if(internal_timing_flag)
537
        rec1 = darshan_core_wtime();
538
    /* write the record name->id hash to the log file */
539
    final_core->log_hdr_p->name_map.off = gz_fp;
540
    ret = darshan_log_write_name_record_hash(log_fh, final_core, &gz_fp);
541
    if(internal_timing_flag)
542 543 544 545
        rec2 = darshan_core_wtime();
    final_core->log_hdr_p->name_map.len = gz_fp - final_core->log_hdr_p->name_map.off;
    /* error out if unable to write name records */
    DARSHAN_CHECK_ERR(ret, "unable to write name records to log file %s", logfile_name);
546 547 548 549 550

    /* loop over globally used darshan modules and:
     *      - get final output buffer
     *      - compress (zlib) provided output buffer
     *      - append compressed buffer to log file
551
     *      - add module map info (file offset/length) to log header
552 553 554 555 556 557 558 559
     *      - shutdown the module
     */
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
    {
        struct darshan_core_module* this_mod = final_core->mod_array[i];
        void* mod_buf = NULL;
        int mod_buf_sz = 0;

560
        if(!active_mods[i])
561
        {
562 563
            final_core->log_hdr_p->mod_map[i].off = 0;
            final_core->log_hdr_p->mod_map[i].len = 0;
564 565 566 567
            continue;
        }

        if(internal_timing_flag)
568 569 570 571 572 573
            mod1[i] = darshan_core_wtime();

#ifdef HAVE_MPI
        struct darshan_core_name_record_ref *ref = NULL;
        int mod_shared_rec_cnt = 0;
        int j;
574

575
        if(using_mpi)
576
        {
577 578
            /* set the shared record list for this module */
            for(j = 0; j < shared_rec_cnt; j++)
579
            {
580 581 582 583 584 585 586
                HASH_FIND(hlink, final_core->name_hash, &shared_recs[j],
                    sizeof(darshan_record_id), ref);
                assert(ref);
                if(DARSHAN_MOD_FLAG_ISSET(ref->global_mod_flags, i))
                {
                    mod_shared_recs[mod_shared_rec_cnt++] = shared_recs[j];
                }
587
            }
588 589

            /* allow the module an opportunity to reduce shared files */
590 591
            if(this_mod->mod_funcs.mod_redux_func && (mod_shared_recs > 0) &&
               (!getenv("DARSHAN_DISABLE_SHARED_REDUCTION")))
592 593
                this_mod->mod_funcs.mod_redux_func(mod_buf, final_core->mpi_comm,
                    mod_shared_recs, mod_shared_rec_cnt);
594
        }
595
#endif
596 597

        /* if module is registered locally, get the corresponding output buffer
Philip Carns's avatar
Philip Carns committed
598
         *
599
         * NOTE: this function can be used to run collective operations across
600
         * modules, if there are records shared globally.
601 602 603
         */
        if(this_mod)
        {
604 605
            mod_buf = final_core->mod_array[i]->rec_buf_start;
            mod_buf_sz = final_core->mod_array[i]->rec_buf_p - mod_buf;
606
            this_mod->mod_funcs.mod_shutdown_func(&mod_buf, &mod_buf_sz);
607 608 609
        }

        /* append this module's data to the darshan log */
610
        final_core->log_hdr_p->mod_map[i].off = gz_fp;
611
        ret = darshan_log_append(log_fh, final_core, mod_buf, mod_buf_sz, &gz_fp);
612 613
        final_core->log_hdr_p->mod_map[i].len =
            gz_fp - final_core->log_hdr_p->mod_map[i].off;
614

615 616 617 618 619 620
        /* XXX: DXT manages its own module memory buffers, so we need to
         * explicitly free them
         */
        if(i == DXT_POSIX_MOD || i == DXT_MPIIO_MOD)
            free(mod_buf);

621
        if(internal_timing_flag)
622
            mod2[i] = darshan_core_wtime();
623

624 625 626
        /* error out if unable to write module data */
        DARSHAN_CHECK_ERR(ret, "unable to write %s module data to log file %s",
            darshan_module_names[i], logfile_name);
627
    }
628 629

    if(internal_timing_flag)
630 631 632 633 634
        header1 = darshan_core_wtime();
    ret = darshan_log_write_header(log_fh, final_core);
    if(internal_timing_flag)
        header2 = darshan_core_wtime();
    DARSHAN_CHECK_ERR(ret, "unable to write header to file %s", logfile_name);
635

636 637
    /* done writing data, close the log file */
    darshan_log_close(log_fh);
638

639 640
    /* finalize log file name and permissions */
    darshan_log_finalize(logfile_name, start_log_time);
641

642
    if(internal_timing_flag)
643
    {
644 645 646 647 648 649
        double open_tm;
        double header_tm;
        double job_tm;
        double rec_tm;
        double mod_tm[DARSHAN_MAX_MODS];
        double all_tm;
650

651
        tm_end = darshan_core_wtime();
652

653 654 655 656
        open_tm = open2 - open1;
        header_tm = header2 - header1;
        job_tm = job2 - job1;
        rec_tm = rec2 - rec1;
657
        all_tm = tm_end - start_log_time;
658
        for(i = 0; i < DARSHAN_MAX_MODS; i++)
659 660 661
        {
            mod_tm[i] = mod2[i] - mod1[i];
        }
662

663 664 665
#ifdef HAVE_MPI
        if(using_mpi)
        {
666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
            if(my_rank == 0)
            {
                PMPI_Reduce(MPI_IN_PLACE, &open_tm, 1,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);
                PMPI_Reduce(MPI_IN_PLACE, &header_tm, 1,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);
                PMPI_Reduce(MPI_IN_PLACE, &job_tm, 1,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);
                PMPI_Reduce(MPI_IN_PLACE, &rec_tm, 1,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);
                PMPI_Reduce(MPI_IN_PLACE, &all_tm, 1,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);
                PMPI_Reduce(MPI_IN_PLACE, mod_tm, DARSHAN_MAX_MODS,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);
            }
            else
            {
                PMPI_Reduce(&open_tm, &open_tm, 1,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);
                PMPI_Reduce(&header_tm, &header_tm, 1,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);
                PMPI_Reduce(&job_tm, &job_tm, 1,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);
                PMPI_Reduce(&rec_tm, &rec_tm, 1,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);
                PMPI_Reduce(&all_tm, &all_tm, 1,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);
                PMPI_Reduce(mod_tm, mod_tm, DARSHAN_MAX_MODS,
                    MPI_DOUBLE, MPI_MAX, 0, final_core->mpi_comm);

                /* let rank 0 report the timing info */
697
                goto exit;
698
            }
699 700
        }
#endif
701

702 703 704 705 706 707
        darshan_core_fprintf(stderr, "#darshan:<op>\t<nprocs>\t<time>\n");
        darshan_core_fprintf(stderr, "darshan:log_open\t%d\t%f\n", nprocs, open_tm);
        darshan_core_fprintf(stderr, "darshan:job_write\t%d\t%f\n", nprocs, job_tm);
        darshan_core_fprintf(stderr, "darshan:hash_write\t%d\t%f\n", nprocs, rec_tm);
        darshan_core_fprintf(stderr, "darshan:header_write\t%d\t%f\n", nprocs, header_tm);
        for(i = 0; i < DARSHAN_MAX_MODS; i++)
708
        {
709 710 711
            if(active_mods[i])
                darshan_core_fprintf(stderr, "darshan:%s_shutdown\t%d\t%f\n",
                    darshan_module_names[i], nprocs, mod_tm[i]);
712
        }
713
        darshan_core_fprintf(stderr, "darshan:core_shutdown\t%d\t%f\n", nprocs, all_tm);
714
    }
715

716 717
exit:
#ifdef HAVE_MPI
718 719 720 721 722
    if(using_mpi)
    {
        free(shared_recs);
        free(mod_shared_recs);
    }
723 724 725 726
#endif
    free(logfile_name);
    darshan_core_cleanup(final_core);

727 728
    return;
}
729

Shane Snyder's avatar
Shane Snyder committed
730
/* *********************************** */
731

732
#ifdef __DARSHAN_ENABLE_MMAP_LOGS
733 734 735 736 737 738 739
static void *darshan_init_mmap_log(struct darshan_core_runtime* core, int jobid)
{
    int ret;
    int mmap_fd;
    int mmap_size;
    int sys_page_size;
    char cuser[L_cuserid] = {0};
740 741 742
    uint64_t hlevel;
    char hname[HOST_NAME_MAX];
    uint64_t logmod;
743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
    char *envstr;
    char *mmap_log_path;
    void *mmap_p;

    sys_page_size = sysconf(_SC_PAGESIZE);
    assert(sys_page_size > 0);

    mmap_size = sizeof(struct darshan_header) + DARSHAN_JOB_RECORD_SIZE +
        + DARSHAN_NAME_RECORD_BUF_SIZE + darshan_mod_mem_quota;
    if(mmap_size % sys_page_size)
        mmap_size = ((mmap_size / sys_page_size) + 1) * sys_page_size;

    envstr = getenv(DARSHAN_MMAP_LOG_PATH_OVERRIDE);
    if(envstr)
        mmap_log_path = envstr;
    else
        mmap_log_path = DARSHAN_DEF_MMAP_LOG_PATH;

    darshan_get_user_name(cuser);

763 764 765 766 767 768 769
    /* generate a random number to help differentiate the temporary log */
    /* NOTE: job id is not sufficient for constructing a unique log file name,
     * since a job could be composed of multiple application runs, so we also
     * add a random number component to the log name
     */
    if(my_rank == 0)
    {
770
        hlevel = darshan_core_wtime_absolute() * 1000000;
771 772 773
        (void)gethostname(hname, sizeof(hname));
        logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
    }
774
    PMPI_Bcast(&logmod, 1, MPI_UINT64_T, 0, core->mpi_comm);
775

776 777 778 779
    /* construct a unique temporary log file name for this process
     * to write mmap log data to
     */
    snprintf(core->mmap_log_name, PATH_MAX,
780 781
        "/%s/%s_%s_id%d_mmap-log-%" PRIu64 "-%d.darshan",
        mmap_log_path, cuser, __progname, jobid, logmod, my_rank);
782 783 784 785 786

    /* create the temporary mmapped darshan log */
    mmap_fd = open(core->mmap_log_name, O_CREAT|O_RDWR|O_EXCL , 0644);
    if(mmap_fd < 0)
    {
787
        darshan_core_fprintf(stderr, "darshan library warning: "
788 789 790 791 792 793 794 795 796
            "unable to create darshan log file %s\n", core->mmap_log_name);
        return(NULL);
    }

    /* TODO: ftruncate or just zero fill? */
    /* allocate the necessary space in the log file */
    ret = ftruncate(mmap_fd, mmap_size);
    if(ret < 0)
    {
797
        darshan_core_fprintf(stderr, "darshan library warning: "
798 799 800 801 802 803 804 805 806 807 808 809
            "unable to allocate darshan log file %s\n", core->mmap_log_name);
        close(mmap_fd);
        unlink(core->mmap_log_name);
        return(NULL);
    }

    /* create the memory map for darshan's data structures so they are
     * persisted to file as the application executes
     */
    mmap_p = mmap(NULL, mmap_size, PROT_WRITE, MAP_SHARED, mmap_fd, 0);
    if(mmap_p == MAP_FAILED)
    {
810
        darshan_core_fprintf(stderr, "darshan library warning: "
811 812 813 814 815 816 817 818 819 820 821
            "unable to mmap darshan log file %s\n", core->mmap_log_name);
        close(mmap_fd);
        unlink(core->mmap_log_name);
        return(NULL);
    }

    /* close darshan log file (this does *not* unmap the log file) */
    close(mmap_fd);

    return(mmap_p);
}
822
#endif
823

824
/* record any hints used to write the darshan log in the job data */
825
static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
826 827
{
    char* hints;
828
    char* job_hints;
829 830 831 832 833 834
    int meta_remain = 0;
    char* m;

    /* check environment variable to see if the default MPI file hints have
     * been overridden
     */
835
    hints = getenv(DARSHAN_LOG_HINTS_OVERRIDE);
836 837
    if(!hints)
    {
838
        hints = __DARSHAN_LOG_HINTS;
839 840 841 842 843
    }

    if(!hints || strlen(hints) < 1)
        return;

844 845
    job_hints = strdup(hints);
    if(!job_hints)
846 847 848
        return;

    meta_remain = DARSHAN_JOB_METADATA_LEN -
849
        strlen(core->log_job_p->metadata) - 1;
850 851
    if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
    {
852
        sprintf(core->log_job_p->metadata, "lib_ver=%s\n", PACKAGE_VERSION);
853 854
        meta_remain -= (strlen(PACKAGE_VERSION) + 9);
    }
855
    if(meta_remain >= (3 + strlen(job_hints)))
856
    {
857
        m = core->log_job_p->metadata + strlen(core->log_job_p->metadata);
858
        /* We have room to store the hints in the metadata portion of
859
         * the job structure.  We just prepend an h= to the hints list.  The
860 861 862
         * metadata parser will ignore = characters that appear in the value
         * portion of the metadata key/value pair.
         */
863
        sprintf(m, "h=%s\n", job_hints);
864
    }
865
    free(job_hints);
866 867 868 869

    return;
}

870 871
static int mnt_data_cmp(const void* a, const void* b)
{
872 873
    const struct darshan_core_mnt_data *d_a = (const struct darshan_core_mnt_data*)a;
    const struct darshan_core_mnt_data *d_b = (const struct darshan_core_mnt_data*)b;
874 875 876 877 878 879 880 881 882 883

    if(strlen(d_a->path) > strlen(d_b->path))
        return(-1);
    else if(strlen(d_a->path) < strlen(d_b->path))
        return(1);
    else
        return(0);
}

/* adds an entry to table of mounted file systems */
884
static void add_entry(char* buf, int* space_left, struct mntent* entry)
885
{
886
    int i;
887 888 889 890
    int ret;
    char tmp_mnt[256];
    struct statfs statfsbuf;

891 892 893 894 895 896 897 898 899 900
    /* avoid adding the same mount points multiple times -- to limit
     * storage space and potential statfs, ioctl, etc calls
     */
    for(i = 0; i < mnt_data_count; i++)
    {
        if((strncmp(mnt_data_array[i].path, entry->mnt_dir, DARSHAN_MAX_MNT_PATH) == 0) &&
           (strncmp(mnt_data_array[i].type, entry->mnt_type, DARSHAN_MAX_MNT_PATH) == 0))
            return;
    }

901 902 903 904
    strncpy(mnt_data_array[mnt_data_count].path, entry->mnt_dir,
        DARSHAN_MAX_MNT_PATH-1);
    strncpy(mnt_data_array[mnt_data_count].type, entry->mnt_type,
        DARSHAN_MAX_MNT_TYPE-1);
Philip Carns's avatar
Philip Carns committed
905 906 907
    /* NOTE: we now try to detect the preferred block size for each file
     * system using fstatfs().  On Lustre we assume a size of 1 MiB
     * because fstatfs() reports 4 KiB.
908 909 910 911 912
     */
#ifndef LL_SUPER_MAGIC
#define LL_SUPER_MAGIC 0x0BD00BD0
#endif
    ret = statfs(entry->mnt_dir, &statfsbuf);
913
    mnt_data_array[mnt_data_count].fs_info.fs_type = statfsbuf.f_type;
914
    if(ret == 0 && statfsbuf.f_type != LL_SUPER_MAGIC)
915
        mnt_data_array[mnt_data_count].fs_info.block_size = statfsbuf.f_bsize;
916
    else if(ret == 0 && statfsbuf.f_type == LL_SUPER_MAGIC)
917
        mnt_data_array[mnt_data_count].fs_info.block_size = 1024*1024;
918
    else
919
        mnt_data_array[mnt_data_count].fs_info.block_size = 4096;
920

921
#ifdef DARSHAN_LUSTRE
922 923 924 925 926 927 928 929 930 931
    /* attempt to retrieve OST and MDS counts from Lustre */
    mnt_data_array[mnt_data_count].fs_info.ost_count = -1;
    mnt_data_array[mnt_data_count].fs_info.mdt_count = -1;
    if ( statfsbuf.f_type == LL_SUPER_MAGIC )
    {
        int n_ost, n_mdt;
        int ret_ost, ret_mdt;
        DIR *mount_dir;

        mount_dir = opendir( entry->mnt_dir );
Philip Carns's avatar
Philip Carns committed
932
        if ( mount_dir  )
933 934 935 936 937 938 939 940
        {
            /* n_ost and n_mdt are used for both input and output to ioctl */
            n_ost = 0;
            n_mdt = 1;

            ret_ost = ioctl( dirfd(mount_dir), LL_IOC_GETOBDCOUNT, &n_ost );
            ret_mdt = ioctl( dirfd(mount_dir), LL_IOC_GETOBDCOUNT, &n_mdt );

941
            if ( !(ret_ost < 0 || ret_mdt < 0) )
942 943 944 945 946 947 948
            {
                mnt_data_array[mnt_data_count].fs_info.ost_count = n_ost;
                mnt_data_array[mnt_data_count].fs_info.mdt_count = n_mdt;
            }
            closedir( mount_dir );
        }
    }
949
#endif
950

951
    /* store mount information with the job-level metadata in darshan log */
952
    ret = snprintf(tmp_mnt, 256, "\n%s\t%s",
953 954 955
        entry->mnt_type, entry->mnt_dir);
    if</