darshan-logutils.c 49 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6
 */

7
#define _GNU_SOURCE
8
#include "darshan-util-config.h"
9 10
#include <stdio.h>
#include <string.h>
11
#include <assert.h>
12
#include <stdlib.h>
13
#include <unistd.h>
14
#include <inttypes.h>
15 16 17
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
18
#include <errno.h>
19

20
#include "darshan-logutils.h"
21
#include "compat/darshan-logutils-compat.h"
22

23 24 25
/* default input buffer size for decompression algorithm */
#define DARSHAN_DEF_COMP_BUF_SZ (1024*1024) /* 1 MiB */

26
/* special identifers for referring to header, job, and
27 28 29 30
 * record map regions of the darshan log file
 */
#define DARSHAN_HEADER_REGION_ID    (-3)
#define DARSHAN_JOB_REGION_ID       (-2)
31
#define DARSHAN_NAME_MAP_REGION_ID  (-1)
32

33 34
struct darshan_dz_state
{
35 36 37 38 39
    /* pointer to arbitrary data structure used for managing
     * compression/decompression state (e.g., z_stream
     * structure needed for libz)
     */
    void *comp_dat;
40 41 42
    /* buffer for staging compressed data to/from log file */
    unsigned char *buf;
    /* size of staging buffer */
43
    unsigned int size;
44 45
    /* for reading logs, flag indicating end of log file region */
    int eor;
46
    /* the region id we last tried reading/writing */
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    int prev_reg_id;
};

/* internal fd data structure */
struct darshan_fd_int_state
{
    /* posix file descriptor for the log file */
    int fildes;
    /* file pointer position */
    int64_t pos;
    /* flag indicating whether log file was created (and written) */
    int creat_flag;
    /* log file path name */
    char logfile_path[PATH_MAX];
    /* pointer to exe & mount data in darshan job data structure */
    char *exe_mnt_data;
    /* whether previous file operations have failed */
    int err;
65 66 67 68
    /* log format version-specific function calls for getting
     * data from the log file
     */
    int (*get_namerecs)(void *, int, int, struct darshan_name_record_ref **);
69

70
    /* compression/decompression stream read/write state */
71 72 73
    struct darshan_dz_state dz;
};

74
static int darshan_mnt_info_cmp(const void *a, const void *b);
75 76
static int darshan_log_get_namerecs(void *name_rec_buf, int buf_len,
    int swap_flag, struct darshan_name_record_ref **hash);
77 78
static int darshan_log_get_header(darshan_fd fd);
static int darshan_log_put_header(darshan_fd fd);
79 80
static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len);
81
static int darshan_log_write(darshan_fd fd, void *buf, int len);
82 83
static int darshan_log_dzinit(darshan_fd fd);
static void darshan_log_dzdestroy(darshan_fd fd);
84 85
static int darshan_log_dzread(darshan_fd fd, int region_id, void *buf, int len);
static int darshan_log_dzwrite(darshan_fd fd, int region_id, void *buf, int len);
86 87 88 89
static int darshan_log_libz_read(darshan_fd fd, struct darshan_log_map map, 
    void *buf, int len, int reset_strm_flag);
static int darshan_log_libz_write(darshan_fd fd, struct darshan_log_map *map_p,
    void *buf, int len, int flush_strm_flag);
90
static int darshan_log_libz_flush(darshan_fd fd, int region_id);
91
#ifdef HAVE_LIBBZ2
92 93 94 95
static int darshan_log_bzip2_read(darshan_fd fd, struct darshan_log_map map, 
    void *buf, int len, int reset_strm_flag);
static int darshan_log_bzip2_write(darshan_fd fd, struct darshan_log_map *map_p,
    void *buf, int len, int flush_strm_flag);
96 97
static int darshan_log_bzip2_flush(darshan_fd fd, int region_id);
#endif
98 99
static int darshan_log_dzload(darshan_fd fd, struct darshan_log_map map);
static int darshan_log_dzunload(darshan_fd fd, struct darshan_log_map *map_p);
100 101
static int darshan_log_noz_read(darshan_fd fd, struct darshan_log_map map,
    void *buf, int len, int reset_strm_flag);
102

Shane Snyder's avatar
Shane Snyder committed
103
/* each module's implementation of the darshan logutil functions */
104
#define X(a, b, c, d) d,
105 106
struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS] =
{
Shane Snyder's avatar
Shane Snyder committed
107
    DARSHAN_MODULE_IDS
108
};
Shane Snyder's avatar
Shane Snyder committed
109
#undef X
110 111 112

/* darshan_log_open()
 *
113
 * open an existing darshan log file for reading only
114
 *
115
 * returns file descriptor on success, NULL on failure
116
 */
117
darshan_fd darshan_log_open(const char *name)
118
{
119
    darshan_fd tmp_fd;
120
    int ret;
121

122
    /* allocate a darshan file descriptor */
123 124 125 126
    tmp_fd = malloc(sizeof(*tmp_fd));
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));
127 128 129 130 131 132 133
    tmp_fd->state = malloc(sizeof(struct darshan_fd_int_state));
    if(!tmp_fd->state)
    {
        free(tmp_fd->state);
        return(NULL);
    }
    memset(tmp_fd->state, 0, sizeof(struct darshan_fd_int_state));
134

135
    /* open the log file in read mode */
136 137
    tmp_fd->state->fildes = open(name, O_RDONLY);
    if(tmp_fd->state->fildes < 0)
138
    {
139
        fprintf(stderr, "Error: failed to open darshan log file %s.\n", name);
140
        free(tmp_fd->state);
141 142 143
        free(tmp_fd);
        return(NULL);
    }
144
    strncpy(tmp_fd->state->logfile_path, name, PATH_MAX);
145 146

    /* read the header from the log file to init fd data structures */
147
    ret = darshan_log_get_header(tmp_fd);
148 149
    if(ret < 0)
    {
150
        fprintf(stderr, "Error: failed to read darshan log file header.\n");
151 152
        close(tmp_fd->state->fildes);
        free(tmp_fd->state);
153
        free(tmp_fd);
154 155 156 157
        return(NULL);
    }

    /* initialize compression data structures */
158
    ret = darshan_log_dzinit(tmp_fd);
159 160 161
    if(ret < 0)
    {
        fprintf(stderr, "Error: failed to initialize decompression data structures.\n");
162 163
        close(tmp_fd->state->fildes);
        free(tmp_fd->state);
164 165
        free(tmp_fd);
        return(NULL);
166 167
    }

168 169 170 171 172 173 174
    return(tmp_fd);
}

/* darshan_log_create()
 *
 * create a darshan log file for writing with the given compression method
 *
175
 * returns file descriptor on success, NULL on failure
176
 */
177 178
darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type,
    int partial_flag)
179 180
{
    darshan_fd tmp_fd;
181
    int ret;
182

183
    /* allocate a darshan file descriptor */
184 185 186 187
    tmp_fd = malloc(sizeof(*tmp_fd));
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));
188 189 190 191 192 193 194
    tmp_fd->state = malloc(sizeof(struct darshan_fd_int_state));
    if(!tmp_fd->state)
    {
        free(tmp_fd);
        return(NULL);
    }
    memset(tmp_fd->state, 0, sizeof(struct darshan_fd_int_state));
195
    tmp_fd->comp_type = comp_type;
196

197 198 199
    /* create the log for writing, making sure to not overwrite existing log */
    tmp_fd->state->fildes = creat(name, 0400);
    if(tmp_fd->state->fildes < 0)
200
    {
201
        fprintf(stderr, "Error: failed to open darshan log file %s.\n", name);
202
        free(tmp_fd->state);
203
        free(tmp_fd);
204
        return(NULL);
205
    }
206
    tmp_fd->state->creat_flag = 1;
207
    tmp_fd->partial_flag = partial_flag;
208
    strncpy(tmp_fd->state->logfile_path, name, PATH_MAX);
209

210 211 212 213 214
    /* position file pointer to prealloc space for the log file header
     * NOTE: the header is written at close time, after all internal data
     * structures have been properly set
     */
    ret = darshan_log_seek(tmp_fd, sizeof(struct darshan_header));
215
    if(ret < 0)
216 217
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
218 219
        close(tmp_fd->state->fildes);
        free(tmp_fd->state);
220 221 222
        free(tmp_fd);
        unlink(name);
        return(NULL);
223 224
    }

225
    /* initialize compression data structures */
226
    ret = darshan_log_dzinit(tmp_fd);
227 228
    if(ret < 0)
    {
229
        fprintf(stderr, "Error: failed to initialize compression data structures.\n");
230 231
        close(tmp_fd->state->fildes);
        free(tmp_fd->state);
232 233 234
        free(tmp_fd);
        unlink(name);
        return(NULL);
235 236
    }

237
    return(tmp_fd);
238 239
}

240
/* darshan_log_get_job()
241 242
 *
 * read job level metadata from the darshan log file
243
 *
244
 * returns 0 on success, -1 on failure
245
 */
246
int darshan_log_get_job(darshan_fd fd, struct darshan_job *job)
247
{
248
    struct darshan_fd_int_state *state = fd->state;
249 250
    char job_buf[DARSHAN_JOB_RECORD_SIZE] = {0};
    int job_buf_sz = DARSHAN_JOB_RECORD_SIZE;
251
    int ret;
252

253
    assert(state);
254 255
    assert(fd->job_map.len > 0 && fd->job_map.off > 0);

256
    /* read the compressed job data from the log file */
257
    ret = darshan_log_dzread(fd, DARSHAN_JOB_REGION_ID, job_buf, job_buf_sz);
258
    if(ret <= (int)sizeof(*job))
259
    {
260
        fprintf(stderr, "Error: failed to read darshan log file job data.\n");
261 262
        return(-1);
    }
263 264

    memcpy(job, job_buf, sizeof(*job));
265

266
    if(fd->swap_flag)
267
    {
268 269 270 271 272 273
        /* swap bytes if necessary */
        DARSHAN_BSWAP64(&job->uid);
        DARSHAN_BSWAP64(&job->start_time);
        DARSHAN_BSWAP64(&job->end_time);
        DARSHAN_BSWAP64(&job->nprocs);
        DARSHAN_BSWAP64(&job->jobid);
274 275
    }

276
    /* save trailing exe & mount information, so it can be retrieved later */
277 278 279
    if(!(state->exe_mnt_data))
        state->exe_mnt_data = malloc(DARSHAN_EXE_LEN+1);
    if(!(state->exe_mnt_data))
280
        return(-1);
281
    memcpy(state->exe_mnt_data, &job_buf[sizeof(*job)], DARSHAN_EXE_LEN+1);
282

283 284 285
    return(0);
}

286
/* darshan_log_put_job()
287
 *
288
 * write job level metadata to darshan log file
289 290 291
 *
 * returns 0 on success, -1 on failure
 */
292
int darshan_log_put_job(darshan_fd fd, struct darshan_job *job)
293
{
294
    struct darshan_fd_int_state *state = fd->state;
295 296 297 298
    struct darshan_job job_copy;
    int len;
    int ret;

299 300
    assert(state);

301 302 303 304 305 306 307 308 309 310 311 312 313 314
    memset(&job_copy, 0, sizeof(*job));
    memcpy(&job_copy, job, sizeof(*job));

    /* check for newline in existing metadata, add if needed */
    len = strlen(job_copy.metadata);
    if(len > 0 && len < DARSHAN_JOB_METADATA_LEN)
    {
        if(job_copy.metadata[len-1] != '\n')
        {
            job_copy.metadata[len] = '\n';
            job_copy.metadata[len+1] = '\0';
        }
    }

315 316
    /* write the compressed job data to log file */
    ret = darshan_log_dzwrite(fd, DARSHAN_JOB_REGION_ID, &job_copy, sizeof(*job));
317
    if(ret != sizeof(*job))
318
    {
319
        state->err = -1;
320 321 322 323 324 325 326
        fprintf(stderr, "Error: failed to write darshan log file job data.\n");
        return(-1);
    }

    return(0);
}

327
/* darshan_log_get_exe()
328 329 330 331 332
 *
 * reads the application exe name from darshan log file
 * 
 * returns 0 on success, -1 on failure 
 */
333
int darshan_log_get_exe(darshan_fd fd, char *buf)
334
{
335
    struct darshan_fd_int_state *state = fd->state;
336
    char *newline;
337
    int ret;
338

339 340
    assert(state);

341
    /* if the exe/mount data has not been saved yet, read in the job info */
342
    if(!(state->exe_mnt_data))
343
    {
344
        struct darshan_job job;
345
        ret = darshan_log_get_job(fd, &job);
346

347
        if(ret < 0 || !(state->exe_mnt_data))
348
            return(-1);
349
    }
350

351
    /* exe string is located before the first line break */
352
    newline = strchr(state->exe_mnt_data, '\n');
353 354

    /* copy over the exe string */
355
    if(newline)
356
        memcpy(buf, state->exe_mnt_data, (newline - state->exe_mnt_data));
357 358 359 360

    return (0);
}

361
/* darshan_log_put_exe()
362 363
 *
 * wrties the application exe name to darshan log file
364
 * NOTE: this needs to be called immediately following put_job as it
365
 * expects the file pointer to be positioned immediately following
366
 * the darshan job information
367 368 369
 *
 * returns 0 on success, -1 on failure 
 */
370
int darshan_log_put_exe(darshan_fd fd, char *buf)
371
{
372 373
    struct darshan_fd_int_state *state = fd->state;
    int len = strlen(buf);
374
    int ret;
375

376
    assert(fd->state);
377

378 379
    ret = darshan_log_dzwrite(fd, DARSHAN_JOB_REGION_ID, buf, len);
    if(ret != len)
380
    {
381
        state->err = -1;
382 383 384 385 386 387 388
        fprintf(stderr, "Error: failed to write exe string to darshan log file.\n");
        return(-1);
    }

    return(0);
}

389
/* darshan_log_get_mounts()
390
 * 
391 392 393
 * retrieves mount table information from the log. Note that mnt_data_array
 * is an array that will be allocated by the function and must be
 * freed by the caller. count will indicate the size of the array
394 395
 *
 * returns 0 on success, -1 on failure
396
 */
397
int darshan_log_get_mounts(darshan_fd fd, struct darshan_mnt_info **mnt_data_array,
398
    int* count)
399
{
400
    struct darshan_fd_int_state *state = fd->state;
401 402
    char *pos;
    int array_index = 0;
403
    int ret;
404

405 406
    assert(state);

407
    /* if the exe/mount data has not been saved yet, read in the job info */
408
    if(!(state->exe_mnt_data))
409
    {
410
        struct darshan_job job;
411
        ret = darshan_log_get_job(fd, &job);
412

413
        if(ret < 0 || !(state->exe_mnt_data))
414
            return(-1);
415
    }
416

417
    /* count entries */
418
    *count = 0;
419
    pos = state->exe_mnt_data;
420 421 422 423 424 425 426 427 428 429 430 431 432
    while((pos = strchr(pos, '\n')) != NULL)
    {
        pos++;
        (*count)++;
    }

    if(*count == 0)
    {
        /* no mount entries present */
        return(0);
    }

    /* allocate output arrays */
433 434
    *mnt_data_array = malloc((*count)*sizeof(**mnt_data_array));
    assert(*mnt_data_array);
435

436
    /* work through the table and parse each line (except for
437 438
     * first, which holds command line information)
     */
439 440
    pos = state->exe_mnt_data;
    while((pos = strchr(pos, '\n')) != NULL)
441
    {
442 443
        ret = sscanf(++pos, "%s\t%s", (*mnt_data_array)[array_index].mnt_type,
            (*mnt_data_array)[array_index].mnt_path);
444
        if(ret != 2)
445
        {
446
            fprintf(stderr, "Error: poorly formatted mount table in darshan log file.\n");
447 448 449 450 451
            return(-1);
        }
        array_index++;
    }

452 453
    qsort(*mnt_data_array, *count, sizeof(**mnt_data_array), darshan_mnt_info_cmp);

454 455 456
    return(0);
}

457
/* darshan_log_put_mounts()
458 459 460 461 462 463 464 465
 *
 * writes mount information to the darshan log file
 * NOTE: this function call should follow immediately after the call
 * to darshan_log_putexe(), as it assumes the darshan log file pointer
 * is pointing to the offset immediately following the exe string
 *
 * returns 0 on success, -1 on failure
 */
466
int darshan_log_put_mounts(darshan_fd fd, struct darshan_mnt_info *mnt_data_array,
467
    int count)
468
{
469
    struct darshan_fd_int_state *state = fd->state;
470 471
    int i;
    char line[1024];
472 473 474
    char mnt_dat[DARSHAN_EXE_LEN] = {0};
    int mnt_dat_sz = 0;
    char *tmp;
475 476
    int ret;

477 478
    assert(state);

479
    /* write each mount entry to file */
480
    tmp = mnt_dat;
481 482
    for(i=count-1; i>=0; i--)
    {
483
        sprintf(line, "\n%s\t%s", mnt_data_array[i].mnt_type, mnt_data_array[i].mnt_path);
484 485 486 487 488 489

        memcpy(tmp, line, strlen(line));
        tmp += strlen(line);
        mnt_dat_sz += strlen(line);
    }

490 491
    ret = darshan_log_dzwrite(fd, DARSHAN_JOB_REGION_ID, mnt_dat, mnt_dat_sz);
    if (ret != mnt_dat_sz)
492
    {
493
        state->err = -1;
494
        fprintf(stderr, "Error: failed to write darshan log mount data.\n");
495 496 497 498 499 500
        return(-1);
    }

    return(0);
}

501
/* darshan_log_get_namehash()
502
 *
503 504
 * read the set of name records from the darshan log file and add to the
 * given hash table
505 506 507
 *
 * returns 0 on success, -1 on failure
 */
508
int darshan_log_get_namehash(darshan_fd fd, struct darshan_name_record_ref **hash)
509
{
510
    struct darshan_fd_int_state *state = fd->state;
511 512
    char *name_rec_buf;
    int name_rec_buf_sz;
513 514
    int read;
    int read_req_sz;
515 516
    int buf_len = 0;
    int buf_processed;
517

518 519
    assert(state);

520 521
    /* just return if there is no name record mapping data */
    if(fd->name_map.len == 0)
522 523 524 525 526
    {
        *hash = NULL;
        return(0);
    }

527 528 529 530
    /* default to buffer twice as big as default compression buf */
    name_rec_buf_sz = DARSHAN_DEF_COMP_BUF_SZ * 2;
    name_rec_buf = malloc(name_rec_buf_sz);
    if(!name_rec_buf)
531
        return(-1);
532
    memset(name_rec_buf, 0, name_rec_buf_sz);
533

534
    do
535
    {
536
        /* read chunks of the darshan record id -> name mapping from log file,
537
         * constructing a hash table in the process
538
         */
539
        read_req_sz = name_rec_buf_sz - buf_len;
540
        read = darshan_log_dzread(fd, DARSHAN_NAME_MAP_REGION_ID,
541
            name_rec_buf + buf_len, read_req_sz);
542
        if(read < 0)
543
        {
544 545
            fprintf(stderr, "Error: failed to read name hash from darshan log file.\n");
            free(name_rec_buf);
546
            return(-1);
547
        }
548
        buf_len += read;
549

550 551
        /* extract any name records in the buffer */
        buf_processed = state->get_namerecs(name_rec_buf, buf_len, fd->swap_flag, hash);
552

553
        /* copy any leftover data to beginning of buffer to parse next */
554 555
        memcpy(name_rec_buf, name_rec_buf + buf_processed, buf_len - buf_processed);
        buf_len -= buf_processed;
556 557 558 559 560

        /* we keep reading until we get a short read informing us we have
         * read all of the record hash
         */
    } while(read == read_req_sz);
561
    assert(buf_len == 0);
562

563
    free(name_rec_buf);
564 565 566
    return(0);
}

567
/* darshan_log_put_namehash()
568
 *
569
 * writes the hash table of name records to the darshan log file
570 571 572
 * NOTE: this function call should follow immediately after the call
 * to darshan_log_putmounts(), as it assumes the darshan log file pointer
 * is pointing to the offset immediately following the mount information
573 574 575
 *
 * returns 0 on success, -1 on failure
 */
576
int darshan_log_put_namehash(darshan_fd fd, struct darshan_name_record_ref *hash)
577
{
578
    struct darshan_fd_int_state *state = fd->state;
579 580 581
    struct darshan_name_record_ref *ref, *tmp;
    struct darshan_name_record_ref *name_rec;
    int name_rec_len;
582
    int wrote;
583

584 585
    assert(state);

586
    /* allocate memory for largest possible hash record */
587 588
    name_rec = malloc(sizeof(struct darshan_name_record) + PATH_MAX);
    if(!name_rec)
589
        return(-1);
590
    memset(name_rec, 0, sizeof(struct darshan_name_record) + PATH_MAX);
591

592
    /* individually serialize each hash record and write to log file */
593 594
    HASH_ITER(hlink, hash, ref, tmp)
    {
595 596
        name_rec_len = sizeof(struct darshan_name_record) + strlen(ref->name_record->name);
        memcpy(name_rec, ref->name_record, name_rec_len);
597

598
        /* write this hash entry to log file */
599 600 601
        wrote = darshan_log_dzwrite(fd, DARSHAN_NAME_MAP_REGION_ID,
            name_rec, name_rec_len);
        if(wrote != name_rec_len)
602
        {
603
            state->err = -1;
604 605
            fprintf(stderr, "Error: failed to write name hash to darshan log file.\n");
            free(name_rec);
606 607
            return(-1);
        }
608 609
    }

610
    free(name_rec);
611 612 613
    return(0);
}

614
/* darshan_log_get_mod()
615
 *
616 617
 * get a chunk of module data from the darshan log file
 *
618
 * returns number of bytes read on success, -1 on failure
619
 */
620
int darshan_log_get_mod(darshan_fd fd, darshan_module_id mod_id,
621
    void *mod_buf, int mod_buf_sz)
622
{
623
    struct darshan_fd_int_state *state = fd->state;
624
    int ret;
625

626 627
    assert(state);

628
    if(mod_id < 0 || mod_id >= DARSHAN_MAX_MODS)
629
    {
630 631
        fprintf(stderr, "Error: invalid Darshan module id.\n");
        return(-1);
632 633
    }

634 635 636 637
    if(fd->mod_map[mod_id].len == 0)
        return(0); /* no data corresponding to this mod_id */

    /* read this module's data from the log file */
638
    ret = darshan_log_dzread(fd, mod_id, mod_buf, mod_buf_sz);
639
    if(ret < 0)
640
    {
641
        fprintf(stderr,
642
            "Error: failed to read module %s data from darshan log file.\n",
643
            darshan_module_names[mod_id]);
644 645 646
        return(-1);
    }

647
    return(ret);
648 649
}

650
/* darshan_log_put_mod()
651 652
 *
 * write a chunk of module data to the darshan log file
Shane Snyder's avatar
Shane Snyder committed
653 654 655 656 657 658
 * NOTE: this function call should be called directly after the
 * put_hash() function, as it expects the file pointer to be
 * positioned directly past the record hash location. Also,
 * for a set of modules with data to write to file, this function
 * should be called in order of increasing module identifiers,
 * as the darshan log file format expects this ordering.
659
 *
660
 * returns number of bytes written on success, -1 on failure
661
 */
662
int darshan_log_put_mod(darshan_fd fd, darshan_module_id mod_id,
663
    void *mod_buf, int mod_buf_sz, int ver)
664
{
665
    struct darshan_fd_int_state *state = fd->state;
666 667
    int ret;

668 669
    assert(state);

Shane Snyder's avatar
Shane Snyder committed
670
    if(mod_id < 0 || mod_id >= DARSHAN_MAX_MODS)
671
    {
672
        state->err = -1;
Shane Snyder's avatar
Shane Snyder committed
673 674
        fprintf(stderr, "Error: invalid Darshan module id.\n");
        return(-1);
675 676 677
    }

    /* write the module chunk to the log file */
678 679
    ret = darshan_log_dzwrite(fd, mod_id, mod_buf, mod_buf_sz);
    if(ret != mod_buf_sz)
680
    {
681
        state->err = -1;
682 683 684 685 686 687
        fprintf(stderr,
            "Error: failed to write module %s data to darshan log file.\n",
            darshan_module_names[mod_id]);
        return(-1);
    }

688 689 690
    /* set the version number for this module's data */
    fd->mod_ver[mod_id] = ver;

691 692 693
    return(0);
}

694 695
/* darshan_log_close()
 *
696
 * close an open darshan file descriptor, freeing any resources
697 698 699
 *
 */
void darshan_log_close(darshan_fd fd)
700
{
701
    struct darshan_fd_int_state *state = fd->state;
702 703
    int ret;

704 705 706 707
    assert(state);

    /* if the file was created for writing */
    if(state->creat_flag)
708 709
    {
        /* flush the last region of the log to file */
710
        switch(fd->comp_type)
711 712
        {
            case DARSHAN_ZLIB_COMP:
713
                ret = darshan_log_libz_flush(fd, state->dz.prev_reg_id);
714 715
                if(ret == 0)
                    break;
716 717
#ifdef HAVE_LIBBZ2
            case DARSHAN_BZIP2_COMP:
718
                ret = darshan_log_bzip2_flush(fd, state->dz.prev_reg_id);
719 720 721
                if(ret == 0)
                    break;
#endif 
722 723
            default:
                /* if flush fails, remove the output log file */
724
                state->err = -1;
725 726 727 728 729
                fprintf(stderr, "Error: final flush to log file failed.\n");
                break;
        }

        /* if no errors flushing, write the log header before closing */
730
        if(state->err != -1)
731
        {
732
            ret = darshan_log_put_header(fd);
733
            if(ret < 0)
734
                state->err = -1;
735 736 737
        }
    }

738
    close(state->fildes);
739 740

    /* remove output log file if error writing to it */
741
    if((state->creat_flag) && (state->err == -1))
742 743
    {
        fprintf(stderr, "Unlinking darshan log file %s ...\n",
744 745
            state->logfile_path);
        unlink(state->logfile_path);
746
    }
747

748
    darshan_log_dzdestroy(fd);
749 750 751
    if(state->exe_mnt_data)
        free(state->exe_mnt_data);
    free(state);
752
    free(fd);
753 754

    return;
755 756
}

757
/* **************************************************** */
758

759 760 761 762 763 764 765 766 767 768 769 770 771
static int darshan_mnt_info_cmp(const void *a, const void *b)
{
    struct darshan_mnt_info *m_a = (struct darshan_mnt_info *)a;
    struct darshan_mnt_info *m_b = (struct darshan_mnt_info *)b;

    if(strlen(m_a->mnt_path) > strlen(m_b->mnt_path))
        return(-1);
    else if(strlen(m_a->mnt_path) < strlen(m_b->mnt_path))
        return(1);
    else
        return(0);
}

772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834
static int darshan_log_get_namerecs(void *name_rec_buf, int buf_len,
    int swap_flag, struct darshan_name_record_ref **hash)
{
    struct darshan_name_record_ref *ref;
    struct darshan_name_record *name_rec;
    char *tmp_p;
    int buf_processed = 0;
    int rec_len;

    /* work through the name record buffer -- deserialize the record data
     * and add to the output hash table
     * NOTE: these mapping pairs are variable in length, so we have to be able
     * to handle incomplete mappings temporarily here
     */
    name_rec = (struct darshan_name_record *)name_rec_buf;
    while(buf_len > sizeof(darshan_record_id) + 1)
    {
        if(strnlen(name_rec->name, buf_len - sizeof(darshan_record_id)) ==
            (buf_len - sizeof(darshan_record_id)))
        {
            /* if this record name's terminating null character is not
             * present, we need to read more of the buffer before continuing
             */
            break;
        }
        rec_len = sizeof(darshan_record_id) + strlen(name_rec->name) + 1;

        if(swap_flag)
        {
            /* we need to sort out endianness issues before deserializing */
            DARSHAN_BSWAP64(&(name_rec->id));
        }

        HASH_FIND(hlink, *hash, &(name_rec->id), sizeof(darshan_record_id), ref);
        if(!ref)
        {
            ref = malloc(sizeof(*ref));
            if(!ref)
                return(-1);

            ref->name_record = malloc(rec_len);
            if(!ref->name_record)
            {
                free(ref);
                return(-1);
            }

            /* copy the name record over from the hash buffer */
            memcpy(ref->name_record, name_rec, rec_len);

            /* add this record to the hash */
            HASH_ADD(hlink, *hash, name_record->id, sizeof(darshan_record_id), ref);
        }

        tmp_p = (char *)name_rec + rec_len;
        name_rec = (struct darshan_name_record *)tmp_p;
        buf_len -= rec_len;
        buf_processed += rec_len;
    }

    return(buf_processed);
}

835 836 837 838 839
/* read the header of the darshan log and set internal fd data structures
 * NOTE: this is the only portion of the darshan log that is uncompressed
 *
 * returns 0 on success, -1 on failure
 */
840
static int darshan_log_get_header(darshan_fd fd)
841 842 843 844 845 846 847 848 849 850 851 852
{
    struct darshan_header header;
    int i;
    int ret;

    ret = darshan_log_seek(fd, 0);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
    }

853 854 855 856 857 858 859 860 861
    /* read the version number so we know how to process this log */
    ret = darshan_log_read(fd, &fd->version, 8);
    if(ret < 8)
    {
        fprintf(stderr, "Error: invalid log file (failed to read version).\n");
        return(-1);
    }

    /* other log file versions can be detected and handled here */
862 863 864 865 866 867 868 869 870
    if(strcmp(fd->version, "3.00") == 0)
    {
        fd->state->get_namerecs = darshan_log_get_namerecs_3_00;
    }
    else if(strcmp(fd->version, "3.01") == 0)
    {
        fd->state->get_namerecs = darshan_log_get_namerecs;
    }
    else
871 872
    {
        fprintf(stderr, "Error: incompatible darshan file.\n");
873 874
        fprintf(stderr, "Error: expected version %s, but got %s\n",
            DARSHAN_LOG_VERSION, fd->version);
875 876 877 878 879 880 881 882 883 884 885
        return(-1);
    }

    /* seek back so we can read the entire header */
    ret = darshan_log_seek(fd, 0);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
    }

886 887
    /* read uncompressed header from log file */
    ret = darshan_log_read(fd, &header, sizeof(header));
888
    if(ret != (int)sizeof(header))
889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907
    {
        fprintf(stderr, "Error: failed to read darshan log file header.\n");
        return(-1);
    }

    if(header.magic_nr == DARSHAN_MAGIC_NR)
    {
        /* no byte swapping needed, this file is in host format already */
        fd->swap_flag = 0;
    }
    else
    {
        /* try byte swapping */
        DARSHAN_BSWAP64(&(header.magic_nr));
        if(header.magic_nr == DARSHAN_MAGIC_NR)
        {
            fd->swap_flag = 1;

            /* swap the log map variables in the header */
908 909
            DARSHAN_BSWAP64(&(header.name_map.off));
            DARSHAN_BSWAP64(&(header.name_map.len));
910 911 912 913
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
            {
                DARSHAN_BSWAP64(&(header.mod_map[i].off));
                DARSHAN_BSWAP64(&(header.mod_map[i].len));
914
                DARSHAN_BSWAP32(&(header.mod_ver[i]));
915 916 917 918 919 920 921 922 923 924
            }
        }
        else
        {
            /* otherwise this file is just broken */
            fprintf(stderr, "Error: bad magic number in darshan log file.\n");
            return(-1);
        }
    }

925
    /* set some fd fields based on what's stored in the header */
926
    fd->comp_type = header.comp_type;
927
    fd->partial_flag = header.partial_flag;
928
    memcpy(fd->mod_ver, header.mod_ver, DARSHAN_MAX_MODS * sizeof(uint32_t));
929 930

    /* save the mapping of data within log file to this file descriptor */
931
    memcpy(&fd->name_map, &(header.name_map), sizeof(struct darshan_log_map));
932 933
    memcpy(&fd->mod_map, &(header.mod_map), DARSHAN_MAX_MODS * sizeof(struct darshan_log_map));

934 935
    /* there may be nothing following the job data, so safety check map */
    fd->job_map.off = sizeof(struct darshan_header);
936
    if(fd->name_map.off == 0)
937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959
    {
        for(i = 0; i < DARSHAN_MAX_MODS; i++)
        {
            if(fd->mod_map[i].off != 0)
            {
                fd->job_map.len = fd->mod_map[i].off - fd->job_map.off;
                break;
            }
        }

        if(fd->job_map.len == 0)
        {
            struct stat sbuf;
            if(fstat(fd->state->fildes, &sbuf) != 0)
            {
                fprintf(stderr, "Error: unable to stat darshan log file.\n");
                return(-1);
            }
            fd->job_map.len = sbuf.st_size - fd->job_map.off;
        }
    }
    else
    {
960
        fd->job_map.len = fd->name_map.off - fd->job_map.off;
961 962
    }

963 964 965 966 967 968 969
    return(0);
}

/* write a darshan header to log file
 *
 * returns 0 on success, -1 on failure
 */
970
static int darshan_log_put_header(darshan_fd fd)
971 972 973 974 975 976 977 978 979 980 981 982 983 984
{
    struct darshan_header header;
    int ret;

    ret = darshan_log_seek(fd, 0);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
    }

    memset(&header, 0, sizeof(header));
    strcpy(header.version_string, DARSHAN_LOG_VERSION);
    header.magic_nr = DARSHAN_MAGIC_NR;
985
    header.comp_type = fd->comp_type;
986
    header.partial_flag = fd->partial_flag;
987
    memcpy(&header.name_map, &fd->name_map, sizeof(struct darshan_log_map));
988 989
    memcpy(header.mod_map, fd->mod_map, DARSHAN_MAX_MODS * sizeof(struct darshan_log_map));
    memcpy(header.mod_ver, fd->mod_ver, DARSHAN_MAX_MODS * sizeof(uint32_t));
990 991 992

    /* write header to file */
    ret = darshan_log_write(fd, &header, sizeof(header));
993
    if(ret != (int)sizeof(header))
994 995 996 997 998 999 1000 1001
    {
        fprintf(stderr, "Error: failed to write Darshan log file header.\n");
        return(-1);
    }

    return(0);
}

1002 1003 1004 1005
/* return 0 on successful seek to offset, -1 on failure.
 */
static int darshan_log_seek(darshan_fd fd, off_t offset)
{
1006
    struct darshan_fd_int_state *state = fd->state;
1007
    off_t ret_off;
1008

1009
    if(state->pos == offset)
1010 1011
        return(0);

1012
    ret_off = lseek(state->fildes, offset, SEEK_SET);
1013
    if(ret_off == offset)
1014
    {
1015
        state->pos = offset;
1016
        return(0);
1017 1018 1019 1020 1021
    }

    return(-1);
}

1022
/* return amount read on success, 0 on EOF, -1 on failure.
1023
 */
1024
static int darshan_log_read(darshan_fd fd, void* buf, int len)
1025
{
1026
    struct darshan_fd_int_state *state = fd->state;
1027
    int ret;
1028
    unsigned int read_so_far = 0;
1029

1030 1031 1032 1033 1034 1035 1036 1037 1038
    do
    {
        ret = read(state->fildes, buf + read_so_far, len - read_so_far);
        if(ret <= 0)
            break;
        read_so_far += ret;
    } while(read_so_far < len);
    if(ret < 0)
        return(-1);
1039

1040 1041
    state->pos += read_so_far;
    return(read_so_far);
1042 1043
}

1044
/* return amount written on success, -1 on failure.
1045
 */
1046
static int darshan_log_write(darshan_fd fd, void* buf, int len)
1047
{
1048
    struct darshan_fd_int_state *state = fd->state;
1049
    int ret;
1050
    unsigned int wrote_so_far = 0;
1051

1052 1053 1054 1055 1056 1057 1058 1059 1060
    do
    {
        ret = write(state->fildes, buf + wrote_so_far, len - wrote_so_far);
        if(ret <= 0)
            break;
        wrote_so_far += ret;
    } while(wrote_so_far < len);
    if(ret < 0)
        return(-1);
1061

1062 1063
    state->pos += wrote_so_far;
    return(wrote_so_far);
1064 1065
}

1066
static int darshan_log_dzinit(darshan_fd fd)
1067
{
1068
    struct darshan_fd_int_state *state = fd->state;
1069 1070
    int ret;

1071 1072 1073
    /* initialize buffers for staging compressed data
     * to/from log file
     */
1074 1075
    state->dz.buf = malloc(DARSHAN_DEF_COMP_BUF_SZ);
    if(state->dz.buf == NULL)
1076
        return(-1);
1077
    state->dz.size = 0;
1078
    state->dz.prev_reg_id = DARSHAN_HEADER_REGION_ID;
1079

1080
    switch(fd->comp_type)
1081 1082
    {
        case DARSHAN_ZLIB_COMP:
1083 1084 1085 1086
        {
            z_stream *tmp_zstrm = malloc(sizeof(*tmp_zstrm));
            if(!tmp_zstrm)
            {
1087
                free(state->dz.buf);
1088 1089 1090 1091 1092 1093 1094 1095 1096
                return(-1);
            }
            tmp_zstrm->zalloc = Z_NULL;
            tmp_zstrm->zfree = Z_NULL;
            tmp_zstrm->opaque = Z_NULL;
            tmp_zstrm->avail_in = 0;
            tmp_zstrm->next_in = Z_NULL;

            /* TODO: worth using {inflate/deflate}Init2 ?? */
1097
            if(!(state->creat_flag))
1098 1099 1100 1101 1102 1103 1104 1105 1106
            {
                /* read only file, init inflate algorithm */
                ret = inflateInit(tmp_zstrm);
            }
            else
            {
                /* write only file, init deflate algorithm */
                ret = deflateInit(tmp_zstrm, Z_DEFAULT_COMPRESSION);
                tmp_zstrm->avail_out = DARSHAN_DEF_COMP_BUF_SZ;
1107
                tmp_zstrm->next_out = state->dz.buf;
1108 1109 1110 1111
            }
            if(ret != Z_OK)
            {
                free(tmp_zstrm);
1112
                free(state->dz.buf);
1113 1114
                return(-1);
            }
1115
            state->dz.comp_dat = tmp_zstrm;
1116
            break;
1117
        }
1118 1119 1120 1121 1122 1123
#ifdef HAVE_LIBBZ2
        case DARSHAN_BZIP2_COMP:
        {
            bz_stream *tmp_bzstrm = malloc(sizeof(*tmp_bzstrm));
            if(!tmp_bzstrm)
            {
1124
                free(state->dz.buf);
1125 1126 1127 1128 1129 1130
                return(-1);
            }
            tmp_bzstrm->bzalloc = NULL;
            tmp_bzstrm->bzfree = NULL;
            tmp_bzstrm->opaque = NULL;
            tmp_bzstrm->avail_in = 0;
1131
            tmp_bzstrm->next_in = NULL;
1132

1133
            if(!(state->creat_flag))
1134 1135 1136 1137 1138 1139 1140 1141 1142
            {
                /* read only file, init decompress algorithm */
                ret = BZ2_bzDecompressInit(tmp_bzstrm, 1, 0);
            }
            else
            {
                /* write only file, init compress algorithm */
                ret = BZ2_bzCompressInit(tmp_bzstrm, 9, 1, 30);
                tmp_bzstrm->avail_out = DARSHAN_DEF_COMP_BUF_SZ;
1143
                tmp_bzstrm->next_out = (char *)state->dz.buf;
1144 1145 1146 1147
            }
            if(ret != BZ_OK)
            {
                free(tmp_bzstrm);
1148
                free(state->dz.buf);
1149 1150
                return(-1);
            }
1151
            state->dz.comp_dat = tmp_bzstrm;
1152 1153 1154
            break;
        }
#endif
1155 1156 1157 1158 1159 1160 1161 1162
        case DARSHAN_NO_COMP:
        {
            /* we just track an offset into the staging buffers for no_comp */
            int *buf_off = malloc(sizeof(int));
            *buf_off = 0;
            state->dz.comp_dat = buf_off;
            break;
        }
1163 1164 1165 1166 1167 1168 1169 1170
        default:
            fprintf(stderr, "Error: invalid compression type.\n");
            return(-1);
    }

    return(0);
}

1171
static void darshan_log_dzdestroy(darshan_fd fd)
1172
{
1173 1174 1175
    struct darshan_fd_int_state *state = fd->state;

    switch(fd->comp_type)
1176 1177
    {
        case DARSHAN_ZLIB_COMP:
1178
            if(!(state->creat_flag))
1179
                inflateEnd((z_stream *)state->dz.comp_dat);
1180
            else
1181
                deflateEnd((z_stream *)state->dz.comp_dat);
1182
            break;
1183 1184
#ifdef HAVE_LIBBZ2
        case DARSHAN_BZIP2_COMP:
1185
            if(!(state->creat_flag))
1186
                BZ2_bzDecompressEnd((bz_stream *)state->dz.comp_dat);
1187
            else
1188
                BZ2_bzCompressEnd((bz_stream *)state->dz.comp_dat);
1189 1190
            break;
#endif
1191 1192 1193
        case DARSHAN_NO_COMP:
            /* do nothing */
            break;
1194 1195 1196 1197
        default:
            fprintf(stderr, "Error: invalid compression type.\n");
    }

1198
    free(state->dz.comp_dat);
1199
    free(state->dz.buf);
1200 1201 1202 1203 1204
    return;
}

static int darshan_log_dzread(darshan_fd fd, int region_id, void *buf, int len)
{
1205
    struct darshan_fd_int_state *state = fd->state;
1206 1207
    struct darshan_log_map map;
    int reset_strm_flag = 0;
1208 1209
    int ret;

1210 1211 1212 1213
    /* if new log region, we reload buffers and clear eor flag */
    if(region_id != state->dz.prev_reg_id)
    {
        state->dz.eor = 0;
1214
        state->dz.size = 0;
1215 1216 1217 1218 1219
        reset_strm_flag = 1; /* reset libz/bzip2 streams */
    }

    if(region_id == DARSHAN_JOB_REGION_ID)
        map = fd->job_map;
1220 1221
    else if(region_id == DARSHAN_NAME_MAP_REGION_ID)
        map = fd->name_map;