darshan-logutils.c 46.8 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6
 */

7
#define _GNU_SOURCE
8
#include "darshan-util-config.h"
9 10
#include <stdio.h>
#include <string.h>
11
#include <assert.h>
12
#include <stdlib.h>
13
#include <unistd.h>
14
#include <inttypes.h>
15 16 17
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
18
#include <errno.h>
19

20 21
#include "darshan-logutils.h"

22 23 24
/* default input buffer size for decompression algorithm */
#define DARSHAN_DEF_COMP_BUF_SZ (1024*1024) /* 1 MiB */

25
/* special identifers for referring to header, job, and
26 27 28 29 30 31
 * record map regions of the darshan log file
 */
#define DARSHAN_HEADER_REGION_ID    (-3)
#define DARSHAN_JOB_REGION_ID       (-2)
#define DARSHAN_REC_MAP_REGION_ID   (-1)

32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
struct darshan_dz_state
{
    /* (libz/bzip2) stream data structure for managing
     * compression and decompression state */
    void *strm;
    /* buffer for staging compressed data to/from log file */
    unsigned char *buf;
    /* size of staging buffer */
    int size;
    /* for reading logs, flag indicating end of log file region */
    int eor;
    /* the region we last tried reading/writing */
    int prev_reg_id;
};

/* internal fd data structure */
struct darshan_fd_int_state
{
    /* posix file descriptor for the log file */
    int fildes;
    /* file pointer position */
    int64_t pos;
    /* flag indicating whether log file was created (and written) */
    int creat_flag;
    /* compression type used on log file (libz or bzip2) */
    enum darshan_comp_type comp_type;
    /* log file path name */
    char logfile_path[PATH_MAX];
    /* pointer to exe & mount data in darshan job data structure */
    char *exe_mnt_data;
    /* whether previous file operations have failed */
    int err;

    /* compression/decompression state */
    struct darshan_dz_state dz;
};

69
static int darshan_mnt_info_cmp(const void *a, const void *b);
70 71
static int darshan_log_getheader(darshan_fd fd);
static int darshan_log_putheader(darshan_fd fd);
72 73
static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len);
74
static int darshan_log_write(darshan_fd fd, void *buf, int len);
75 76
static int darshan_log_dzinit(struct darshan_fd_int_state *state);
static void darshan_log_dzdestroy(struct darshan_fd_int_state *state);
77 78 79 80 81
static int darshan_log_dzread(darshan_fd fd, int region_id, void *buf, int len);
static int darshan_log_dzwrite(darshan_fd fd, int region_id, void *buf, int len);
static int darshan_log_libz_read(darshan_fd fd, int region_id, void *buf, int len);
static int darshan_log_libz_write(darshan_fd fd, int region_id, void *buf, int len);
static int darshan_log_libz_flush(darshan_fd fd, int region_id);
82 83 84 85 86
#ifdef HAVE_LIBBZ2
static int darshan_log_bzip2_read(darshan_fd fd, int region_id, void *buf, int len);
static int darshan_log_bzip2_write(darshan_fd fd, int region_id, void *buf, int len);
static int darshan_log_bzip2_flush(darshan_fd fd, int region_id);
#endif
87 88 89
static int darshan_log_dzload(darshan_fd fd, struct darshan_log_map map);
static int darshan_log_dzunload(darshan_fd fd, struct darshan_log_map *map_p);

Shane Snyder's avatar
Shane Snyder committed
90
/* each module's implementation of the darshan logutil functions */
91
#define X(a, b, c, d) d,
92 93
struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS] =
{
Shane Snyder's avatar
Shane Snyder committed
94
    DARSHAN_MODULE_IDS
95
};
Shane Snyder's avatar
Shane Snyder committed
96
#undef X
97 98 99

/* darshan_log_open()
 *
100
 * open an existing darshan log file for reading only
101
 *
102
 * returns file descriptor on success, NULL on failure
103
 */
104
darshan_fd darshan_log_open(const char *name)
105
{
106
    darshan_fd tmp_fd;
107
    int ret;
108

109
    /* allocate a darshan file descriptor */
110 111 112 113
    tmp_fd = malloc(sizeof(*tmp_fd));
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));
114 115 116 117 118 119 120
    tmp_fd->state = malloc(sizeof(struct darshan_fd_int_state));
    if(!tmp_fd->state)
    {
        free(tmp_fd->state);
        return(NULL);
    }
    memset(tmp_fd->state, 0, sizeof(struct darshan_fd_int_state));
121

122
    /* open the log file in read mode */
123 124
    tmp_fd->state->fildes = open(name, O_RDONLY);
    if(tmp_fd->state->fildes < 0)
125
    {
126
        fprintf(stderr, "Error: failed to open darshan log file %s.\n", name);
127
        free(tmp_fd->state);
128 129 130
        free(tmp_fd);
        return(NULL);
    }
131
    strncpy(tmp_fd->state->logfile_path, name, PATH_MAX);
132 133 134 135 136

    /* read the header from the log file to init fd data structures */
    ret = darshan_log_getheader(tmp_fd);
    if(ret < 0)
    {
137
        fprintf(stderr, "Error: failed to read darshan log file header.\n");
138 139
        close(tmp_fd->state->fildes);
        free(tmp_fd->state);
140
        free(tmp_fd);
141 142 143 144
        return(NULL);
    }

    /* initialize compression data structures */
145
    ret = darshan_log_dzinit(tmp_fd->state);
146 147 148
    if(ret < 0)
    {
        fprintf(stderr, "Error: failed to initialize decompression data structures.\n");
149 150
        close(tmp_fd->state->fildes);
        free(tmp_fd->state);
151 152
        free(tmp_fd);
        return(NULL);
153 154
    }

155 156 157 158 159 160 161
    return(tmp_fd);
}

/* darshan_log_create()
 *
 * create a darshan log file for writing with the given compression method
 *
162
 * returns file descriptor on success, NULL on failure
163
 */
164 165
darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type,
    int partial_flag)
166 167
{
    darshan_fd tmp_fd;
168
    int ret;
169

170
    /* allocate a darshan file descriptor */
171 172 173 174
    tmp_fd = malloc(sizeof(*tmp_fd));
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));
175 176 177 178 179 180 181
    tmp_fd->state = malloc(sizeof(struct darshan_fd_int_state));
    if(!tmp_fd->state)
    {
        free(tmp_fd);
        return(NULL);
    }
    memset(tmp_fd->state, 0, sizeof(struct darshan_fd_int_state));
182

183 184 185
    /* create the log for writing, making sure to not overwrite existing log */
    tmp_fd->state->fildes = creat(name, 0400);
    if(tmp_fd->state->fildes < 0)
186
    {
187
        fprintf(stderr, "Error: failed to open darshan log file %s.\n", name);
188
        free(tmp_fd->state);
189
        free(tmp_fd);
190
        return(NULL);
191
    }
192 193
    tmp_fd->state->creat_flag = 1;
    tmp_fd->state->comp_type = comp_type;
194
    tmp_fd->partial_flag = partial_flag;
195
    strncpy(tmp_fd->state->logfile_path, name, PATH_MAX);
196

197 198 199 200 201
    /* position file pointer to prealloc space for the log file header
     * NOTE: the header is written at close time, after all internal data
     * structures have been properly set
     */
    ret = darshan_log_seek(tmp_fd, sizeof(struct darshan_header));
202
    if(ret < 0)
203 204
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
205 206
        close(tmp_fd->state->fildes);
        free(tmp_fd->state);
207 208 209
        free(tmp_fd);
        unlink(name);
        return(NULL);
210 211
    }

212
    /* initialize compression data structures */
213
    ret = darshan_log_dzinit(tmp_fd->state);
214 215
    if(ret < 0)
    {
216
        fprintf(stderr, "Error: failed to initialize compression data structures.\n");
217 218
        close(tmp_fd->state->fildes);
        free(tmp_fd->state);
219 220 221
        free(tmp_fd);
        unlink(name);
        return(NULL);
222 223
    }

224
    return(tmp_fd);
225 226
}

227
/* darshan_log_getjob()
228 229
 *
 * read job level metadata from the darshan log file
230
 *
231
 * returns 0 on success, -1 on failure
232
 */
233
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
234
{
235
    struct darshan_fd_int_state *state = fd->state;
236 237
    char job_buf[DARSHAN_JOB_RECORD_SIZE] = {0};
    int job_buf_sz = DARSHAN_JOB_RECORD_SIZE;
238
    int ret;
239

240
    assert(state);
241 242
    assert(fd->job_map.len > 0 && fd->job_map.off > 0);

243
    /* read the compressed job data from the log file */
244 245
    ret = darshan_log_dzread(fd, DARSHAN_JOB_REGION_ID, job_buf, job_buf_sz);
    if(ret <= sizeof(*job))
246
    {
247
        fprintf(stderr, "Error: failed to read darshan log file job data.\n");
248 249
        return(-1);
    }
250 251

    memcpy(job, job_buf, sizeof(*job));
252

253
    if(fd->swap_flag)
254
    {
255 256 257 258 259 260
        /* swap bytes if necessary */
        DARSHAN_BSWAP64(&job->uid);
        DARSHAN_BSWAP64(&job->start_time);
        DARSHAN_BSWAP64(&job->end_time);
        DARSHAN_BSWAP64(&job->nprocs);
        DARSHAN_BSWAP64(&job->jobid);
261 262
    }

263
    /* save trailing exe & mount information, so it can be retrieved later */
264 265 266
    if(!(state->exe_mnt_data))
        state->exe_mnt_data = malloc(DARSHAN_EXE_LEN+1);
    if(!(state->exe_mnt_data))
267
        return(-1);
268
    memcpy(state->exe_mnt_data, &job_buf[sizeof(*job)], DARSHAN_EXE_LEN+1);
269

270 271 272
    return(0);
}

273 274
/* darshan_log_putjob()
 *
275
 * write job level metadata to darshan log file
276 277 278 279 280
 *
 * returns 0 on success, -1 on failure
 */
int darshan_log_putjob(darshan_fd fd, struct darshan_job *job)
{
281
    struct darshan_fd_int_state *state = fd->state;
282 283 284 285
    struct darshan_job job_copy;
    int len;
    int ret;

286 287
    assert(state);

288 289 290 291 292 293 294 295 296 297 298 299 300 301
    memset(&job_copy, 0, sizeof(*job));
    memcpy(&job_copy, job, sizeof(*job));

    /* check for newline in existing metadata, add if needed */
    len = strlen(job_copy.metadata);
    if(len > 0 && len < DARSHAN_JOB_METADATA_LEN)
    {
        if(job_copy.metadata[len-1] != '\n')
        {
            job_copy.metadata[len] = '\n';
            job_copy.metadata[len+1] = '\0';
        }
    }

302 303 304
    /* write the compressed job data to log file */
    ret = darshan_log_dzwrite(fd, DARSHAN_JOB_REGION_ID, &job_copy, sizeof(*job));
    if(ret != sizeof(*job))
305
    {
306
        state->err = -1;
307 308 309 310 311 312 313 314 315 316 317 318 319
        fprintf(stderr, "Error: failed to write darshan log file job data.\n");
        return(-1);
    }

    return(0);
}

/* darshan_log_getexe()
 *
 * reads the application exe name from darshan log file
 * 
 * returns 0 on success, -1 on failure 
 */
320 321
int darshan_log_getexe(darshan_fd fd, char *buf)
{
322
    struct darshan_fd_int_state *state = fd->state;
323
    char *newline;
324
    int ret;
325

326 327
    assert(state);

328
    /* if the exe/mount data has not been saved yet, read in the job info */
329
    if(!(state->exe_mnt_data))
330
    {
331 332
        struct darshan_job job;
        ret = darshan_log_getjob(fd, &job);
333

334
        if(ret < 0 || !(state->exe_mnt_data))
335
            return(-1);
336
    }
337

338
    /* exe string is located before the first line break */
339
    newline = strchr(state->exe_mnt_data, '\n');
340 341

    /* copy over the exe string */
342
    if(newline)
343
        memcpy(buf, state->exe_mnt_data, (newline - state->exe_mnt_data));
344 345 346 347

    return (0);
}

348 349 350
/* darshan_log_putexe()
 *
 * wrties the application exe name to darshan log file
351
 * NOTE: this needs to be called immediately following put_job as it
352
 * expects the file pointer to be positioned immediately following
353
 * the darshan job information
354 355 356 357 358
 *
 * returns 0 on success, -1 on failure 
 */
int darshan_log_putexe(darshan_fd fd, char *buf)
{
359 360
    struct darshan_fd_int_state *state = fd->state;
    int len = strlen(buf);
361
    int ret;
362

363
    assert(fd->state);
364

365 366
    ret = darshan_log_dzwrite(fd, DARSHAN_JOB_REGION_ID, buf, len);
    if(ret != len)
367
    {
368
        state->err = -1;
369 370 371 372 373 374 375
        fprintf(stderr, "Error: failed to write exe string to darshan log file.\n");
        return(-1);
    }

    return(0);
}

376 377
/* darshan_log_getmounts()
 * 
378 379 380
 * retrieves mount table information from the log. Note that mnt_data_array
 * is an array that will be allocated by the function and must be
 * freed by the caller. count will indicate the size of the array
381 382
 *
 * returns 0 on success, -1 on failure
383
 */
384 385
int darshan_log_getmounts(darshan_fd fd, struct darshan_mnt_info **mnt_data_array,
    int* count)
386
{
387
    struct darshan_fd_int_state *state = fd->state;
388 389
    char *pos;
    int array_index = 0;
390
    int ret;
391

392 393
    assert(state);

394
    /* if the exe/mount data has not been saved yet, read in the job info */
395
    if(!(state->exe_mnt_data))
396
    {
397 398
        struct darshan_job job;
        ret = darshan_log_getjob(fd, &job);
399

400
        if(ret < 0 || !(state->exe_mnt_data))
401
            return(-1);
402
    }
403

404
    /* count entries */
405
    *count = 0;
406
    pos = state->exe_mnt_data;
407 408 409 410 411 412 413 414 415 416 417 418 419
    while((pos = strchr(pos, '\n')) != NULL)
    {
        pos++;
        (*count)++;
    }

    if(*count == 0)
    {
        /* no mount entries present */
        return(0);
    }

    /* allocate output arrays */
420 421
    *mnt_data_array = malloc((*count)*sizeof(**mnt_data_array));
    assert(*mnt_data_array);
422

423
    /* work through the table and parse each line (except for
424 425
     * first, which holds command line information)
     */
426 427
    pos = state->exe_mnt_data;
    while((pos = strchr(pos, '\n')) != NULL)
428
    {
429 430
        ret = sscanf(++pos, "%s\t%s", (*mnt_data_array)[array_index].mnt_type,
            (*mnt_data_array)[array_index].mnt_path);
431
        if(ret != 2)
432
        {
433
            fprintf(stderr, "Error: poorly formatted mount table in darshan log file.\n");
434 435 436 437 438
            return(-1);
        }
        array_index++;
    }

439 440
    qsort(*mnt_data_array, *count, sizeof(**mnt_data_array), darshan_mnt_info_cmp);

441 442 443
    return(0);
}

444 445 446 447 448 449 450 451 452
/* darshan_log_putmounts()
 *
 * writes mount information to the darshan log file
 * NOTE: this function call should follow immediately after the call
 * to darshan_log_putexe(), as it assumes the darshan log file pointer
 * is pointing to the offset immediately following the exe string
 *
 * returns 0 on success, -1 on failure
 */
453 454
int darshan_log_putmounts(darshan_fd fd, struct darshan_mnt_info *mnt_data_array,
    int count)
455
{
456
    struct darshan_fd_int_state *state = fd->state;
457 458
    int i;
    char line[1024];
459 460 461
    char mnt_dat[DARSHAN_EXE_LEN] = {0};
    int mnt_dat_sz = 0;
    char *tmp;
462 463
    int ret;

464 465
    assert(state);

466
    /* write each mount entry to file */
467
    tmp = mnt_dat;
468 469
    for(i=count-1; i>=0; i--)
    {
470
        sprintf(line, "\n%s\t%s", mnt_data_array[i].mnt_type, mnt_data_array[i].mnt_path);
471 472 473 474 475 476

        memcpy(tmp, line, strlen(line));
        tmp += strlen(line);
        mnt_dat_sz += strlen(line);
    }

477 478
    ret = darshan_log_dzwrite(fd, DARSHAN_JOB_REGION_ID, mnt_dat, mnt_dat_sz);
    if (ret != mnt_dat_sz)
479
    {
480
        state->err = -1;
481
        fprintf(stderr, "Error: failed to write darshan log mount data.\n");
482 483 484 485 486 487
        return(-1);
    }

    return(0);
}

488
/* darshan_log_gethash()
489
 *
490
 * read the hash of records from the darshan log file
491 492 493
 *
 * returns 0 on success, -1 on failure
 */
494
int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
495
{
496
    struct darshan_fd_int_state *state = fd->state;
497
    char *hash_buf;
498
    int hash_buf_sz;
499
    char *buf_ptr;
500
    darshan_record_id *rec_id_ptr;
501
    uint32_t *path_len_ptr, tmp_path_len;
502 503
    char *path_ptr;
    struct darshan_record_ref *ref;
504 505 506
    int read;
    int read_req_sz;
    int buf_remaining = 0;
507

508 509
    assert(state);

510 511 512 513 514 515 516
    /* just return if there is no record mapping data */
    if(fd->rec_map.len == 0)
    {
        *hash = NULL;
        return(0);
    }

517 518 519
    /* default to hash buffer twice as big as default compression buf */
    hash_buf = malloc(DARSHAN_DEF_COMP_BUF_SZ * 2);
    if(!hash_buf)
520
        return(-1);
521 522
    memset(hash_buf, 0, DARSHAN_DEF_COMP_BUF_SZ * 2);
    hash_buf_sz = DARSHAN_DEF_COMP_BUF_SZ * 2;
523

524
    do
525
    {
526 527
        /* read chunks of the darshan record id -> file name mapping from log file,
         * constructing a hash table in the process
528
         */
529 530 531 532
        read_req_sz = hash_buf_sz - buf_remaining;
        read = darshan_log_dzread(fd, DARSHAN_REC_MAP_REGION_ID,
            hash_buf + buf_remaining, read_req_sz);
        if(read < 0)
533
        {
534 535 536
            fprintf(stderr, "Error: failed to read record hash from darshan log file.\n");
            free(hash_buf);
            return(-1);
537 538
        }

539 540 541 542 543 544 545 546
        /* work through the hash buffer -- deserialize the mapping data and
         * add to the output hash table
         * NOTE: these mapping pairs are variable in length, so we have to be able
         * to handle incomplete mappings temporarily here
         */
        buf_ptr = hash_buf;
        buf_remaining += read;
        while(buf_remaining > (sizeof(darshan_record_id) + sizeof(uint32_t)))
547
        {
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
            /* see if we have enough buf space to read in the next full record */
            tmp_path_len = *(uint32_t *)(buf_ptr + sizeof(darshan_record_id));
            if(fd->swap_flag)
                DARSHAN_BSWAP32(&tmp_path_len);

            /* we need to read more before we continue deserializing */
            if(buf_remaining <
                (sizeof(darshan_record_id) + sizeof(uint32_t) + tmp_path_len))
                break;

            /* get pointers for each field of this darshan record */
            /* NOTE: darshan record hash serialization method: 
             *          ... darshan_record_id | (uint32_t) path_len | path ...
             */
            rec_id_ptr = (darshan_record_id *)buf_ptr;
            buf_ptr += sizeof(darshan_record_id);
            path_len_ptr = (uint32_t *)buf_ptr;
            buf_ptr += sizeof(uint32_t);
            path_ptr = (char *)buf_ptr;

            if(fd->swap_flag)
569
            {
570 571 572
                /* we need to sort out endianness issues before deserializing */
                DARSHAN_BSWAP64(rec_id_ptr);
                DARSHAN_BSWAP32(path_len_ptr);
573
            }
574 575 576

            HASH_FIND(hlink, *hash, rec_id_ptr, sizeof(darshan_record_id), ref);
            if(!ref)
577
            {
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
                ref = malloc(sizeof(*ref));
                if(!ref)
                {
                    free(hash_buf);
                    return(-1);
                }
                ref->rec.name = malloc(*path_len_ptr + 1);
                if(!ref->rec.name)
                {
                    free(ref);
                    free(hash_buf);
                    return(-1);
                }

                /* set the fields for this record */
                ref->rec.id = *rec_id_ptr;
                memcpy(ref->rec.name, path_ptr, *path_len_ptr);
                ref->rec.name[*path_len_ptr] = '\0';

                /* add this record to the hash */
                HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref);
599 600
            }

601 602 603
            buf_ptr += *path_len_ptr;
            buf_remaining -=
                (sizeof(darshan_record_id) + sizeof(uint32_t) + *path_len_ptr);
604
        }
605

606 607 608 609 610 611 612 613
        /* copy any leftover data to beginning of buffer to parse next */
        memcpy(hash_buf, buf_ptr, buf_remaining);

        /* we keep reading until we get a short read informing us we have
         * read all of the record hash
         */
    } while(read == read_req_sz);
    assert(buf_remaining == 0);
614

615
    free(hash_buf);
616 617 618
    return(0);
}

619 620 621
/* darshan_log_puthash()
 *
 * writes the hash table of records to the darshan log file
622 623 624
 * NOTE: this function call should follow immediately after the call
 * to darshan_log_putmounts(), as it assumes the darshan log file pointer
 * is pointing to the offset immediately following the mount information
625 626 627 628 629
 *
 * returns 0 on success, -1 on failure
 */
int darshan_log_puthash(darshan_fd fd, struct darshan_record_ref *hash)
{
630
    struct darshan_fd_int_state *state = fd->state;
631
    char *hash_buf;
632
    int hash_buf_sz;
633
    struct darshan_record_ref *ref, *tmp;
634 635 636
    char *buf_ptr;
    int path_len;
    int wrote;
637

638 639
    assert(state);

640 641
    /* allocate memory for largest possible hash record */
    hash_buf_sz = sizeof(darshan_record_id) + sizeof(uint32_t) + PATH_MAX;
642 643 644
    hash_buf = malloc(hash_buf_sz);
    if(!hash_buf)
        return(-1);
645
    memset(hash_buf, 0, hash_buf_sz);
646

647
    /* individually serialize each hash record and write to log file */
648 649
    HASH_ITER(hlink, hash, ref, tmp)
    {
650 651
        buf_ptr = hash_buf;
        path_len = strlen(ref->rec.name);
652

653
        /* the hash buffer has space to serialize this record
654 655 656
         * NOTE: darshan record hash serialization method: 
         *          ... darshan_record_id | (uint32_t) path_len | path ...
         */
657 658 659 660 661 662
        *((darshan_record_id *)buf_ptr) = ref->rec.id;
        buf_ptr += sizeof(darshan_record_id);
        *((uint32_t *)buf_ptr) = path_len;
        buf_ptr += sizeof(uint32_t);
        memcpy(buf_ptr, ref->rec.name, path_len);
        buf_ptr += path_len;
663

664 665 666 667 668
        /* write this hash entry to log file */
        wrote = darshan_log_dzwrite(fd, DARSHAN_REC_MAP_REGION_ID,
            hash_buf, (buf_ptr - hash_buf));
        if(wrote != (buf_ptr - hash_buf))
        {
669
            state->err = -1;
670 671 672 673
            fprintf(stderr, "Error: failed to write record hash to darshan log file.\n");
            free(hash_buf);
            return(-1);
        }
674 675 676 677 678 679 680 681
    }

    free(hash_buf);
    return(0);
}

/* darshan_log_getmod()
 *
682 683
 * get a chunk of module data from the darshan log file
 *
684
 * returns number of bytes read on success, -1 on failure
685 686
 */
int darshan_log_getmod(darshan_fd fd, darshan_module_id mod_id,
687
    void *mod_buf, int mod_buf_sz)
688
{
689
    struct darshan_fd_int_state *state = fd->state;
690
    int ret;
691

692 693
    assert(state);

694
    if(mod_id < 0 || mod_id >= DARSHAN_MAX_MODS)
695
    {
696 697
        fprintf(stderr, "Error: invalid Darshan module id.\n");
        return(-1);
698 699
    }

700 701 702 703
    if(fd->mod_map[mod_id].len == 0)
        return(0); /* no data corresponding to this mod_id */

    /* read this module's data from the log file */
704
    ret = darshan_log_dzread(fd, mod_id, mod_buf, mod_buf_sz);
705
    if(ret < 0)
706
    {
707
        fprintf(stderr,
708
            "Error: failed to read module %s data from darshan log file.\n",
709
            darshan_module_names[mod_id]);
710 711 712
        return(-1);
    }

713
    return(ret);
714 715
}

716 717 718
/* darshan_log_putmod()
 *
 * write a chunk of module data to the darshan log file
Shane Snyder's avatar
Shane Snyder committed
719 720 721 722 723 724
 * NOTE: this function call should be called directly after the
 * put_hash() function, as it expects the file pointer to be
 * positioned directly past the record hash location. Also,
 * for a set of modules with data to write to file, this function
 * should be called in order of increasing module identifiers,
 * as the darshan log file format expects this ordering.
725
 *
726
 * returns number of bytes written on success, -1 on failure
727 728
 */
int darshan_log_putmod(darshan_fd fd, darshan_module_id mod_id,
729
    void *mod_buf, int mod_buf_sz, int ver)
730
{
731
    struct darshan_fd_int_state *state = fd->state;
732 733
    int ret;

734 735
    assert(state);

Shane Snyder's avatar
Shane Snyder committed
736
    if(mod_id < 0 || mod_id >= DARSHAN_MAX_MODS)
737
    {
738
        state->err = -1;
Shane Snyder's avatar
Shane Snyder committed
739 740
        fprintf(stderr, "Error: invalid Darshan module id.\n");
        return(-1);
741 742 743
    }

    /* write the module chunk to the log file */
744 745
    ret = darshan_log_dzwrite(fd, mod_id, mod_buf, mod_buf_sz);
    if(ret != mod_buf_sz)
746
    {
747
        state->err = -1;
748 749 750 751 752 753
        fprintf(stderr,
            "Error: failed to write module %s data to darshan log file.\n",
            darshan_module_names[mod_id]);
        return(-1);
    }

754 755 756
    /* set the version number for this module's data */
    fd->mod_ver[mod_id] = ver;

757 758 759
    return(0);
}

760 761
/* darshan_log_close()
 *
762
 * close an open darshan file descriptor, freeing any resources
763 764 765
 *
 */
void darshan_log_close(darshan_fd fd)
766
{
767
    struct darshan_fd_int_state *state = fd->state;
768 769
    int ret;

770 771 772 773
    assert(state);

    /* if the file was created for writing */
    if(state->creat_flag)
774 775
    {
        /* flush the last region of the log to file */
776
        switch(state->comp_type)
777 778
        {
            case DARSHAN_ZLIB_COMP:
779
                ret = darshan_log_libz_flush(fd, state->dz.prev_reg_id);
780 781
                if(ret == 0)
                    break;
782 783
#ifdef HAVE_LIBBZ2
            case DARSHAN_BZIP2_COMP:
784
                ret = darshan_log_bzip2_flush(fd, state->dz.prev_reg_id);
785 786 787
                if(ret == 0)
                    break;
#endif 
788 789
            default:
                /* if flush fails, remove the output log file */
790
                state->err = -1;
791 792 793 794 795
                fprintf(stderr, "Error: final flush to log file failed.\n");
                break;
        }

        /* if no errors flushing, write the log header before closing */
796
        if(state->err != -1)
797 798 799
        {
            ret = darshan_log_putheader(fd);
            if(ret < 0)
800
                state->err = -1;
801 802 803
        }
    }

804
    close(state->fildes);
805 806

    /* remove output log file if error writing to it */
807
    if((state->creat_flag) && (state->err == -1))
808 809
    {
        fprintf(stderr, "Unlinking darshan log file %s ...\n",
810 811
            state->logfile_path);
        unlink(state->logfile_path);
812
    }
813

814 815 816 817
    darshan_log_dzdestroy(state);
    if(state->exe_mnt_data)
        free(state->exe_mnt_data);
    free(state);
818
    free(fd);
819 820

    return;
821 822
}

823
/* **************************************************** */
824

825 826 827 828 829 830 831 832 833 834 835 836 837
static int darshan_mnt_info_cmp(const void *a, const void *b)
{
    struct darshan_mnt_info *m_a = (struct darshan_mnt_info *)a;
    struct darshan_mnt_info *m_b = (struct darshan_mnt_info *)b;

    if(strlen(m_a->mnt_path) > strlen(m_b->mnt_path))
        return(-1);
    else if(strlen(m_a->mnt_path) < strlen(m_b->mnt_path))
        return(1);
    else
        return(0);
}

838 839 840 841 842 843 844
/* read the header of the darshan log and set internal fd data structures
 * NOTE: this is the only portion of the darshan log that is uncompressed
 *
 * returns 0 on success, -1 on failure
 */
static int darshan_log_getheader(darshan_fd fd)
{
845
    struct darshan_fd_int_state *state = fd->state;
846 847 848 849 850 851 852 853 854 855 856
    struct darshan_header header;
    int i;
    int ret;

    ret = darshan_log_seek(fd, 0);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
    }

857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880
    /* read the version number so we know how to process this log */
    ret = darshan_log_read(fd, &fd->version, 8);
    if(ret < 8)
    {
        fprintf(stderr, "Error: invalid log file (failed to read version).\n");
        return(-1);
    }

    /* other log file versions can be detected and handled here */
    if(strcmp(fd->version, "3.00"))
    {
        fprintf(stderr, "Error: incompatible darshan file.\n");
        fprintf(stderr, "Error: expected version %s\n", DARSHAN_LOG_VERSION);
        return(-1);
    }

    /* seek back so we can read the entire header */
    ret = darshan_log_seek(fd, 0);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
    }

881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
    /* read uncompressed header from log file */
    ret = darshan_log_read(fd, &header, sizeof(header));
    if(ret != sizeof(header))
    {
        fprintf(stderr, "Error: failed to read darshan log file header.\n");
        return(-1);
    }

    if(header.magic_nr == DARSHAN_MAGIC_NR)
    {
        /* no byte swapping needed, this file is in host format already */
        fd->swap_flag = 0;
    }
    else
    {
        /* try byte swapping */
        DARSHAN_BSWAP64(&(header.magic_nr));
        if(header.magic_nr == DARSHAN_MAGIC_NR)
        {
            fd->swap_flag = 1;

            /* swap the log map variables in the header */
            DARSHAN_BSWAP64(&(header.rec_map.off));
            DARSHAN_BSWAP64(&(header.rec_map.len));
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
            {
                DARSHAN_BSWAP64(&(header.mod_map[i].off));
                DARSHAN_BSWAP64(&(header.mod_map[i].len));
            }
        }
        else
        {
            /* otherwise this file is just broken */
            fprintf(stderr, "Error: bad magic number in darshan log file.\n");
            return(-1);
        }
    }

919
    /* set some fd fields based on what's stored in the header */
920
    state->comp_type = header.comp_type;