darshan-logutils.c 17.1 KB
Newer Older
1
2
3
4
5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6
#define _GNU_SOURCE
7
#include "darshan-util-config.h"
8
9
#include <stdio.h>
#include <string.h>
10
#include <assert.h>
11
#include <stdlib.h>
12
#include <unistd.h>
13
#include <inttypes.h>
14
15
16
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
17
#include <errno.h>
18

19
20
21
22
23
#include <zlib.h>
#ifdef HAVE_LIBBZ2
#include <bzlib.h>
#endif

24
25
#include "darshan-logutils.h"

26
27
28
/* default to a compression buffer size of 4 MiB */
#define DARSHAN_DEF_DECOMP_BUF_SZ (4*1024*1024)

29
30
struct darshan_fd_s
{
31
    int pf;
32
    int64_t pos;
33
    char version[8];
34
35
    int swap_flag;
    char *exe_mnt_data;
36
37
38
    struct darshan_log_map job_map;
    struct darshan_log_map rec_map;
    struct darshan_log_map mod_map[DARSHAN_MAX_MODS];
39
40
};

41
42
43
static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len);
static int darshan_log_write(darshan_fd fd, void *buf, int len);
44
45
static int darshan_decompress_buffer(char *comp_buf, int comp_buf_sz,
    char *decomp_buf, int *inout_decomp_buf_sz);
46
47
48
49
50
51
52

/* darshan_log_open()
 *
 * open a darshan log file for reading/writing
 *
 * returns 0 on success, -1 on failure
 */
53
darshan_fd darshan_log_open(const char *name, const char *mode)
54
{
55
    int o_flags;
56

57
58
59
    /* we only allows "w" or "r" modes, nothing fancy */
    assert(strlen(mode) == 1);
    assert(mode[0] == 'r' || mode[0] == 'w');
60
61
62
63
    if(mode[0] == 'r')
        o_flags = O_RDONLY;
    else
        o_flags = O_WRONLY;
64

65
66
67
68
69
    darshan_fd tmp_fd = malloc(sizeof(*tmp_fd));
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));

70
71
    tmp_fd->pf = open(name, o_flags);
    if(tmp_fd->pf < 0)
72
73
    {
        free(tmp_fd);
74
        return(NULL);
75
    }
76
77

    return(tmp_fd);
78
79
}

80
81
82
83
84
85
86
87
/* darshan_log_getheader()
 *
 * read the header of the darshan log and set internal data structures
 * NOTE: this function must be called before reading other portions of the log
 *
 * returns 0 on success, -1 on failure
 */
int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
88
{
89
    int i;
90
    int ret;
91

92
    ret = darshan_log_seek(fd, 0);
93
    if(ret < 0)
94
95
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
96
        return(ret);
97
    }
98

99
    /* read header from log file */
100
    ret = darshan_log_read(fd, header, sizeof(*header));
101
    if(ret < sizeof(*header))
102
    {
103
        fprintf(stderr, "Error: invalid darshan log file (failed to read header).\n");
104
105
106
        return(-1);
    }

107
108
    /* save the version string */
    strncpy(fd->version, header->version_string, 8);
109

110
    if(header->magic_nr == DARSHAN_MAGIC_NR)
111
    {
112
        /* no byte swapping needed, this file is in host format already */
113
114
115
116
117
118
        fd->swap_flag = 0;
    }
    else
    {
        /* try byte swapping */
        DARSHAN_BSWAP64(&header->magic_nr);
119
        if(header->magic_nr == DARSHAN_MAGIC_NR)
120
121
        {
            fd->swap_flag = 1;
122
123
124
125
126
127
128
129
130

            /* swap the log map variables in the header */
            DARSHAN_BSWAP64(&header->rec_map.off);
            DARSHAN_BSWAP64(&header->rec_map.len);
            for(i=0;i<DARSHAN_MAX_MODS;i++)
            {
                DARSHAN_BSWAP64(&header->mod_map[i].off);
                DARSHAN_BSWAP64(&header->mod_map[i].len);
            }
131
132
133
134
135
136
137
        }
        else
        {
            /* otherwise this file is just broken */
            fprintf(stderr, "Error: bad magic number in darshan log file.\n");
            return(-1);
        }
138
139
    }

140
141
142
143
144
    /* save the mapping of data within log file to this file descriptor */
    fd->job_map.off = sizeof(struct darshan_header);
    fd->job_map.len = header->rec_map.off - fd->job_map.off;
    memcpy(&fd->rec_map, &header->rec_map, sizeof(struct darshan_log_map));
    memcpy(&fd->mod_map, &header->mod_map, DARSHAN_MAX_MODS * sizeof(struct darshan_log_map));
145
146

    return(0);
147
}
148

149
/* darshan_log_getjob()
150
151
 *
 * read job level metadata from the darshan log file
152
 *
153
 * returns 0 on success, -1 on failure
154
 */
155
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
156
{
157
    char *comp_buf;
158
    char job_buf[DARSHAN_JOB_RECORD_SIZE] = {0};
159
    int job_buf_sz = DARSHAN_JOB_RECORD_SIZE;
160
    int ret;
161

162
163
164
165
166
    /* allocate a buffer to store the (compressed) darshan job info */
    comp_buf = malloc(fd->job_map.len);
    if(!comp_buf)
        return(-1);

167
    ret = darshan_log_seek(fd, fd->job_map.off);
168
    if(ret < 0)
169
    {
170
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
171
        free(comp_buf);
172
        return(ret);
173
174
    }

175
    /* read the job data from the log file */
176
    ret = darshan_log_read(fd, comp_buf, fd->job_map.len);
177
    if(ret < fd->job_map.len)
178
    {
179
        fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
180
        free(comp_buf);
181
182
183
        return(-1);
    }

184
185
186
187
188
189
190
191
192
193
194
195
    /* decompress the job data */
    ret = darshan_decompress_buffer(comp_buf, fd->job_map.len,
        job_buf, &job_buf_sz);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to decompress darshan job data.\n");
        free(comp_buf);
        return(-1);
    }
    assert(job_buf_sz == DARSHAN_JOB_RECORD_SIZE);
    free(comp_buf);

196
197
    memcpy(job, job_buf, sizeof(*job));

198
    if(fd->swap_flag)
199
    {
200
201
202
203
204
205
        /* swap bytes if necessary */
        DARSHAN_BSWAP64(&job->uid);
        DARSHAN_BSWAP64(&job->start_time);
        DARSHAN_BSWAP64(&job->end_time);
        DARSHAN_BSWAP64(&job->nprocs);
        DARSHAN_BSWAP64(&job->jobid);
206
207
    }

208
    /* save trailing job data, so exe and mount information can be retrieved later */
209
210
    if(!fd->exe_mnt_data)
        fd->exe_mnt_data = malloc(DARSHAN_EXE_LEN+1);
211
212
    if(!fd->exe_mnt_data)
        return(-1);
213
    memcpy(fd->exe_mnt_data, &job_buf[sizeof(*job)], DARSHAN_EXE_LEN+1);
214
215
216
217
218
219
220
221

    return(0);
}

int darshan_log_getexe(darshan_fd fd, char *buf)
{
    char *newline;

222
223
224
    /* if the exe/mount info has not been saved yet, read in the job
     * header to get this data.
     */
225
    if(!fd->exe_mnt_data)
226
227
228
229
230
231
232
    {
        struct darshan_job job;
        (void)darshan_log_getjob(fd, &job);

        if(!fd->exe_mnt_data)
            return(-1);
    }
233
234
235
236
237
238
239
240
241
242
243
244

    newline = strchr(fd->exe_mnt_data, '\n');

    /* copy over the exe string */
    if(newline)
        memcpy(buf, fd->exe_mnt_data, (newline - fd->exe_mnt_data));

    return (0);
}

/* darshan_log_getmounts()
 * 
245
246
247
 * retrieves mount table information from the log.  Note that mnt_pts and
 * fs_types are arrays that will be allocated by the function and must be
 * freed by the caller.  count will indicate the size of the arrays
248
 */
249
int darshan_log_getmounts(darshan_fd fd, char*** mnt_pts,
250
251
252
253
254
255
    char*** fs_types, int* count)
{
    int ret;
    char *pos;
    int array_index = 0;

256
257
258
    /* if the exe/mount info has not been saved yet, read in the job
     * header to get this data.
     */
259
    if(!fd->exe_mnt_data)
260
261
262
263
264
265
266
    {
        struct darshan_job job;
        (void)darshan_log_getjob(fd, &job);

        if(!fd->exe_mnt_data)
            return(-1);
    }
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293

    *count = 0;
    pos = fd->exe_mnt_data;
    while((pos = strchr(pos, '\n')) != NULL)
    {
        pos++;
        (*count)++;
    }

    if(*count == 0)
    {
        /* no mount entries present */
        return(0);
    }

    /* allocate output arrays */
    *mnt_pts = malloc((*count)*sizeof(char*));
    assert(*mnt_pts);
    *fs_types = malloc((*count)*sizeof(char*));
    assert(*fs_types);

    /* work backwards through the table and parse each line (except for
     * first, which holds command line information)
     */
    while((pos = strrchr(fd->exe_mnt_data, '\n')) != NULL)
    {
        /* overestimate string lengths */
294
        (*mnt_pts)[array_index] = malloc(DARSHAN_EXE_LEN);
295
        assert((*mnt_pts)[array_index]);
296
        (*fs_types)[array_index] = malloc(DARSHAN_EXE_LEN);
297
298
        assert((*fs_types)[array_index]);

299
300
301
        ret = sscanf(++pos, "%s\t%s", (*fs_types)[array_index],
            (*mnt_pts)[array_index]);
        if(ret != 2)
302
303
304
305
306
307
308
309
310
        {
            fprintf(stderr, "Error: poorly formatted mount table in log file.\n");
            return(-1);
        }
        pos--;
        *pos = '\0';
        array_index++;
    }

311
312
313
    return(0);
}

314
/* darshan_log_gethash()
315
 *
316
 * read the hash of records from the darshan log file
317
318
319
 *
 * returns 0 on success, -1 on failure
 */
320
int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
321
{
322
323
324
325
    char *comp_buf;
    char hash_buf[DARSHAN_DEF_DECOMP_BUF_SZ] = {0};
    int hash_buf_sz = DARSHAN_DEF_DECOMP_BUF_SZ;
    char *buf_ptr;
326
327
328
329
    darshan_record_id *rec_id_ptr;
    uint32_t *path_len_ptr;
    char *path_ptr;
    struct darshan_record_ref *ref;
330
331
    int ret;

332
333
334
335
336
    /* allocate a buffer to store the (compressed, serialized) darshan record hash */
    comp_buf = malloc(fd->rec_map.len);
    if(!comp_buf)
        return(-1);

337
    ret = darshan_log_seek(fd, fd->rec_map.off);
338
    if(ret < 0)
339
340
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
341
        free(comp_buf);
342
        return(ret);
343
    }
344

345
346
    /* read the record hash from the log file */
    ret = darshan_log_read(fd, comp_buf, fd->rec_map.len);
347
    if(ret < fd->rec_map.len)
348
    {
349
        fprintf(stderr, "Error: invalid darshan log file (failed to read record hash).\n");
350
        free(comp_buf);
351
        return(-1);
352
353
    }

354
355
356
357
358
359
360
361
362
363
364
    /* decompress the record hash buffer */
    ret = darshan_decompress_buffer(comp_buf, fd->rec_map.len,
        hash_buf, &hash_buf_sz);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to decompress darshan job data.\n");
        free(comp_buf);
        return(-1);
    }
    free(comp_buf);

365
    buf_ptr = hash_buf;
366
    while(buf_ptr < (hash_buf + hash_buf_sz))
367
    {
368
        /* get pointers for each field of this darshan record */
369
        /* NOTE: darshan record hash serialization method: 
370
371
         *          ... darshan_record_id | (uint32_t) path_len | path ...
         */
372
373
374
375
376
377
378
379
        rec_id_ptr = (darshan_record_id *)buf_ptr;
        buf_ptr += sizeof(darshan_record_id);
        path_len_ptr = (uint32_t *)buf_ptr;
        buf_ptr += sizeof(uint32_t);
        path_ptr = (char *)buf_ptr;
        buf_ptr += *path_len_ptr;

        if(fd->swap_flag)
380
        {
381
382
383
            /* we need to sort out endianness issues before deserializing */
            DARSHAN_BSWAP64(rec_id_ptr);
            DARSHAN_BSWAP32(path_len_ptr);
384
385
        }

386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
        HASH_FIND(hlink, *hash, rec_id_ptr, sizeof(darshan_record_id), ref);
        if(!ref)
        {
            ref = malloc(sizeof(*ref));
            if(!ref)
            {
                return(-1);
            }
            ref->rec.name = malloc(*path_len_ptr + 1);
            if(!ref->rec.name)
            {
                free(ref);
                return(-1);
            }

            /* set the fields for this record */
            ref->rec.id = *rec_id_ptr;
            memcpy(ref->rec.name, path_ptr, *path_len_ptr);
            ref->rec.name[*path_len_ptr] = '\0';
405

406
407
408
            /* add this record to the hash */
            HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref);
        }
409
410
    }

411
412
413
    return(0);
}

414
415
int darshan_log_getmod(darshan_fd fd, darshan_module_id mod_id,
    void **mod_buf, int *mod_buf_sz)
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
{
    char *comp_buf;
    char *tmp_buf;
    int tmp_buf_sz;
    int ret;
    *mod_buf = NULL;
    *mod_buf_sz = 0;

    if(mod_id < 0 || mod_id >= DARSHAN_MAX_MODS)
    {
        fprintf(stderr, "Error: invalid Darshan module id.\n");
        return(-1);
    }

    if(fd->mod_map[mod_id].len == 0)
    {
        /* this module has no data in the log */
        return(0);
    }

    comp_buf = malloc(fd->mod_map[mod_id].len);
    if(!comp_buf)
        return(-1);

    ret = darshan_log_seek(fd, fd->mod_map[mod_id].off);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        free(comp_buf);
        return(ret);
    }

    /* read the given module's (compressed) data from the log file */
    ret = darshan_log_read(fd, comp_buf, fd->mod_map[mod_id].len);
    if(ret < fd->mod_map[mod_id].len)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read module %s data).\n",
            darshan_module_names[mod_id]);
        free(comp_buf);
        return(-1);
    }

    tmp_buf_sz = DARSHAN_DEF_DECOMP_BUF_SZ;
    tmp_buf = malloc(DARSHAN_DEF_DECOMP_BUF_SZ);
    if(!tmp_buf)
    {
        free(comp_buf);
        return(-1);
    }

    /* decompress this module's data */
    ret = darshan_decompress_buffer(comp_buf, fd->mod_map[mod_id].len, tmp_buf,
        &tmp_buf_sz);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to decompress module %s data.\n",
            darshan_module_names[mod_id]);
        free(tmp_buf);
        return(-1);
    }
    free(comp_buf);

    /* pass back the final decompressed data pointer */
    *mod_buf = tmp_buf;
    *mod_buf_sz = tmp_buf_sz;
481

482
    return(0);
483
}
484

485
486
/* TODO: hardcoded for posix -- what can we do generally?
 *       different function for each module and a way to map to this function?
487
 */
488
/* TODO: we need bswaps here, too */
489
int darshan_log_getfile(darshan_fd fd, struct darshan_posix_file *file)
490
{
491
492
    char *comp_buf;
    char hash_buf[DARSHAN_DEF_DECOMP_BUF_SZ] = {0};
493
494
495
    int ret;
    const char* err_string;
    int i;
496

497
    if(fd->pos < fd->mod_map[0].off)
498
    {
499
        ret = darshan_log_seek(fd, fd->mod_map[0].off);
500
        if(ret < 0)
501
502
        {
            fprintf(stderr, "Error: unable to seek in darshan log file.\n");
503
            return(ret);
504
        }
505
    }
506
507
508
509

    /* reset file record, so that diff compares against a zero'd out record
     * if file is missing
     */
510
    memset(file, 0, sizeof(*file));
511

512
    ret = darshan_log_read(fd, file, sizeof(*file));
513
514
515
516
517
518
    if(ret == sizeof(*file))
    {
        /* got exactly one, correct size record */
        if(fd->swap_flag)
        {
            /* swap bytes if necessary */
519
            DARSHAN_BSWAP64(&file->f_id);
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
            DARSHAN_BSWAP64(&file->rank);
            for(i=0; i<CP_NUM_INDICES; i++)
                DARSHAN_BSWAP64(&file->counters[i]);
            for(i=0; i<CP_F_NUM_INDICES; i++)
                DARSHAN_BSWAP64(&file->fcounters[i]);
        }
        return(1);
    }


    if(ret > 0)
    {
        /* got a short read */
        fprintf(stderr, "Error: invalid file record (too small)\n");
        return(-1);
    }

537
    if(ret == 0)
538
539
540
541
542
543
    {
        /* hit end of file */
        return(0);
    }

    /* all other errors */
544
    err_string = strerror(errno);
545
546
547
548
    fprintf(stderr, "Error: %s\n", err_string);
    return(-1);
}

549
550
551
552
553
554
555
/* darshan_log_close()
 *
 * close an open darshan file descriptor
 *
 * returns 0 on success, -1 on failure
 */
void darshan_log_close(darshan_fd fd)
556
{
557
558
    if(fd->pf)
        close(fd->pf);
559

560
561
562
    if(fd->exe_mnt_data)
        free(fd->exe_mnt_data);

563
    free(fd);
564
565
}

566
/* ******************************************* */
567

568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
/* return 0 on successful seek to offset, -1 on failure.
 */
static int darshan_log_seek(darshan_fd fd, off_t offset)
{
    off_t ret_off;

    if(fd->pos == offset)
        return(0);

    ret_off = lseek(fd->pf, offset, SEEK_SET);
    if(ret_off == offset)
    {
        fd->pos = offset;
        return(0);
    }

    return(-1);
}

587
588
589
590
591
592
/* return amount written on success, -1 on failure.
 */
static int darshan_log_write(darshan_fd fd, void* buf, int len)
{
    int ret;

593
    if(fd->pf)
594
    {
595
        ret = write(fd->pf, buf, len);
596
597
598
599
600
601
602
603
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

    return(-1);
}

604
605
606
607
608
609
/* return amount read on success, 0 on EOF, -1 on failure.
 */
static int darshan_log_read(darshan_fd fd, void* buf, int len)
{
    int ret;

610
    if(fd->pf)
611
    {
612
        ret = read(fd->pf, buf, len);
613
614
615
616
617
618
619
620
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

    return(-1);
}

621
622
623
/* TODO bz2 compression support */
static int darshan_decompress_buffer(char *comp_buf, int comp_buf_sz,
    char *decomp_buf, int *inout_decomp_buf_sz)
624
{
625
    int ret;
626
    int total_out = 0;
627
628
629
630
631
632
    z_stream tmp_stream;

    memset(&tmp_stream, 0, sizeof(tmp_stream));
    tmp_stream.zalloc = Z_NULL;
    tmp_stream.zfree = Z_NULL;
    tmp_stream.opaque = Z_NULL;
633
634
635
636
    tmp_stream.next_in = comp_buf;
    tmp_stream.avail_in = comp_buf_sz;
    tmp_stream.next_out = decomp_buf;
    tmp_stream.avail_out = *inout_decomp_buf_sz;
637
638
639
640
641
642
643
644
645

    /* initialize the zlib decompression parameters */
    /* TODO: check these parameters? */
    //ret = inflateInit2(&tmp_stream, 31);
    ret = inflateInit(&tmp_stream);
    if(ret != Z_OK)
    {
        return(-1);
    }
646

647
    /* while we have not finished consuming all of the compressed input data */
648
    while(tmp_stream.avail_in)
649
    {
650
651
652
653
654
655
656
657
658
659
660
        if(tmp_stream.avail_out == 0)
        {
            /* We ran out of buffer space for compression.  In theory,
             * we could just alloc more space, but probably just easier
             * to bump up the default size of the output buffer.
             */
            inflateEnd(&tmp_stream);
            return(-1);
        }

        /* decompress data */
661
        ret = inflate(&tmp_stream, Z_FINISH);
662
663
664
665
666
        if(ret != Z_STREAM_END)
        {
            inflateEnd(&tmp_stream);
            return(-1);
        }
667
668
669
670

        total_out += tmp_stream.total_out;
        if(tmp_stream.avail_in)
            inflateReset(&tmp_stream);
671
    }
672
    inflateEnd(&tmp_stream);
673

674
    *inout_decomp_buf_sz = total_out;
675
    return(0);
676
}
677
678
679
680
681
682
683
684
685

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */