darshan-logutils.c 16.8 KB
Newer Older
1
2
3
4
5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6
#define _GNU_SOURCE
7
#include "darshan-util-config.h"
8
9
#include <stdio.h>
#include <string.h>
10
#include <assert.h>
11
#include <stdlib.h>
12
#include <unistd.h>
13
#include <inttypes.h>
14
15
16
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
17
#include <errno.h>
18

19
20
21
22
23
#include <zlib.h>
#ifdef HAVE_LIBBZ2
#include <bzlib.h>
#endif

24
25
#include "darshan-logutils.h"

26
27
28
/* default to a compression buffer size of 4 MiB */
#define DARSHAN_DEF_DECOMP_BUF_SZ (4*1024*1024)

29
30
struct darshan_fd_s
{
31
    int pf;
32
    int64_t pos;
33
    char version[8];
34
35
    int swap_flag;
    char *exe_mnt_data;
36
37
38
    struct darshan_log_map job_map;
    struct darshan_log_map rec_map;
    struct darshan_log_map mod_map[DARSHAN_MAX_MODS];
39
40
};

41
42
43
static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len);
static int darshan_log_write(darshan_fd fd, void *buf, int len);
44
45
static int darshan_decompress_buffer(char *comp_buf, int comp_buf_sz,
    char *decomp_buf, int *inout_decomp_buf_sz);
46
47
48
49
50
51
52

/* darshan_log_open()
 *
 * open a darshan log file for reading/writing
 *
 * returns 0 on success, -1 on failure
 */
53
darshan_fd darshan_log_open(const char *name, const char *mode)
54
{
55
    int o_flags;
56

57
58
59
    /* we only allows "w" or "r" modes, nothing fancy */
    assert(strlen(mode) == 1);
    assert(mode[0] == 'r' || mode[0] == 'w');
60
61
62
63
    if(mode[0] == 'r')
        o_flags = O_RDONLY;
    else
        o_flags = O_WRONLY;
64

65
66
67
68
69
    darshan_fd tmp_fd = malloc(sizeof(*tmp_fd));
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));

70
71
    tmp_fd->pf = open(name, o_flags);
    if(tmp_fd->pf < 0)
72
73
    {
        free(tmp_fd);
74
        return(NULL);
75
    }
76
77

    return(tmp_fd);
78
79
}

80
81
82
83
84
85
86
87
/* darshan_log_getheader()
 *
 * read the header of the darshan log and set internal data structures
 * NOTE: this function must be called before reading other portions of the log
 *
 * returns 0 on success, -1 on failure
 */
int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
88
{
89
    int i;
90
    int ret;
91

92
    ret = darshan_log_seek(fd, 0);
93
    if(ret < 0)
94
95
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
96
        return(ret);
97
    }
98

99
    /* read header from log file */
100
    ret = darshan_log_read(fd, header, sizeof(*header));
101
    if(ret < sizeof(*header))
102
    {
103
        fprintf(stderr, "Error: invalid darshan log file (failed to read header).\n");
104
105
106
        return(-1);
    }

107
108
    /* save the version string */
    strncpy(fd->version, header->version_string, 8);
109

110
    if(header->magic_nr == DARSHAN_MAGIC_NR)
111
    {
112
        /* no byte swapping needed, this file is in host format already */
113
114
115
116
117
118
        fd->swap_flag = 0;
    }
    else
    {
        /* try byte swapping */
        DARSHAN_BSWAP64(&header->magic_nr);
119
        if(header->magic_nr == DARSHAN_MAGIC_NR)
120
121
        {
            fd->swap_flag = 1;
122
123
124
125
126
127
128
129
130

            /* swap the log map variables in the header */
            DARSHAN_BSWAP64(&header->rec_map.off);
            DARSHAN_BSWAP64(&header->rec_map.len);
            for(i=0;i<DARSHAN_MAX_MODS;i++)
            {
                DARSHAN_BSWAP64(&header->mod_map[i].off);
                DARSHAN_BSWAP64(&header->mod_map[i].len);
            }
131
132
133
134
135
136
137
        }
        else
        {
            /* otherwise this file is just broken */
            fprintf(stderr, "Error: bad magic number in darshan log file.\n");
            return(-1);
        }
138
139
    }

140
141
142
143
144
    /* save the mapping of data within log file to this file descriptor */
    fd->job_map.off = sizeof(struct darshan_header);
    fd->job_map.len = header->rec_map.off - fd->job_map.off;
    memcpy(&fd->rec_map, &header->rec_map, sizeof(struct darshan_log_map));
    memcpy(&fd->mod_map, &header->mod_map, DARSHAN_MAX_MODS * sizeof(struct darshan_log_map));
145
146

    return(0);
147
}
148

149
/* darshan_log_getjob()
150
151
 *
 * read job level metadata from the darshan log file
152
 *
153
 * returns 0 on success, -1 on failure
154
 */
155
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
156
{
157
    char *comp_buf;
158
    char job_buf[DARSHAN_JOB_RECORD_SIZE] = {0};
159
    int job_buf_sz = DARSHAN_JOB_RECORD_SIZE;
160
    int ret;
161

162
163
164
165
166
    /* allocate a buffer to store the (compressed) darshan job info */
    comp_buf = malloc(fd->job_map.len);
    if(!comp_buf)
        return(-1);

167
    ret = darshan_log_seek(fd, fd->job_map.off);
168
    if(ret < 0)
169
    {
170
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
171
        free(comp_buf);
172
        return(ret);
173
174
    }

175
    /* read the job data from the log file */
176
    ret = darshan_log_read(fd, comp_buf, fd->job_map.len);
177
    if(ret < fd->job_map.len)
178
    {
179
        fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
180
        free(comp_buf);
181
182
183
        return(-1);
    }

184
185
186
187
188
189
190
191
192
193
194
195
    /* decompress the job data */
    ret = darshan_decompress_buffer(comp_buf, fd->job_map.len,
        job_buf, &job_buf_sz);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to decompress darshan job data.\n");
        free(comp_buf);
        return(-1);
    }
    assert(job_buf_sz == DARSHAN_JOB_RECORD_SIZE);
    free(comp_buf);

196
197
    memcpy(job, job_buf, sizeof(*job));

198
    if(fd->swap_flag)
199
    {
200
201
202
203
204
205
        /* swap bytes if necessary */
        DARSHAN_BSWAP64(&job->uid);
        DARSHAN_BSWAP64(&job->start_time);
        DARSHAN_BSWAP64(&job->end_time);
        DARSHAN_BSWAP64(&job->nprocs);
        DARSHAN_BSWAP64(&job->jobid);
206
207
    }

208
    /* save trailing job data, so exe and mount information can be retrieved later */
209
210
    if(!fd->exe_mnt_data)
        fd->exe_mnt_data = malloc(DARSHAN_EXE_LEN+1);
211
212
    if(!fd->exe_mnt_data)
        return(-1);
213
    memcpy(fd->exe_mnt_data, &job_buf[sizeof(*job)], DARSHAN_EXE_LEN+1);
214
215
216
217
218
219
220
221

    return(0);
}

int darshan_log_getexe(darshan_fd fd, char *buf)
{
    char *newline;

222
223
224
    /* if the exe/mount info has not been saved yet, read in the job
     * header to get this data.
     */
225
    if(!fd->exe_mnt_data)
226
227
228
229
230
231
232
    {
        struct darshan_job job;
        (void)darshan_log_getjob(fd, &job);

        if(!fd->exe_mnt_data)
            return(-1);
    }
233
234
235
236
237
238
239
240
241
242
243
244

    newline = strchr(fd->exe_mnt_data, '\n');

    /* copy over the exe string */
    if(newline)
        memcpy(buf, fd->exe_mnt_data, (newline - fd->exe_mnt_data));

    return (0);
}

/* darshan_log_getmounts()
 * 
245
246
247
 * retrieves mount table information from the log.  Note that mnt_pts and
 * fs_types are arrays that will be allocated by the function and must be
 * freed by the caller.  count will indicate the size of the arrays
248
 */
249
int darshan_log_getmounts(darshan_fd fd, char*** mnt_pts,
250
251
252
253
254
255
    char*** fs_types, int* count)
{
    int ret;
    char *pos;
    int array_index = 0;

256
257
258
    /* if the exe/mount info has not been saved yet, read in the job
     * header to get this data.
     */
259
    if(!fd->exe_mnt_data)
260
261
262
263
264
265
266
    {
        struct darshan_job job;
        (void)darshan_log_getjob(fd, &job);

        if(!fd->exe_mnt_data)
            return(-1);
    }
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293

    *count = 0;
    pos = fd->exe_mnt_data;
    while((pos = strchr(pos, '\n')) != NULL)
    {
        pos++;
        (*count)++;
    }

    if(*count == 0)
    {
        /* no mount entries present */
        return(0);
    }

    /* allocate output arrays */
    *mnt_pts = malloc((*count)*sizeof(char*));
    assert(*mnt_pts);
    *fs_types = malloc((*count)*sizeof(char*));
    assert(*fs_types);

    /* work backwards through the table and parse each line (except for
     * first, which holds command line information)
     */
    while((pos = strrchr(fd->exe_mnt_data, '\n')) != NULL)
    {
        /* overestimate string lengths */
294
        (*mnt_pts)[array_index] = malloc(DARSHAN_EXE_LEN);
295
        assert((*mnt_pts)[array_index]);
296
        (*fs_types)[array_index] = malloc(DARSHAN_EXE_LEN);
297
298
        assert((*fs_types)[array_index]);

299
300
301
        ret = sscanf(++pos, "%s\t%s", (*fs_types)[array_index],
            (*mnt_pts)[array_index]);
        if(ret != 2)
302
303
304
305
306
307
308
309
310
        {
            fprintf(stderr, "Error: poorly formatted mount table in log file.\n");
            return(-1);
        }
        pos--;
        *pos = '\0';
        array_index++;
    }

311
312
313
    return(0);
}

314
/* darshan_log_gethash()
315
 *
316
 * read the hash of records from the darshan log file
317
318
319
 *
 * returns 0 on success, -1 on failure
 */
320
int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
321
{
322
323
324
325
    char *comp_buf;
    char hash_buf[DARSHAN_DEF_DECOMP_BUF_SZ] = {0};
    int hash_buf_sz = DARSHAN_DEF_DECOMP_BUF_SZ;
    char *buf_ptr;
326
327
328
329
    darshan_record_id *rec_id_ptr;
    uint32_t *path_len_ptr;
    char *path_ptr;
    struct darshan_record_ref *ref;
330
331
    int ret;

332
333
334
335
336
    /* allocate a buffer to store the (compressed, serialized) darshan record hash */
    comp_buf = malloc(fd->rec_map.len);
    if(!comp_buf)
        return(-1);

337
    ret = darshan_log_seek(fd, fd->rec_map.off);
338
    if(ret < 0)
339
340
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
341
        free(comp_buf);
342
        return(ret);
343
    }
344

345
346
    /* read the record hash from the log file */
    ret = darshan_log_read(fd, comp_buf, fd->rec_map.len);
347
    if(ret < fd->rec_map.len)
348
    {
349
        fprintf(stderr, "Error: invalid darshan log file (failed to read record hash).\n");
350
        free(comp_buf);
351
        return(-1);
352
353
    }

354
355
356
357
358
359
360
361
362
363
364
    /* decompress the record hash buffer */
    ret = darshan_decompress_buffer(comp_buf, fd->rec_map.len,
        hash_buf, &hash_buf_sz);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to decompress darshan job data.\n");
        free(comp_buf);
        return(-1);
    }
    free(comp_buf);

365
    buf_ptr = hash_buf;
366
    while(buf_ptr < (hash_buf + hash_buf_sz))
367
    {
368
        /* get pointers for each field of this darshan record */
369
        /* NOTE: darshan record hash serialization method: 
370
371
         *          ... darshan_record_id | (uint32_t) path_len | path ...
         */
372
373
374
375
376
377
378
379
380
        rec_id_ptr = (darshan_record_id *)buf_ptr;
        buf_ptr += sizeof(darshan_record_id);
        path_len_ptr = (uint32_t *)buf_ptr;
        buf_ptr += sizeof(uint32_t);
        path_ptr = (char *)buf_ptr;
        buf_ptr += *path_len_ptr;

        ref = malloc(sizeof(*ref));
        if(!ref)
381
        {
382
            return(-1);
383
        }
384
385
        ref->rec.name = malloc(*path_len_ptr + 1);
        if(!ref->rec.name)
386
        {
387
388
            free(ref);
            return(-1);
389
390
        }

391
        if(fd->swap_flag)
392
        {
393
394
395
            /* we need to sort out endianness issues before deserializing */
            DARSHAN_BSWAP64(rec_id_ptr);
            DARSHAN_BSWAP32(path_len_ptr);
396
397
        }

398
399
400
401
        /* set the fields for this record */
        ref->rec.id = *rec_id_ptr;
        memcpy(ref->rec.name, path_ptr, *path_len_ptr);
        ref->rec.name[*path_len_ptr] = '\0';
402

403
        /* add this record to the hash */
404
        HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref);
405
406
    }

407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
    return(0);
}

int darshan_log_getmod(darshan_fd fd, int mod_id, void **mod_buf,
    int *mod_buf_sz)
{
    char *comp_buf;
    char *tmp_buf;
    int tmp_buf_sz;
    int ret;
    *mod_buf = NULL;
    *mod_buf_sz = 0;

    if(mod_id < 0 || mod_id >= DARSHAN_MAX_MODS)
    {
        fprintf(stderr, "Error: invalid Darshan module id.\n");
        return(-1);
    }

    if(fd->mod_map[mod_id].len == 0)
    {
        /* this module has no data in the log */
        return(0);
    }

    comp_buf = malloc(fd->mod_map[mod_id].len);
    if(!comp_buf)
        return(-1);

    ret = darshan_log_seek(fd, fd->mod_map[mod_id].off);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        free(comp_buf);
        return(ret);
    }

    /* read the given module's (compressed) data from the log file */
    ret = darshan_log_read(fd, comp_buf, fd->mod_map[mod_id].len);
    if(ret < fd->mod_map[mod_id].len)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read module %s data).\n",
            darshan_module_names[mod_id]);
        free(comp_buf);
        return(-1);
    }

    tmp_buf_sz = DARSHAN_DEF_DECOMP_BUF_SZ;
    tmp_buf = malloc(DARSHAN_DEF_DECOMP_BUF_SZ);
    if(!tmp_buf)
    {
        free(comp_buf);
        return(-1);
    }

    /* decompress this module's data */
    ret = darshan_decompress_buffer(comp_buf, fd->mod_map[mod_id].len, tmp_buf,
        &tmp_buf_sz);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to decompress module %s data.\n",
            darshan_module_names[mod_id]);
        free(tmp_buf);
        return(-1);
    }
    free(comp_buf);

    /* pass back the final decompressed data pointer */
    *mod_buf = tmp_buf;
    *mod_buf_sz = tmp_buf_sz;
477

478
    return(0);
479
}
480

481
#if 0
482
483
/* TODO: hardcoded for posix -- what can we do generally?
 *       different function for each module and a way to map to this function?
484
 */
485
int darshan_log_getfile(darshan_fd fd, struct darshan_posix_file *file)
486
{
487
488
    char *comp_buf;
    char hash_buf[DARSHAN_DEF_DECOMP_BUF_SZ] = {0};
489
490
491
    int ret;
    const char* err_string;
    int i;
492

493
    if(fd->pos < fd->mod_map[0].off)
494
    {
495
        ret = darshan_log_seek(fd, fd->mod_map[0].off);
496
        if(ret < 0)
497
498
        {
            fprintf(stderr, "Error: unable to seek in darshan log file.\n");
499
            return(ret);
500
        }
501
    }
502
503
504
505

    /* reset file record, so that diff compares against a zero'd out record
     * if file is missing
     */
506
    memset(file, 0, sizeof(*file));
507

508
    ret = darshan_log_read(fd, file, sizeof(*file));
509
510
511
512
513
514
    if(ret == sizeof(*file))
    {
        /* got exactly one, correct size record */
        if(fd->swap_flag)
        {
            /* swap bytes if necessary */
515
            DARSHAN_BSWAP64(&file->f_id);
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
            DARSHAN_BSWAP64(&file->rank);
            for(i=0; i<CP_NUM_INDICES; i++)
                DARSHAN_BSWAP64(&file->counters[i]);
            for(i=0; i<CP_F_NUM_INDICES; i++)
                DARSHAN_BSWAP64(&file->fcounters[i]);
        }
        return(1);
    }


    if(ret > 0)
    {
        /* got a short read */
        fprintf(stderr, "Error: invalid file record (too small)\n");
        return(-1);
    }

533
    if(ret == 0)
534
535
536
537
538
539
    {
        /* hit end of file */
        return(0);
    }

    /* all other errors */
540
    err_string = strerror(errno);
541
542
543
    fprintf(stderr, "Error: %s\n", err_string);
    return(-1);
}
544
#endif
545

546
547
548
549
550
551
552
/* darshan_log_close()
 *
 * close an open darshan file descriptor
 *
 * returns 0 on success, -1 on failure
 */
void darshan_log_close(darshan_fd fd)
553
{
554
555
    if(fd->pf)
        close(fd->pf);
556

557
558
559
    if(fd->exe_mnt_data)
        free(fd->exe_mnt_data);

560
    free(fd);
561
562
}

563
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
564

565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
/* return 0 on successful seek to offset, -1 on failure.
 */
static int darshan_log_seek(darshan_fd fd, off_t offset)
{
    off_t ret_off;

    if(fd->pos == offset)
        return(0);

    ret_off = lseek(fd->pf, offset, SEEK_SET);
    if(ret_off == offset)
    {
        fd->pos = offset;
        return(0);
    }

    return(-1);
}

584
585
586
587
588
589
/* return amount written on success, -1 on failure.
 */
static int darshan_log_write(darshan_fd fd, void* buf, int len)
{
    int ret;

590
    if(fd->pf)
591
    {
592
        ret = write(fd->pf, buf, len);
593
594
595
596
597
598
599
600
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

    return(-1);
}

601
602
603
604
605
606
/* return amount read on success, 0 on EOF, -1 on failure.
 */
static int darshan_log_read(darshan_fd fd, void* buf, int len)
{
    int ret;

607
    if(fd->pf)
608
    {
609
        ret = read(fd->pf, buf, len);
610
611
612
613
614
615
616
617
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

    return(-1);
}

618
619
620
/* TODO bz2 compression support */
static int darshan_decompress_buffer(char *comp_buf, int comp_buf_sz,
    char *decomp_buf, int *inout_decomp_buf_sz)
621
{
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
    int ret;
    z_stream tmp_stream;

    memset(&tmp_stream, 0, sizeof(tmp_stream));
    tmp_stream.zalloc = Z_NULL;
    tmp_stream.zfree = Z_NULL;
    tmp_stream.opaque = Z_NULL;

    /* initialize the zlib decompression parameters */
    /* TODO: check these parameters? */
    //ret = inflateInit2(&tmp_stream, 31);
    ret = inflateInit(&tmp_stream);
    if(ret != Z_OK)
    {
        return(-1);
    }
638

639
640
641
642
    tmp_stream.next_in = comp_buf;
    tmp_stream.avail_in = comp_buf_sz;
    tmp_stream.next_out = decomp_buf;
    tmp_stream.avail_out = *inout_decomp_buf_sz;
643

644
645
    /* while we have not finished consuming all of the compressed input data */
    while(tmp_stream.total_in < comp_buf_sz)
646
    {
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
        if(tmp_stream.avail_out == 0)
        {
            /* We ran out of buffer space for compression.  In theory,
             * we could just alloc more space, but probably just easier
             * to bump up the default size of the output buffer.
             */
            inflateEnd(&tmp_stream);
            return(-1);
        }

        /* decompress data */
        ret = inflate(&tmp_stream, Z_NO_FLUSH);
        if(ret != Z_STREAM_END)
        {
            inflateEnd(&tmp_stream);
            return(-1);
        }
664
    }
665
    inflateEnd(&tmp_stream);
666

667
668
    *inout_decomp_buf_sz = tmp_stream.total_out;
    return(0);
669
}
670
671
672
673
674
675
676
677
678

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */