darshan-logutils.c 12.1 KB
Newer Older
1
2
3
4
5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6
#define _GNU_SOURCE
7
#include "darshan-util-config.h"
8
9
#include <stdio.h>
#include <string.h>
10
#include <assert.h>
11
#include <stdlib.h>
12
#include <unistd.h>
13
#include <inttypes.h>
14
15
16
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
17
#include <errno.h>
18

19
20
#include "darshan-logutils.h"

21
22
23
static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len);
static int darshan_log_write(darshan_fd fd, void *buf, int len);
24
25
static int darshan_decompress_buffer(char *comp_buf, int comp_buf_sz,
    char *decomp_buf, int *inout_decomp_buf_sz);
26
27
28
29
30
31
32

/* darshan_log_open()
 *
 * open a darshan log file for reading/writing
 *
 * returns 0 on success, -1 on failure
 */
33
darshan_fd darshan_log_open(const char *name, const char *mode)
34
{
35
    darshan_fd tmp_fd;
36

37
38
39
40
    /* we only allows "w" or "r" modes, nothing fancy */
    assert(strlen(mode) == 1);
    assert(mode[0] == 'r' || mode[0] == 'w');

41
    tmp_fd = malloc(sizeof(*tmp_fd));
42
43
44
45
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));

46
47
48
49
50
51
52
53
54
55
56
57
    if(strcmp(mode, "r") == 0)
    {
        /* TODO: check for bz2 */
    }
    
    if(strcmp(mode, "w") == 0)
    {
        /* TODO: check for bz2 */
    }

    tmp_fd->gzf = gzopen(name, mode);
    if(!tmp_fd->gzf)
58
59
    {
        free(tmp_fd);
60
        tmp_fd = NULL;
61
    }
62
63

    return(tmp_fd);
64
65
}

66
67
68
69
70
71
72
73
/* darshan_log_getheader()
 *
 * read the header of the darshan log and set internal data structures
 * NOTE: this function must be called before reading other portions of the log
 *
 * returns 0 on success, -1 on failure
 */
int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
74
{
75
    int i;
76
    int ret;
77

78
    ret = darshan_log_seek(fd, 0);
79
    if(ret < 0)
80
81
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
82
        return(-1);
83
    }
84

85
    /* read header from log file */
86
    ret = darshan_log_read(fd, header, sizeof(*header));
87
    if(ret < sizeof(*header))
88
    {
89
        fprintf(stderr, "Error: invalid darshan log file (failed to read header).\n");
90
91
92
        return(-1);
    }

93
94
    /* save the version string */
    strncpy(fd->version, header->version_string, 8);
95

96
    if(header->magic_nr == DARSHAN_MAGIC_NR)
97
    {
98
        /* no byte swapping needed, this file is in host format already */
99
100
101
102
103
104
        fd->swap_flag = 0;
    }
    else
    {
        /* try byte swapping */
        DARSHAN_BSWAP64(&header->magic_nr);
105
        if(header->magic_nr == DARSHAN_MAGIC_NR)
106
107
        {
            fd->swap_flag = 1;
108
109
110
111

            /* swap the log map variables in the header */
            DARSHAN_BSWAP64(&header->rec_map.off);
            DARSHAN_BSWAP64(&header->rec_map.len);
112
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
113
114
115
116
            {
                DARSHAN_BSWAP64(&header->mod_map[i].off);
                DARSHAN_BSWAP64(&header->mod_map[i].len);
            }
117
118
119
120
121
122
123
        }
        else
        {
            /* otherwise this file is just broken */
            fprintf(stderr, "Error: bad magic number in darshan log file.\n");
            return(-1);
        }
124
125
    }

126
127
128
    /* save the mapping of data within log file to this file descriptor */
    memcpy(&fd->rec_map, &header->rec_map, sizeof(struct darshan_log_map));
    memcpy(&fd->mod_map, &header->mod_map, DARSHAN_MAX_MODS * sizeof(struct darshan_log_map));
129
130

    return(0);
131
}
132

133
/* darshan_log_getjob()
134
135
 *
 * read job level metadata from the darshan log file
136
 *
137
 * returns 0 on success, -1 on failure
138
 */
139
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
140
{
141
    int ret;
142

143
    ret = darshan_log_seek(fd, sizeof(struct darshan_header));
144
    if(ret < 0)
145
    {
146
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
147
        return(-1);
148
149
    }

150
    /* read the job data from the log file */
151
152
    ret = darshan_log_read(fd, job, sizeof(*job));
    if(ret < sizeof(*job))
153
    {
154
        fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
155
156
157
        return(-1);
    }

158
    if(fd->swap_flag)
159
    {
160
161
162
163
164
165
        /* swap bytes if necessary */
        DARSHAN_BSWAP64(&job->uid);
        DARSHAN_BSWAP64(&job->start_time);
        DARSHAN_BSWAP64(&job->end_time);
        DARSHAN_BSWAP64(&job->nprocs);
        DARSHAN_BSWAP64(&job->jobid);
166
167
    }

168
169
170
171
172
    return(0);
}

int darshan_log_getexe(darshan_fd fd, char *buf)
{
173
174
    int tmp_off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
    int ret;
175
176
    char *newline;

177
178
    ret = darshan_log_seek(fd, tmp_off);
    if(ret < 0)
179
    {
180
181
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
182
    }
183

184
185
186
187
188
189
190
    /* read the trailing exe data from the darshan log */
    ret = darshan_log_read(fd, buf, DARSHAN_EXE_LEN+1);
    if(ret < DARSHAN_EXE_LEN+1)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read exe string).\n");
        return(-1);
    }
191

192
193
    /* mount info is stored after the exe string, so truncate there */
    newline = strchr(buf, '\n');
194
    if(newline)
195
        *newline = '\0';
196
197
198
199
200
201

    return (0);
}

/* darshan_log_getmounts()
 * 
202
203
204
 * retrieves mount table information from the log.  Note that mnt_pts and
 * fs_types are arrays that will be allocated by the function and must be
 * freed by the caller.  count will indicate the size of the arrays
205
 */
206
int darshan_log_getmounts(darshan_fd fd, char*** mnt_pts,
207
208
    char*** fs_types, int* count)
{
209
    int tmp_off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
210
211
212
    int ret;
    char *pos;
    int array_index = 0;
213
    char buf[DARSHAN_EXE_LEN+1];
214

215
216
    ret = darshan_log_seek(fd, tmp_off);
    if(ret < 0)
217
    {
218
219
220
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
    }
221

222
223
224
225
226
227
    /* read the trailing mount data from the darshan log */
    ret = darshan_log_read(fd, buf, DARSHAN_EXE_LEN+1);
    if(ret < DARSHAN_EXE_LEN+1)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read mount info).\n");
        return(-1);
228
    }
229

230
    /* count entries */
231
    *count = 0;
232
    pos = buf;
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
    while((pos = strchr(pos, '\n')) != NULL)
    {
        pos++;
        (*count)++;
    }

    if(*count == 0)
    {
        /* no mount entries present */
        return(0);
    }

    /* allocate output arrays */
    *mnt_pts = malloc((*count)*sizeof(char*));
    assert(*mnt_pts);
    *fs_types = malloc((*count)*sizeof(char*));
    assert(*fs_types);

    /* work backwards through the table and parse each line (except for
     * first, which holds command line information)
     */
254
    while((pos = strrchr(buf, '\n')) != NULL)
255
256
    {
        /* overestimate string lengths */
257
        (*mnt_pts)[array_index] = malloc(DARSHAN_EXE_LEN);
258
        assert((*mnt_pts)[array_index]);
259
        (*fs_types)[array_index] = malloc(DARSHAN_EXE_LEN);
260
261
        assert((*fs_types)[array_index]);

262
263
264
        ret = sscanf(++pos, "%s\t%s", (*fs_types)[array_index],
            (*mnt_pts)[array_index]);
        if(ret != 2)
265
266
267
268
269
270
271
272
273
        {
            fprintf(stderr, "Error: poorly formatted mount table in log file.\n");
            return(-1);
        }
        pos--;
        *pos = '\0';
        array_index++;
    }

274
275
276
    return(0);
}

277
/* darshan_log_gethash()
278
 *
279
 * read the hash of records from the darshan log file
280
281
282
 *
 * returns 0 on success, -1 on failure
 */
283
int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
284
{
285
286
    char *hash_buf;
    int hash_buf_sz = fd->rec_map.len;
287
    char *buf_ptr;
288
289
290
291
    darshan_record_id *rec_id_ptr;
    uint32_t *path_len_ptr;
    char *path_ptr;
    struct darshan_record_ref *ref;
292
293
    int ret;

294
295
    hash_buf = malloc(hash_buf_sz);
    if(!hash_buf)
296
297
        return(-1);

298
    ret = darshan_log_seek(fd, fd->rec_map.off);
299
    if(ret < 0)
300
301
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
302
        return(-1);
303
    }
304

305
    /* read the record hash from the log file */
306
    ret = darshan_log_read(fd, hash_buf, fd->rec_map.len);
307
    if(ret < fd->rec_map.len)
308
    {
309
        fprintf(stderr, "Error: invalid darshan log file (failed to read record hash).\n");
310
311
312
        return(-1);
    }

313
    buf_ptr = hash_buf;
314
    while(buf_ptr < (hash_buf + hash_buf_sz))
315
    {
316
        /* get pointers for each field of this darshan record */
317
        /* NOTE: darshan record hash serialization method: 
318
319
         *          ... darshan_record_id | (uint32_t) path_len | path ...
         */
320
321
322
323
324
325
326
327
        rec_id_ptr = (darshan_record_id *)buf_ptr;
        buf_ptr += sizeof(darshan_record_id);
        path_len_ptr = (uint32_t *)buf_ptr;
        buf_ptr += sizeof(uint32_t);
        path_ptr = (char *)buf_ptr;
        buf_ptr += *path_len_ptr;

        if(fd->swap_flag)
328
        {
329
330
331
            /* we need to sort out endianness issues before deserializing */
            DARSHAN_BSWAP64(rec_id_ptr);
            DARSHAN_BSWAP32(path_len_ptr);
332
333
        }

334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
        HASH_FIND(hlink, *hash, rec_id_ptr, sizeof(darshan_record_id), ref);
        if(!ref)
        {
            ref = malloc(sizeof(*ref));
            if(!ref)
            {
                return(-1);
            }
            ref->rec.name = malloc(*path_len_ptr + 1);
            if(!ref->rec.name)
            {
                free(ref);
                return(-1);
            }

            /* set the fields for this record */
            ref->rec.id = *rec_id_ptr;
            memcpy(ref->rec.name, path_ptr, *path_len_ptr);
            ref->rec.name[*path_len_ptr] = '\0';
353

354
355
356
            /* add this record to the hash */
            HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref);
        }
357
358
    }

359
360
361
    return(0);
}

362
363
int darshan_log_get_moddat(darshan_fd fd, darshan_module_id mod_id,
    void *moddat_buf, int moddat_buf_sz)
364
{
365
    int mod_buf_end = fd->mod_map[mod_id].off + fd->mod_map[mod_id].len;
366
    int ret;
367

368
369
    if(!fd->mod_map[mod_id].len || fd->pos == mod_buf_end)
        return(0); /* no (more) data corresponding to this mod_id */
370

371
372
373
374
375
376
    /* only seek to start of module data if current log file position 
     * is not within the given mod_id's range. This allows one to
     * repeatedly call this function and get chunks of a module's
     * data piecemeal.
     */
    if((fd->pos < fd->mod_map[mod_id].off) || (fd->pos > mod_buf_end))
377
    {
378
        ret = darshan_log_seek(fd, fd->mod_map[mod_id].off);
379
        if(ret < 0)
380
381
        {
            fprintf(stderr, "Error: unable to seek in darshan log file.\n");
382
            return(-1);
383
384
385
        }
    }

386
387
388
    /* read the record hash from the log file */
    ret = darshan_log_read(fd, moddat_buf, moddat_buf_sz);
    if(ret != moddat_buf_sz)
389
    {
390
391
392
        fprintf(stderr,
            "Error: invalid darshan log file (failed to read module %s data).\n",
            darshan_module_names[mod_id]);
393
394
395
        return(-1);
    }

396
    return(1);
397
398
}

399
400
401
402
403
404
405
/* darshan_log_close()
 *
 * close an open darshan file descriptor
 *
 * returns 0 on success, -1 on failure
 */
void darshan_log_close(darshan_fd fd)
406
{
407
408
    if(fd->gzf)
        gzclose(fd->gzf);
409

410
    /* TODO: check bz2 */
411

412
    free(fd);
413
414

    return;
415
416
}

417
/* **************************************************** */
418

419
420
421
422
/* return 0 on successful seek to offset, -1 on failure.
 */
static int darshan_log_seek(darshan_fd fd, off_t offset)
{
423
424
    z_off_t zoff = 0;
    z_off_t zoff_ret = 0;
425
426
427
428

    if(fd->pos == offset)
        return(0);

429
    if(fd->gzf)
430
    {
431
432
433
434
435
436
437
438
        zoff += offset;
        zoff_ret = gzseek(fd->gzf, zoff, SEEK_SET);
        if(zoff_ret == zoff)
        {
            fd->pos = offset;
            return(0);
        }
        return(-1);
439
440
    }

441
442
    /* TODO: check bz2 */

443
444
445
    return(-1);
}

446
447
448
449
450
451
/* return amount written on success, -1 on failure.
 */
static int darshan_log_write(darshan_fd fd, void* buf, int len)
{
    int ret;

452
    if(fd->gzf)
453
    {
454
        ret = gzwrite(fd->gzf, buf, len);
455
456
457
458
459
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

460
461
    /* TODO: check bz2 */

462
463
464
    return(-1);
}

465
466
467
468
469
470
/* return amount read on success, 0 on EOF, -1 on failure.
 */
static int darshan_log_read(darshan_fd fd, void* buf, int len)
{
    int ret;

471
    if(fd->gzf)
472
    {
473
        ret = gzread(fd->gzf, buf, len);
474
475
476
477
478
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

479
    /* TODO: check bz2 */
480

481
    return(-1);
482
}
483
484
485
486
487
488
489
490
491

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */