darshan-logutils.c 12.9 KB
Newer Older
1
2
3
4
5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6
#define _GNU_SOURCE
7
#include "darshan-util-config.h"
8
9
#include <stdio.h>
#include <string.h>
10
#include <assert.h>
11
#include <stdlib.h>
12
#include <unistd.h>
13
#include <inttypes.h>
14
15
16
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
17
#include <errno.h>
18

19
20
21
22
23
#include <zlib.h>
#ifdef HAVE_LIBBZ2
#include <bzlib.h>
#endif

24
25
#include "darshan-logutils.h"

26
27
struct darshan_fd_s
{
28
    int pf;
29
    int64_t pos;
30
    char version[8];
31
32
    int swap_flag;
    char *exe_mnt_data;
33
34
35
    struct darshan_log_map job_map;
    struct darshan_log_map rec_map;
    struct darshan_log_map mod_map[DARSHAN_MAX_MODS];
36
37
};

38
39
40
static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len);
static int darshan_log_write(darshan_fd fd, void *buf, int len);
41

42
43
44
45
46
47
48

/* darshan_log_open()
 *
 * open a darshan log file for reading/writing
 *
 * returns 0 on success, -1 on failure
 */
49
darshan_fd darshan_log_open(const char *name, const char *mode)
50
{
51
    int o_flags;
52

53
54
55
    /* we only allows "w" or "r" modes, nothing fancy */
    assert(strlen(mode) == 1);
    assert(mode[0] == 'r' || mode[0] == 'w');
56
57
58
59
    if(mode[0] == 'r')
        o_flags = O_RDONLY;
    else
        o_flags = O_WRONLY;
60

61
62
63
64
65
    darshan_fd tmp_fd = malloc(sizeof(*tmp_fd));
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));

66
67
    tmp_fd->pf = open(name, o_flags);
    if(tmp_fd->pf < 0)
68
69
    {
        free(tmp_fd);
70
        return(NULL);
71
    }
72
73

    return(tmp_fd);
74
75
}

76
77
78
79
80
81
82
83
/* darshan_log_getheader()
 *
 * read the header of the darshan log and set internal data structures
 * NOTE: this function must be called before reading other portions of the log
 *
 * returns 0 on success, -1 on failure
 */
int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
84
{
85
    int i;
86
    int ret;
87

88
    ret = darshan_log_seek(fd, 0);
89
    if(ret < 0)
90
91
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
92
        return(ret);
93
    }
94

95
    /* read header from log file */
96
    ret = darshan_log_read(fd, header, sizeof(*header));
97
    if(ret < sizeof(*header))
98
    {
99
        fprintf(stderr, "Error: invalid darshan log file (failed to read header).\n");
100
101
102
        return(-1);
    }

103
104
    /* save the version string */
    strncpy(fd->version, header->version_string, 8);
105
106

    if(header->magic_nr == CP_MAGIC_NR)
107
    {
108
        /* no byte swapping needed, this file is in host format already */
109
110
111
112
113
114
115
116
117
        fd->swap_flag = 0;
    }
    else
    {
        /* try byte swapping */
        DARSHAN_BSWAP64(&header->magic_nr);
        if(header->magic_nr == CP_MAGIC_NR)
        {
            fd->swap_flag = 1;
118
119
120
121
122
123
124
125
126

            /* swap the log map variables in the header */
            DARSHAN_BSWAP64(&header->rec_map.off);
            DARSHAN_BSWAP64(&header->rec_map.len);
            for(i=0;i<DARSHAN_MAX_MODS;i++)
            {
                DARSHAN_BSWAP64(&header->mod_map[i].off);
                DARSHAN_BSWAP64(&header->mod_map[i].len);
            }
127
128
129
130
131
132
133
        }
        else
        {
            /* otherwise this file is just broken */
            fprintf(stderr, "Error: bad magic number in darshan log file.\n");
            return(-1);
        }
134
135
    }

136
137
138
139
140
    /* save the mapping of data within log file to this file descriptor */
    fd->job_map.off = sizeof(struct darshan_header);
    fd->job_map.len = header->rec_map.off - fd->job_map.off;
    memcpy(&fd->rec_map, &header->rec_map, sizeof(struct darshan_log_map));
    memcpy(&fd->mod_map, &header->mod_map, DARSHAN_MAX_MODS * sizeof(struct darshan_log_map));
141
142

    return(0);
143
}
144

145
/* darshan_log_getjob()
146
147
 *
 * read job level metadata from the darshan log file
148
 *
149
 * returns 0 on success, -1 on failure
150
 */
151
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
152
{
153
    char job_buf[CP_JOB_RECORD_SIZE] = {0};
154
    int ret;
155

156
    ret = darshan_log_seek(fd, fd->job_map.off);
157
    if(ret < 0)
158
    {
159
160
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(ret);
161
162
    }

163
    /* read the job data from the log file */
164
    ret = darshan_log_read(fd, job_buf, fd->job_map.len);
165
    if(ret < fd->job_map.len)
166
    {
167
        fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
168
169
170
        return(-1);
    }

171
172
    memcpy(job, job_buf, sizeof(*job));

173
    if(fd->swap_flag)
174
    {
175
176
177
178
179
180
        /* swap bytes if necessary */
        DARSHAN_BSWAP64(&job->uid);
        DARSHAN_BSWAP64(&job->start_time);
        DARSHAN_BSWAP64(&job->end_time);
        DARSHAN_BSWAP64(&job->nprocs);
        DARSHAN_BSWAP64(&job->jobid);
181
182
    }

183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
    /* save trailing job data, so exe and mount information can be retrieved later */
    fd->exe_mnt_data = malloc(CP_EXE_LEN+1);
    if(!fd->exe_mnt_data)
        return(-1);
    memcpy(fd->exe_mnt_data, &job_buf[sizeof(*job)], CP_EXE_LEN+1);

    return(0);
}

#if 0
#ifdef HAVE_STRNDUP
    metadata = strndup(job->metadata, sizeof(job->metadata));
#else
    metadata = strdup(job->metadata);
#endif
    char *kv;
    char *key;
    char *value;
    char *save;

    for(kv=strtok_r(metadata, "\n", &save);
        kv != NULL;
        kv=strtok_r(NULL, "\n", &save))
    {
        /* NOTE: we intentionally only split on the first = character.
         * There may be additional = characters in the value portion
         * (for example, when storing mpi-io hints).
         */
        strcpy(buffer, kv);
        key = buffer;
        value = index(buffer, '=');
        if(!value)
            continue;
        /* convert = to a null terminator to split key and value */
        value[0] = '\0';
        value++;
        if (strcmp(key, "prev_ver") == 0)
        {
            strncpy(job->version_string, value, sizeof(job->version_string));
        }
    }
    free(metadata);
#endif

int darshan_log_getexe(darshan_fd fd, char *buf)
{
    char *newline;

    /* TODO: try reading log job one more time to set this buffer up */
    if(!fd->exe_mnt_data)
        return(-1);

    newline = strchr(fd->exe_mnt_data, '\n');

    /* copy over the exe string */
    if(newline)
        memcpy(buf, fd->exe_mnt_data, (newline - fd->exe_mnt_data));

    return (0);
}

/* darshan_log_getmounts()
 * 
 * retrieves mount table information from the log.  Note that devs, mnt_pts,
 * and fs_types are arrays that will be allocated by the function and must
 * be freed by the caller.  count will indicate the size of the arrays
 */
int darshan_log_getmounts(darshan_fd fd, int64_t** devs, char*** mnt_pts,
    char*** fs_types, int* count)
{
    int ret;
    char *pos;
    int array_index = 0;

    /* TODO: try reading log job one more time to set this buffer up */
    if(!fd->exe_mnt_data)
        return(-1);

    /* count entries */
    *count = 0;
    pos = fd->exe_mnt_data;
    while((pos = strchr(pos, '\n')) != NULL)
    {
        pos++;
        (*count)++;
    }

    if(*count == 0)
    {
        /* no mount entries present */
        return(0);
    }

    /* allocate output arrays */
    *devs = malloc((*count)*sizeof(int64_t));
    assert(*devs);
    *mnt_pts = malloc((*count)*sizeof(char*));
    assert(*mnt_pts);
    *fs_types = malloc((*count)*sizeof(char*));
    assert(*fs_types);

    /* work backwards through the table and parse each line (except for
     * first, which holds command line information)
     */
    while((pos = strrchr(fd->exe_mnt_data, '\n')) != NULL)
    {
        /* overestimate string lengths */
        (*mnt_pts)[array_index] = malloc(CP_EXE_LEN);
        assert((*mnt_pts)[array_index]);
        (*fs_types)[array_index] = malloc(CP_EXE_LEN);
        assert((*fs_types)[array_index]);

        ret = sscanf(++pos, "%" PRId64 "\t%s\t%s", &(*devs)[array_index],
            (*fs_types)[array_index], (*mnt_pts)[array_index]);

        if(ret != 3)
        {
            fprintf(stderr, "Error: poorly formatted mount table in log file.\n");
            return(-1);
        }
        pos--;
        *pos = '\0';
        array_index++;
    }

308
309
310
    return(0);
}

311
/* darshan_log_gethash()
312
 *
313
 * read the hash of records from the darshan log file
314
315
316
 *
 * returns 0 on success, -1 on failure
 */
317
int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
318
{
319
    unsigned char *hash_buf;
320
321
322
323
324
    unsigned char *buf_ptr;
    darshan_record_id *rec_id_ptr;
    uint32_t *path_len_ptr;
    char *path_ptr;
    struct darshan_record_ref *ref;
325
326
    int ret;

327
    ret = darshan_log_seek(fd, fd->rec_map.off);
328
    if(ret < 0)
329
330
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
331
        return(ret);
332
    }
333

334
335
336
    /* allocate a buffer to store the (serialized) darshan record hash */
    hash_buf = malloc(fd->rec_map.len);
    if(!hash_buf)
337
338
        return(-1);

339
    /* read the record map from the log file */
340
341
    ret = darshan_log_read(fd, hash_buf, fd->rec_map.len);
    if(ret < fd->rec_map.len)
342
    {
343
344
        fprintf(stderr, "Error: invalid darshan log file (failed to read record hash).\n");
        free(hash_buf);
345
        return(-1);
346
347
    }

348
349
    buf_ptr = hash_buf;
    while(buf_ptr < (hash_buf + fd->rec_map.len))
350
    {
351
        /* get pointers for each field of this darshan record */
352
        /* NOTE: darshan record hash serialization method: 
353
354
         *          ... darshan_record_id | (uint32_t) path_len | path ...
         */
355
356
357
358
359
360
361
362
363
        rec_id_ptr = (darshan_record_id *)buf_ptr;
        buf_ptr += sizeof(darshan_record_id);
        path_len_ptr = (uint32_t *)buf_ptr;
        buf_ptr += sizeof(uint32_t);
        path_ptr = (char *)buf_ptr;
        buf_ptr += *path_len_ptr;

        ref = malloc(sizeof(*ref));
        if(!ref)
364
        {
365
            free(hash_buf);
366
            return(-1);
367
        }
368
369
        ref->rec.name = malloc(*path_len_ptr + 1);
        if(!ref->rec.name)
370
        {
371
            free(hash_buf);
372
373
            free(ref);
            return(-1);
374
375
        }

376
        if(fd->swap_flag)
377
        {
378
379
380
            /* we need to sort out endianness issues before deserializing */
            DARSHAN_BSWAP64(rec_id_ptr);
            DARSHAN_BSWAP32(path_len_ptr);
381
382
        }

383
384
385
386
        /* set the fields for this record */
        ref->rec.id = *rec_id_ptr;
        memcpy(ref->rec.name, path_ptr, *path_len_ptr);
        ref->rec.name[*path_len_ptr] = '\0';
387

388
        /* add this record to the hash */
389
        HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref);
390
391
    }

392
    free(hash_buf);
393

394
    return(0);
395
}
396

397
398
/* TODO: hardcoded for posix -- what can we do generally?
 *       different function for each module and a way to map to this function?
399
 */
400
int darshan_log_getfile(darshan_fd fd, struct darshan_posix_file *file)
401
402
403
404
{
    int ret;
    const char* err_string;
    int i;
405

406
    if(fd->pos < fd->mod_map[0].off)
407
    {
408
        ret = darshan_log_seek(fd, fd->mod_map[0].off);
409
        if(ret < 0)
410
411
        {
            fprintf(stderr, "Error: unable to seek in darshan log file.\n");
412
            return(ret);
413
        }
414
    }
415
416
417
418

    /* reset file record, so that diff compares against a zero'd out record
     * if file is missing
     */
419
    memset(file, 0, sizeof(*file));
420

421
    ret = darshan_log_read(fd, file, sizeof(*file));
422
423
424
425
426
427
    if(ret == sizeof(*file))
    {
        /* got exactly one, correct size record */
        if(fd->swap_flag)
        {
            /* swap bytes if necessary */
428
            DARSHAN_BSWAP64(&file->f_id);
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
            DARSHAN_BSWAP64(&file->rank);
            for(i=0; i<CP_NUM_INDICES; i++)
                DARSHAN_BSWAP64(&file->counters[i]);
            for(i=0; i<CP_F_NUM_INDICES; i++)
                DARSHAN_BSWAP64(&file->fcounters[i]);
        }
        return(1);
    }


    if(ret > 0)
    {
        /* got a short read */
        fprintf(stderr, "Error: invalid file record (too small)\n");
        return(-1);
    }

446
    if(ret == 0)
447
448
449
450
451
452
    {
        /* hit end of file */
        return(0);
    }

    /* all other errors */
453
    err_string = strerror(errno);
454
455
456
457
    fprintf(stderr, "Error: %s\n", err_string);
    return(-1);
}

458
459
460
461
462
463
464
/* darshan_log_close()
 *
 * close an open darshan file descriptor
 *
 * returns 0 on success, -1 on failure
 */
void darshan_log_close(darshan_fd fd)
465
{
466
467
    if(fd->pf)
        close(fd->pf);
468

469
470
471
    if(fd->exe_mnt_data)
        free(fd->exe_mnt_data);

472
    free(fd);
473
474
}

475
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
476

477
478
479
480
481
482
/* return amount written on success, -1 on failure.
 */
static int darshan_log_write(darshan_fd fd, void* buf, int len)
{
    int ret;

483
    if(fd->pf)
484
    {
485
        ret = write(fd->pf, buf, len);
486
487
488
489
490
491
492
493
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

    return(-1);
}

494
495
496
497
498
499
/* return amount read on success, 0 on EOF, -1 on failure.
 */
static int darshan_log_read(darshan_fd fd, void* buf, int len)
{
    int ret;

500
    if(fd->pf)
501
    {
502
        ret = read(fd->pf, buf, len);
503
504
505
506
507
508
509
510
511
512
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

    return(-1);
}

/* return 0 on successful seek to offset, -1 on failure.
 */
513
static int darshan_log_seek(darshan_fd fd, off_t offset)
514
{
515
516
    off_t ret_off;

517
518
519
    if(fd->pos == offset)
        return(0);

520
521
    ret_off = lseek(fd->pf, offset, SEEK_SET);
    if(ret_off == offset)
522
    {
523
        fd->pos = offset;
524
525
526
527
528
        return(0);
    }

    return(-1);
}