darshan-logutils.c 12.2 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2
3
4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5
6
 */

7
#define _GNU_SOURCE
8
#include "darshan-util-config.h"
9
10
#include <stdio.h>
#include <string.h>
11
#include <assert.h>
12
#include <stdlib.h>
13
#include <unistd.h>
14
#include <inttypes.h>
15
16
17
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
18
#include <errno.h>
19

20
21
#include "darshan-logutils.h"

22
23
static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len);
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//static int darshan_log_write(darshan_fd fd, void *buf, int len);

/* TODO: can we make this s.t. we don't care about ordering (i.e., X macro it ) */
struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS] =
{
    NULL,   /* NULL */
    &posix_logutils,    /* POSIX */
    &mpiio_logutils,   /* MPI-IO */
    NULL,   /* HDF5 */
    NULL,   /* PNETCDF */
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL
};
46
47
48
49
50
51
52

/* darshan_log_open()
 *
 * open a darshan log file for reading/writing
 *
 * returns 0 on success, -1 on failure
 */
53
darshan_fd darshan_log_open(const char *name, const char *mode)
54
{
55
    darshan_fd tmp_fd;
56

57
58
59
60
    /* we only allows "w" or "r" modes, nothing fancy */
    assert(strlen(mode) == 1);
    assert(mode[0] == 'r' || mode[0] == 'w');

61
    tmp_fd = malloc(sizeof(*tmp_fd));
62
63
64
65
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));

66
67
    tmp_fd->gzf = gzopen(name, mode);
    if(!tmp_fd->gzf)
68
69
    {
        free(tmp_fd);
70
        tmp_fd = NULL;
71
    }
72
73

    return(tmp_fd);
74
75
}

76
77
78
79
80
81
82
83
/* darshan_log_getheader()
 *
 * read the header of the darshan log and set internal data structures
 * NOTE: this function must be called before reading other portions of the log
 *
 * returns 0 on success, -1 on failure
 */
int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
84
{
85
    int i;
86
    int ret;
87

88
    ret = darshan_log_seek(fd, 0);
89
    if(ret < 0)
90
91
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
92
        return(-1);
93
    }
94

95
    /* read header from log file */
96
    ret = darshan_log_read(fd, header, sizeof(*header));
97
    if(ret < sizeof(*header))
98
    {
99
        fprintf(stderr, "Error: invalid darshan log file (failed to read header).\n");
100
101
102
        return(-1);
    }

103
104
    /* save the version string */
    strncpy(fd->version, header->version_string, 8);
105

106
    if(header->magic_nr == DARSHAN_MAGIC_NR)
107
    {
108
        /* no byte swapping needed, this file is in host format already */
109
110
111
112
113
114
        fd->swap_flag = 0;
    }
    else
    {
        /* try byte swapping */
        DARSHAN_BSWAP64(&header->magic_nr);
115
        if(header->magic_nr == DARSHAN_MAGIC_NR)
116
117
        {
            fd->swap_flag = 1;
118
119
120
121

            /* swap the log map variables in the header */
            DARSHAN_BSWAP64(&header->rec_map.off);
            DARSHAN_BSWAP64(&header->rec_map.len);
122
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
123
124
125
126
            {
                DARSHAN_BSWAP64(&header->mod_map[i].off);
                DARSHAN_BSWAP64(&header->mod_map[i].len);
            }
127
128
129
130
131
132
133
        }
        else
        {
            /* otherwise this file is just broken */
            fprintf(stderr, "Error: bad magic number in darshan log file.\n");
            return(-1);
        }
134
135
    }

136
137
138
    /* save the mapping of data within log file to this file descriptor */
    memcpy(&fd->rec_map, &header->rec_map, sizeof(struct darshan_log_map));
    memcpy(&fd->mod_map, &header->mod_map, DARSHAN_MAX_MODS * sizeof(struct darshan_log_map));
139
140

    return(0);
141
}
142

143
/* darshan_log_getjob()
144
145
 *
 * read job level metadata from the darshan log file
146
 *
147
 * returns 0 on success, -1 on failure
148
 */
149
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
150
{
151
    int ret;
152

153
    ret = darshan_log_seek(fd, sizeof(struct darshan_header));
154
    if(ret < 0)
155
    {
156
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
157
        return(-1);
158
159
    }

160
    /* read the job data from the log file */
161
162
    ret = darshan_log_read(fd, job, sizeof(*job));
    if(ret < sizeof(*job))
163
    {
164
        fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
165
166
167
        return(-1);
    }

168
    if(fd->swap_flag)
169
    {
170
171
172
173
174
175
        /* swap bytes if necessary */
        DARSHAN_BSWAP64(&job->uid);
        DARSHAN_BSWAP64(&job->start_time);
        DARSHAN_BSWAP64(&job->end_time);
        DARSHAN_BSWAP64(&job->nprocs);
        DARSHAN_BSWAP64(&job->jobid);
176
177
    }

178
179
180
181
182
    return(0);
}

int darshan_log_getexe(darshan_fd fd, char *buf)
{
183
184
    int tmp_off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
    int ret;
185
186
    char *newline;

187
188
    ret = darshan_log_seek(fd, tmp_off);
    if(ret < 0)
189
    {
190
191
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
192
    }
193

194
195
196
197
198
199
200
    /* read the trailing exe data from the darshan log */
    ret = darshan_log_read(fd, buf, DARSHAN_EXE_LEN+1);
    if(ret < DARSHAN_EXE_LEN+1)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read exe string).\n");
        return(-1);
    }
201

202
203
    /* mount info is stored after the exe string, so truncate there */
    newline = strchr(buf, '\n');
204
    if(newline)
205
        *newline = '\0';
206
207
208
209
210
211

    return (0);
}

/* darshan_log_getmounts()
 * 
212
213
214
 * retrieves mount table information from the log.  Note that mnt_pts and
 * fs_types are arrays that will be allocated by the function and must be
 * freed by the caller.  count will indicate the size of the arrays
215
 */
216
int darshan_log_getmounts(darshan_fd fd, char*** mnt_pts,
217
218
    char*** fs_types, int* count)
{
219
    int tmp_off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
220
221
222
    int ret;
    char *pos;
    int array_index = 0;
223
    char buf[DARSHAN_EXE_LEN+1];
224

225
226
    ret = darshan_log_seek(fd, tmp_off);
    if(ret < 0)
227
    {
228
229
230
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
    }
231

232
233
234
235
236
237
    /* read the trailing mount data from the darshan log */
    ret = darshan_log_read(fd, buf, DARSHAN_EXE_LEN+1);
    if(ret < DARSHAN_EXE_LEN+1)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read mount info).\n");
        return(-1);
238
    }
239

240
    /* count entries */
241
    *count = 0;
242
    pos = buf;
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
    while((pos = strchr(pos, '\n')) != NULL)
    {
        pos++;
        (*count)++;
    }

    if(*count == 0)
    {
        /* no mount entries present */
        return(0);
    }

    /* allocate output arrays */
    *mnt_pts = malloc((*count)*sizeof(char*));
    assert(*mnt_pts);
    *fs_types = malloc((*count)*sizeof(char*));
    assert(*fs_types);

    /* work backwards through the table and parse each line (except for
     * first, which holds command line information)
     */
264
    while((pos = strrchr(buf, '\n')) != NULL)
265
266
    {
        /* overestimate string lengths */
267
        (*mnt_pts)[array_index] = malloc(DARSHAN_EXE_LEN);
268
        assert((*mnt_pts)[array_index]);
269
        (*fs_types)[array_index] = malloc(DARSHAN_EXE_LEN);
270
271
        assert((*fs_types)[array_index]);

272
273
274
        ret = sscanf(++pos, "%s\t%s", (*fs_types)[array_index],
            (*mnt_pts)[array_index]);
        if(ret != 2)
275
276
277
278
279
280
281
282
283
        {
            fprintf(stderr, "Error: poorly formatted mount table in log file.\n");
            return(-1);
        }
        pos--;
        *pos = '\0';
        array_index++;
    }

284
285
286
    return(0);
}

287
/* darshan_log_gethash()
288
 *
289
 * read the hash of records from the darshan log file
290
291
292
 *
 * returns 0 on success, -1 on failure
 */
293
int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
294
{
295
296
    char *hash_buf;
    int hash_buf_sz = fd->rec_map.len;
297
    char *buf_ptr;
298
299
300
301
    darshan_record_id *rec_id_ptr;
    uint32_t *path_len_ptr;
    char *path_ptr;
    struct darshan_record_ref *ref;
302
303
    int ret;

304
305
    hash_buf = malloc(hash_buf_sz);
    if(!hash_buf)
306
307
        return(-1);

308
    ret = darshan_log_seek(fd, fd->rec_map.off);
309
    if(ret < 0)
310
311
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
312
        return(-1);
313
    }
314

315
    /* read the record hash from the log file */
316
    ret = darshan_log_read(fd, hash_buf, fd->rec_map.len);
317
    if(ret < fd->rec_map.len)
318
    {
319
        fprintf(stderr, "Error: invalid darshan log file (failed to read record hash).\n");
320
321
322
        return(-1);
    }

323
    buf_ptr = hash_buf;
324
    while(buf_ptr < (hash_buf + hash_buf_sz))
325
    {
326
        /* get pointers for each field of this darshan record */
327
        /* NOTE: darshan record hash serialization method: 
328
329
         *          ... darshan_record_id | (uint32_t) path_len | path ...
         */
330
331
332
333
334
335
336
337
        rec_id_ptr = (darshan_record_id *)buf_ptr;
        buf_ptr += sizeof(darshan_record_id);
        path_len_ptr = (uint32_t *)buf_ptr;
        buf_ptr += sizeof(uint32_t);
        path_ptr = (char *)buf_ptr;
        buf_ptr += *path_len_ptr;

        if(fd->swap_flag)
338
        {
339
340
341
            /* we need to sort out endianness issues before deserializing */
            DARSHAN_BSWAP64(rec_id_ptr);
            DARSHAN_BSWAP32(path_len_ptr);
342
343
        }

344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
        HASH_FIND(hlink, *hash, rec_id_ptr, sizeof(darshan_record_id), ref);
        if(!ref)
        {
            ref = malloc(sizeof(*ref));
            if(!ref)
            {
                return(-1);
            }
            ref->rec.name = malloc(*path_len_ptr + 1);
            if(!ref->rec.name)
            {
                free(ref);
                return(-1);
            }

            /* set the fields for this record */
            ref->rec.id = *rec_id_ptr;
            memcpy(ref->rec.name, path_ptr, *path_len_ptr);
            ref->rec.name[*path_len_ptr] = '\0';
363

364
365
366
            /* add this record to the hash */
            HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref);
        }
367
368
    }

369
370
371
    return(0);
}

372
373
int darshan_log_get_moddat(darshan_fd fd, darshan_module_id mod_id,
    void *moddat_buf, int moddat_buf_sz)
374
{
375
    int mod_buf_end = fd->mod_map[mod_id].off + fd->mod_map[mod_id].len;
376
    int ret;
377

378
379
    if(!fd->mod_map[mod_id].len || fd->pos == mod_buf_end)
        return(0); /* no (more) data corresponding to this mod_id */
380

381
382
383
384
385
386
    /* only seek to start of module data if current log file position 
     * is not within the given mod_id's range. This allows one to
     * repeatedly call this function and get chunks of a module's
     * data piecemeal.
     */
    if((fd->pos < fd->mod_map[mod_id].off) || (fd->pos > mod_buf_end))
387
    {
388
        ret = darshan_log_seek(fd, fd->mod_map[mod_id].off);
389
        if(ret < 0)
390
391
        {
            fprintf(stderr, "Error: unable to seek in darshan log file.\n");
392
            return(-1);
393
394
395
        }
    }

396
397
398
    /* read the record hash from the log file */
    ret = darshan_log_read(fd, moddat_buf, moddat_buf_sz);
    if(ret != moddat_buf_sz)
399
    {
400
401
402
        fprintf(stderr,
            "Error: invalid darshan log file (failed to read module %s data).\n",
            darshan_module_names[mod_id]);
403
404
405
        return(-1);
    }

406
    return(1);
407
408
}

409
410
411
412
413
414
415
/* darshan_log_close()
 *
 * close an open darshan file descriptor
 *
 * returns 0 on success, -1 on failure
 */
void darshan_log_close(darshan_fd fd)
416
{
417
418
    if(fd->gzf)
        gzclose(fd->gzf);
419

420
    free(fd);
421
422

    return;
423
424
}

425
/* **************************************************** */
426

427
428
429
430
/* return 0 on successful seek to offset, -1 on failure.
 */
static int darshan_log_seek(darshan_fd fd, off_t offset)
{
431
432
    z_off_t zoff = 0;
    z_off_t zoff_ret = 0;
433
434
435
436

    if(fd->pos == offset)
        return(0);

437
    if(fd->gzf)
438
    {
439
440
441
442
443
444
445
446
        zoff += offset;
        zoff_ret = gzseek(fd->gzf, zoff, SEEK_SET);
        if(zoff_ret == zoff)
        {
            fd->pos = offset;
            return(0);
        }
        return(-1);
447
448
449
450
451
    }

    return(-1);
}

452
#if 0
453
454
455
456
457
458
/* return amount written on success, -1 on failure.
 */
static int darshan_log_write(darshan_fd fd, void* buf, int len)
{
    int ret;

459
    if(fd->gzf)
460
    {
461
        ret = gzwrite(fd->gzf, buf, len);
462
463
464
465
466
467
468
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

    return(-1);
}
469
#endif
470

471
472
473
474
475
476
/* return amount read on success, 0 on EOF, -1 on failure.
 */
static int darshan_log_read(darshan_fd fd, void* buf, int len)
{
    int ret;

477
    if(fd->gzf)
478
    {
479
        ret = gzread(fd->gzf, buf, len);
480
481
482
483
484
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

485
    return(-1);
486
}
487
488
489
490
491
492
493
494
495

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */