darshan-logutils.c 12 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6
 */

7
#define _GNU_SOURCE
8
#include "darshan-util-config.h"
9 10
#include <stdio.h>
#include <string.h>
11
#include <assert.h>
12
#include <stdlib.h>
13
#include <unistd.h>
14
#include <inttypes.h>
15 16 17
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
18
#include <errno.h>
19

20 21
#include "darshan-logutils.h"

22 23 24
static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len);
static int darshan_log_write(darshan_fd fd, void *buf, int len);
25 26 27 28 29 30 31

/* darshan_log_open()
 *
 * open a darshan log file for reading/writing
 *
 * returns 0 on success, -1 on failure
 */
32
darshan_fd darshan_log_open(const char *name, const char *mode)
33
{
34
    darshan_fd tmp_fd;
35

36 37 38 39
    /* we only allows "w" or "r" modes, nothing fancy */
    assert(strlen(mode) == 1);
    assert(mode[0] == 'r' || mode[0] == 'w');

40
    tmp_fd = malloc(sizeof(*tmp_fd));
41 42 43 44
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));

45 46 47 48 49 50 51 52 53 54 55 56
    if(strcmp(mode, "r") == 0)
    {
        /* TODO: check for bz2 */
    }
    
    if(strcmp(mode, "w") == 0)
    {
        /* TODO: check for bz2 */
    }

    tmp_fd->gzf = gzopen(name, mode);
    if(!tmp_fd->gzf)
57 58
    {
        free(tmp_fd);
59
        tmp_fd = NULL;
60
    }
61 62

    return(tmp_fd);
63 64
}

65 66 67 68 69 70 71 72
/* darshan_log_getheader()
 *
 * read the header of the darshan log and set internal data structures
 * NOTE: this function must be called before reading other portions of the log
 *
 * returns 0 on success, -1 on failure
 */
int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
73
{
74
    int i;
75
    int ret;
76

77
    ret = darshan_log_seek(fd, 0);
78
    if(ret < 0)
79 80
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
81
        return(-1);
82
    }
83

84
    /* read header from log file */
85
    ret = darshan_log_read(fd, header, sizeof(*header));
86
    if(ret < sizeof(*header))
87
    {
88
        fprintf(stderr, "Error: invalid darshan log file (failed to read header).\n");
89 90 91
        return(-1);
    }

92 93
    /* save the version string */
    strncpy(fd->version, header->version_string, 8);
94

95
    if(header->magic_nr == DARSHAN_MAGIC_NR)
96
    {
97
        /* no byte swapping needed, this file is in host format already */
98 99 100 101 102 103
        fd->swap_flag = 0;
    }
    else
    {
        /* try byte swapping */
        DARSHAN_BSWAP64(&header->magic_nr);
104
        if(header->magic_nr == DARSHAN_MAGIC_NR)
105 106
        {
            fd->swap_flag = 1;
107 108 109 110

            /* swap the log map variables in the header */
            DARSHAN_BSWAP64(&header->rec_map.off);
            DARSHAN_BSWAP64(&header->rec_map.len);
111
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
112 113 114 115
            {
                DARSHAN_BSWAP64(&header->mod_map[i].off);
                DARSHAN_BSWAP64(&header->mod_map[i].len);
            }
116 117 118 119 120 121 122
        }
        else
        {
            /* otherwise this file is just broken */
            fprintf(stderr, "Error: bad magic number in darshan log file.\n");
            return(-1);
        }
123 124
    }

125 126 127
    /* save the mapping of data within log file to this file descriptor */
    memcpy(&fd->rec_map, &header->rec_map, sizeof(struct darshan_log_map));
    memcpy(&fd->mod_map, &header->mod_map, DARSHAN_MAX_MODS * sizeof(struct darshan_log_map));
128 129

    return(0);
130
}
131

132
/* darshan_log_getjob()
133 134
 *
 * read job level metadata from the darshan log file
135
 *
136
 * returns 0 on success, -1 on failure
137
 */
138
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
139
{
140
    int ret;
141

142
    ret = darshan_log_seek(fd, sizeof(struct darshan_header));
143
    if(ret < 0)
144
    {
145
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
146
        return(-1);
147 148
    }

149
    /* read the job data from the log file */
150 151
    ret = darshan_log_read(fd, job, sizeof(*job));
    if(ret < sizeof(*job))
152
    {
153
        fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
154 155 156
        return(-1);
    }

157
    if(fd->swap_flag)
158
    {
159 160 161 162 163 164
        /* swap bytes if necessary */
        DARSHAN_BSWAP64(&job->uid);
        DARSHAN_BSWAP64(&job->start_time);
        DARSHAN_BSWAP64(&job->end_time);
        DARSHAN_BSWAP64(&job->nprocs);
        DARSHAN_BSWAP64(&job->jobid);
165 166
    }

167 168 169 170 171
    return(0);
}

int darshan_log_getexe(darshan_fd fd, char *buf)
{
172 173
    int tmp_off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
    int ret;
174 175
    char *newline;

176 177
    ret = darshan_log_seek(fd, tmp_off);
    if(ret < 0)
178
    {
179 180
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
181
    }
182

183 184 185 186 187 188 189
    /* read the trailing exe data from the darshan log */
    ret = darshan_log_read(fd, buf, DARSHAN_EXE_LEN+1);
    if(ret < DARSHAN_EXE_LEN+1)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read exe string).\n");
        return(-1);
    }
190

191 192
    /* mount info is stored after the exe string, so truncate there */
    newline = strchr(buf, '\n');
193
    if(newline)
194
        *newline = '\0';
195 196 197 198 199 200

    return (0);
}

/* darshan_log_getmounts()
 * 
201 202 203
 * retrieves mount table information from the log.  Note that mnt_pts and
 * fs_types are arrays that will be allocated by the function and must be
 * freed by the caller.  count will indicate the size of the arrays
204
 */
205
int darshan_log_getmounts(darshan_fd fd, char*** mnt_pts,
206 207
    char*** fs_types, int* count)
{
208
    int tmp_off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
209 210 211
    int ret;
    char *pos;
    int array_index = 0;
212
    char buf[DARSHAN_EXE_LEN+1];
213

214 215
    ret = darshan_log_seek(fd, tmp_off);
    if(ret < 0)
216
    {
217 218 219
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
    }
220

221 222 223 224 225 226
    /* read the trailing mount data from the darshan log */
    ret = darshan_log_read(fd, buf, DARSHAN_EXE_LEN+1);
    if(ret < DARSHAN_EXE_LEN+1)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read mount info).\n");
        return(-1);
227
    }
228

229
    /* count entries */
230
    *count = 0;
231
    pos = buf;
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
    while((pos = strchr(pos, '\n')) != NULL)
    {
        pos++;
        (*count)++;
    }

    if(*count == 0)
    {
        /* no mount entries present */
        return(0);
    }

    /* allocate output arrays */
    *mnt_pts = malloc((*count)*sizeof(char*));
    assert(*mnt_pts);
    *fs_types = malloc((*count)*sizeof(char*));
    assert(*fs_types);

    /* work backwards through the table and parse each line (except for
     * first, which holds command line information)
     */
253
    while((pos = strrchr(buf, '\n')) != NULL)
254 255
    {
        /* overestimate string lengths */
256
        (*mnt_pts)[array_index] = malloc(DARSHAN_EXE_LEN);
257
        assert((*mnt_pts)[array_index]);
258
        (*fs_types)[array_index] = malloc(DARSHAN_EXE_LEN);
259 260
        assert((*fs_types)[array_index]);

261 262 263
        ret = sscanf(++pos, "%s\t%s", (*fs_types)[array_index],
            (*mnt_pts)[array_index]);
        if(ret != 2)
264 265 266 267 268 269 270 271 272
        {
            fprintf(stderr, "Error: poorly formatted mount table in log file.\n");
            return(-1);
        }
        pos--;
        *pos = '\0';
        array_index++;
    }

273 274 275
    return(0);
}

276
/* darshan_log_gethash()
277
 *
278
 * read the hash of records from the darshan log file
279 280 281
 *
 * returns 0 on success, -1 on failure
 */
282
int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
283
{
284 285
    char *hash_buf;
    int hash_buf_sz = fd->rec_map.len;
286
    char *buf_ptr;
287 288 289 290
    darshan_record_id *rec_id_ptr;
    uint32_t *path_len_ptr;
    char *path_ptr;
    struct darshan_record_ref *ref;
291 292
    int ret;

293 294
    hash_buf = malloc(hash_buf_sz);
    if(!hash_buf)
295 296
        return(-1);

297
    ret = darshan_log_seek(fd, fd->rec_map.off);
298
    if(ret < 0)
299 300
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
301
        return(-1);
302
    }
303

304
    /* read the record hash from the log file */
305
    ret = darshan_log_read(fd, hash_buf, fd->rec_map.len);
306
    if(ret < fd->rec_map.len)
307
    {
308
        fprintf(stderr, "Error: invalid darshan log file (failed to read record hash).\n");
309 310 311
        return(-1);
    }

312
    buf_ptr = hash_buf;
313
    while(buf_ptr < (hash_buf + hash_buf_sz))
314
    {
315
        /* get pointers for each field of this darshan record */
316
        /* NOTE: darshan record hash serialization method: 
317 318
         *          ... darshan_record_id | (uint32_t) path_len | path ...
         */
319 320 321 322 323 324 325 326
        rec_id_ptr = (darshan_record_id *)buf_ptr;
        buf_ptr += sizeof(darshan_record_id);
        path_len_ptr = (uint32_t *)buf_ptr;
        buf_ptr += sizeof(uint32_t);
        path_ptr = (char *)buf_ptr;
        buf_ptr += *path_len_ptr;

        if(fd->swap_flag)
327
        {
328 329 330
            /* we need to sort out endianness issues before deserializing */
            DARSHAN_BSWAP64(rec_id_ptr);
            DARSHAN_BSWAP32(path_len_ptr);
331 332
        }

333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
        HASH_FIND(hlink, *hash, rec_id_ptr, sizeof(darshan_record_id), ref);
        if(!ref)
        {
            ref = malloc(sizeof(*ref));
            if(!ref)
            {
                return(-1);
            }
            ref->rec.name = malloc(*path_len_ptr + 1);
            if(!ref->rec.name)
            {
                free(ref);
                return(-1);
            }

            /* set the fields for this record */
            ref->rec.id = *rec_id_ptr;
            memcpy(ref->rec.name, path_ptr, *path_len_ptr);
            ref->rec.name[*path_len_ptr] = '\0';
352

353 354 355
            /* add this record to the hash */
            HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref);
        }
356 357
    }

358 359 360
    return(0);
}

361 362
int darshan_log_get_moddat(darshan_fd fd, darshan_module_id mod_id,
    void *moddat_buf, int moddat_buf_sz)
363
{
364
    int mod_buf_end = fd->mod_map[mod_id].off + fd->mod_map[mod_id].len;
365
    int ret;
366

367 368
    if(!fd->mod_map[mod_id].len || fd->pos == mod_buf_end)
        return(0); /* no (more) data corresponding to this mod_id */
369

370 371 372 373 374 375
    /* only seek to start of module data if current log file position 
     * is not within the given mod_id's range. This allows one to
     * repeatedly call this function and get chunks of a module's
     * data piecemeal.
     */
    if((fd->pos < fd->mod_map[mod_id].off) || (fd->pos > mod_buf_end))
376
    {
377
        ret = darshan_log_seek(fd, fd->mod_map[mod_id].off);
378
        if(ret < 0)
379 380
        {
            fprintf(stderr, "Error: unable to seek in darshan log file.\n");
381
            return(-1);
382 383 384
        }
    }

385 386 387
    /* read the record hash from the log file */
    ret = darshan_log_read(fd, moddat_buf, moddat_buf_sz);
    if(ret != moddat_buf_sz)
388
    {
389 390 391
        fprintf(stderr,
            "Error: invalid darshan log file (failed to read module %s data).\n",
            darshan_module_names[mod_id]);
392 393 394
        return(-1);
    }

395
    return(1);
396 397
}

398 399 400 401 402 403 404
/* darshan_log_close()
 *
 * close an open darshan file descriptor
 *
 * returns 0 on success, -1 on failure
 */
void darshan_log_close(darshan_fd fd)
405
{
406 407
    if(fd->gzf)
        gzclose(fd->gzf);
408

409
    /* TODO: check bz2 */
410

411
    free(fd);
412 413

    return;
414 415
}

416
/* **************************************************** */
417

418 419 420 421
/* return 0 on successful seek to offset, -1 on failure.
 */
static int darshan_log_seek(darshan_fd fd, off_t offset)
{
422 423
    z_off_t zoff = 0;
    z_off_t zoff_ret = 0;
424 425 426 427

    if(fd->pos == offset)
        return(0);

428
    if(fd->gzf)
429
    {
430 431 432 433 434 435 436 437
        zoff += offset;
        zoff_ret = gzseek(fd->gzf, zoff, SEEK_SET);
        if(zoff_ret == zoff)
        {
            fd->pos = offset;
            return(0);
        }
        return(-1);
438 439
    }

440 441
    /* TODO: check bz2 */

442 443 444
    return(-1);
}

445 446 447 448 449 450
/* return amount written on success, -1 on failure.
 */
static int darshan_log_write(darshan_fd fd, void* buf, int len)
{
    int ret;

451
    if(fd->gzf)
452
    {
453
        ret = gzwrite(fd->gzf, buf, len);
454 455 456 457 458
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

459 460
    /* TODO: check bz2 */

461 462 463
    return(-1);
}

464 465 466 467 468 469
/* return amount read on success, 0 on EOF, -1 on failure.
 */
static int darshan_log_read(darshan_fd fd, void* buf, int len)
{
    int ret;

470
    if(fd->gzf)
471
    {
472
        ret = gzread(fd->gzf, buf, len);
473 474 475 476 477
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

478
    /* TODO: check bz2 */
479

480
    return(-1);
481
}
482 483 484 485 486 487 488 489 490

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */