darshan-logutils.c 12.1 KB
Newer Older
1 2 3 4 5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6
#define _GNU_SOURCE
7
#include "darshan-util-config.h"
8 9
#include <stdio.h>
#include <string.h>
10
#include <assert.h>
11
#include <stdlib.h>
12
#include <unistd.h>
13
#include <inttypes.h>
14 15 16
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
17
#include <errno.h>
18

19 20
#include "darshan-logutils.h"

21 22 23
static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len);
static int darshan_log_write(darshan_fd fd, void *buf, int len);
24 25
static int darshan_decompress_buffer(char *comp_buf, int comp_buf_sz,
    char *decomp_buf, int *inout_decomp_buf_sz);
26 27 28 29 30 31 32

/* darshan_log_open()
 *
 * open a darshan log file for reading/writing
 *
 * returns 0 on success, -1 on failure
 */
33
darshan_fd darshan_log_open(const char *name, const char *mode)
34
{
35
    darshan_fd tmp_fd;
36

37 38 39 40
    /* we only allows "w" or "r" modes, nothing fancy */
    assert(strlen(mode) == 1);
    assert(mode[0] == 'r' || mode[0] == 'w');

41
    tmp_fd = malloc(sizeof(*tmp_fd));
42 43 44 45
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));

46 47 48 49 50 51 52 53 54 55 56 57
    if(strcmp(mode, "r") == 0)
    {
        /* TODO: check for bz2 */
    }
    
    if(strcmp(mode, "w") == 0)
    {
        /* TODO: check for bz2 */
    }

    tmp_fd->gzf = gzopen(name, mode);
    if(!tmp_fd->gzf)
58 59
    {
        free(tmp_fd);
60
        tmp_fd = NULL;
61
    }
62 63

    return(tmp_fd);
64 65
}

66 67 68 69 70 71 72 73
/* darshan_log_getheader()
 *
 * read the header of the darshan log and set internal data structures
 * NOTE: this function must be called before reading other portions of the log
 *
 * returns 0 on success, -1 on failure
 */
int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
74
{
75
    int i;
76
    int ret;
77

78
    ret = darshan_log_seek(fd, 0);
79
    if(ret < 0)
80 81
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
82
        return(-1);
83
    }
84

85
    /* read header from log file */
86
    ret = darshan_log_read(fd, header, sizeof(*header));
87
    if(ret < sizeof(*header))
88
    {
89
        fprintf(stderr, "Error: invalid darshan log file (failed to read header).\n");
90 91 92
        return(-1);
    }

93 94
    /* save the version string */
    strncpy(fd->version, header->version_string, 8);
95

96
    if(header->magic_nr == DARSHAN_MAGIC_NR)
97
    {
98
        /* no byte swapping needed, this file is in host format already */
99 100 101 102 103 104
        fd->swap_flag = 0;
    }
    else
    {
        /* try byte swapping */
        DARSHAN_BSWAP64(&header->magic_nr);
105
        if(header->magic_nr == DARSHAN_MAGIC_NR)
106 107
        {
            fd->swap_flag = 1;
108 109 110 111

            /* swap the log map variables in the header */
            DARSHAN_BSWAP64(&header->rec_map.off);
            DARSHAN_BSWAP64(&header->rec_map.len);
112
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
113 114 115 116
            {
                DARSHAN_BSWAP64(&header->mod_map[i].off);
                DARSHAN_BSWAP64(&header->mod_map[i].len);
            }
117 118 119 120 121 122 123
        }
        else
        {
            /* otherwise this file is just broken */
            fprintf(stderr, "Error: bad magic number in darshan log file.\n");
            return(-1);
        }
124 125
    }

126 127 128
    /* save the mapping of data within log file to this file descriptor */
    memcpy(&fd->rec_map, &header->rec_map, sizeof(struct darshan_log_map));
    memcpy(&fd->mod_map, &header->mod_map, DARSHAN_MAX_MODS * sizeof(struct darshan_log_map));
129 130

    return(0);
131
}
132

133
/* darshan_log_getjob()
134 135
 *
 * read job level metadata from the darshan log file
136
 *
137
 * returns 0 on success, -1 on failure
138
 */
139
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
140
{
141
    int ret;
142

143
    ret = darshan_log_seek(fd, sizeof(struct darshan_header));
144
    if(ret < 0)
145
    {
146
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
147
        return(-1);
148 149
    }

150
    /* read the job data from the log file */
151 152
    ret = darshan_log_read(fd, job, sizeof(*job));
    if(ret < sizeof(*job))
153
    {
154
        fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
155 156 157
        return(-1);
    }

158
    if(fd->swap_flag)
159
    {
160 161 162 163 164 165
        /* swap bytes if necessary */
        DARSHAN_BSWAP64(&job->uid);
        DARSHAN_BSWAP64(&job->start_time);
        DARSHAN_BSWAP64(&job->end_time);
        DARSHAN_BSWAP64(&job->nprocs);
        DARSHAN_BSWAP64(&job->jobid);
166 167
    }

168 169 170 171 172
    return(0);
}

int darshan_log_getexe(darshan_fd fd, char *buf)
{
173 174
    int tmp_off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
    int ret;
175 176
    char *newline;

177 178
    ret = darshan_log_seek(fd, tmp_off);
    if(ret < 0)
179
    {
180 181
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
182
    }
183

184 185 186 187 188 189 190
    /* read the trailing exe data from the darshan log */
    ret = darshan_log_read(fd, buf, DARSHAN_EXE_LEN+1);
    if(ret < DARSHAN_EXE_LEN+1)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read exe string).\n");
        return(-1);
    }
191

192 193
    /* mount info is stored after the exe string, so truncate there */
    newline = strchr(buf, '\n');
194
    if(newline)
195
        *newline = '\0';
196 197 198 199 200 201

    return (0);
}

/* darshan_log_getmounts()
 * 
202 203 204
 * retrieves mount table information from the log.  Note that mnt_pts and
 * fs_types are arrays that will be allocated by the function and must be
 * freed by the caller.  count will indicate the size of the arrays
205
 */
206
int darshan_log_getmounts(darshan_fd fd, char*** mnt_pts,
207 208
    char*** fs_types, int* count)
{
209
    int tmp_off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
210 211 212
    int ret;
    char *pos;
    int array_index = 0;
213
    char buf[DARSHAN_EXE_LEN+1];
214

215 216
    ret = darshan_log_seek(fd, tmp_off);
    if(ret < 0)
217
    {
218 219 220
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
    }
221

222 223 224 225 226 227
    /* read the trailing mount data from the darshan log */
    ret = darshan_log_read(fd, buf, DARSHAN_EXE_LEN+1);
    if(ret < DARSHAN_EXE_LEN+1)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read mount info).\n");
        return(-1);
228
    }
229

230
    /* count entries */
231
    *count = 0;
232
    pos = buf;
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
    while((pos = strchr(pos, '\n')) != NULL)
    {
        pos++;
        (*count)++;
    }

    if(*count == 0)
    {
        /* no mount entries present */
        return(0);
    }

    /* allocate output arrays */
    *mnt_pts = malloc((*count)*sizeof(char*));
    assert(*mnt_pts);
    *fs_types = malloc((*count)*sizeof(char*));
    assert(*fs_types);

    /* work backwards through the table and parse each line (except for
     * first, which holds command line information)
     */
254
    while((pos = strrchr(buf, '\n')) != NULL)
255 256
    {
        /* overestimate string lengths */
257
        (*mnt_pts)[array_index] = malloc(DARSHAN_EXE_LEN);
258
        assert((*mnt_pts)[array_index]);
259
        (*fs_types)[array_index] = malloc(DARSHAN_EXE_LEN);
260 261
        assert((*fs_types)[array_index]);

262 263 264
        ret = sscanf(++pos, "%s\t%s", (*fs_types)[array_index],
            (*mnt_pts)[array_index]);
        if(ret != 2)
265 266 267 268 269 270 271 272 273
        {
            fprintf(stderr, "Error: poorly formatted mount table in log file.\n");
            return(-1);
        }
        pos--;
        *pos = '\0';
        array_index++;
    }

274 275 276
    return(0);
}

277
/* darshan_log_gethash()
278
 *
279
 * read the hash of records from the darshan log file
280 281 282
 *
 * returns 0 on success, -1 on failure
 */
283
int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
284
{
285 286
    char *hash_buf;
    int hash_buf_sz = fd->rec_map.len;
287
    char *buf_ptr;
288 289 290 291
    darshan_record_id *rec_id_ptr;
    uint32_t *path_len_ptr;
    char *path_ptr;
    struct darshan_record_ref *ref;
292 293
    int ret;

294 295
    hash_buf = malloc(hash_buf_sz);
    if(!hash_buf)
296 297
        return(-1);

298
    ret = darshan_log_seek(fd, fd->rec_map.off);
299
    if(ret < 0)
300 301
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
302
        return(-1);
303
    }
304

305
    /* read the record hash from the log file */
306
    ret = darshan_log_read(fd, hash_buf, fd->rec_map.len);
307
    if(ret < fd->rec_map.len)
308
    {
309
        fprintf(stderr, "Error: invalid darshan log file (failed to read record hash).\n");
310 311 312
        return(-1);
    }

313
    buf_ptr = hash_buf;
314
    while(buf_ptr < (hash_buf + hash_buf_sz))
315
    {
316
        /* get pointers for each field of this darshan record */
317
        /* NOTE: darshan record hash serialization method: 
318 319
         *          ... darshan_record_id | (uint32_t) path_len | path ...
         */
320 321 322 323 324 325 326 327
        rec_id_ptr = (darshan_record_id *)buf_ptr;
        buf_ptr += sizeof(darshan_record_id);
        path_len_ptr = (uint32_t *)buf_ptr;
        buf_ptr += sizeof(uint32_t);
        path_ptr = (char *)buf_ptr;
        buf_ptr += *path_len_ptr;

        if(fd->swap_flag)
328
        {
329 330 331
            /* we need to sort out endianness issues before deserializing */
            DARSHAN_BSWAP64(rec_id_ptr);
            DARSHAN_BSWAP32(path_len_ptr);
332 333
        }

334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
        HASH_FIND(hlink, *hash, rec_id_ptr, sizeof(darshan_record_id), ref);
        if(!ref)
        {
            ref = malloc(sizeof(*ref));
            if(!ref)
            {
                return(-1);
            }
            ref->rec.name = malloc(*path_len_ptr + 1);
            if(!ref->rec.name)
            {
                free(ref);
                return(-1);
            }

            /* set the fields for this record */
            ref->rec.id = *rec_id_ptr;
            memcpy(ref->rec.name, path_ptr, *path_len_ptr);
            ref->rec.name[*path_len_ptr] = '\0';
353

354 355 356
            /* add this record to the hash */
            HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref);
        }
357 358
    }

359 360 361
    return(0);
}

362 363
int darshan_log_get_moddat(darshan_fd fd, darshan_module_id mod_id,
    void *moddat_buf, int moddat_buf_sz)
364
{
365
    int mod_buf_end = fd->mod_map[mod_id].off + fd->mod_map[mod_id].len;
366
    int ret;
367

368 369
    if(!fd->mod_map[mod_id].len || fd->pos == mod_buf_end)
        return(0); /* no (more) data corresponding to this mod_id */
370

371 372 373 374 375 376
    /* only seek to start of module data if current log file position 
     * is not within the given mod_id's range. This allows one to
     * repeatedly call this function and get chunks of a module's
     * data piecemeal.
     */
    if((fd->pos < fd->mod_map[mod_id].off) || (fd->pos > mod_buf_end))
377
    {
378
        ret = darshan_log_seek(fd, fd->mod_map[mod_id].off);
379
        if(ret < 0)
380 381
        {
            fprintf(stderr, "Error: unable to seek in darshan log file.\n");
382
            return(-1);
383 384 385
        }
    }

386 387 388
    /* read the record hash from the log file */
    ret = darshan_log_read(fd, moddat_buf, moddat_buf_sz);
    if(ret != moddat_buf_sz)
389
    {
390 391 392
        fprintf(stderr,
            "Error: invalid darshan log file (failed to read module %s data).\n",
            darshan_module_names[mod_id]);
393 394 395
        return(-1);
    }

396
    return(1);
397 398
}

399 400 401 402 403 404 405
/* darshan_log_close()
 *
 * close an open darshan file descriptor
 *
 * returns 0 on success, -1 on failure
 */
void darshan_log_close(darshan_fd fd)
406
{
407 408
    if(fd->gzf)
        gzclose(fd->gzf);
409

410
    /* TODO: check bz2 */
411

412
    free(fd);
413 414

    return;
415 416
}

417
/* **************************************************** */
418

419 420 421 422
/* return 0 on successful seek to offset, -1 on failure.
 */
static int darshan_log_seek(darshan_fd fd, off_t offset)
{
423 424
    z_off_t zoff = 0;
    z_off_t zoff_ret = 0;
425 426 427 428

    if(fd->pos == offset)
        return(0);

429
    if(fd->gzf)
430
    {
431 432 433 434 435 436 437 438
        zoff += offset;
        zoff_ret = gzseek(fd->gzf, zoff, SEEK_SET);
        if(zoff_ret == zoff)
        {
            fd->pos = offset;
            return(0);
        }
        return(-1);
439 440
    }

441 442
    /* TODO: check bz2 */

443 444 445
    return(-1);
}

446 447 448 449 450 451
/* return amount written on success, -1 on failure.
 */
static int darshan_log_write(darshan_fd fd, void* buf, int len)
{
    int ret;

452
    if(fd->gzf)
453
    {
454
        ret = gzwrite(fd->gzf, buf, len);
455 456 457 458 459
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

460 461
    /* TODO: check bz2 */

462 463 464
    return(-1);
}

465 466 467 468 469 470
/* return amount read on success, 0 on EOF, -1 on failure.
 */
static int darshan_log_read(darshan_fd fd, void* buf, int len)
{
    int ret;

471
    if(fd->gzf)
472
    {
473
        ret = gzread(fd->gzf, buf, len);
474 475 476 477 478
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

479
    /* TODO: check bz2 */
480

481
    return(-1);
482
}
483 484 485 486 487 488 489 490 491

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */