darshan-logutils.c 12.2 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6
 */

7
#define _GNU_SOURCE
8
#include "darshan-util-config.h"
9 10
#include <stdio.h>
#include <string.h>
11
#include <assert.h>
12
#include <stdlib.h>
13
#include <unistd.h>
14
#include <inttypes.h>
15 16 17
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
18
#include <errno.h>
19

20 21
#include "darshan-logutils.h"

22 23
static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len);
24 25 26 27 28
//static int darshan_log_write(darshan_fd fd, void *buf, int len);

/* TODO: can we make this s.t. we don't care about ordering (i.e., X macro it ) */
struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS] =
{
29
    NULL,               /* NULL */
30
    &posix_logutils,    /* POSIX */
31 32 33
    &mpiio_logutils,    /* MPI-IO */
    &hdf5_logutils,     /* HDF5 */
    &pnetcdf_logutils,  /* PNETCDF */
34 35 36 37 38 39 40 41 42 43 44 45
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL
};
46 47 48 49 50 51 52

/* darshan_log_open()
 *
 * open a darshan log file for reading/writing
 *
 * returns 0 on success, -1 on failure
 */
53
darshan_fd darshan_log_open(const char *name, const char *mode)
54
{
55
    darshan_fd tmp_fd;
56

57 58 59 60
    /* we only allows "w" or "r" modes, nothing fancy */
    assert(strlen(mode) == 1);
    assert(mode[0] == 'r' || mode[0] == 'w');

61
    tmp_fd = malloc(sizeof(*tmp_fd));
62 63 64 65
    if(!tmp_fd)
        return(NULL);
    memset(tmp_fd, 0, sizeof(*tmp_fd));

66 67
    tmp_fd->gzf = gzopen(name, mode);
    if(!tmp_fd->gzf)
68 69
    {
        free(tmp_fd);
70
        tmp_fd = NULL;
71
    }
72 73

    return(tmp_fd);
74 75
}

76 77 78 79 80 81 82 83
/* darshan_log_getheader()
 *
 * read the header of the darshan log and set internal data structures
 * NOTE: this function must be called before reading other portions of the log
 *
 * returns 0 on success, -1 on failure
 */
int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
84
{
85
    int i;
86
    int ret;
87

88
    ret = darshan_log_seek(fd, 0);
89
    if(ret < 0)
90 91
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
92
        return(-1);
93
    }
94

95
    /* read header from log file */
96
    ret = darshan_log_read(fd, header, sizeof(*header));
97
    if(ret < sizeof(*header))
98
    {
99
        fprintf(stderr, "Error: invalid darshan log file (failed to read header).\n");
100 101 102
        return(-1);
    }

103 104
    /* save the version string */
    strncpy(fd->version, header->version_string, 8);
105

106
    if(header->magic_nr == DARSHAN_MAGIC_NR)
107
    {
108
        /* no byte swapping needed, this file is in host format already */
109 110 111 112 113 114
        fd->swap_flag = 0;
    }
    else
    {
        /* try byte swapping */
        DARSHAN_BSWAP64(&header->magic_nr);
115
        if(header->magic_nr == DARSHAN_MAGIC_NR)
116 117
        {
            fd->swap_flag = 1;
118 119 120 121

            /* swap the log map variables in the header */
            DARSHAN_BSWAP64(&header->rec_map.off);
            DARSHAN_BSWAP64(&header->rec_map.len);
122
            for(i = 0; i < DARSHAN_MAX_MODS; i++)
123 124 125 126
            {
                DARSHAN_BSWAP64(&header->mod_map[i].off);
                DARSHAN_BSWAP64(&header->mod_map[i].len);
            }
127 128 129 130 131 132 133
        }
        else
        {
            /* otherwise this file is just broken */
            fprintf(stderr, "Error: bad magic number in darshan log file.\n");
            return(-1);
        }
134 135
    }

136 137 138
    /* save the mapping of data within log file to this file descriptor */
    memcpy(&fd->rec_map, &header->rec_map, sizeof(struct darshan_log_map));
    memcpy(&fd->mod_map, &header->mod_map, DARSHAN_MAX_MODS * sizeof(struct darshan_log_map));
139 140

    return(0);
141
}
142

143
/* darshan_log_getjob()
144 145
 *
 * read job level metadata from the darshan log file
146
 *
147
 * returns 0 on success, -1 on failure
148
 */
149
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
150
{
151
    int ret;
152

153
    ret = darshan_log_seek(fd, sizeof(struct darshan_header));
154
    if(ret < 0)
155
    {
156
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
157
        return(-1);
158 159
    }

160
    /* read the job data from the log file */
161 162
    ret = darshan_log_read(fd, job, sizeof(*job));
    if(ret < sizeof(*job))
163
    {
164
        fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
165 166 167
        return(-1);
    }

168
    if(fd->swap_flag)
169
    {
170 171 172 173 174 175
        /* swap bytes if necessary */
        DARSHAN_BSWAP64(&job->uid);
        DARSHAN_BSWAP64(&job->start_time);
        DARSHAN_BSWAP64(&job->end_time);
        DARSHAN_BSWAP64(&job->nprocs);
        DARSHAN_BSWAP64(&job->jobid);
176 177
    }

178 179 180 181 182
    return(0);
}

int darshan_log_getexe(darshan_fd fd, char *buf)
{
183 184
    int tmp_off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
    int ret;
185 186
    char *newline;

187 188
    ret = darshan_log_seek(fd, tmp_off);
    if(ret < 0)
189
    {
190 191
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
192
    }
193

194 195 196 197 198 199 200
    /* read the trailing exe data from the darshan log */
    ret = darshan_log_read(fd, buf, DARSHAN_EXE_LEN+1);
    if(ret < DARSHAN_EXE_LEN+1)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read exe string).\n");
        return(-1);
    }
201

202 203
    /* mount info is stored after the exe string, so truncate there */
    newline = strchr(buf, '\n');
204
    if(newline)
205
        *newline = '\0';
206 207 208 209 210 211

    return (0);
}

/* darshan_log_getmounts()
 * 
212 213 214
 * retrieves mount table information from the log.  Note that mnt_pts and
 * fs_types are arrays that will be allocated by the function and must be
 * freed by the caller.  count will indicate the size of the arrays
215
 */
216
int darshan_log_getmounts(darshan_fd fd, char*** mnt_pts,
217 218
    char*** fs_types, int* count)
{
219
    int tmp_off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
220 221 222
    int ret;
    char *pos;
    int array_index = 0;
223
    char buf[DARSHAN_EXE_LEN+1];
224

225 226
    ret = darshan_log_seek(fd, tmp_off);
    if(ret < 0)
227
    {
228 229 230
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
        return(-1);
    }
231

232 233 234 235 236 237
    /* read the trailing mount data from the darshan log */
    ret = darshan_log_read(fd, buf, DARSHAN_EXE_LEN+1);
    if(ret < DARSHAN_EXE_LEN+1)
    {
        fprintf(stderr, "Error: invalid darshan log file (failed to read mount info).\n");
        return(-1);
238
    }
239

240
    /* count entries */
241
    *count = 0;
242
    pos = buf;
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
    while((pos = strchr(pos, '\n')) != NULL)
    {
        pos++;
        (*count)++;
    }

    if(*count == 0)
    {
        /* no mount entries present */
        return(0);
    }

    /* allocate output arrays */
    *mnt_pts = malloc((*count)*sizeof(char*));
    assert(*mnt_pts);
    *fs_types = malloc((*count)*sizeof(char*));
    assert(*fs_types);

    /* work backwards through the table and parse each line (except for
     * first, which holds command line information)
     */
264
    while((pos = strrchr(buf, '\n')) != NULL)
265 266
    {
        /* overestimate string lengths */
267
        (*mnt_pts)[array_index] = malloc(DARSHAN_EXE_LEN);
268
        assert((*mnt_pts)[array_index]);
269
        (*fs_types)[array_index] = malloc(DARSHAN_EXE_LEN);
270 271
        assert((*fs_types)[array_index]);

272 273 274
        ret = sscanf(++pos, "%s\t%s", (*fs_types)[array_index],
            (*mnt_pts)[array_index]);
        if(ret != 2)
275 276 277 278 279 280 281 282 283
        {
            fprintf(stderr, "Error: poorly formatted mount table in log file.\n");
            return(-1);
        }
        pos--;
        *pos = '\0';
        array_index++;
    }

284 285 286
    return(0);
}

287
/* darshan_log_gethash()
288
 *
289
 * read the hash of records from the darshan log file
290 291 292
 *
 * returns 0 on success, -1 on failure
 */
293
int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
294
{
295 296
    char *hash_buf;
    int hash_buf_sz = fd->rec_map.len;
297
    char *buf_ptr;
298 299 300 301
    darshan_record_id *rec_id_ptr;
    uint32_t *path_len_ptr;
    char *path_ptr;
    struct darshan_record_ref *ref;
302 303
    int ret;

304 305
    hash_buf = malloc(hash_buf_sz);
    if(!hash_buf)
306 307
        return(-1);

308
    ret = darshan_log_seek(fd, fd->rec_map.off);
309
    if(ret < 0)
310 311
    {
        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
312
        return(-1);
313
    }
314

315
    /* read the record hash from the log file */
316
    ret = darshan_log_read(fd, hash_buf, fd->rec_map.len);
317
    if(ret < fd->rec_map.len)
318
    {
319
        fprintf(stderr, "Error: invalid darshan log file (failed to read record hash).\n");
320 321 322
        return(-1);
    }

323
    buf_ptr = hash_buf;
324
    while(buf_ptr < (hash_buf + hash_buf_sz))
325
    {
326
        /* get pointers for each field of this darshan record */
327
        /* NOTE: darshan record hash serialization method: 
328 329
         *          ... darshan_record_id | (uint32_t) path_len | path ...
         */
330 331 332 333 334 335 336 337
        rec_id_ptr = (darshan_record_id *)buf_ptr;
        buf_ptr += sizeof(darshan_record_id);
        path_len_ptr = (uint32_t *)buf_ptr;
        buf_ptr += sizeof(uint32_t);
        path_ptr = (char *)buf_ptr;
        buf_ptr += *path_len_ptr;

        if(fd->swap_flag)
338
        {
339 340 341
            /* we need to sort out endianness issues before deserializing */
            DARSHAN_BSWAP64(rec_id_ptr);
            DARSHAN_BSWAP32(path_len_ptr);
342 343
        }

344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
        HASH_FIND(hlink, *hash, rec_id_ptr, sizeof(darshan_record_id), ref);
        if(!ref)
        {
            ref = malloc(sizeof(*ref));
            if(!ref)
            {
                return(-1);
            }
            ref->rec.name = malloc(*path_len_ptr + 1);
            if(!ref->rec.name)
            {
                free(ref);
                return(-1);
            }

            /* set the fields for this record */
            ref->rec.id = *rec_id_ptr;
            memcpy(ref->rec.name, path_ptr, *path_len_ptr);
            ref->rec.name[*path_len_ptr] = '\0';
363

364 365 366
            /* add this record to the hash */
            HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref);
        }
367 368
    }

369 370 371
    return(0);
}

372 373
int darshan_log_get_moddat(darshan_fd fd, darshan_module_id mod_id,
    void *moddat_buf, int moddat_buf_sz)
374
{
375
    int mod_buf_end = fd->mod_map[mod_id].off + fd->mod_map[mod_id].len;
376
    int ret;
377

378 379
    if(!fd->mod_map[mod_id].len || fd->pos == mod_buf_end)
        return(0); /* no (more) data corresponding to this mod_id */
380

381 382 383 384 385 386
    /* only seek to start of module data if current log file position 
     * is not within the given mod_id's range. This allows one to
     * repeatedly call this function and get chunks of a module's
     * data piecemeal.
     */
    if((fd->pos < fd->mod_map[mod_id].off) || (fd->pos > mod_buf_end))
387
    {
388
        ret = darshan_log_seek(fd, fd->mod_map[mod_id].off);
389
        if(ret < 0)
390 391
        {
            fprintf(stderr, "Error: unable to seek in darshan log file.\n");
392
            return(-1);
393 394 395
        }
    }

396 397 398
    /* read the record hash from the log file */
    ret = darshan_log_read(fd, moddat_buf, moddat_buf_sz);
    if(ret != moddat_buf_sz)
399
    {
400 401 402
        fprintf(stderr,
            "Error: invalid darshan log file (failed to read module %s data).\n",
            darshan_module_names[mod_id]);
403 404 405
        return(-1);
    }

406
    return(1);
407 408
}

409 410 411 412 413 414 415
/* darshan_log_close()
 *
 * close an open darshan file descriptor
 *
 * returns 0 on success, -1 on failure
 */
void darshan_log_close(darshan_fd fd)
416
{
417 418
    if(fd->gzf)
        gzclose(fd->gzf);
419

420
    free(fd);
421 422

    return;
423 424
}

425
/* **************************************************** */
426

427 428 429 430
/* return 0 on successful seek to offset, -1 on failure.
 */
static int darshan_log_seek(darshan_fd fd, off_t offset)
{
431 432
    z_off_t zoff = 0;
    z_off_t zoff_ret = 0;
433 434 435 436

    if(fd->pos == offset)
        return(0);

437
    if(fd->gzf)
438
    {
439 440 441 442 443 444 445 446
        zoff += offset;
        zoff_ret = gzseek(fd->gzf, zoff, SEEK_SET);
        if(zoff_ret == zoff)
        {
            fd->pos = offset;
            return(0);
        }
        return(-1);
447 448 449 450 451
    }

    return(-1);
}

452
#if 0
453 454 455 456 457 458
/* return amount written on success, -1 on failure.
 */
static int darshan_log_write(darshan_fd fd, void* buf, int len)
{
    int ret;

459
    if(fd->gzf)
460
    {
461
        ret = gzwrite(fd->gzf, buf, len);
462 463 464 465 466 467 468
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

    return(-1);
}
469
#endif
470

471 472 473 474 475 476
/* return amount read on success, 0 on EOF, -1 on failure.
 */
static int darshan_log_read(darshan_fd fd, void* buf, int len)
{
    int ret;

477
    if(fd->gzf)
478
    {
479
        ret = gzread(fd->gzf, buf, len);
480 481 482 483 484
        if(ret > 0)
            fd->pos += ret;
        return(ret);
    }

485
    return(-1);
486
}
487 488 489 490 491 492 493 494 495

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */