darshan-convert.c 12.8 KB
Newer Older
1
/*
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6 7 8 9 10 11 12 13 14 15 16 17
 */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <zlib.h>
#include <time.h>
#include <stdlib.h>
#include <getopt.h>
#include <assert.h>
18
#include <errno.h>
19 20 21

#include "darshan-logutils.h"

22
extern uint32_t darshan_hashlittle(const void *key, size_t length, uint32_t initval);
23 24 25 26 27 28

int usage (char *exename)
{
    fprintf(stderr, "Usage: %s [options] <infile> <outfile>\n", exename);
    fprintf(stderr, "       Converts darshan log from infile to outfile.\n");
    fprintf(stderr, "       rewrites the log file into the newest format.\n");
29
    fprintf(stderr, "       --bzip2 Use bzip2 compression instead of zlib.\n");
30
    fprintf(stderr, "       --obfuscate Obfuscate items in the log.\n");
31 32
    fprintf(stderr, "       --key <key> Key to use when obfuscating.\n");
    fprintf(stderr, "       --annotate <string> Additional metadata to add.\n");
33
    fprintf(stderr, "       --file <hash> Limit output to specified (hashed) file only.\n");
34
    fprintf(stderr, "       --reset-md Reset old metadata during conversion.\n");
35 36 37 38

    exit(1);
}

39
void parse_args (int argc, char **argv, char **infile, char **outfile,
40 41
                 int *bzip2, int *obfuscate, int *reset_md, int *key,
                 char **annotate, uint64_t* hash)
42 43
{
    int index;
44 45
    int ret;

46 47
    static struct option long_opts[] =
    {
48
        {"bzip2", 0, NULL, 'b'},
49
        {"annotate", 1, NULL, 'a'},
50
        {"obfuscate", 0, NULL, 'o'},
51
        {"reset-md", 0, NULL, 'r'},
52
        {"key", 1, NULL, 'k'},
53
        {"file", 1, NULL, 'f'},
54 55
        {"help",  0, NULL, 0},
        { 0, 0, 0, 0 }
56 57
    };

58
    *bzip2 = 0;
59
    *obfuscate = 0;
60
    *reset_md = 0;
61
    *key = 0;
62 63
    *hash = 0;

64 65 66 67 68 69 70 71
    while(1)
    {
        int c = getopt_long(argc, argv, "", long_opts, &index);

        if (c == -1) break;

        switch(c)
        {
72 73 74
            case 'b':
                *bzip2 = 1;
                break;
75 76 77
            case 'a':
                *annotate = optarg;
                break;
78 79 80
            case 'o':
                *obfuscate = 1;
                break;
81 82 83
            case 'r':
                *reset_md = 1;
                break;
84 85 86
            case 'k':
                *key = atoi(optarg);
                break;
87 88 89 90 91
            case 'f':
                ret = sscanf(optarg, "%" PRIu64, hash);
                if(ret != 1)
                    usage(argv[0]);
                break;
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
            case 0:
            case '?':
            default:
                usage(argv[0]);
                break;
        }
    }

    if (optind + 2 == argc)
    {
        *infile = argv[optind];
        *outfile = argv[optind+1];
    }
    else
    {
        usage(argv[0]);
    }

    return;
}

113 114 115 116 117 118
static void reset_md_job(struct darshan_job *job)
{
    job->metadata[0] = '\0';
    return;
}

119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
void obfuscate_job(int key, struct darshan_job *job)
{
    job->uid   = (int64_t) darshan_hashlittle(&job->uid, sizeof(job->uid), key);
    if (job->jobid != 0)
    {
        job->jobid = (int64_t) darshan_hashlittle(&job->jobid, sizeof(job->jobid), key);
    }

    return;
}

void obfuscate_exe(int key, char *exe)
{
    uint32_t hashed;

    hashed = darshan_hashlittle(exe, strlen(exe), key);
    memset(exe, 0, strlen(exe));
    sprintf(exe, "%u", hashed);

    return;
}

141
void obfuscate_filenames(int key, struct darshan_record_ref *rec_hash)
142
{
143
    struct darshan_record_ref *ref, *tmp;
144
    uint32_t hashed;
145
    char tmp_string[32];
146

147 148 149 150 151 152 153 154 155
    HASH_ITER(hlink, rec_hash, ref, tmp)
    {
        hashed = darshan_hashlittle(ref->rec.name, strlen(ref->rec.name), key);
        sprintf(tmp_string, "%u", hashed);
        free(ref->rec.name);
        ref->rec.name = malloc(strlen(tmp_string));
        assert(ref->rec.name);
        memcpy(ref->rec.name, tmp_string, strlen(tmp_string));
    }
156 157 158 159

    return;
}

160 161 162 163 164
void add_annotation (char *annotation,
                     struct darshan_job *job)
{
    char *token;
    char *save;
165
    int len;
166
    
167 168 169 170 171 172 173 174 175 176 177
    /* check for newline in existing metadata, insert if needed */
    len = strlen(job->metadata);
    if(len > 0 && len < sizeof(job->metadata))
    {
        if(job->metadata[len-1] != '\n')
        {
            job->metadata[len] = '\n';
            job->metadata[len+1] = '\0';
        }
    }

178 179
    /* determine remaining space in metadata string */
    int remaining = sizeof(job->metadata) - strlen(job->metadata);
180 181 182 183 184

    for(token=strtok_r(annotation, "\t", &save);
        token != NULL;
        token=strtok_r(NULL, "\t", &save))
    {
185 186 187 188 189 190 191 192 193 194
        if ((strlen(token)+1) < remaining)
        {
            strcat(job->metadata, token);
            strcat(job->metadata, "\n");
            remaining -= (strlen(token)+1);
        }
        else
        {
            fprintf(stderr,
                    "not enough space left in metadata for: current=%s token=%s (remain=%d:need=%d)\n",
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
                    job->metadata, token, remaining-1, (int)strlen(token)+1);
        }
    }

    return;
}

static void remove_hash_recs(struct darshan_record_ref **rec_hash, darshan_record_id hash)
{
    struct darshan_record_ref *ref, *tmp;

    HASH_ITER(hlink, *rec_hash, ref, tmp)
    {
        if(ref->rec.id != hash)
        {
            HASH_DELETE(hlink, *rec_hash, ref);
            free(ref->rec.name);
            free(ref);
213
        }
214 215 216 217 218
    }

    return;
}

219 220 221 222 223
int main(int argc, char **argv)
{
    int ret;
    char *infile_name;
    char *outfile_name;
224
    struct darshan_header header;
225
    struct darshan_job job;
226
    char tmp_string[4096] = {0};
227 228 229 230 231
    darshan_fd infile;
    darshan_fd outfile;
    int i;
    int mount_count;
    char** mnt_pts;
232
    char** fs_types;
233 234 235 236
    struct darshan_record_ref *rec_hash = NULL;
    struct darshan_record_ref *ref, *tmp;
    char *mod_buf;
    int mod_buf_sz;
237
    enum darshan_comp_type comp_type;
238 239 240
    int bzip2;
    int obfuscate;
    int key;
241
    char *annotation = NULL;
242 243
    darshan_record_id hash;
    int reset_md;
244

245 246
    parse_args(argc, argv, &infile_name, &outfile_name, &bzip2, &obfuscate,
               &reset_md, &key, &annotation, &hash);
247

248
    infile = darshan_log_open(infile_name);
249 250
    if(!infile)
    {
251
        fprintf(stderr, "darshan_log_open() failed to open %s\n.", infile_name);
252 253 254
        return(-1);
    }
 
255 256
    comp_type = bzip2 ? comp_type = DARSHAN_BZIP2_COMP : DARSHAN_ZLIB_COMP;
    outfile = darshan_log_create(outfile_name, comp_type);
257 258
    if(!outfile)
    {
259
        fprintf(stderr, "darshan_log_create() failed to create %s\n.", outfile_name);
260 261 262 263 264 265 266 267 268 269 270
        darshan_log_close(infile);
        return(-1);
    }

    /* read header from input file */
    ret = darshan_log_getheader(infile, &header);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read header from input log file %s.\n", infile_name);
        darshan_log_close(infile);
        darshan_log_close(outfile);
271 272 273
        return(-1);
    }

274 275 276
    /* NOTE: we do not write the header to the output file until the end, as
     * the mapping data stored in this structure may change in the conversion
     * process (particularly, if we are converting between libz/bz2 compression)
277
     */
278

279 280 281 282 283 284
    /* read job info */
    ret = darshan_log_getjob(infile, &job);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read job information from log file.\n");
        darshan_log_close(infile);
285
        darshan_log_close(outfile);
286 287 288
        return(-1);
    }

289
    if (reset_md) reset_md_job(&job);
290
    if (obfuscate) obfuscate_job(key, &job);
291
    if (annotation) add_annotation(annotation, &job);
292

293 294 295 296
    ret = darshan_log_putjob(outfile, &job);
    if (ret < 0)
    {
        fprintf(stderr, "Error: unable to write job information to log file.\n");
297
        darshan_log_close(infile);
298 299 300 301
        darshan_log_close(outfile);
        return(-1);
    }

302
    ret = darshan_log_getexe(infile, tmp_string);
303 304 305 306
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read trailing job information.\n");
        darshan_log_close(infile);
307
        darshan_log_close(outfile);
308 309
        return(-1);
    }
310

311 312
    if (obfuscate) obfuscate_exe(key, tmp_string);

313 314 315 316
    ret = darshan_log_putexe(outfile, tmp_string);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write trailing job information.\n");
317
        darshan_log_close(infile);
318 319 320
        darshan_log_close(outfile);
        return(-1);
    }
321 322

    ret = darshan_log_getmounts(infile, &mnt_pts, &fs_types, &mount_count);
323 324 325 326
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read trailing job information.\n");
        darshan_log_close(infile);
327
        darshan_log_close(outfile);
328 329 330
        return(-1);
    }

331
    ret = darshan_log_putmounts(outfile, mnt_pts, fs_types, mount_count);
332 333 334
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write mount information.\n");
335
        darshan_log_close(infile);
336 337 338 339
        darshan_log_close(outfile);
        return(-1);
    }

340
    ret = darshan_log_gethash(infile, &rec_hash);
341 342
    if(ret < 0)
    {
343 344 345 346
        fprintf(stderr, "Error: unable to read darshan record hash.\n");
        darshan_log_close(infile);
        darshan_log_close(outfile);
        return(-1);
347
    }
348

349 350 351 352 353 354
    /* NOTE: obfuscating filepaths breaks the ability to map files
     * to the corresponding FS & mount info maintained by darshan
     */
    if(obfuscate) obfuscate_filenames(key, rec_hash);
    if(hash) remove_hash_recs(&rec_hash, hash);

355 356
    ret = darshan_log_puthash(outfile, rec_hash);
    if(ret < 0)
357
    {
358 359 360 361
        fprintf(stderr, "Error: unable to write darshan record hash.\n");
        darshan_log_close(infile);
        darshan_log_close(outfile);
        return(-1);
362 363
    }

364 365 366 367 368
    mod_buf = malloc(DARSHAN_DEF_COMP_BUF_SZ);
    if(!mod_buf)
        return(-1);

    for(i=0; i<DARSHAN_MAX_MODS; i++)
369
    {
370 371 372 373 374 375
        int mod_bytes_left;
        int mod_bytes_left_save;
        void *mod_buf_p;
        void *rec_p = NULL;
        darshan_record_id rec_id;

376 377 378 379 380 381
        memset(mod_buf, 0, DARSHAN_DEF_COMP_BUF_SZ);
        mod_buf_sz = DARSHAN_DEF_COMP_BUF_SZ;

        /* check each module for any data */
        ret = darshan_log_getmod(infile, i, mod_buf, &mod_buf_sz);
        if(ret < 0)
382
        {
383 384 385 386
            fprintf(stderr, "Error: failed to get module %s data.\n",
                darshan_module_names[i]);
            darshan_log_close(infile);
            darshan_log_close(outfile);
387 388
            return(-1);
        }
389
        else if(ret == 0)
390
        {
391 392
            /* skip modules not present in log file */
            continue;
393
        }
394

395 396 397 398 399 400 401 402
        /* skip modules with no defined logutil handlers */
        if(!mod_logutils[i])
        {
            fprintf(stderr, "Warning: no log utility handlers defined "
                "for module %s, SKIPPING\n", darshan_module_names[i]);
            continue;
        }

403
        /* we have module data to convert */
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
        /* NOTE: it is necessary to iterate through each module's
         * records to correct any endianness issues before writing
         * this data back to file
         */
        mod_bytes_left = mod_buf_sz;
        mod_buf_p = mod_buf;
        while(mod_bytes_left > 0)
        {
            mod_bytes_left_save = mod_bytes_left;
            ret = mod_logutils[i]->log_get_record(&mod_buf_p, &mod_bytes_left,
                &rec_p, &rec_id, infile->swap_flag);
            if(ret < 0)
            {
                fprintf(stderr, "Error: failed to parse module %s data record\n",
                    darshan_module_names[i]);
                darshan_log_close(infile);
                darshan_log_close(outfile);
                return(-1);
            }

            if(hash == rec_id)
            {
                mod_buf_p = rec_p;
                mod_buf_sz = mod_bytes_left_save - mod_bytes_left;
                break;
            }
            else if(mod_bytes_left == 0)
            {
                mod_buf_p = mod_buf;
            }
        }

        ret = darshan_log_putmod(outfile, i, mod_buf_p, mod_buf_sz);
437 438 439 440 441 442 443 444
        if(ret < 0)
        {
            fprintf(stderr, "Error: failed to put module %s data.\n",
                darshan_module_names[i]);
            darshan_log_close(infile);
            darshan_log_close(outfile);
            return(-1);
        }
445
    }
446
    free(mod_buf);
447

448 449 450 451 452 453 454 455 456 457 458 459 460
    /* write header to output file */
    ret = darshan_log_putheader(outfile);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write header to output log file %s.\n", outfile_name);
        darshan_log_close(infile);
        darshan_log_close(outfile);
        return(-1);
    }

    darshan_log_close(infile);
    darshan_log_close(outfile);

461 462 463 464 465 466 467 468 469 470
    for(i=0; i<mount_count; i++)
    {
        free(mnt_pts[i]);
        free(fs_types[i]);
    }
    if(mount_count > 0)
    {
        free(mnt_pts);
        free(fs_types);
    }
471 472 473 474 475 476 477 478

    HASH_ITER(hlink, rec_hash, ref, tmp)
    {
        HASH_DELETE(hlink, rec_hash, ref);
        free(ref->rec.name);
        free(ref);
    }

479
    return(ret);
480 481
}

482 483 484 485 486 487 488 489
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */