darshan-convert.c 10.7 KB
Newer Older
1
/*
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6 7 8 9 10 11 12 13 14 15 16 17
 */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <zlib.h>
#include <time.h>
#include <stdlib.h>
#include <getopt.h>
#include <assert.h>
18
#include <errno.h>
19 20 21

#include "darshan-logutils.h"

22 23
#define DEF_MOD_BUF_SIZE 1024 /* 1 KiB is enough for all current mod records ... */

24
extern uint32_t darshan_hashlittle(const void *key, size_t length, uint32_t initval);
25 26 27 28 29 30

int usage (char *exename)
{
    fprintf(stderr, "Usage: %s [options] <infile> <outfile>\n", exename);
    fprintf(stderr, "       Converts darshan log from infile to outfile.\n");
    fprintf(stderr, "       rewrites the log file into the newest format.\n");
31
    fprintf(stderr, "       --bzip2 Use bzip2 compression instead of zlib.\n");
32
    fprintf(stderr, "       --obfuscate Obfuscate items in the log.\n");
33 34
    fprintf(stderr, "       --key <key> Key to use when obfuscating.\n");
    fprintf(stderr, "       --annotate <string> Additional metadata to add.\n");
35
    fprintf(stderr, "       --file <hash> Limit output to specified (hashed) file only.\n");
36
    fprintf(stderr, "       --reset-md Reset old metadata during conversion.\n");
37 38 39 40

    exit(1);
}

41
void parse_args (int argc, char **argv, char **infile, char **outfile,
42 43
                 int *bzip2, int *obfuscate, int *reset_md, int *key,
                 char **annotate, uint64_t* hash)
44 45
{
    int index;
46 47
    int ret;

48 49
    static struct option long_opts[] =
    {
50
        {"bzip2", 0, NULL, 'b'},
51
        {"annotate", 1, NULL, 'a'},
52
        {"obfuscate", 0, NULL, 'o'},
53
        {"reset-md", 0, NULL, 'r'},
54
        {"key", 1, NULL, 'k'},
55
        {"file", 1, NULL, 'f'},
56 57
        {"help",  0, NULL, 0},
        { 0, 0, 0, 0 }
58 59
    };

60
    *bzip2 = 0;
61
    *obfuscate = 0;
62
    *reset_md = 0;
63
    *key = 0;
64 65
    *hash = 0;

66 67 68 69 70 71 72 73
    while(1)
    {
        int c = getopt_long(argc, argv, "", long_opts, &index);

        if (c == -1) break;

        switch(c)
        {
74 75 76
            case 'b':
                *bzip2 = 1;
                break;
77 78 79
            case 'a':
                *annotate = optarg;
                break;
80 81 82
            case 'o':
                *obfuscate = 1;
                break;
83 84 85
            case 'r':
                *reset_md = 1;
                break;
86 87 88
            case 'k':
                *key = atoi(optarg);
                break;
89 90 91 92 93
            case 'f':
                ret = sscanf(optarg, "%" PRIu64, hash);
                if(ret != 1)
                    usage(argv[0]);
                break;
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
            case 0:
            case '?':
            default:
                usage(argv[0]);
                break;
        }
    }

    if (optind + 2 == argc)
    {
        *infile = argv[optind];
        *outfile = argv[optind+1];
    }
    else
    {
        usage(argv[0]);
    }

    return;
}

115 116 117 118 119 120
static void reset_md_job(struct darshan_job *job)
{
    job->metadata[0] = '\0';
    return;
}

121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
void obfuscate_job(int key, struct darshan_job *job)
{
    job->uid   = (int64_t) darshan_hashlittle(&job->uid, sizeof(job->uid), key);
    if (job->jobid != 0)
    {
        job->jobid = (int64_t) darshan_hashlittle(&job->jobid, sizeof(job->jobid), key);
    }

    return;
}

void obfuscate_exe(int key, char *exe)
{
    uint32_t hashed;

    hashed = darshan_hashlittle(exe, strlen(exe), key);
    memset(exe, 0, strlen(exe));
    sprintf(exe, "%u", hashed);

    return;
}

143
void obfuscate_filenames(int key, struct darshan_name_record_ref *name_hash)
144
{
145
    struct darshan_name_record_ref *ref, *tmp;
146
    uint32_t hashed;
147
    char tmp_string[128] = {0};
148
    darshan_record_id tmp_id;
149

150
    HASH_ITER(hlink, name_hash, ref, tmp)
151
    {
152 153 154
        tmp_id = ref->name_record->id;
        hashed = darshan_hashlittle(ref->name_record->name,
            strlen(ref->name_record->name), key);
155
        sprintf(tmp_string, "%u", hashed);
156 157 158 159 160 161
        free(ref->name_record);
        ref->name_record = malloc(sizeof(struct darshan_name_record) +
            strlen(tmp_string));
        assert(ref->name_record);
        ref->name_record->id = tmp_id;
        strcpy(ref->name_record->name, tmp_string);
162
    }
163 164 165 166

    return;
}

167 168 169 170 171
void add_annotation (char *annotation,
                     struct darshan_job *job)
{
    char *token;
    char *save;
172
    int len;
173
    
174 175 176 177 178 179 180 181 182 183 184
    /* check for newline in existing metadata, insert if needed */
    len = strlen(job->metadata);
    if(len > 0 && len < sizeof(job->metadata))
    {
        if(job->metadata[len-1] != '\n')
        {
            job->metadata[len] = '\n';
            job->metadata[len+1] = '\0';
        }
    }

185 186
    /* determine remaining space in metadata string */
    int remaining = sizeof(job->metadata) - strlen(job->metadata);
187 188 189 190 191

    for(token=strtok_r(annotation, "\t", &save);
        token != NULL;
        token=strtok_r(NULL, "\t", &save))
    {
192 193 194 195 196 197 198 199 200 201
        if ((strlen(token)+1) < remaining)
        {
            strcat(job->metadata, token);
            strcat(job->metadata, "\n");
            remaining -= (strlen(token)+1);
        }
        else
        {
            fprintf(stderr,
                    "not enough space left in metadata for: current=%s token=%s (remain=%d:need=%d)\n",
202 203 204 205 206 207 208
                    job->metadata, token, remaining-1, (int)strlen(token)+1);
        }
    }

    return;
}

209 210
static void remove_hash_recs(struct darshan_name_record_ref **name_hash,
    darshan_record_id hash)
211
{
212
    struct darshan_name_record_ref *ref, *tmp;
213

214
    HASH_ITER(hlink, *name_hash, ref, tmp)
215
    {
216
        if(ref->name_record->id != hash)
217
        {
218 219
            HASH_DELETE(hlink, *name_hash, ref);
            free(ref->name_record);
220
            free(ref);
221
        }
222 223 224 225 226
    }

    return;
}

227 228 229 230 231 232
int main(int argc, char **argv)
{
    int ret;
    char *infile_name;
    char *outfile_name;
    struct darshan_job job;
233
    char tmp_string[4096] = {0};
234 235 236 237 238
    darshan_fd infile;
    darshan_fd outfile;
    int i;
    int mount_count;
    char** mnt_pts;
239
    char** fs_types;
240 241
    struct darshan_name_record_ref *name_hash = NULL;
    struct darshan_name_record_ref *ref, *tmp;
242
    char mod_buf[DEF_MOD_BUF_SIZE];
243
    enum darshan_comp_type comp_type;
244 245 246
    int bzip2;
    int obfuscate;
    int key;
247
    char *annotation = NULL;
248 249
    darshan_record_id hash;
    int reset_md;
250

251 252
    parse_args(argc, argv, &infile_name, &outfile_name, &bzip2, &obfuscate,
               &reset_md, &key, &annotation, &hash);
253

254
    infile = darshan_log_open(infile_name);
255 256 257
    if(!infile)
        return(-1);
 
258
    comp_type = bzip2 ? DARSHAN_BZIP2_COMP : DARSHAN_ZLIB_COMP;
259
    outfile = darshan_log_create(outfile_name, comp_type, infile->partial_flag);
260 261
    if(!outfile)
    {
262 263 264 265
        darshan_log_close(infile);
        return(-1);
    }

266
    /* read job info */
267
    ret = darshan_log_get_job(infile, &job);
268 269 270
    if(ret < 0)
    {
        darshan_log_close(infile);
271
        darshan_log_close(outfile);
272
        unlink(outfile_name);
273 274 275
        return(-1);
    }

276
    if (reset_md) reset_md_job(&job);
277
    if (obfuscate) obfuscate_job(key, &job);
278
    if (annotation) add_annotation(annotation, &job);
279

280
    ret = darshan_log_put_job(outfile, &job);
281 282
    if (ret < 0)
    {
283
        darshan_log_close(infile);
284 285 286 287
        darshan_log_close(outfile);
        return(-1);
    }

288
    ret = darshan_log_get_exe(infile, tmp_string);
289 290 291
    if(ret < 0)
    {
        darshan_log_close(infile);
292
        darshan_log_close(outfile);
293
        unlink(outfile_name);
294 295
        return(-1);
    }
296

297 298
    if (obfuscate) obfuscate_exe(key, tmp_string);

299
    ret = darshan_log_put_exe(outfile, tmp_string);
300 301
    if(ret < 0)
    {
302
        darshan_log_close(infile);
303 304 305
        darshan_log_close(outfile);
        return(-1);
    }
306

307
    ret = darshan_log_get_mounts(infile, &mnt_pts, &fs_types, &mount_count);
308 309 310
    if(ret < 0)
    {
        darshan_log_close(infile);
311
        darshan_log_close(outfile);
312
        unlink(outfile_name);
313 314 315
        return(-1);
    }

316
    ret = darshan_log_put_mounts(outfile, mnt_pts, fs_types, mount_count);
317 318
    if(ret < 0)
    {
319
        darshan_log_close(infile);
320 321 322 323
        darshan_log_close(outfile);
        return(-1);
    }

324
    ret = darshan_log_get_namehash(infile, &name_hash);
325 326
    if(ret < 0)
    {
327 328
        darshan_log_close(infile);
        darshan_log_close(outfile);
329
        unlink(outfile_name);
330
        return(-1);
331
    }
332

333 334 335
    /* NOTE: obfuscating filepaths breaks the ability to map files
     * to the corresponding FS & mount info maintained by darshan
     */
336 337
    if(obfuscate) obfuscate_filenames(key, name_hash);
    if(hash) remove_hash_recs(&name_hash, hash);
338

339
    ret = darshan_log_put_namehash(outfile, name_hash);
340
    if(ret < 0)
341
    {
342 343 344
        darshan_log_close(infile);
        darshan_log_close(outfile);
        return(-1);
345 346
    }

347
    /* loop over each module and convert it's data to the new format */
348
    for(i=0; i<DARSHAN_MAX_MODS; i++)
349
    {
350
        struct darshan_base_record *base_rec;
351

352
        /* check each module for any data */
353
        if(infile->mod_map[i].len == 0)
354
            continue;
355
        else if(!mod_logutils[i])
356 357
        {
            fprintf(stderr, "Warning: no log utility handlers defined "
358
                "for module %s, SKIPPING.\n", darshan_module_names[i]);
359 360 361
            continue;
        }

362
        /* we have module data to convert */
363
        memset(mod_buf, 0, DEF_MOD_BUF_SIZE);
364

365
        ret = mod_logutils[i]->log_get_record(infile, mod_buf);
366
        if(ret != 1)
367
        {
368
            fprintf(stderr, "Error: failed to parse the first %s module record.\n",
369 370 371
                darshan_module_names[i]);
            darshan_log_close(infile);
            darshan_log_close(outfile);
372
            unlink(outfile_name);
373 374
            return(-1);
        }
375

376 377 378
        /* loop over each of the module's records and convert */
        do
        {
379 380 381
            base_rec = (struct darshan_base_record *)mod_buf;

            if(!hash || hash == base_rec->id)
382
            {
383
                ret = mod_logutils[i]->log_put_record(outfile, mod_buf, infile->mod_ver[i]);
384 385 386 387 388 389 390 391 392
                if(ret < 0)
                {
                    darshan_log_close(infile);
                    darshan_log_close(outfile);
                    return(-1);
                }

                memset(mod_buf, 0, DEF_MOD_BUF_SIZE);
            }
393
        } while((ret = mod_logutils[i]->log_get_record(infile, mod_buf)) == 1);
394 395 396 397 398
    }

    darshan_log_close(infile);
    darshan_log_close(outfile);

399 400 401 402 403 404 405 406 407 408
    for(i=0; i<mount_count; i++)
    {
        free(mnt_pts[i]);
        free(fs_types[i]);
    }
    if(mount_count > 0)
    {
        free(mnt_pts);
        free(fs_types);
    }
409

410
    HASH_ITER(hlink, name_hash, ref, tmp)
411
    {
412 413
        HASH_DELETE(hlink, name_hash, ref);
        free(ref->name_record);
414 415 416
        free(ref);
    }

417
    return(ret);
418 419
}

420 421 422 423 424 425 426 427
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */