darshan-convert.c 11.9 KB
Newer Older
1
/*
2
3
4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5
6
7
8
9
10
11
12
13
14
15
16
17
 */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <zlib.h>
#include <time.h>
#include <stdlib.h>
#include <getopt.h>
#include <assert.h>
18
#include <errno.h>
19
20
21

#include "darshan-logutils.h"

22
extern uint32_t darshan_hashlittle(const void *key, size_t length, uint32_t initval);
23
24
25
26
27
28

int usage (char *exename)
{
    fprintf(stderr, "Usage: %s [options] <infile> <outfile>\n", exename);
    fprintf(stderr, "       Converts darshan log from infile to outfile.\n");
    fprintf(stderr, "       rewrites the log file into the newest format.\n");
29
    fprintf(stderr, "       --bzip2 Use bzip2 compression instead of zlib.\n");
30
    fprintf(stderr, "       --obfuscate Obfuscate items in the log.\n");
31
32
    fprintf(stderr, "       --key <key> Key to use when obfuscating.\n");
    fprintf(stderr, "       --annotate <string> Additional metadata to add.\n");
33
    fprintf(stderr, "       --file <hash> Limit output to specified (hashed) file only.\n");
34
    fprintf(stderr, "       --reset-md Reset old metadata during conversion.\n");
35
36
37
38

    exit(1);
}

39
void parse_args (int argc, char **argv, char **infile, char **outfile,
40
41
                 int *bzip2, int *obfuscate, int *reset_md, int *key,
                 char **annotate, uint64_t* hash)
42
43
{
    int index;
44
45
    int ret;

46
47
    static struct option long_opts[] =
    {
48
        {"bzip2", 0, NULL, 'b'},
49
        {"annotate", 1, NULL, 'a'},
50
        {"obfuscate", 0, NULL, 'o'},
51
        {"reset-md", 0, NULL, 'r'},
52
        {"key", 1, NULL, 'k'},
53
        {"file", 1, NULL, 'f'},
54
55
        {"help",  0, NULL, 0},
        { 0, 0, 0, 0 }
56
57
    };

58
    *bzip2 = 0;
59
    *obfuscate = 0;
60
    *reset_md = 0;
61
    *key = 0;
62
63
    *hash = 0;

64
65
66
67
68
69
70
71
    while(1)
    {
        int c = getopt_long(argc, argv, "", long_opts, &index);

        if (c == -1) break;

        switch(c)
        {
72
73
74
            case 'b':
                *bzip2 = 1;
                break;
75
76
77
            case 'a':
                *annotate = optarg;
                break;
78
79
80
            case 'o':
                *obfuscate = 1;
                break;
81
82
83
            case 'r':
                *reset_md = 1;
                break;
84
85
86
            case 'k':
                *key = atoi(optarg);
                break;
87
88
89
90
91
            case 'f':
                ret = sscanf(optarg, "%" PRIu64, hash);
                if(ret != 1)
                    usage(argv[0]);
                break;
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
            case 0:
            case '?':
            default:
                usage(argv[0]);
                break;
        }
    }

    if (optind + 2 == argc)
    {
        *infile = argv[optind];
        *outfile = argv[optind+1];
    }
    else
    {
        usage(argv[0]);
    }

    return;
}

113
114
115
116
117
118
static void reset_md_job(struct darshan_job *job)
{
    job->metadata[0] = '\0';
    return;
}

119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
void obfuscate_job(int key, struct darshan_job *job)
{
    job->uid   = (int64_t) darshan_hashlittle(&job->uid, sizeof(job->uid), key);
    if (job->jobid != 0)
    {
        job->jobid = (int64_t) darshan_hashlittle(&job->jobid, sizeof(job->jobid), key);
    }

    return;
}

void obfuscate_exe(int key, char *exe)
{
    uint32_t hashed;

    hashed = darshan_hashlittle(exe, strlen(exe), key);
    memset(exe, 0, strlen(exe));
    sprintf(exe, "%u", hashed);

    return;
}

141
void obfuscate_filenames(int key, struct darshan_name_record_ref *name_hash, struct darshan_mnt_info *mnt_data_array, int mount_count )
142
{
143
    struct darshan_name_record_ref *ref, *tmp;
144
    uint32_t hashed;
145
    char tmp_string[PATH_MAX+128] = {0};
146
    darshan_record_id tmp_id;
147

148
    HASH_ITER(hlink, name_hash, ref, tmp)
149
    {
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
        /* find file system */
        int j;
        char *mnt_pt = NULL;

        /* get mount point and fs type associated with this record */
        for(j=0; j<mount_count; j++)
        {
            if(strncmp(mnt_data_array[j].mnt_path, ref->name_record->name,
                strlen(mnt_data_array[j].mnt_path)) == 0)
            {
                mnt_pt = mnt_data_array[j].mnt_path;
                break;
            }
        }

165
166
167
        tmp_id = ref->name_record->id;
        hashed = darshan_hashlittle(ref->name_record->name,
            strlen(ref->name_record->name), key);
168
169
170
171
172
173
174
175
        if ( mnt_pt != NULL ) 
        {
            sprintf(tmp_string, "%s/%u", mnt_pt, hashed);
        }
        else 
        {
            sprintf(tmp_string, "%u", hashed);
        }
176
177
178
179
180
181
        free(ref->name_record);
        ref->name_record = malloc(sizeof(struct darshan_name_record) +
            strlen(tmp_string));
        assert(ref->name_record);
        ref->name_record->id = tmp_id;
        strcpy(ref->name_record->name, tmp_string);
182
    }
183
184
185
186

    return;
}

187
188
189
190
191
void add_annotation (char *annotation,
                     struct darshan_job *job)
{
    char *token;
    char *save;
192
    int len;
193
    
194
195
196
197
198
199
200
201
202
203
204
    /* check for newline in existing metadata, insert if needed */
    len = strlen(job->metadata);
    if(len > 0 && len < sizeof(job->metadata))
    {
        if(job->metadata[len-1] != '\n')
        {
            job->metadata[len] = '\n';
            job->metadata[len+1] = '\0';
        }
    }

205
206
    /* determine remaining space in metadata string */
    int remaining = sizeof(job->metadata) - strlen(job->metadata);
207
208
209
210
211

    for(token=strtok_r(annotation, "\t", &save);
        token != NULL;
        token=strtok_r(NULL, "\t", &save))
    {
212
213
214
215
216
217
218
219
220
221
        if ((strlen(token)+1) < remaining)
        {
            strcat(job->metadata, token);
            strcat(job->metadata, "\n");
            remaining -= (strlen(token)+1);
        }
        else
        {
            fprintf(stderr,
                    "not enough space left in metadata for: current=%s token=%s (remain=%d:need=%d)\n",
222
223
224
225
226
227
228
                    job->metadata, token, remaining-1, (int)strlen(token)+1);
        }
    }

    return;
}

229
230
static void remove_hash_recs(struct darshan_name_record_ref **name_hash,
    darshan_record_id hash)
231
{
232
    struct darshan_name_record_ref *ref, *tmp;
233

234
    HASH_ITER(hlink, *name_hash, ref, tmp)
235
    {
236
        if(ref->name_record->id != hash)
237
        {
238
239
            HASH_DELETE(hlink, *name_hash, ref);
            free(ref->name_record);
240
            free(ref);
241
        }
242
243
244
245
246
    }

    return;
}

247
248
249
250
251
252
int main(int argc, char **argv)
{
    int ret;
    char *infile_name;
    char *outfile_name;
    struct darshan_job job;
253
    char tmp_string[4096] = {0};
254
255
256
257
    darshan_fd infile;
    darshan_fd outfile;
    int i;
    int mount_count;
258
    struct darshan_mnt_info *mnt_data_array;
259
260
    struct darshan_name_record_ref *name_hash = NULL;
    struct darshan_name_record_ref *ref, *tmp;
261
    char *mod_buf, *tmp_mod_buf;
262
    enum darshan_comp_type comp_type;
263
264
265
    int bzip2;
    int obfuscate;
    int key;
266
    char *annotation = NULL;
267
268
    darshan_record_id hash;
    int reset_md;
269

270
271
    parse_args(argc, argv, &infile_name, &outfile_name, &bzip2, &obfuscate,
               &reset_md, &key, &annotation, &hash);
272

273
    infile = darshan_log_open(infile_name);
274
275
276
    if(!infile)
        return(-1);
 
277
    comp_type = bzip2 ? DARSHAN_BZIP2_COMP : DARSHAN_ZLIB_COMP;
278
    outfile = darshan_log_create(outfile_name, comp_type, infile->partial_flag);
279
280
    if(!outfile)
    {
281
282
283
284
        darshan_log_close(infile);
        return(-1);
    }

285
    /* read job info */
286
    ret = darshan_log_get_job(infile, &job);
287
288
289
    if(ret < 0)
    {
        darshan_log_close(infile);
290
        darshan_log_close(outfile);
291
        unlink(outfile_name);
292
293
294
        return(-1);
    }

295
    if (reset_md) reset_md_job(&job);
296
    if (obfuscate) obfuscate_job(key, &job);
297
    if (annotation) add_annotation(annotation, &job);
298

299
    ret = darshan_log_put_job(outfile, &job);
300
301
    if (ret < 0)
    {
302
        darshan_log_close(infile);
303
304
305
306
        darshan_log_close(outfile);
        return(-1);
    }

307
    ret = darshan_log_get_exe(infile, tmp_string);
308
309
310
    if(ret < 0)
    {
        darshan_log_close(infile);
311
        darshan_log_close(outfile);
312
        unlink(outfile_name);
313
314
        return(-1);
    }
315

316
317
    if (obfuscate) obfuscate_exe(key, tmp_string);

318
    ret = darshan_log_put_exe(outfile, tmp_string);
319
320
    if(ret < 0)
    {
321
        darshan_log_close(infile);
322
323
324
        darshan_log_close(outfile);
        return(-1);
    }
325

326
    ret = darshan_log_get_mounts(infile, &mnt_data_array, &mount_count);
327
328
329
    if(ret < 0)
    {
        darshan_log_close(infile);
330
        darshan_log_close(outfile);
331
        unlink(outfile_name);
332
333
334
        return(-1);
    }

335
    ret = darshan_log_put_mounts(outfile, mnt_data_array, mount_count);
336
337
    if(ret < 0)
    {
338
        darshan_log_close(infile);
339
340
341
342
        darshan_log_close(outfile);
        return(-1);
    }

343
    ret = darshan_log_get_namehash(infile, &name_hash);
344
345
    if(ret < 0)
    {
346
347
        darshan_log_close(infile);
        darshan_log_close(outfile);
348
        unlink(outfile_name);
349
        return(-1);
350
    }
351

352
353
354
    /* NOTE: obfuscating filepaths breaks the ability to map files
     * to the corresponding FS & mount info maintained by darshan
     */
355
    if(obfuscate) obfuscate_filenames(key, name_hash, mnt_data_array, mount_count );
356
    if(hash) remove_hash_recs(&name_hash, hash);
357

358
    ret = darshan_log_put_namehash(outfile, name_hash);
359
    if(ret < 0)
360
    {
361
362
        darshan_log_close(infile);
        darshan_log_close(outfile);
363
        unlink(outfile_name);
364
        return(-1);
365
366
    }

367
368
369
370
371
    mod_buf = malloc(DEF_MOD_BUF_SIZE);
    if(!mod_buf)
    {
        darshan_log_close(infile);
        darshan_log_close(outfile);
372
        unlink(outfile_name);
373
374
375
        return(-1);
    }

376
    /* loop over each module and convert it's data to the new format */
377
    for(i=0; i<DARSHAN_MAX_MODS; i++)
378
    {
379
        struct darshan_base_record *base_rec;
380

381
        /* check each module for any data */
382
        if(infile->mod_map[i].len == 0)
383
            continue;
384
        else if(!mod_logutils[i])
385
386
        {
            fprintf(stderr, "Warning: no log utility handlers defined "
387
                "for module %s, SKIPPING.\n", darshan_module_names[i]);
388
389
390
            continue;
        }

391
392
393
394
395
396
397
398
399
400
401
402
        /* for dxt, don't use static record buffer and instead have
         * darshan-logutils malloc us memory for the trace data
         */
        if(i == DXT_POSIX_MOD || i == DXT_MPIIO_MOD)
        {
            tmp_mod_buf = NULL;
        }
        else
        {
            tmp_mod_buf = mod_buf;
            memset(tmp_mod_buf, 0, DEF_MOD_BUF_SIZE);
        }
403

404
        /* loop over each of the module's records and convert */
405
        while((ret = mod_logutils[i]->log_get_record(infile, (void **)&tmp_mod_buf)) == 1)
406
        {
407
            base_rec = (struct darshan_base_record *)tmp_mod_buf;
408
409

            if(!hash || hash == base_rec->id)
410
            {
411
                ret = mod_logutils[i]->log_put_record(outfile, tmp_mod_buf);
412
413
                if(ret < 0)
                {
414
415
                    if(i == DXT_POSIX_MOD || i == DXT_MPIIO_MOD)
                        free(tmp_mod_buf);
416
417
                    darshan_log_close(infile);
                    darshan_log_close(outfile);
418
                    unlink(outfile_name);
419
420
                    return(-1);
                }
421
            }
422

423
424
425
426
427
428
429
430
            if(i == DXT_POSIX_MOD || i == DXT_MPIIO_MOD)
            {
                free(tmp_mod_buf);
                tmp_mod_buf = NULL;
            }
            else
            {
                memset(tmp_mod_buf, 0, DEF_MOD_BUF_SIZE);
431
            }
432
433
434
435
436
437
438
439
440
441
        }
        if(ret < 0)
        {
            fprintf(stderr, "Error: failed to parse %s module record.\n",
                darshan_module_names[i]);
            darshan_log_close(infile);
            darshan_log_close(outfile);
            unlink(outfile_name);
            return(-1);
        }
442
443
444
445
446
    }

    darshan_log_close(infile);
    darshan_log_close(outfile);

447
    if(mount_count > 0)
448
        free(mnt_data_array);
449

450
    HASH_ITER(hlink, name_hash, ref, tmp)
451
    {
452
453
        HASH_DELETE(hlink, name_hash, ref);
        free(ref->name_record);
454
455
456
        free(ref);
    }

457
458
    free(mod_buf);

459
    return(ret);
460
461
}

462
463
464
465
466
467
468
469
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */