darshan-convert.c 10.5 KB
Newer Older
1
/*
2
3
4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5
6
7
8
9
10
11
12
13
14
15
16
17
 */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <zlib.h>
#include <time.h>
#include <stdlib.h>
#include <getopt.h>
#include <assert.h>
18
#include <errno.h>
19
20
21

#include "darshan-logutils.h"

22
extern uint32_t darshan_hashlittle(const void *key, size_t length, uint32_t initval);
23
24
25
26
27
28

int usage (char *exename)
{
    fprintf(stderr, "Usage: %s [options] <infile> <outfile>\n", exename);
    fprintf(stderr, "       Converts darshan log from infile to outfile.\n");
    fprintf(stderr, "       rewrites the log file into the newest format.\n");
29
    fprintf(stderr, "       --bzip2 Use bzip2 compression instead of zlib.\n");
30
    fprintf(stderr, "       --obfuscate Obfuscate items in the log.\n");
31
32
    fprintf(stderr, "       --key <key> Key to use when obfuscating.\n");
    fprintf(stderr, "       --annotate <string> Additional metadata to add.\n");
33
    fprintf(stderr, "       --file <hash> Limit output to specified (hashed) file only.\n");
34
    fprintf(stderr, "       --reset-md Reset old metadata during conversion.\n");
35
36
37
38

    exit(1);
}

39
void parse_args (int argc, char **argv, char **infile, char **outfile,
40
41
                 int *bzip2, int *obfuscate, int *reset_md, int *key,
                 char **annotate, uint64_t* hash)
42
43
{
    int index;
44
45
    int ret;

46
47
    static struct option long_opts[] =
    {
48
        {"bzip2", 0, NULL, 'b'},
49
        {"annotate", 1, NULL, 'a'},
50
        {"obfuscate", 0, NULL, 'o'},
51
        {"reset-md", 0, NULL, 'r'},
52
        {"key", 1, NULL, 'k'},
53
        {"file", 1, NULL, 'f'},
54
55
        {"help",  0, NULL, 0},
        { 0, 0, 0, 0 }
56
57
    };

58
    *bzip2 = 0;
59
    *reset_md = 0;
60
61
    *hash = 0;

62
63
64
65
66
67
68
69
    while(1)
    {
        int c = getopt_long(argc, argv, "", long_opts, &index);

        if (c == -1) break;

        switch(c)
        {
70
71
72
            case 'b':
                *bzip2 = 1;
                break;
73
74
75
            case 'a':
                *annotate = optarg;
                break;
76
77
78
            case 'o':
                *obfuscate = 1;
                break;
79
80
81
            case 'r':
                *reset_md = 1;
                break;
82
83
84
            case 'k':
                *key = atoi(optarg);
                break;
85
86
87
88
89
            case 'f':
                ret = sscanf(optarg, "%" PRIu64, hash);
                if(ret != 1)
                    usage(argv[0]);
                break;
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
            case 0:
            case '?':
            default:
                usage(argv[0]);
                break;
        }
    }

    if (optind + 2 == argc)
    {
        *infile = argv[optind];
        *outfile = argv[optind+1];
    }
    else
    {
        usage(argv[0]);
    }

    return;
}

111
#if 0
112
113
114
115
116
117
static void reset_md_job(struct darshan_job *job)
{
    job->metadata[0] = '\0';
    return;
}

118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
void obfuscate_job(int key, struct darshan_job *job)
{
    job->uid   = (int64_t) darshan_hashlittle(&job->uid, sizeof(job->uid), key);
    if (job->jobid != 0)
    {
        job->jobid = (int64_t) darshan_hashlittle(&job->jobid, sizeof(job->jobid), key);
    }

    return;
}

void obfuscate_exe(int key, char *exe)
{
    uint32_t hashed;

    hashed = darshan_hashlittle(exe, strlen(exe), key);
    memset(exe, 0, strlen(exe));
    sprintf(exe, "%u", hashed);

    return;
}

void obfuscate_file(int key, struct darshan_file *file)
{
    uint32_t hashed;

    hashed = darshan_hashlittle(file->name_suffix, sizeof(file->name_suffix), key);
    memset(file->name_suffix, 0, sizeof(file->name_suffix));
    sprintf(file->name_suffix, "%u", hashed);

    return;
}

151
152
153
154
155
void add_annotation (char *annotation,
                     struct darshan_job *job)
{
    char *token;
    char *save;
156
    int len;
157
    
158
159
160
161
162
163
164
165
166
167
168
    /* check for newline in existing metadata, insert if needed */
    len = strlen(job->metadata);
    if(len > 0 && len < sizeof(job->metadata))
    {
        if(job->metadata[len-1] != '\n')
        {
            job->metadata[len] = '\n';
            job->metadata[len+1] = '\0';
        }
    }

169
170
    /* determine remaining space in metadata string */
    int remaining = sizeof(job->metadata) - strlen(job->metadata);
171
172
173
174
175

    for(token=strtok_r(annotation, "\t", &save);
        token != NULL;
        token=strtok_r(NULL, "\t", &save))
    {
176
177
178
179
180
181
182
183
184
185
186
187
        if ((strlen(token)+1) < remaining)
        {
            strcat(job->metadata, token);
            strcat(job->metadata, "\n");
            remaining -= (strlen(token)+1);
        }
        else
        {
            fprintf(stderr,
                    "not enough space left in metadata for: current=%s token=%s (remain=%d:need=%d)\n",
                    job->metadata, token, remaining-1, strlen(token)+1);
        }
188
189
190
191
    }

    return;
}
192
#endif
193

194
195
196
197
198
int main(int argc, char **argv)
{
    int ret;
    char *infile_name;
    char *outfile_name;
199
    struct darshan_header header;
200
    struct darshan_job job;
201
    char tmp_string[4096];
202
203
204
205
206
    darshan_fd infile;
    darshan_fd outfile;
    int i;
    int mount_count;
    char** mnt_pts;
207
208
209
210
    struct darshan_record_ref *rec_hash = NULL;
    struct darshan_record_ref *ref, *tmp;
    char *mod_buf;
    int mod_buf_sz;
211
    char** fs_types;
212
213
214
    int bzip2;
    enum darshan_comp_type comp_type;

215
216
    int obfuscate = 0;
    int key = 0;
217
    char *annotation = NULL;
218
    uint64_t hash;
219
    int reset_md = 0;
220

221
222
    parse_args(argc, argv, &infile_name, &outfile_name, &bzip2, &obfuscate,
               &reset_md, &key, &annotation, &hash);
223

224
    infile = darshan_log_open(infile_name);
225
226
    if(!infile)
    {
227
        fprintf(stderr, "darshan_log_open() failed to open %s\n.", infile_name);
228
229
230
        return(-1);
    }
 
231
232
    comp_type = bzip2 ? comp_type = DARSHAN_BZIP2_COMP : DARSHAN_ZLIB_COMP;
    outfile = darshan_log_create(outfile_name, comp_type);
233
234
    if(!outfile)
    {
235
        fprintf(stderr, "darshan_log_create() failed to create %s\n.", outfile_name);
236
237
238
239
240
241
242
243
244
245
246
        darshan_log_close(infile);
        return(-1);
    }

    /* read header from input file */
    ret = darshan_log_getheader(infile, &header);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read header from input log file %s.\n", infile_name);
        darshan_log_close(infile);
        darshan_log_close(outfile);
247
248
249
        return(-1);
    }

250
251
252
    /* NOTE: we do not write the header to the output file until the end, as
     * the mapping data stored in this structure may change in the conversion
     * process (particularly, if we are converting between libz/bz2 compression)
253
     */
254

255
256
257
258
259
260
    /* read job info */
    ret = darshan_log_getjob(infile, &job);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read job information from log file.\n");
        darshan_log_close(infile);
261
        darshan_log_close(outfile);
262
263
264
        return(-1);
    }

265
#if 0
266
    if (reset_md) reset_md_job(&job);
267
    if (obfuscate) obfuscate_job(key, &job);
268
    if (annotation) add_annotation(annotation, &job);
269
#endif
270

271
272
273
274
    ret = darshan_log_putjob(outfile, &job);
    if (ret < 0)
    {
        fprintf(stderr, "Error: unable to write job information to log file.\n");
275
        darshan_log_close(infile);
276
277
278
279
        darshan_log_close(outfile);
        return(-1);
    }

280
    ret = darshan_log_getexe(infile, tmp_string);
281
282
283
284
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read trailing job information.\n");
        darshan_log_close(infile);
285
        darshan_log_close(outfile);
286
287
        return(-1);
    }
288

289
#if 0
290
    if (obfuscate) obfuscate_exe(key, tmp_string);
291
#endif
292

293
294
295
296
    ret = darshan_log_putexe(outfile, tmp_string);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write trailing job information.\n");
297
        darshan_log_close(infile);
298
299
300
        darshan_log_close(outfile);
        return(-1);
    }
301
302

    ret = darshan_log_getmounts(infile, &mnt_pts, &fs_types, &mount_count);
303
304
305
306
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read trailing job information.\n");
        darshan_log_close(infile);
307
        darshan_log_close(outfile);
308
309
310
        return(-1);
    }

311
    ret = darshan_log_putmounts(outfile, mnt_pts, fs_types, mount_count);
312
313
314
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write mount information.\n");
315
        darshan_log_close(infile);
316
317
318
319
        darshan_log_close(outfile);
        return(-1);
    }

320
    ret = darshan_log_gethash(infile, &rec_hash);
321
322
    if(ret < 0)
    {
323
324
325
326
        fprintf(stderr, "Error: unable to read darshan record hash.\n");
        darshan_log_close(infile);
        darshan_log_close(outfile);
        return(-1);
327
    }
328
329
330

    ret = darshan_log_puthash(outfile, rec_hash);
    if(ret < 0)
331
    {
332
333
334
335
        fprintf(stderr, "Error: unable to write darshan record hash.\n");
        darshan_log_close(infile);
        darshan_log_close(outfile);
        return(-1);
336
337
    }

338
339
340
341
342
    mod_buf = malloc(DARSHAN_DEF_COMP_BUF_SZ);
    if(!mod_buf)
        return(-1);

    for(i=0; i<DARSHAN_MAX_MODS; i++)
343
    {
344
345
346
347
348
349
        memset(mod_buf, 0, DARSHAN_DEF_COMP_BUF_SZ);
        mod_buf_sz = DARSHAN_DEF_COMP_BUF_SZ;

        /* check each module for any data */
        ret = darshan_log_getmod(infile, i, mod_buf, &mod_buf_sz);
        if(ret < 0)
350
        {
351
352
353
354
            fprintf(stderr, "Error: failed to get module %s data.\n",
                darshan_module_names[i]);
            darshan_log_close(infile);
            darshan_log_close(outfile);
355
356
            return(-1);
        }
357
        else if(ret == 0)
358
        {
359
360
            /* skip modules not present in log file */
            continue;
361
        }
362

363
364
365
366
367
368
369
370
371
372
        /* we have module data to convert */
        ret = darshan_log_putmod(outfile, i, mod_buf, mod_buf_sz);
        if(ret < 0)
        {
            fprintf(stderr, "Error: failed to put module %s data.\n",
                darshan_module_names[i]);
            darshan_log_close(infile);
            darshan_log_close(outfile);
            return(-1);
        }
373
    }
374
    free(mod_buf);
375
376
377
378
379
380
381
382
383
384
385

    for(i=0; i<mount_count; i++)
    {
        free(mnt_pts[i]);
        free(fs_types[i]);
    }
    if(mount_count > 0)
    {
        free(mnt_pts);
        free(fs_types);
    }
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403

    HASH_ITER(hlink, rec_hash, ref, tmp)
    {
        HASH_DELETE(hlink, rec_hash, ref);
        free(ref->rec.name);
        free(ref);
    }

    /* write header to output file */
    ret = darshan_log_putheader(outfile);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write header to output log file %s.\n", outfile_name);
        darshan_log_close(infile);
        darshan_log_close(outfile);
        return(-1);
    }

404
405
406
    darshan_log_close(infile);
    darshan_log_close(outfile);

407
    return(ret);
408
409
}

410
411
412
413
414
415
416
417
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */