darshan-convert.c 10.1 KB
Newer Older
1
/*
2
3
4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5
6
7
8
9
10
11
12
13
14
15
16
17
 */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <zlib.h>
#include <time.h>
#include <stdlib.h>
#include <getopt.h>
#include <assert.h>
18
#include <errno.h>
19
20
21

#include "darshan-logutils.h"

22
extern uint32_t darshan_hashlittle(const void *key, size_t length, uint32_t initval);
23
24
25
26
27
28

int usage (char *exename)
{
    fprintf(stderr, "Usage: %s [options] <infile> <outfile>\n", exename);
    fprintf(stderr, "       Converts darshan log from infile to outfile.\n");
    fprintf(stderr, "       rewrites the log file into the newest format.\n");
29
    fprintf(stderr, "       --obfuscate Obfuscate items in the log.\n");
30
31
    fprintf(stderr, "       --key <key> Key to use when obfuscating.\n");
    fprintf(stderr, "       --annotate <string> Additional metadata to add.\n");
32
    fprintf(stderr, "       --file <hash> Limit output to specified (hashed) file only.\n");
33
    fprintf(stderr, "       --reset-md Reset old metadata during conversion.\n");
34
35
36
37

    exit(1);
}

38
void parse_args (int argc, char **argv, char **infile, char **outfile,
39
                 int *obfuscate, int *reset_md, int *key, char **annotate, uint64_t* hash)
40
41
{
    int index;
42
43
    int ret;

44
45
    static struct option long_opts[] =
    {
46
        {"annotate", 1, NULL, 'a'},
47
        {"obfuscate", 0, NULL, 'o'},
48
        {"reset-md", 0, NULL, 'r'},
49
        {"key", 1, NULL, 'k'},
50
        {"file", 1, NULL, 'f'},
51
52
        {"help",  0, NULL, 0},
        { 0, 0, 0, 0 }
53
54
    };

55
    *reset_md = 0;
56
57
    *hash = 0;

58
59
60
61
62
63
64
65
    while(1)
    {
        int c = getopt_long(argc, argv, "", long_opts, &index);

        if (c == -1) break;

        switch(c)
        {
66
67
68
            case 'a':
                *annotate = optarg;
                break;
69
70
71
            case 'o':
                *obfuscate = 1;
                break;
72
73
74
            case 'r':
                *reset_md = 1;
                break;
75
76
77
            case 'k':
                *key = atoi(optarg);
                break;
78
79
80
81
82
            case 'f':
                ret = sscanf(optarg, "%" PRIu64, hash);
                if(ret != 1)
                    usage(argv[0]);
                break;
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
            case 0:
            case '?':
            default:
                usage(argv[0]);
                break;
        }
    }

    if (optind + 2 == argc)
    {
        *infile = argv[optind];
        *outfile = argv[optind+1];
    }
    else
    {
        usage(argv[0]);
    }

    return;
}

104
#if 0
105
106
107
108
109
110
static void reset_md_job(struct darshan_job *job)
{
    job->metadata[0] = '\0';
    return;
}

111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
void obfuscate_job(int key, struct darshan_job *job)
{
    job->uid   = (int64_t) darshan_hashlittle(&job->uid, sizeof(job->uid), key);
    if (job->jobid != 0)
    {
        job->jobid = (int64_t) darshan_hashlittle(&job->jobid, sizeof(job->jobid), key);
    }

    return;
}

void obfuscate_exe(int key, char *exe)
{
    uint32_t hashed;

    hashed = darshan_hashlittle(exe, strlen(exe), key);
    memset(exe, 0, strlen(exe));
    sprintf(exe, "%u", hashed);

    return;
}

void obfuscate_file(int key, struct darshan_file *file)
{
    uint32_t hashed;

    hashed = darshan_hashlittle(file->name_suffix, sizeof(file->name_suffix), key);
    memset(file->name_suffix, 0, sizeof(file->name_suffix));
    sprintf(file->name_suffix, "%u", hashed);

    return;
}

144
145
146
147
148
void add_annotation (char *annotation,
                     struct darshan_job *job)
{
    char *token;
    char *save;
149
    int len;
150
    
151
152
153
154
155
156
157
158
159
160
161
    /* check for newline in existing metadata, insert if needed */
    len = strlen(job->metadata);
    if(len > 0 && len < sizeof(job->metadata))
    {
        if(job->metadata[len-1] != '\n')
        {
            job->metadata[len] = '\n';
            job->metadata[len+1] = '\0';
        }
    }

162
163
    /* determine remaining space in metadata string */
    int remaining = sizeof(job->metadata) - strlen(job->metadata);
164
165
166
167
168

    for(token=strtok_r(annotation, "\t", &save);
        token != NULL;
        token=strtok_r(NULL, "\t", &save))
    {
169
170
171
172
173
174
175
176
177
178
179
180
        if ((strlen(token)+1) < remaining)
        {
            strcat(job->metadata, token);
            strcat(job->metadata, "\n");
            remaining -= (strlen(token)+1);
        }
        else
        {
            fprintf(stderr,
                    "not enough space left in metadata for: current=%s token=%s (remain=%d:need=%d)\n",
                    job->metadata, token, remaining-1, strlen(token)+1);
        }
181
182
183
184
    }

    return;
}
185
#endif
186

187
188
189
190
191
int main(int argc, char **argv)
{
    int ret;
    char *infile_name;
    char *outfile_name;
192
    struct darshan_header header;
193
    struct darshan_job job;
194
    char tmp_string[4096];
195
196
197
198
199
    darshan_fd infile;
    darshan_fd outfile;
    int i;
    int mount_count;
    char** mnt_pts;
200
201
202
203
    struct darshan_record_ref *rec_hash = NULL;
    struct darshan_record_ref *ref, *tmp;
    char *mod_buf;
    int mod_buf_sz;
204
    char** fs_types;
205
206
    int obfuscate = 0;
    int key = 0;
207
    char *annotation = NULL;
208
    uint64_t hash;
209
    int reset_md = 0;
210

211
    parse_args(argc, argv, &infile_name, &outfile_name, &obfuscate, &reset_md, &key, &annotation, &hash);
212
213
214
215

    infile = darshan_log_open(infile_name, "r");
    if(!infile)
    {
216
        fprintf(stderr, "darshan_log_open() failed to open %s\n.", infile_name);
217
218
219
220
221
222
        return(-1);
    }
 
    outfile = darshan_log_open(outfile_name, "w");
    if(!outfile)
    {
223
        fprintf(stderr, "darshan_log_open() failed to open %s\n.", outfile_name);
224
225
226
227
228
229
230
231
232
233
234
        darshan_log_close(infile);
        return(-1);
    }

    /* read header from input file */
    ret = darshan_log_getheader(infile, &header);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read header from input log file %s.\n", infile_name);
        darshan_log_close(infile);
        darshan_log_close(outfile);
235
236
237
        return(-1);
    }

238
239
240
    /* NOTE: we do not write the header to the output file until the end, as
     * the mapping data stored in this structure may change in the conversion
     * process (particularly, if we are converting between libz/bz2 compression)
241
     */
242

243
244
245
246
247
248
    /* read job info */
    ret = darshan_log_getjob(infile, &job);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read job information from log file.\n");
        darshan_log_close(infile);
249
        darshan_log_close(outfile);
250
251
252
        return(-1);
    }

253
#if 0
254
    if (reset_md) reset_md_job(&job);
255
    if (obfuscate) obfuscate_job(key, &job);
256
    if (annotation) add_annotation(annotation, &job);
257
#endif
258

259
260
261
262
    ret = darshan_log_putjob(outfile, &job);
    if (ret < 0)
    {
        fprintf(stderr, "Error: unable to write job information to log file.\n");
263
        darshan_log_close(infile);
264
265
266
267
        darshan_log_close(outfile);
        return(-1);
    }

268
    ret = darshan_log_getexe(infile, tmp_string);
269
270
271
272
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read trailing job information.\n");
        darshan_log_close(infile);
273
        darshan_log_close(outfile);
274
275
        return(-1);
    }
276

277
#if 0
278
    if (obfuscate) obfuscate_exe(key, tmp_string);
279
#endif
280

281
282
283
284
    ret = darshan_log_putexe(outfile, tmp_string);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write trailing job information.\n");
285
        darshan_log_close(infile);
286
287
288
        darshan_log_close(outfile);
        return(-1);
    }
289
290

    ret = darshan_log_getmounts(infile, &mnt_pts, &fs_types, &mount_count);
291
292
293
294
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to read trailing job information.\n");
        darshan_log_close(infile);
295
        darshan_log_close(outfile);
296
297
298
        return(-1);
    }

299
    ret = darshan_log_putmounts(outfile, mnt_pts, fs_types, mount_count);
300
301
302
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write mount information.\n");
303
        darshan_log_close(infile);
304
305
306
307
        darshan_log_close(outfile);
        return(-1);
    }

308
    ret = darshan_log_gethash(infile, &rec_hash);
309
310
    if(ret < 0)
    {
311
312
313
314
        fprintf(stderr, "Error: unable to read darshan record hash.\n");
        darshan_log_close(infile);
        darshan_log_close(outfile);
        return(-1);
315
    }
316
317
318

    ret = darshan_log_puthash(outfile, rec_hash);
    if(ret < 0)
319
    {
320
321
322
323
        fprintf(stderr, "Error: unable to write darshan record hash.\n");
        darshan_log_close(infile);
        darshan_log_close(outfile);
        return(-1);
324
325
    }

326
327
328
329
330
    mod_buf = malloc(DARSHAN_DEF_COMP_BUF_SZ);
    if(!mod_buf)
        return(-1);

    for(i=0; i<DARSHAN_MAX_MODS; i++)
331
    {
332
333
334
335
336
337
        memset(mod_buf, 0, DARSHAN_DEF_COMP_BUF_SZ);
        mod_buf_sz = DARSHAN_DEF_COMP_BUF_SZ;

        /* check each module for any data */
        ret = darshan_log_getmod(infile, i, mod_buf, &mod_buf_sz);
        if(ret < 0)
338
        {
339
340
341
342
            fprintf(stderr, "Error: failed to get module %s data.\n",
                darshan_module_names[i]);
            darshan_log_close(infile);
            darshan_log_close(outfile);
343
344
            return(-1);
        }
345
        else if(ret == 0)
346
        {
347
348
            /* skip modules not present in log file */
            continue;
349
        }
350

351
352
353
354
355
356
357
358
359
360
        /* we have module data to convert */
        ret = darshan_log_putmod(outfile, i, mod_buf, mod_buf_sz);
        if(ret < 0)
        {
            fprintf(stderr, "Error: failed to put module %s data.\n",
                darshan_module_names[i]);
            darshan_log_close(infile);
            darshan_log_close(outfile);
            return(-1);
        }
361
    }
362
    free(mod_buf);
363
364
365
366
367
368
369
370
371
372
373

    for(i=0; i<mount_count; i++)
    {
        free(mnt_pts[i]);
        free(fs_types[i]);
    }
    if(mount_count > 0)
    {
        free(mnt_pts);
        free(fs_types);
    }
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391

    HASH_ITER(hlink, rec_hash, ref, tmp)
    {
        HASH_DELETE(hlink, rec_hash, ref);
        free(ref->rec.name);
        free(ref);
    }

    /* write header to output file */
    ret = darshan_log_putheader(outfile);
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write header to output log file %s.\n", outfile_name);
        darshan_log_close(infile);
        darshan_log_close(outfile);
        return(-1);
    }

392
393
394
    darshan_log_close(infile);
    darshan_log_close(outfile);

395
    return(ret);
396
397
}

398
399
400
401
402
403
404
405
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */