darshan-merge.c 14.8 KB
Newer Older
1
#include <stdio.h>
2 3 4
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
5 6
#include <getopt.h>
#include <glob.h>
7

8 9
#include "uthash-1.9.2/src/uthash.h"

10 11
#include "darshan-logutils.h"

12 13
#define DEF_MOD_BUF_SIZE 1024 /* 1 KiB is enough for all current mod records ... */

14 15 16 17 18 19 20 21 22 23
/* TODO: set job end timestamp? */

struct darshan_shared_record_ref
{
    darshan_record_id id;
    int ref_cnt;
    char agg_rec[DEF_MOD_BUF_SIZE];
    UT_hash_handle hlink;
};

24 25
void usage(char *exename)
{
Shane Snyder's avatar
Shane Snyder committed
26 27 28 29 30 31
    fprintf(stderr, "Usage: %s --output-dir <output_dir> [options] <input-logs>\n", exename);
    fprintf(stderr, "This utility merges multiple Darshan log files into a single output log file.\n");
    fprintf(stderr, "Options:\n");
    fprintf(stderr, "\t--output-dir\t(REQUIRED) Output directory to store output log file in.\n");
    fprintf(stderr, "\t--output-name\tOutput log file name. If unspecified, name generated automatically.\n");
    fprintf(stderr, "\t--shared-redux\tReduce globally shared records into a single record.\n");
32 33 34 35

    exit(1);
}

Shane Snyder's avatar
Shane Snyder committed
36 37
void parse_args(int argc, char **argv, char ***infile_list, int *n_files,
    char **outlog_dir, char **outlog_name, int *shared_redux)
38 39 40 41 42
{
    int index;
    static struct option long_opts[] =
    {
        {"shared-redux", no_argument, NULL, 's'},
Shane Snyder's avatar
Shane Snyder committed
43 44
        {"output-dir", required_argument, NULL, 'd'},
        {"output-name", required_argument, NULL, 'n'},
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
        {0, 0, 0, 0}
    };

    *shared_redux = 0;

    while(1)
    {
        int c = getopt_long(argc, argv, "", long_opts, &index);

        if(c == -1) break;

        switch(c)
        {
            case 's':
                *shared_redux = 1;
                break;
Shane Snyder's avatar
Shane Snyder committed
61 62 63 64 65 66
            case 'd':
                *outlog_dir = optarg;
                break;
            case 'n':
                *outlog_name = optarg;
                break;
67 68 69 70 71 72 73
            case '?':
            default:
                usage(argv[0]);
                break;
        }
    }

Shane Snyder's avatar
Shane Snyder committed
74
    if(*outlog_dir == NULL)
75 76 77 78
    {
        usage(argv[0]);
    }

Shane Snyder's avatar
Shane Snyder committed
79 80
    *infile_list = &argv[optind];
    *n_files = argc - optind;
81

Shane Snyder's avatar
Shane Snyder committed
82
    return;
83 84
}

Shane Snyder's avatar
Shane Snyder committed
85 86 87
int build_mod_shared_rec_hash(char **infile_list, int n_infiles,
    darshan_module_id mod_id, int nprocs, char *mod_buf,
    struct darshan_shared_record_ref **shared_rec_hash)
88 89 90 91 92 93 94 95 96
{
    darshan_fd in_fd;
    struct darshan_base_record *base_rec;
    struct darshan_shared_record_ref *ref, *tmp;
    int init = 0;
    int ret;
    int i;

    /* loop over each input log file */
Shane Snyder's avatar
Shane Snyder committed
97
    for(i = 0; i < n_infiles; i++)
98
    {
Shane Snyder's avatar
Shane Snyder committed
99
        in_fd = darshan_log_open(infile_list[i]);
100 101 102 103
        if(in_fd == NULL)
        {
            fprintf(stderr,
                "Error: unable to open input Darshan log file %s.\n",
Shane Snyder's avatar
Shane Snyder committed
104
                infile_list[i]);
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
            return(-1);
        }

        while((ret = mod_logutils[mod_id]->log_get_record(in_fd, mod_buf)) == 1)
        {
            base_rec = (struct darshan_base_record *)mod_buf;

            /* initialize the hash with the first rank's records */
            if(!init)
            {
                struct darshan_base_record *agg_base;

                /* create a new ref and add to the hash */
                ref = malloc(sizeof(*ref));
                if(!ref)
                {
                    darshan_log_close(in_fd);
                    return(-1);
                }
124
                memset(ref, 0, sizeof(*ref));
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153

                /* initialize the aggregate record with this rank's record */
                agg_base = (struct darshan_base_record *)ref->agg_rec;
                agg_base->id = base_rec->id;
                agg_base->rank = -1;
                mod_logutils[mod_id]->log_agg_records(mod_buf, ref->agg_rec, 1);

                ref->id = base_rec->id;
                ref->ref_cnt = 1;
                HASH_ADD(hlink, *shared_rec_hash, id, sizeof(darshan_record_id), ref);
                init = 1;
            }
            else
            {
                /* search for this record in shared record hash */
                HASH_FIND(hlink, *shared_rec_hash, &(base_rec->id),
                    sizeof(darshan_record_id), ref);
                if(ref)
                {
                    /* if found, aggregate this rank's record into the shared record */
                    mod_logutils[mod_id]->log_agg_records(mod_buf, ref->agg_rec, 0);
                    ref->ref_cnt++;
                }
            }
        }
        if(ret < 0)
        {
            fprintf(stderr,
                "Error: unable to read %s module record from input log file %s.\n",
Shane Snyder's avatar
Shane Snyder committed
154
                darshan_module_names[mod_id], infile_list[i]);
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
            darshan_log_close(in_fd);
            return(-1);
        }

        darshan_log_close(in_fd);
    }

    /* prune any non-shared records from the hash one last time */
    HASH_ITER(hlink, *shared_rec_hash, ref, tmp)
    {
        if(ref->ref_cnt != nprocs)
        {
            HASH_DELETE(hlink, *shared_rec_hash, ref);
            free(ref);
        }
    }

    return(0);
}

175 176
int main(int argc, char *argv[])
{
Shane Snyder's avatar
Shane Snyder committed
177 178
    char **infile_list;
    int n_infiles;
179
    int shared_redux;
Shane Snyder's avatar
Shane Snyder committed
180 181 182 183 184 185 186 187 188
    char *outlog_dir = NULL;
    char *outlog_name = NULL;
    char outlog_path[512];
    darshan_fd in_fd, merge_fd;
    struct darshan_job in_job, merge_job;
    char merge_exe[DARSHAN_EXE_LEN+1];
    char **merge_mnt_pts;
    char **merge_fs_types;
    int merge_mnt_count = 0;
189
    struct darshan_record_ref *in_hash = NULL;
Shane Snyder's avatar
Shane Snyder committed
190
    struct darshan_record_ref *merge_hash = NULL;
191
    struct darshan_record_ref *ref, *tmp, *found;
192 193 194 195
    struct darshan_shared_record_ref *shared_rec_hash = NULL;
    struct darshan_shared_record_ref *sref, *stmp;
    struct darshan_base_record *base_rec;
    char mod_buf[DEF_MOD_BUF_SIZE];
196 197
    int i, j;
    int ret;
198 199

    /* grab command line arguments */
Shane Snyder's avatar
Shane Snyder committed
200 201
    parse_args(argc, argv, &infile_list, &n_infiles, &outlog_dir,
        &outlog_name, &shared_redux);
202

Shane Snyder's avatar
Shane Snyder committed
203
    memset(&merge_job, 0, sizeof(struct darshan_job));
204

Shane Snyder's avatar
Shane Snyder committed
205
    /* first pass at merging together logs:
206 207
     *      - compose output job-level metadata structure (including exe & mount data)
     *      - compose output record_id->file_name mapping 
208
     */
Shane Snyder's avatar
Shane Snyder committed
209
    for(i = 0; i < n_infiles; i++)
210 211 212
    {
        memset(&in_job, 0, sizeof(struct darshan_job));

Shane Snyder's avatar
Shane Snyder committed
213
        in_fd = darshan_log_open(infile_list[i]);
214
        if(in_fd == NULL)
215 216
        {
            fprintf(stderr,
217
                "Error: unable to open input Darshan log file %s.\n",
Shane Snyder's avatar
Shane Snyder committed
218
                infile_list[i]);
219 220 221 222
            return(-1);
        }

        /* read job-level metadata from the input file */
223 224
        ret = darshan_log_getjob(in_fd, &in_job);
        if(ret < 0)
225 226
        {
            fprintf(stderr,
227
                "Error: unable to read job data from input Darshan log file %s.\n",
Shane Snyder's avatar
Shane Snyder committed
228
                infile_list[i]);
229
            darshan_log_close(in_fd);
230 231 232
            return(-1);
        }

233 234 235 236 237 238 239 240 241
        /* if the input darshan log has metadata set indicating the darshan
         * shutdown procedure was called on the log, then we error out. if the
         * shutdown procedure was started, then it's possible the log has
         * incomplete or corrupt data, so we just throw out the data for now.
         */
        if(strstr(in_job.metadata, "darshan_shutdown=yes"))
        {
            fprintf(stderr,
                "Error: potentially corrupt data found in input log file %s.\n",
Shane Snyder's avatar
Shane Snyder committed
242
                infile_list[i]);
243 244 245 246
            darshan_log_close(in_fd);
            return(-1);
        }

247 248
        if(i == 0)
        {
249
            /* get job data, exe, & mounts directly from the first input log */
Shane Snyder's avatar
Shane Snyder committed
250
            memcpy(&merge_job, &in_job, sizeof(struct darshan_job));
251

Shane Snyder's avatar
Shane Snyder committed
252
            ret = darshan_log_getexe(in_fd, merge_exe);
253
            if(ret < 0)
254 255
            {
                fprintf(stderr,
Shane Snyder's avatar
Shane Snyder committed
256 257
                    "Error: unable to read exe string from input Darshan log file %s.\n",
                    infile_list[i]);
258
                darshan_log_close(in_fd);
259 260 261
                return(-1);
            }

Shane Snyder's avatar
Shane Snyder committed
262 263
            ret = darshan_log_getmounts(in_fd, &merge_mnt_pts,
                &merge_fs_types, &merge_mnt_count);
264
            if(ret < 0)
265
            {
266
                fprintf(stderr,
Shane Snyder's avatar
Shane Snyder committed
267 268
                    "Error: unable to read mount info from input Darshan log file %s.\n",
                    infile_list[i]);
269 270
                darshan_log_close(in_fd);
                return(-1);
271 272 273 274
            }
        }
        else
        {
275
            /* potentially update job timestamps using remaining logs */
Shane Snyder's avatar
Shane Snyder committed
276 277 278 279
            if(in_job.start_time < merge_job.start_time)
                merge_job.start_time = in_job.start_time;
            if(in_job.end_time > merge_job.end_time)
                merge_job.end_time = in_job.end_time;
280 281
        }

282
        /* read the hash of ids->names for the input log */
283 284 285 286 287
        ret = darshan_log_gethash(in_fd, &in_hash);
        if(ret < 0)
        {
            fprintf(stderr,
                "Error: unable to read job data from input Darshan log file %s.\n",
Shane Snyder's avatar
Shane Snyder committed
288
                infile_list[i]);
289 290 291 292 293 294 295 296 297
            darshan_log_close(in_fd);
            return(-1);
        }

        /* iterate the input hash, copying over record_id->file_name mappings
         * that have not already been copied to the output hash
         */
        HASH_ITER(hlink, in_hash, ref, tmp)
        {
Shane Snyder's avatar
Shane Snyder committed
298
            HASH_FIND(hlink, merge_hash, &(ref->id), sizeof(darshan_record_id), found);
299 300
            if(!found)
            {
Shane Snyder's avatar
Shane Snyder committed
301
                HASH_ADD(hlink, merge_hash, id, sizeof(darshan_record_id), ref);
302
            }
303
            else if(strcmp(ref->name, found->name))
304 305 306 307 308 309 310 311 312
            {
                fprintf(stderr,
                    "Error: invalid Darshan record table entry.\n");
                darshan_log_close(in_fd);
                return(-1);
            }
        }

        darshan_log_close(in_fd);
313 314
    }

Shane Snyder's avatar
Shane Snyder committed
315 316 317 318 319 320 321 322 323 324
    if(!outlog_name)
    {
        outlog_name = "test123.darshan";
    }

    sprintf(outlog_path, "%s/%s", outlog_dir, outlog_name); 

    /* create the output "merged" log */
    merge_fd = darshan_log_create(outlog_path, DARSHAN_ZLIB_COMP, 1);
    if(merge_fd == NULL)
325 326 327 328 329 330
    {
        fprintf(stderr, "Error: unable to create output darshan log.\n");
        return(-1);
    }

    /* write the darshan job info, exe string, and mount data to output file */
Shane Snyder's avatar
Shane Snyder committed
331
    ret = darshan_log_putjob(merge_fd, &merge_job);
332 333 334
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write job data to output darshan log.\n");
Shane Snyder's avatar
Shane Snyder committed
335 336
        darshan_log_close(merge_fd);
        unlink(outlog_path);
337 338 339
        return(-1);
    }

Shane Snyder's avatar
Shane Snyder committed
340
    ret = darshan_log_putexe(merge_fd, merge_exe);
341 342 343
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write exe string to output darshan log.\n");
Shane Snyder's avatar
Shane Snyder committed
344 345
        darshan_log_close(merge_fd);
        unlink(outlog_path);
346 347 348
        return(-1);
    }

Shane Snyder's avatar
Shane Snyder committed
349
    ret = darshan_log_putmounts(merge_fd, merge_mnt_pts, merge_fs_types, merge_mnt_count);
350 351 352
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write mount data to output darshan log.\n");
Shane Snyder's avatar
Shane Snyder committed
353 354
        darshan_log_close(merge_fd);
        unlink(outlog_path);
355 356 357
        return(-1);
    }

Shane Snyder's avatar
Shane Snyder committed
358 359
    /* write the merged table of records to output file */
    ret = darshan_log_puthash(merge_fd, merge_hash);
360 361 362
    if(ret < 0)
    {
        fprintf(stderr, "Error: unable to write record table to output darshan log.\n");
Shane Snyder's avatar
Shane Snyder committed
363 364
        darshan_log_close(merge_fd);
        unlink(outlog_path);
365 366
        return(-1);
    }
367

368
    /* iterate over active darshan modules and gather module data to write
Shane Snyder's avatar
Shane Snyder committed
369
     * to the merged output log
370
     */
Shane Snyder's avatar
Shane Snyder committed
371
    for(i = 0; i < DARSHAN_MAX_MODS; i++)
372 373 374
    {
        if(!mod_logutils[i]) continue;

375
        if(shared_redux)
376
        {
377
            /* build the hash of records shared globally by this module */
Shane Snyder's avatar
Shane Snyder committed
378 379
            ret = build_mod_shared_rec_hash(infile_list, n_infiles, i,
                merge_job.nprocs, mod_buf, &shared_rec_hash);
380
            if(ret < 0)
381
            {
382 383 384
                fprintf(stderr,
                    "Error: unable to build list of %s module's shared records.\n",
                    darshan_module_names[i]);
Shane Snyder's avatar
Shane Snyder committed
385 386
                darshan_log_close(merge_fd);
                unlink(outlog_path);
387
                return(-1);
388
            }
389 390 391

        }

Shane Snyder's avatar
Shane Snyder committed
392
        for(j = 0; j < n_infiles; j++)
393
        {
Shane Snyder's avatar
Shane Snyder committed
394
            in_fd = darshan_log_open(infile_list[j]);
395 396 397 398
            if(in_fd == NULL)
            {
                fprintf(stderr,
                    "Error: unable to open input Darshan log file %s.\n",
Shane Snyder's avatar
Shane Snyder committed
399 400 401
                    infile_list[j]);
                darshan_log_close(merge_fd);
                unlink(outlog_path);
402 403 404
                return(-1);
            }

Shane Snyder's avatar
Shane Snyder committed
405 406 407 408 409
            if(j == 0 && shared_rec_hash)
            {
                /* write out the shared records first */
                HASH_ITER(hlink, shared_rec_hash, sref, stmp)
                {
Shane Snyder's avatar
Shane Snyder committed
410
                    ret = mod_logutils[i]->log_put_record(merge_fd, sref->agg_rec, in_fd->mod_ver[i]);
Shane Snyder's avatar
Shane Snyder committed
411 412 413 414 415
                    if(ret < 0)
                    {
                        fprintf(stderr,
                            "Error: unable to write %s module record to output darshan log.\n",
                            darshan_module_names[i]);
Shane Snyder's avatar
Shane Snyder committed
416 417 418
                        darshan_log_close(in_fd);
                        darshan_log_close(merge_fd);
                        unlink(outlog_path);
Shane Snyder's avatar
Shane Snyder committed
419 420 421 422 423
                        return(-1);
                    }
                }
            }

424
            /* loop over module records and write them to output file */
425
            while((ret = mod_logutils[i]->log_get_record(in_fd, mod_buf)) == 1)
426
            {
427 428 429 430 431 432
                base_rec = (struct darshan_base_record *)mod_buf;

                HASH_FIND(hlink, shared_rec_hash, &(base_rec->id), sizeof(darshan_record_id), sref);
                if(sref)
                    continue; /* skip shared records */

Shane Snyder's avatar
Shane Snyder committed
433
                ret = mod_logutils[i]->log_put_record(merge_fd, mod_buf, in_fd->mod_ver[i]);
434 435 436 437
                if(ret < 0)
                {
                    fprintf(stderr,
                        "Error: unable to write %s module record to output log file %s.\n",
Shane Snyder's avatar
Shane Snyder committed
438
                        darshan_module_names[i], infile_list[j]);
439
                    darshan_log_close(in_fd);
Shane Snyder's avatar
Shane Snyder committed
440 441
                    darshan_log_close(merge_fd);
                    unlink(outlog_path);
442 443 444
                    return(-1);
                }
            }
445 446 447
            if(ret < 0)
            {
                fprintf(stderr,
448
                    "Error: unable to read %s module record from input log file %s.\n",
Shane Snyder's avatar
Shane Snyder committed
449
                    darshan_module_names[i], infile_list[j]);
450
                darshan_log_close(in_fd);
Shane Snyder's avatar
Shane Snyder committed
451 452
                darshan_log_close(merge_fd);
                unlink(outlog_path);
453 454 455
                return(-1);
            }

456
            darshan_log_close(in_fd);
457
        }
458 459 460 461 462 463 464 465 466 467

        /* clear the shared record hash for the next module */
        if(shared_redux)
        {
            HASH_ITER(hlink, shared_rec_hash, sref, stmp)
            {
                HASH_DELETE(hlink, shared_rec_hash, sref);
                free(sref);
            }
        }
468 469
    }

Shane Snyder's avatar
Shane Snyder committed
470
    darshan_log_close(merge_fd);
471 472 473

    return(0);
}
Shane Snyder's avatar
Shane Snyder committed
474 475 476 477 478 479 480 481 482

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */