codes-workload-dump.c 17.7 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * Copyright (C) 2014 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

#include <assert.h>
#include <getopt.h>
#include <stdio.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
10 11
#include <codes/codes-workload.h>
#include <codes/codes.h>
12
#include <inttypes.h>
13 14

static char type[128] = {'\0'};
15
static darshan_params d_params = {"", 0}; 
16
static iolang_params i_params = {0, 0, "", ""};
17
static recorder_params r_params = {"", 0};
18
static dumpi_trace_params du_params = {"", 0};
19
static online_comm_params oc_params = {"", "", 0};
20
static checkpoint_wrkld_params c_params = {0, 0, 0, 0, 0};
21
static iomock_params im_params = {0, 0, 1, 0, 0, 0};
22
static int n = -1;
23
static int start_rank = 0;
24 25 26 27 28

static struct option long_opts[] = 
{
    {"type", required_argument, NULL, 't'},
    {"num-ranks", required_argument, NULL, 'n'},
29
    {"start-rank", required_argument, NULL, 'r'},
30 31
    {"d-log", required_argument, NULL, 'l'},
    {"d-aggregator-cnt", required_argument, NULL, 'a'},
32
    {"i-meta", required_argument, NULL, 'm'},
33
    {"i-use-relpath", no_argument, NULL, 'p'},
34
    {"r-trace-dir", required_argument, NULL, 'd'},
35
    {"r-nprocs", required_argument, NULL, 'x'},
36
    {"dumpi-log", required_argument, NULL, 'w'},
37
    {"workload-name", required_argument, NULL, 'b'},
38 39
    {"chkpoint-size", required_argument, NULL, 'S'},
    {"chkpoint-bw", required_argument, NULL, 'B'},
40
    {"chkpoint-iters", required_argument, NULL, 'i'},
41
    {"chkpoint-mtti", required_argument, NULL, 'M'},
42 43 44 45 46
    {"iomock-request-type", required_argument, NULL, 'Q'},
    {"iomock-num-requests", required_argument, NULL, 'N'},
    {"iomock-request-size", required_argument, NULL, 'z'},
    {"iomock-file-id", required_argument, NULL, 'f'},
    {"iomock-use-uniq-file-ids", no_argument, NULL, 'u'},
47 48 49
    {NULL, 0, NULL, 0}
};

50
void usage(){
51
    fputs(
52
            "Usage: codes-workload-dump --type TYPE --num-ranks N [OPTION...]\n"
53
            "--type: type of workload (\"darshan_io_workload\", \"iolang_workload\", dumpi-trace-workload\" etc.)\n"
54
            "--num-ranks: number of ranks to process (if not set, it is set by the workload)\n"
55
            "-s: print final workload stats\n"
56
            "DARSHAN OPTIONS (darshan_io_workload)\n"
57
            "--d-log: darshan log file\n"
58
            "--d-aggregator-cnt: number of aggregators for collective I/O in darshan\n"
59
            "IOLANG OPTIONS (iolang_workload)\n"
60
            "--i-meta: i/o language kernel meta file path\n"
61
            "--i-use-relpath: use i/o kernel path relative meta file path\n"
62
            "RECORDER OPTIONS (recorder_io_workload)\n"
63
            "--r-trace-dir: directory containing recorder trace files\n"
64
            "--r-nprocs: number of ranks in original recorder workload\n"
65 66
            "DUMPI TRACE OPTIONS (dumpi-trace-workload) \n"
            "--dumpi-log: dumpi log file \n"
67 68
            "ONLINE COMM OPTIONS (online_comm_workload) \n"
            "--workload-name : name of the workload (lammps or nekbone) \n"
69 70 71
            "CHECKPOINT OPTIONS (checkpoint_io_workload)\n"
            "--chkpoint-size: size of aggregate checkpoint to write\n"
            "--chkpoint-bw: checkpointing bandwidth\n"
72
            "--chkpoint-iters: iteration count for checkpoint workload\n"
73
            "--chkpoint-mtti: mean time to interrupt\n"
Jonathan Jenkins's avatar
Jonathan Jenkins committed
74
            "MOCK IO OPTIONS (iomock_workload)\n"
75 76 77 78 79 80 81
            "--iomock-request-type: whether to write or read\n"
            "--iomock-num-requests: number of writes/reads\n"
            "--iomock-request-size: size of each request\n"
            "--iomock-file-id: file id to use for requests\n"
            "--iomock-use-uniq-file-ids: whether to offset file ids by rank\n",
            stderr
            );
82
}
83 84 85

int main(int argc, char *argv[])
{
86 87 88
#ifdef USE_ONLINE
    ABT_init(argc, argv);
#endif
89 90 91 92
    int print_stats = 0;
    double total_delay = 0.0;
    int64_t num_barriers = 0;
    int64_t num_opens = 0;
93
    int64_t num_closes = 0;
94 95 96 97
    int64_t num_reads = 0;
    int64_t read_size = 0;
    int64_t num_writes = 0;
    int64_t write_size = 0;
98
    int64_t num_sends = 0;
99
    int64_t num_frees = 0;
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
    int64_t send_size = 0;
    int64_t num_recvs = 0;
    int64_t recv_size = 0;
    int64_t num_isends = 0;
    int64_t isend_size = 0;
    int64_t num_irecvs = 0;
    int64_t irecv_size = 0;
    int64_t num_bcasts = 0;
    int64_t bcast_size = 0;
    int64_t num_allgathers = 0;
    int64_t allgather_size = 0;
    int64_t num_allgathervs = 0;
    int64_t allgatherv_size = 0;
    int64_t num_alltoalls = 0;
    int64_t alltoall_size = 0;
    int64_t num_alltoallvs = 0;
    int64_t alltoallv_size = 0;
    int64_t num_reduces = 0;
    int64_t reduce_size = 0;
    int64_t num_allreduces = 0;
    int64_t allreduce_size = 0;
    int64_t num_collectives = 0;
    int64_t collective_size = 0;
    int64_t num_waitalls = 0;
    int64_t num_waits = 0;
    int64_t num_waitsomes = 0;
    int64_t num_waitanys = 0;
    int64_t num_testalls = 0;
128

129
    char ch;
130
    while ((ch = getopt_long(argc, argv, "t:n:l:b:a:m:sp:wr:S:B:R:M:Q:N:z:f:u",
131
                    long_opts, NULL)) != -1){
132 133 134 135 136 137 138 139 140 141 142
        switch (ch){
            case 't':
                strcpy(type, optarg);
                break;
            case 'n':
                n = atoi(optarg);
                assert(n>0);
                break;
            case 'l':
                strcpy(d_params.log_file_path, optarg);
                break;
143 144 145
            case 'b':
                strcpy(oc_params.workload_name, optarg);
            break;
146 147 148
            case 'a':
                d_params.aggregator_cnt = atol(optarg);
                break;
149
            case 'm':
150
                strcpy(i_params.io_kernel_meta_path, optarg);
151
                break;
152
            case 'p':
153
                i_params.use_relpath = 1;
154
                break;
155 156 157
            case 'd':
                strcpy(r_params.trace_dir_path, optarg);
                break;
158 159 160
            case 'x':
                r_params.nprocs = atol(optarg);
                break;
161 162 163
            case 'w':
                strcpy(du_params.file_name, optarg);
                break;
164 165 166
            case 's':
                print_stats = 1;
                break;
167 168 169 170
            case 'r':
                start_rank = atoi(optarg);
                assert(n>0);
                break;
171 172 173 174 175 176
            case 'S':
                c_params.checkpoint_sz = atof(optarg);
                break;
            case 'B':
                c_params.checkpoint_wr_bw = atof(optarg);
                break;
177 178
            case 'i':
                c_params.total_checkpoints = atoi(optarg);
179 180 181 182
                break;
            case 'M':
                c_params.mtti = atof(optarg);
                break;
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
            case 'Q':
                im_params.is_write = (strcmp("write", optarg) == 0);
                break;
            case 'N':
                im_params.num_requests = atoi(optarg);
                break;
            case 'z':
                im_params.request_size = atoi(optarg);
                break;
            case 'f':
                im_params.file_id = (uint64_t) atoll(optarg);
                break;
            case 'u':
                im_params.use_uniq_file_ids = 1;
                break;
198 199 200
        }
    }

201 202 203 204 205 206
    if (type[0] == '\0'){
        fprintf(stderr, "Expected \"--type\" argument\n");
        usage();
        return 1;
    }

207 208
    int i;
    char *wparams;
209 210 211 212 213
    if (strcmp(type, "iomock_workload") == 0) {
        // TODO: more involved input checking
        wparams = (char*) &im_params;
    }
    else if (strcmp(type, "darshan_io_workload") == 0){
214 215 216 217 218 219 220 221 222 223 224 225 226
        if (d_params.log_file_path[0] == '\0'){
            fprintf(stderr, "Expected \"--d-log\" argument for darshan workload\n");
            usage();
            return 1;
        }
        else if (d_params.aggregator_cnt == 0){
            fprintf(stderr, "Expected \"--d-aggregator-cnt\" argument for darshan workload\n");
            usage();
            return 1;
        }
        else{
            wparams = (char*)&d_params;
        }
227
    }
228 229 230 231 232 233 234 235 236 237 238 239
    else if(strcmp(type, "online_comm_workload") == 0){
        if (n == -1){
            fprintf(stderr,
                    "Expected \"--num-ranks\" argument for online workload\n");
            usage();
            return 1;
        }
        else{
            oc_params.nprocs = n;
        }
        wparams = (char *)&oc_params;
    }
240
    else if (strcmp(type, "iolang_workload") == 0){
241 242 243 244 245 246 247 248
        if (n == -1){
            fprintf(stderr,
                    "Expected \"--num-ranks\" argument for iolang workload\n");
            usage();
            return 1;
        }
        else{
            i_params.num_cns = n;
249
        }
250 251 252
        if (i_params.io_kernel_meta_path[0] == '\0'){
            fprintf(stderr,
                    "Expected \"--i-meta\" argument for iolang workload\n");
253 254 255
            usage();
            return 1;
        }
256

257
        wparams = (char *)&i_params;
258
    }
259 260 261 262 263 264
    else if (strcmp(type, "recorder_io_workload") == 0){
        if (r_params.trace_dir_path[0] == '\0'){
            fprintf(stderr, "Expected \"--r-trace-dir\" argument for recorder workload\n");
            usage();
            return 1;
        }
265 266 267 268 269
        if (r_params.nprocs == 0){
            fprintf(stderr, "Expected \"--r-nprocs\" argument for recorder workload\n");
            usage();
            return 1;
        }
270 271 272 273
        else{
            wparams = (char *)&r_params;
        }
    }
274 275 276 277
   else if(strcmp(type, "dumpi-trace-workload") == 0)
	{
	if(n == -1){
            fprintf(stderr,
Jonathan Jenkins's avatar
Jonathan Jenkins committed
278
                    "Expected \"--num-ranks\" argument for dumpi workload\n");
279 280 281 282 283 284 285 286
            usage();
            return 1;		
	}
	else{
	    du_params.num_net_traces = n;	
	}

	if(du_params.file_name[0] == '\0' ){
287
            fprintf(stderr, "Expected \"--dumpi-log\" argument for dumpi workload\n");
288 289 290 291 292 293 294 295
            usage();
            return 1;
	}
	else
	{
	  wparams = (char*)&du_params;
	}
	}
296 297 298
    else if(strcmp(type, "checkpoint_io_workload") == 0)
    {
        if(c_params.checkpoint_sz == 0 || c_params.checkpoint_wr_bw == 0 ||
299
           c_params.total_checkpoints == 0 || c_params.mtti == 0)
300 301 302 303 304 305 306 307 308 309 310
        {
            fprintf(stderr, "All checkpoint workload arguments are required\n");
            usage();
            return 1;
        }
        else
        {
            c_params.nprocs = n;
            wparams = (char *)&c_params;
        }
    }
311
    else {
312 313 314
        fprintf(stderr, "Invalid type argument\n");
        usage();
        return 1;
315
    }
316 317 318

    /* if num_ranks not set, pull it from the workload */
    if (n == -1){
319
        n = codes_workload_get_rank_cnt(type, wparams, 0);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
320 321 322 323 324 325
        if (n == -1) {
            fprintf(stderr,
                    "Unable to get rank count from workload. "
                    "Specify option --num-ranks\n");
            return 1;
        }
326 327
    }

328
    for (i = start_rank ; i < start_rank+n; i++){
329 330
        struct codes_workload_op op;
        printf("loading %s, %d\n", type, i);
331
        int id = codes_workload_load(type, wparams, 0, i);
332 333
        assert(id != -1);
        do {
334
            codes_workload_get_next(id, 0, i, &op);
335
//            codes_workload_print_op(stdout, &op, 0, i);
336 337 338

            switch(op.op_type)
            {
339 340
                case CODES_WK_DELAY:
                    total_delay += op.u.delay.seconds;
341 342 343 344
                    break;
                case CODES_WK_BARRIER:
                    num_barriers++;
                    break;
345 346
                case CODES_WK_OPEN:
                    num_opens++;
347
                    break;
348 349
                case CODES_WK_CLOSE:
                    num_closes++;
350 351 352 353 354
                    break;
                case CODES_WK_WRITE:
                    num_writes++;
                    write_size += op.u.write.size;
                    break;
355 356 357 358 359 360 361
                case CODES_WK_READ:
                    num_reads++;
                    read_size += op.u.write.size;
                    break;
                case CODES_WK_SEND:
                    num_sends++;
                    send_size += op.u.send.num_bytes;
362
                    break;
363 364 365 366
                case CODES_WK_REQ_FREE:
                    num_frees++;
                    break;

367 368 369 370 371
                case CODES_WK_RECV:
                    num_recvs++;
                    recv_size += op.u.recv.num_bytes;
                    break;
                case CODES_WK_ISEND:
372 373
                    num_isends++;
                    isend_size += op.u.send.num_bytes;
374 375 376 377 378 379 380 381 382 383
                    break;
                case CODES_WK_IRECV:
                    num_irecvs++;
                    irecv_size += op.u.recv.num_bytes;
                    break;
                /* NOTE: all collectives are currently represented as the
                 * generic "collective" type */
                case CODES_WK_BCAST:
                    num_bcasts++;
                    bcast_size += op.u.collective.num_bytes;
384
                    break;
385
                case CODES_WK_ALLGATHER:
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
                    num_allgathers++;
                    allgather_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_ALLGATHERV:
                    num_allgathervs++;
                    allgatherv_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_ALLTOALL:
                    num_alltoalls++;
                    alltoall_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_ALLTOALLV:
                    num_alltoallvs++;
                    alltoallv_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_REDUCE:
                    num_reduces++;
                    reduce_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_ALLREDUCE:
                    num_allreduces++;
                    allreduce_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_COL:
                    num_collectives++;
                    collective_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_WAITALL:
414 415 416 417
                    {
                        if(i == 0)
                        {
                    int j;
418
                    num_waitalls++;
419 420
                        }
                    }
421 422 423 424 425 426 427 428 429 430 431 432 433
                    break;
                case CODES_WK_WAIT:
                    num_waits++;
                    break;
                case CODES_WK_WAITSOME:
                    num_waitsomes++;
                    break;
                case CODES_WK_WAITANY:
                    num_waitanys++;
                    break;
                case CODES_WK_TESTALL:
                    num_testalls++;
                    break;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
434 435 436 437
                case CODES_WK_END:
                    break;
                case CODES_WK_IGNORE:
                    break;
438 439 440 441
                default:
                    fprintf(stderr,
                            "WARNING: unknown workload op type (code %d)\n",
                            op.op_type);
442
            }
443
        } while (op.op_type != CODES_WK_END);
444 445 446 447 448

    if(strcmp(type, "online_comm_workload") == 0)
    {
        codes_workload_finalize(type, wparams, 0, i);
    }
449 450
    }

451 452 453
    if (print_stats)
    {
        fprintf(stderr, "\n* * * * * FINAL STATS * * * * * *\n");
454 455 456 457 458 459 460 461 462
        fprintf(stderr, "NUM_OPENS:       %"PRId64"\n", num_opens);
        fprintf(stderr, "NUM_CLOSES:      %"PRId64"\n", num_closes);
        fprintf(stderr, "NUM_BARRIERS:    %"PRId64"\n", num_barriers);
        fprintf(stderr, "TOTAL_DELAY:     %.4lf\n", total_delay);
        fprintf(stderr, "NUM_READS:       %"PRId64"\n", num_reads);
        fprintf(stderr, "READ_SIZE:       %"PRId64"\n", read_size);
        fprintf(stderr, "NUM_WRITES:      %"PRId64"\n", num_writes);
        fprintf(stderr, "WRITE_SIZE:      %"PRId64"\n", write_size);
        fprintf(stderr, "NUM_SENDS:       %"PRId64"\n", num_sends);
463
        fprintf(stderr, "NUM_FREES:       %"PRId64"\n", num_frees);
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491
        fprintf(stderr, "SEND_SIZE:       %"PRId64"\n", send_size);
        fprintf(stderr, "NUM_RECVS:       %"PRId64"\n", num_recvs);
        fprintf(stderr, "RECV_SIZE:       %"PRId64"\n", recv_size);
        fprintf(stderr, "NUM_ISENDS:      %"PRId64"\n", num_isends);
        fprintf(stderr, "ISEND_SIZE:      %"PRId64"\n", isend_size);
        fprintf(stderr, "NUM_IRECVS:      %"PRId64"\n", num_irecvs);
        fprintf(stderr, "IRECV_SIZE:      %"PRId64"\n", irecv_size);
        fprintf(stderr, "NUM_BCASTS:      %"PRId64"\n", num_bcasts);
        fprintf(stderr, "BCAST_SIZE:      %"PRId64"\n", bcast_size);
        fprintf(stderr, "NUM_ALLGATHERS:  %"PRId64"\n", num_allgathers);
        fprintf(stderr, "ALLGATHER_SIZE:  %"PRId64"\n", allgather_size);
        fprintf(stderr, "NUM_ALLGATHERVS: %"PRId64"\n", num_allgathervs);
        fprintf(stderr, "ALLGATHERV_SIZE: %"PRId64"\n", allgatherv_size);
        fprintf(stderr, "NUM_ALLTOALLS:   %"PRId64"\n", num_alltoalls);
        fprintf(stderr, "ALLTOALL_SIZE:   %"PRId64"\n", alltoall_size);
        fprintf(stderr, "NUM_ALLTOALLVS:  %"PRId64"\n", num_alltoallvs);
        fprintf(stderr, "ALLTOALLV_SIZE:  %"PRId64"\n", alltoallv_size);
        fprintf(stderr, "NUM_REDUCES:     %"PRId64"\n", num_reduces);
        fprintf(stderr, "REDUCE_SIZE:     %"PRId64"\n", reduce_size);
        fprintf(stderr, "NUM_ALLREDUCE:   %"PRId64"\n", num_allreduces);
        fprintf(stderr, "ALLREDUCE_SIZE:  %"PRId64"\n", allreduce_size);
        fprintf(stderr, "NUM_COLLECTIVE:  %"PRId64"\n", num_collectives);
        fprintf(stderr, "COLLECTIVE_SIZE: %"PRId64"\n", collective_size);
        fprintf(stderr, "NUM_WAITALLS:    %"PRId64"\n", num_waitalls);
        fprintf(stderr, "NUM_WAITS:       %"PRId64"\n", num_waits);
        fprintf(stderr, "NUM_WAITSOMES:   %"PRId64"\n", num_waitsomes);
        fprintf(stderr, "NUM_WAITANYS:    %"PRId64"\n", num_waitanys);
        fprintf(stderr, "NUM_TESTALLS:    %"PRId64"\n", num_testalls);
492
    }
493

494 495 496
#ifdef USE_ONLINE
    ABT_finalize();
#endif
497 498 499 500 501 502 503 504 505 506 507
    return 0;
}

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */