GitLab maintenance scheduled for Tomorrow, 2020-08-11, from 17:00 to 18:00 CT - Services will be unavailable during this time.

codes-workload-dump.c 18.1 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * Copyright (C) 2014 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

#include <assert.h>
#include <getopt.h>
#include <stdio.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
10 11
#include <codes/codes-workload.h>
#include <codes/codes.h>
12
#include <inttypes.h>
13 14

static char type[128] = {'\0'};
15
static darshan_params d_params = {"", 0}; 
16
static iolang_params i_params = {0, 0, "", ""};
17
static recorder_params r_params = {"", 0};
18
static dumpi_trace_params du_params = {"", 0};
19
static online_comm_params oc_params = {"", "", 0};
20
static checkpoint_wrkld_params c_params = {0, 0, 0, 0, 0};
21
static iomock_params im_params = {0, 0, 1, 0, 0, 0};
22
static int n = -1;
23
static int start_rank = 0;
24 25 26 27 28

static struct option long_opts[] = 
{
    {"type", required_argument, NULL, 't'},
    {"num-ranks", required_argument, NULL, 'n'},
29
    {"start-rank", required_argument, NULL, 'r'},
30
    {"d-log", required_argument, NULL, 'l'},
31
    {"i-meta", required_argument, NULL, 'm'},
32
    {"i-use-relpath", no_argument, NULL, 'p'},
33
    {"r-trace-dir", required_argument, NULL, 'd'},
34
    {"r-nprocs", required_argument, NULL, 'x'},
35
    {"dumpi-log", required_argument, NULL, 'w'},
36
    {"workload-name", required_argument, NULL, 'b'},
37 38
    {"chkpoint-size", required_argument, NULL, 'S'},
    {"chkpoint-bw", required_argument, NULL, 'B'},
39
    {"chkpoint-iters", required_argument, NULL, 'i'},
40
    {"chkpoint-mtti", required_argument, NULL, 'M'},
41 42 43 44 45
    {"iomock-request-type", required_argument, NULL, 'Q'},
    {"iomock-num-requests", required_argument, NULL, 'N'},
    {"iomock-request-size", required_argument, NULL, 'z'},
    {"iomock-file-id", required_argument, NULL, 'f'},
    {"iomock-use-uniq-file-ids", no_argument, NULL, 'u'},
46 47 48
    {NULL, 0, NULL, 0}
};

49
void usage(){
50
    fputs(
51
            "Usage: codes-workload-dump --type TYPE --num-ranks N [OPTION...]\n"
52
            "--type: type of workload (\"darshan_io_workload\", \"iolang_workload\", dumpi-trace-workload\" etc.)\n"
53
            "--num-ranks: number of ranks to process (if not set, it is set by the workload)\n"
54
            "-s: print final workload stats\n"
55
            "DARSHAN OPTIONS (darshan_io_workload)\n"
56
            "--d-log: darshan log file\n"
57
            "IOLANG OPTIONS (iolang_workload)\n"
58
            "--i-meta: i/o language kernel meta file path\n"
59
            "--i-use-relpath: use i/o kernel path relative meta file path\n"
60
            "RECORDER OPTIONS (recorder_io_workload)\n"
61
            "--r-trace-dir: directory containing recorder trace files\n"
62
            "--r-nprocs: number of ranks in original recorder workload\n"
63 64
            "DUMPI TRACE OPTIONS (dumpi-trace-workload) \n"
            "--dumpi-log: dumpi log file \n"
65 66
            "ONLINE COMM OPTIONS (online_comm_workload) \n"
            "--workload-name : name of the workload (lammps or nekbone) \n"
67 68 69
            "CHECKPOINT OPTIONS (checkpoint_io_workload)\n"
            "--chkpoint-size: size of aggregate checkpoint to write\n"
            "--chkpoint-bw: checkpointing bandwidth\n"
70
            "--chkpoint-iters: iteration count for checkpoint workload\n"
71
            "--chkpoint-mtti: mean time to interrupt\n"
Jonathan Jenkins's avatar
Jonathan Jenkins committed
72
            "MOCK IO OPTIONS (iomock_workload)\n"
73 74 75 76 77 78 79
            "--iomock-request-type: whether to write or read\n"
            "--iomock-num-requests: number of writes/reads\n"
            "--iomock-request-size: size of each request\n"
            "--iomock-file-id: file id to use for requests\n"
            "--iomock-use-uniq-file-ids: whether to offset file ids by rank\n",
            stderr
            );
80
}
81 82 83

int main(int argc, char *argv[])
{
84 85 86
#ifdef USE_ONLINE
    ABT_init(argc, argv);
#endif
87 88 89 90
    int print_stats = 0;
    double total_delay = 0.0;
    int64_t num_barriers = 0;
    int64_t num_opens = 0;
91
    int64_t num_closes = 0;
92 93 94 95
    int64_t num_reads = 0;
    int64_t read_size = 0;
    int64_t num_writes = 0;
    int64_t write_size = 0;
96
    int64_t num_sends = 0;
97
    int64_t num_frees = 0;
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
    int64_t send_size = 0;
    int64_t num_recvs = 0;
    int64_t recv_size = 0;
    int64_t num_isends = 0;
    int64_t isend_size = 0;
    int64_t num_irecvs = 0;
    int64_t irecv_size = 0;
    int64_t num_bcasts = 0;
    int64_t bcast_size = 0;
    int64_t num_allgathers = 0;
    int64_t allgather_size = 0;
    int64_t num_allgathervs = 0;
    int64_t allgatherv_size = 0;
    int64_t num_alltoalls = 0;
    int64_t alltoall_size = 0;
    int64_t num_alltoallvs = 0;
    int64_t alltoallv_size = 0;
    int64_t num_reduces = 0;
    int64_t reduce_size = 0;
    int64_t num_allreduces = 0;
    int64_t allreduce_size = 0;
    int64_t num_collectives = 0;
    int64_t collective_size = 0;
    int64_t num_waitalls = 0;
    int64_t num_waits = 0;
    int64_t num_waitsomes = 0;
    int64_t num_waitanys = 0;
    int64_t num_testalls = 0;
126

127
    char ch;
128
    while ((ch = getopt_long(argc, argv, "t:n:l:b:a:m:sp:wr:S:B:R:M:Q:N:z:f:u",
129
                    long_opts, NULL)) != -1){
130 131 132 133 134 135 136 137 138 139 140
        switch (ch){
            case 't':
                strcpy(type, optarg);
                break;
            case 'n':
                n = atoi(optarg);
                assert(n>0);
                break;
            case 'l':
                strcpy(d_params.log_file_path, optarg);
                break;
141 142
            case 'b':
                strcpy(oc_params.workload_name, optarg);
143
                break;
144
            case 'm':
145
                strcpy(i_params.io_kernel_meta_path, optarg);
146
                break;
147
            case 'p':
148
                i_params.use_relpath = 1;
149
                break;
150 151 152
            case 'd':
                strcpy(r_params.trace_dir_path, optarg);
                break;
153 154 155
            case 'x':
                r_params.nprocs = atol(optarg);
                break;
156 157 158
            case 'w':
                strcpy(du_params.file_name, optarg);
                break;
159 160 161
            case 's':
                print_stats = 1;
                break;
162 163 164 165
            case 'r':
                start_rank = atoi(optarg);
                assert(n>0);
                break;
166 167 168 169 170 171
            case 'S':
                c_params.checkpoint_sz = atof(optarg);
                break;
            case 'B':
                c_params.checkpoint_wr_bw = atof(optarg);
                break;
172 173
            case 'i':
                c_params.total_checkpoints = atoi(optarg);
174 175 176 177
                break;
            case 'M':
                c_params.mtti = atof(optarg);
                break;
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
            case 'Q':
                im_params.is_write = (strcmp("write", optarg) == 0);
                break;
            case 'N':
                im_params.num_requests = atoi(optarg);
                break;
            case 'z':
                im_params.request_size = atoi(optarg);
                break;
            case 'f':
                im_params.file_id = (uint64_t) atoll(optarg);
                break;
            case 'u':
                im_params.use_uniq_file_ids = 1;
                break;
193 194 195
        }
    }

196 197 198 199 200 201
    if (type[0] == '\0'){
        fprintf(stderr, "Expected \"--type\" argument\n");
        usage();
        return 1;
    }

202 203
    int i;
    char *wparams;
204 205 206 207 208
    if (strcmp(type, "iomock_workload") == 0) {
        // TODO: more involved input checking
        wparams = (char*) &im_params;
    }
    else if (strcmp(type, "darshan_io_workload") == 0){
209 210 211 212 213 214 215 216
        if (d_params.log_file_path[0] == '\0'){
            fprintf(stderr, "Expected \"--d-log\" argument for darshan workload\n");
            usage();
            return 1;
        }
        else{
            wparams = (char*)&d_params;
        }
217
    }
218 219 220 221 222 223 224 225 226 227 228 229
    else if(strcmp(type, "online_comm_workload") == 0){
        if (n == -1){
            fprintf(stderr,
                    "Expected \"--num-ranks\" argument for online workload\n");
            usage();
            return 1;
        }
        else{
            oc_params.nprocs = n;
        }
        wparams = (char *)&oc_params;
    }
230
    else if (strcmp(type, "iolang_workload") == 0){
231 232 233 234 235 236 237 238
        if (n == -1){
            fprintf(stderr,
                    "Expected \"--num-ranks\" argument for iolang workload\n");
            usage();
            return 1;
        }
        else{
            i_params.num_cns = n;
239
        }
240 241 242
        if (i_params.io_kernel_meta_path[0] == '\0'){
            fprintf(stderr,
                    "Expected \"--i-meta\" argument for iolang workload\n");
243 244 245
            usage();
            return 1;
        }
246

247
        wparams = (char *)&i_params;
248
    }
249 250 251 252 253 254
    else if (strcmp(type, "recorder_io_workload") == 0){
        if (r_params.trace_dir_path[0] == '\0'){
            fprintf(stderr, "Expected \"--r-trace-dir\" argument for recorder workload\n");
            usage();
            return 1;
        }
255 256 257 258 259
        if (r_params.nprocs == 0){
            fprintf(stderr, "Expected \"--r-nprocs\" argument for recorder workload\n");
            usage();
            return 1;
        }
260 261 262 263
        else{
            wparams = (char *)&r_params;
        }
    }
264 265 266 267
   else if(strcmp(type, "dumpi-trace-workload") == 0)
	{
	if(n == -1){
            fprintf(stderr,
Jonathan Jenkins's avatar
Jonathan Jenkins committed
268
                    "Expected \"--num-ranks\" argument for dumpi workload\n");
269 270 271 272 273 274 275 276
            usage();
            return 1;		
	}
	else{
	    du_params.num_net_traces = n;	
	}

	if(du_params.file_name[0] == '\0' ){
277
            fprintf(stderr, "Expected \"--dumpi-log\" argument for dumpi workload\n");
278 279 280 281 282 283 284 285
            usage();
            return 1;
	}
	else
	{
	  wparams = (char*)&du_params;
	}
	}
286 287 288
    else if(strcmp(type, "checkpoint_io_workload") == 0)
    {
        if(c_params.checkpoint_sz == 0 || c_params.checkpoint_wr_bw == 0 ||
289
           c_params.total_checkpoints == 0 || c_params.mtti == 0)
290 291 292 293 294 295 296 297 298 299 300
        {
            fprintf(stderr, "All checkpoint workload arguments are required\n");
            usage();
            return 1;
        }
        else
        {
            c_params.nprocs = n;
            wparams = (char *)&c_params;
        }
    }
301
    else {
302 303 304
        fprintf(stderr, "Invalid type argument\n");
        usage();
        return 1;
305
    }
306 307 308

    /* if num_ranks not set, pull it from the workload */
    if (n == -1){
309
    	//printf("Getting rank count\n");
310
        n = codes_workload_get_rank_cnt(type, wparams, 0);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
311 312 313 314 315 316
        if (n == -1) {
            fprintf(stderr,
                    "Unable to get rank count from workload. "
                    "Specify option --num-ranks\n");
            return 1;
        }
317
        printf("rank count = %d\n", n);
318 319
    }

320
    for (i = start_rank ; i < start_rank+n; i++){
321
        struct codes_workload_op op;
322
        //printf("loading %s, %d\n", type, i);
323
        int id = codes_workload_load(type, wparams, 0, i);
324 325 326 327
        double total_read_time = 0.0, total_write_time = 0.0;
        int64_t total_read_bytes = 0, total_written_bytes = 0;
        codes_workload_get_time(type, wparams, 0, i, &total_read_time, &total_write_time, &total_read_bytes, &total_written_bytes);
        printf("total_read_time = %f, total_write_time = %f\n", total_read_time, total_write_time);
328 329
        assert(id != -1);
        do {
330
            codes_workload_get_next(id, 0, i, &op);
331
//            codes_workload_print_op(stdout, &op, 0, i);
332 333 334

            switch(op.op_type)
            {
335 336
                case CODES_WK_DELAY:
                    total_delay += op.u.delay.seconds;
337 338 339 340
                    break;
                case CODES_WK_BARRIER:
                    num_barriers++;
                    break;
341
                case CODES_WK_OPEN:
342 343
                case CODES_WK_MPI_OPEN:
                case CODES_WK_MPI_COLL_OPEN:
344
                    num_opens++;
345
                    break;
346
                case CODES_WK_CLOSE:
347
                case CODES_WK_MPI_CLOSE:
348
                    num_closes++;
349 350
                    break;
                case CODES_WK_WRITE:
351 352
                case CODES_WK_MPI_WRITE:
                case CODES_WK_MPI_COLL_WRITE:
353 354 355
                    num_writes++;
                    write_size += op.u.write.size;
                    break;
356
                case CODES_WK_READ:
357 358
                case CODES_WK_MPI_READ:
                case CODES_WK_MPI_COLL_READ:
359 360 361 362 363 364
                    num_reads++;
                    read_size += op.u.write.size;
                    break;
                case CODES_WK_SEND:
                    num_sends++;
                    send_size += op.u.send.num_bytes;
365
                    break;
366 367 368 369
                case CODES_WK_REQ_FREE:
                    num_frees++;
                    break;

370 371 372 373 374
                case CODES_WK_RECV:
                    num_recvs++;
                    recv_size += op.u.recv.num_bytes;
                    break;
                case CODES_WK_ISEND:
375 376
                    num_isends++;
                    isend_size += op.u.send.num_bytes;
377 378 379 380 381 382 383 384 385 386
                    break;
                case CODES_WK_IRECV:
                    num_irecvs++;
                    irecv_size += op.u.recv.num_bytes;
                    break;
                /* NOTE: all collectives are currently represented as the
                 * generic "collective" type */
                case CODES_WK_BCAST:
                    num_bcasts++;
                    bcast_size += op.u.collective.num_bytes;
387
                    break;
388
                case CODES_WK_ALLGATHER:
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
                    num_allgathers++;
                    allgather_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_ALLGATHERV:
                    num_allgathervs++;
                    allgatherv_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_ALLTOALL:
                    num_alltoalls++;
                    alltoall_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_ALLTOALLV:
                    num_alltoallvs++;
                    alltoallv_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_REDUCE:
                    num_reduces++;
                    reduce_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_ALLREDUCE:
                    num_allreduces++;
                    allreduce_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_COL:
                    num_collectives++;
                    collective_size += op.u.collective.num_bytes;
                    break;
                case CODES_WK_WAITALL:
417 418 419
                    {
                        if(i == 0)
                        {
420 421 422 423 424
							int j;
							printf("\n rank %d wait_all: ", i);
							for(j = 0; j < op.u.waits.count; j++)
								printf(" %d ", op.u.waits.req_ids[j]);
							num_waitalls++;
425 426
                        }
                    }
427 428 429 430 431 432 433 434 435 436 437 438 439
                    break;
                case CODES_WK_WAIT:
                    num_waits++;
                    break;
                case CODES_WK_WAITSOME:
                    num_waitsomes++;
                    break;
                case CODES_WK_WAITANY:
                    num_waitanys++;
                    break;
                case CODES_WK_TESTALL:
                    num_testalls++;
                    break;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
440 441 442 443
                case CODES_WK_END:
                    break;
                case CODES_WK_IGNORE:
                    break;
444 445 446 447
                default:
                    fprintf(stderr,
                            "WARNING: unknown workload op type (code %d)\n",
                            op.op_type);
448
            }
449
        } while (op.op_type != CODES_WK_END);
450 451 452 453 454

    if(strcmp(type, "online_comm_workload") == 0)
    {
        codes_workload_finalize(type, wparams, 0, i);
    }
455 456
    }

457 458 459
    if (print_stats)
    {
        fprintf(stderr, "\n* * * * * FINAL STATS * * * * * *\n");
460 461 462 463 464 465 466 467 468
        fprintf(stderr, "NUM_OPENS:       %"PRId64"\n", num_opens);
        fprintf(stderr, "NUM_CLOSES:      %"PRId64"\n", num_closes);
        fprintf(stderr, "NUM_BARRIERS:    %"PRId64"\n", num_barriers);
        fprintf(stderr, "TOTAL_DELAY:     %.4lf\n", total_delay);
        fprintf(stderr, "NUM_READS:       %"PRId64"\n", num_reads);
        fprintf(stderr, "READ_SIZE:       %"PRId64"\n", read_size);
        fprintf(stderr, "NUM_WRITES:      %"PRId64"\n", num_writes);
        fprintf(stderr, "WRITE_SIZE:      %"PRId64"\n", write_size);
        fprintf(stderr, "NUM_SENDS:       %"PRId64"\n", num_sends);
469
        fprintf(stderr, "NUM_FREES:       %"PRId64"\n", num_frees);
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497
        fprintf(stderr, "SEND_SIZE:       %"PRId64"\n", send_size);
        fprintf(stderr, "NUM_RECVS:       %"PRId64"\n", num_recvs);
        fprintf(stderr, "RECV_SIZE:       %"PRId64"\n", recv_size);
        fprintf(stderr, "NUM_ISENDS:      %"PRId64"\n", num_isends);
        fprintf(stderr, "ISEND_SIZE:      %"PRId64"\n", isend_size);
        fprintf(stderr, "NUM_IRECVS:      %"PRId64"\n", num_irecvs);
        fprintf(stderr, "IRECV_SIZE:      %"PRId64"\n", irecv_size);
        fprintf(stderr, "NUM_BCASTS:      %"PRId64"\n", num_bcasts);
        fprintf(stderr, "BCAST_SIZE:      %"PRId64"\n", bcast_size);
        fprintf(stderr, "NUM_ALLGATHERS:  %"PRId64"\n", num_allgathers);
        fprintf(stderr, "ALLGATHER_SIZE:  %"PRId64"\n", allgather_size);
        fprintf(stderr, "NUM_ALLGATHERVS: %"PRId64"\n", num_allgathervs);
        fprintf(stderr, "ALLGATHERV_SIZE: %"PRId64"\n", allgatherv_size);
        fprintf(stderr, "NUM_ALLTOALLS:   %"PRId64"\n", num_alltoalls);
        fprintf(stderr, "ALLTOALL_SIZE:   %"PRId64"\n", alltoall_size);
        fprintf(stderr, "NUM_ALLTOALLVS:  %"PRId64"\n", num_alltoallvs);
        fprintf(stderr, "ALLTOALLV_SIZE:  %"PRId64"\n", alltoallv_size);
        fprintf(stderr, "NUM_REDUCES:     %"PRId64"\n", num_reduces);
        fprintf(stderr, "REDUCE_SIZE:     %"PRId64"\n", reduce_size);
        fprintf(stderr, "NUM_ALLREDUCE:   %"PRId64"\n", num_allreduces);
        fprintf(stderr, "ALLREDUCE_SIZE:  %"PRId64"\n", allreduce_size);
        fprintf(stderr, "NUM_COLLECTIVE:  %"PRId64"\n", num_collectives);
        fprintf(stderr, "COLLECTIVE_SIZE: %"PRId64"\n", collective_size);
        fprintf(stderr, "NUM_WAITALLS:    %"PRId64"\n", num_waitalls);
        fprintf(stderr, "NUM_WAITS:       %"PRId64"\n", num_waits);
        fprintf(stderr, "NUM_WAITSOMES:   %"PRId64"\n", num_waitsomes);
        fprintf(stderr, "NUM_WAITANYS:    %"PRId64"\n", num_waitanys);
        fprintf(stderr, "NUM_TESTALLS:    %"PRId64"\n", num_testalls);
498
    }
499

500 501 502
#ifdef USE_ONLINE
    ABT_finalize();
#endif
503 504 505 506 507 508 509 510 511 512 513
    return 0;
}

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */