darshan-parser.c 30.4 KB
Newer Older
1 2 3 4 5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6 7 8 9 10 11 12 13
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <zlib.h>
#include <time.h>
14
#include <stdlib.h>
15
#include <getopt.h>
16
#include <assert.h>
17

18
#include "darshan-logutils.h"
19

20
#include "uthash-1.9.2/src/uthash.h"
21 22 23 24

/*
 * Options
 */
Philip Carns's avatar
Philip Carns committed
25 26 27 28
#define OPTION_BASE  (1 << 0)  /* darshan log fields */
#define OPTION_TOTAL (1 << 1)  /* aggregated fields */
#define OPTION_PERF  (1 << 2)  /* derived performance */
#define OPTION_FILE  (1 << 3)  /* file count totals */
29
#define OPTION_FILE_LIST  (1 << 4)  /* per-file summaries */
30 31 32 33
#define OPTION_ALL (\
  OPTION_BASE|\
  OPTION_TOTAL|\
  OPTION_PERF|\
34 35
  OPTION_FILE|\
  OPTION_FILE_LIST)
36 37 38 39 40 41 42 43 44 45 46 47 48 49

#define FILETYPE_SHARED (1 << 0)
#define FILETYPE_UNIQUE (1 << 1)
#define FILETYPE_PARTSHARED (1 << 2)

#define max(a,b) (((a) > (b)) ? (a) : (b))
#define max3(a,b,c) (((a) > (b)) ? (((a) > (c)) ? (a) : (c)) : (((b) > (c)) ? (b) : (c)))

/*
 * Datatypes
 */
typedef struct hash_entry_s
{
    UT_hash_handle hlink;
50
    uint64_t hash;
51 52 53 54 55 56
    int64_t type;
    int64_t procs;
    int64_t counters[CP_NUM_INDICES];
    double  fcounters[CP_F_NUM_INDICES];
    double cumul_time;
    double meta_time;
Philip Carns's avatar
Philip Carns committed
57
    char name_suffix[CP_NAME_SUFFIX_LEN+1];
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
} hash_entry_t;

typedef struct perf_data_s
{
    int64_t total_bytes;
    double slowest_rank_time;
    double slowest_rank_meta_time;
    double shared_time_by_cumul;
    double shared_time_by_open;
    double shared_time_by_open_lastio;
    double shared_time_by_slowest;
    double shared_meta_time;
    double agg_perf_by_cumul;
    double agg_perf_by_open;
    double agg_perf_by_open_lastio;
    double agg_perf_by_slowest;
74 75
    double *rank_cumul_io_time;
    double *rank_cumul_md_time;
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
} perf_data_t;

typedef struct file_data_s
{
    int64_t total;
    int64_t total_size;
    int64_t total_max;
    int64_t read_only;
    int64_t read_only_size;
    int64_t read_only_max;
    int64_t write_only;
    int64_t write_only_size;
    int64_t write_only_max;
    int64_t read_write;
    int64_t read_write_size;
    int64_t read_write_max;
    int64_t unique;
    int64_t unique_size;
    int64_t unique_max;
    int64_t shared;
    int64_t shared_size;
    int64_t shared_max;
} file_data_t;

/*
 * Prototypes
 */
void accum_perf(struct darshan_file *, hash_entry_t *, perf_data_t *);
void calc_perf(struct darshan_job *, hash_entry_t *, perf_data_t *);

void accum_file(struct darshan_file *, hash_entry_t *, file_data_t *);
void calc_file(struct darshan_job *, hash_entry_t *, file_data_t *);
108
void file_list(struct darshan_job *, hash_entry_t *);
109 110 111 112 113 114 115

int usage (char *exename)
{
    fprintf(stderr, "Usage: %s [options] <filename>\n", exename);
    fprintf(stderr, "    --all   : all sub-options are enabled\n");
    fprintf(stderr, "    --base  : darshan log field data [default]\n");
    fprintf(stderr, "    --file  : total file counts\n");
116
    fprintf(stderr, "    --file-list  : per-file summaries\n");
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
    fprintf(stderr, "    --perf  : derived perf data\n");
    fprintf(stderr, "    --total : aggregated darshan field data\n");

    exit(1);
}

int parse_args (int argc, char **argv, char **filename)
{
    int index;
    int mask;
    static struct option long_opts[] =
    {
        {"all",   0, NULL, OPTION_ALL},
        {"base",  0, NULL, OPTION_BASE},
        {"file",  0, NULL, OPTION_FILE},
132
        {"file-list",  0, NULL, OPTION_FILE_LIST},
133 134
        {"perf",  0, NULL, OPTION_PERF},
        {"total", 0, NULL, OPTION_TOTAL},
135 136
        {"help",  0, NULL, 0},
        {0, 0, 0, 0}
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
    };

    mask = 0;

    while(1)
    {
        int c = getopt_long(argc, argv, "", long_opts, &index);

        if (c == -1) break;

        switch(c)
        {
            case OPTION_ALL:
            case OPTION_BASE:
            case OPTION_FILE:
152
            case OPTION_FILE_LIST:
153 154 155 156 157
            case OPTION_PERF:
            case OPTION_TOTAL:
                mask |= c;
                break;
            case 0:
158
            case '?':
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
            default:
                usage(argv[0]);
                break;
        }
    }

    if (optind < argc)
    {
        *filename = argv[optind];
    }
    else
    {
        usage(argv[0]);
    }

    /* default mask value if none specified */
    if (mask == 0)
    {
        mask = OPTION_BASE;
    }

    return mask;
}

183 184 185
int main(int argc, char **argv)
{
    int ret;
186 187
    int mask;
    char *filename;
188 189 190 191
    struct darshan_job job;
    struct darshan_file cp_file;
    char tmp_string[1024];
    time_t tmp_time = 0;
192
    darshan_fd file;
193
    int i;
194
    int mount_count;
195
    int64_t* devs;
196 197
    char** mnt_pts;
    char** fs_types;
198
    int last_rank = 0;
199 200
    char *token;
    char *save;
201
    char buffer[DARSHAN_JOB_METADATA_LEN];
202

203 204 205 206 207 208 209 210 211 212 213
    hash_entry_t *file_hash = NULL;
    hash_entry_t *curr = NULL;
    hash_entry_t *tmp = NULL;
    hash_entry_t total;
    perf_data_t pdata;
    file_data_t fdata;

    memset(&pdata, 0, sizeof(pdata));
    memset(&total, 0, sizeof(total));

    mask = parse_args(argc, argv, &filename);
214

215
    file = darshan_log_open(filename, "r");
216 217
    if(!file)
    {
218
        fprintf(stderr, "darshan_log_open() failed to open %s\n.", filename);
219 220 221 222
        return(-1);
    }
   
    /* read job info */
223
    ret = darshan_log_getjob(file, &job);
224
    if(ret < 0)
225
    {
226
        fprintf(stderr, "Error: unable to read job information from log file.\n");
227
        darshan_log_close(file);
228 229 230
        return(-1);
    }

231 232 233
    /* warn user about any missing information in this log format */
    darshan_log_print_version_warnings(&job);

234
    ret = darshan_log_getexe(file, tmp_string);
235
    if(ret < 0)
236
    {
237
        fprintf(stderr, "Error: unable to read trailing job information.\n");
238
        darshan_log_close(file);
239 240 241
        return(-1);
    }

242
    /* print job summary */
243
    printf("# darshan log version: %s\n", job.version_string);
244 245
    printf("# size of file statistics: %zu bytes\n", sizeof(cp_file));
    printf("# size of job statistics: %zu bytes\n", sizeof(job));
246
    printf("# exe: %s\n", tmp_string);
Philip Carns's avatar
Philip Carns committed
247 248 249
    printf("# uid: %" PRId64 "\n", job.uid);
    printf("# jobid: %" PRId64 "\n", job.jobid);
    printf("# start_time: %" PRId64 "\n", job.start_time);
250
    tmp_time += job.start_time;
251
    printf("# start_time_asci: %s", ctime(&tmp_time));
Philip Carns's avatar
Philip Carns committed
252
    printf("# end_time: %" PRId64 "\n", job.end_time);
253 254
    tmp_time = 0;
    tmp_time += job.end_time;
255
    printf("# end_time_asci: %s", ctime(&tmp_time));
Philip Carns's avatar
Philip Carns committed
256 257
    printf("# nprocs: %" PRId64 "\n", job.nprocs);
    printf("# run time: %" PRId64 "\n", job.end_time - job.start_time + 1);
258 259 260 261 262
    for(token=strtok_r(job.metadata, "\n", &save);
        token != NULL;
        token=strtok_r(NULL, "\n", &save))
    {
        char *key;
263 264 265 266 267 268 269 270 271 272 273 274 275 276
        char *value;
        /* NOTE: we intentionally only split on the first = character.
         * There may be additional = characters in the value portion
         * (for example, when storing mpi-io hints).
         */
        strcpy(buffer, token);
        key = buffer;
        value = index(buffer, '=');
        if(!value)
            continue;
        /* convert = to a null terminator to split key and value */
        value[0] = '\0';
        value++;
        printf("# metadata: %s = %s\n", key, value);
277
    }
278 279
 
    /* print table of mounted file systems */
280
    ret = darshan_log_getmounts(file, &devs, &mnt_pts, &fs_types, &mount_count);
281
    printf("\n# mounted file systems (device, mount point, and fs type)\n");
282 283 284
    printf("# -------------------------------------------------------\n");
    for(i=0; i<mount_count; i++)
    {
Philip Carns's avatar
Philip Carns committed
285
        printf("# mount entry: %" PRId64 "\t%s\t%s\n", devs[i], mnt_pts[i], fs_types[i]);
286 287
    }
  
288 289 290 291 292 293 294 295 296
    /* try to retrieve first record (may not exist) */
    ret = darshan_log_getfile(file, &job, &cp_file);
    if(ret < 0)
    {
        fprintf(stderr, "Error: failed to parse log file.\n");
        fflush(stderr);
        return(-1);
    }
    if(ret == 0)
297 298 299
    {
        /* it looks like the app didn't open any files */
        printf("# no files opened.\n");
300
        darshan_log_close(file);
301 302 303
        return(0);
    }

304 305 306 307 308 309 310 311 312
    if ((mask & OPTION_BASE))
    {
        printf("\n# description of columns:\n");
        printf("#   <rank>: MPI rank.  -1 indicates that the file is shared\n");
        printf("#      across all processes and statistics are aggregated.\n");
        printf("#   <file>: hash of file path.  0 indicates that statistics\n");
        printf("#      are condensed to refer to all files opened at the given\n");
        printf("#      process.\n");
        printf("#   <counter> and <value>: statistical counters.\n");
313 314
        printf("#      A value of -1 indicates that Darshan could not monitor\n");
        printf("#      that counter, and its value should be ignored.\n");
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
        printf("#   <name suffix>: last %d characters of file name.\n", CP_NAME_SUFFIX_LEN);
        printf("#   <mount pt>: mount point that the file resides on.\n");
        printf("#   <fs type>: type of file system that the file resides on.\n");
        printf("\n# description of counters:\n");
        printf("#   CP_POSIX_*: posix operation counts.\n");
        printf("#   CP_COLL_*: MPI collective operation counts.\n");
        printf("#   CP_INDEP_*: MPI independent operation counts.\n");
        printf("#   CP_SPIT_*: MPI split collective operation counts.\n");
        printf("#   CP_NB_*: MPI non blocking operation counts.\n");
        printf("#   READS,WRITES,OPENS,SEEKS,STATS, and MMAPS are types of operations.\n");
        printf("#   CP_*_NC_OPENS: number of indep. and collective pnetcdf opens.\n");
        printf("#   CP_HDF5_OPENS: number of hdf5 opens.\n");
        printf("#   CP_COMBINER_*: combiner counts for MPI mem and file datatypes.\n");
        printf("#   CP_HINTS: number of times MPI hints were used.\n");
        printf("#   CP_VIEWS: number of times MPI file views were used.\n");
        printf("#   CP_MODE: mode that file was opened in.\n");
        printf("#   CP_BYTES_*: total bytes read and written.\n");
        printf("#   CP_MAX_BYTE_*: highest offset byte read and written.\n");
        printf("#   CP_CONSEC_*: number of exactly adjacent reads and writes.\n");
        printf("#   CP_SEQ_*: number of reads and writes from increasing offsets.\n");
        printf("#   CP_RW_SWITCHES: number of times access alternated between read and write.\n");
        printf("#   CP_*_ALIGNMENT: memory and file alignment.\n");
        printf("#   CP_*_NOT_ALIGNED: number of reads and writes that were not aligned.\n");
        printf("#   CP_MAX_*_TIME_SIZE: size of the slowest read and write operations.\n");
        printf("#   CP_SIZE_READ_*: histogram of read access sizes.\n");
        printf("#   CP_SIZE_READ_AGG_*: histogram of MPI datatype total sizes.\n");
        printf("#   CP_EXTENT_READ_*: histogram of MPI datatype extents.\n");
        printf("#   CP_STRIDE*_STRIDE: the four most common strides detected.\n");
        printf("#   CP_STRIDE*_COUNT: count of the four most common strides.\n");
        printf("#   CP_ACCESS*_ACCESS: the four most common access sizes.\n");
        printf("#   CP_ACCESS*_COUNT: count of the four most common access sizes.\n");
346
        printf("#   CP_DEVICE: File system identifier.\n");
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
        printf("#   CP_SIZE_AT_OPEN: size of file when first opened.\n");
        printf("#   CP_*_RANK_BYTES: fastest, slowest and variance of bytes transfer.\n");
        printf("#   CP_F_OPEN_TIMESTAMP: timestamp of first open (mpi or posix).\n");
        printf("#   CP_F_*_START_TIMESTAMP: timestamp of first read/write (mpi or posix).\n");
        printf("#   CP_F_*_END_TIMESTAMP: timestamp of last read/write (mpi or posix).\n");
        printf("#   CP_F_CLOSE_TIMESTAMP: timestamp of last close (mpi or posix).\n");
        printf("#   CP_F_POSIX_READ/WRITE_TIME: cumulative time spent in posix reads or writes.\n");
        printf("#   CP_F_MPI_READ/WRITE_TIME: cumulative time spent in mpi-io reads or writes.\n");
        printf("#   CP_F_POSIX_META_TIME: cumulative time spent in posix open, close, fsync, stat and seek, .\n");
        printf("#   CP_F_MPI_META_TIME: cumulative time spent in mpi-io open, close, set_view, and sync.\n");
        printf("#   CP_MAX_*_TIME: duration of the slowest read and write operations.\n");
        printf("#   CP_*_RANK_TIME: fastest, slowest variance of transfer time.\n");

        printf("\n");
        CP_PRINT_HEADER();
    }
363

364 365 366 367 368 369 370 371 372 373 374 375 376 377
    pdata.rank_cumul_io_time = malloc(sizeof(double)*job.nprocs);
    pdata.rank_cumul_md_time = malloc(sizeof(double)*job.nprocs);
    if (!pdata.rank_cumul_io_time || !pdata.rank_cumul_md_time)
    {
        perror("malloc failed");
        darshan_log_close(file);
        return(-1);
    }
    else
    {
        memset(pdata.rank_cumul_io_time, 0, sizeof(double)*job.nprocs);
        memset(pdata.rank_cumul_md_time, 0, sizeof(double)*job.nprocs);
    }

378
    do
379
    {
380 381
        char* mnt_pt = NULL;
        char* fs_type = NULL;
382
        hash_entry_t *hfile = NULL;
383 384 385 386

        if(cp_file.rank != -1 && cp_file.rank < last_rank)
        {
            fprintf(stderr, "Error: log file contains out of order rank data.\n");
387
            fflush(stderr);
388 389 390 391
            return(-1);
        }
        if(cp_file.rank != -1)
            last_rank = cp_file.rank;
392 393 394 395 396 397 398 399 400 401 402 403 404 405 406
        
        for(i=0; i<mount_count; i++)
        {
            if(cp_file.counters[CP_DEVICE] == devs[i])
            {
                mnt_pt = mnt_pts[i];
                fs_type = fs_types[i];
                break;
            }
        }
        if(!mnt_pt)
            mnt_pt = "UNKNOWN";
        if(!fs_type)
            fs_type = "UNKNOWN";

407 408 409 410 411 412 413 414 415 416 417 418 419
        HASH_FIND(hlink,file_hash,&cp_file.hash,sizeof(int64_t),hfile);
        if (!hfile)
        {
            hfile = (hash_entry_t*) malloc(sizeof(*hfile));
            if (!hfile)
            {
                fprintf(stderr,"malloc failure");
                exit(1);
            }

            /* init */
            memset(hfile, 0, sizeof(*hfile));
            hfile->hash          = cp_file.hash;
Philip Carns's avatar
Philip Carns committed
420
            memcpy(hfile->name_suffix, cp_file.name_suffix, CP_NAME_SUFFIX_LEN+1);
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
            hfile->type          = 0;
            hfile->procs         = 0;
            hfile->cumul_time    = 0.0;
            hfile->meta_time     = 0.0;

            HASH_ADD(hlink,file_hash,hash,sizeof(int64_t),hfile);
        }

        accum_file(&cp_file, &total, NULL);
        accum_file(&cp_file, hfile, &fdata);
        accum_perf(&cp_file, hfile, &pdata);

        if ((mask & OPTION_BASE))
        {
            for(i=0; i<CP_NUM_INDICES; i++)
            {
                CP_PRINT(&job, &cp_file, i, mnt_pt, fs_type);
            }
            for(i=0; i<CP_F_NUM_INDICES; i++)
            {
                CP_F_PRINT(&job, &cp_file, i, mnt_pt, fs_type);
            }
        }
444
    }while((ret = darshan_log_getfile(file, &job, &cp_file)) == 1);
445 446 447 448

    /* Total Calc */
    if ((mask & OPTION_TOTAL))
    {
449 450
        for(i=0; i<CP_NUM_INDICES; i++)
        {
Philip Carns's avatar
Philip Carns committed
451
            printf("total_%s: %" PRId64 "\n",
452
                   darshan_names[i], total.counters[i]);
453 454 455
        }
        for(i=0; i<CP_F_NUM_INDICES; i++)
        {
456 457
            printf("total_%s: %lf\n",
                   darshan_f_names[i], total.fcounters[i]);
458
        }
459 460
    }

461 462 463 464 465 466
    /* Perf Calc */
    calc_perf(&job, file_hash, &pdata);
    if ((mask & OPTION_PERF))
    {
        printf("\n# performance\n");
        printf("# -----------\n");
Philip Carns's avatar
Philip Carns committed
467
        printf("# total_bytes: %" PRId64 "\n", pdata.total_bytes);
468 469 470 471 472 473 474 475 476 477 478
        printf("# slowest_rank_time: %lf\n", pdata.slowest_rank_time);
        printf("# slowest_rank_meta_time: %lf\n", pdata.slowest_rank_meta_time);
        printf("# shared_time_by_cumul: %lf\n", pdata.shared_time_by_cumul);
        printf("# shared_time_by_open: %lf\n", pdata.shared_time_by_open);
        printf("# shared_time_by_open_lastio: %lf\n", pdata.shared_time_by_open_lastio);
        printf("# shared_meta_time: %lf\n", pdata.shared_meta_time);
        printf("# agg_perf_by_cumul: %lf\n", pdata.agg_perf_by_cumul);
        printf("# agg_perf_by_open: %lf\n", pdata.agg_perf_by_open);
        printf("# agg_perf_by_open_lastio: %lf\n", pdata.agg_perf_by_open_lastio);
        printf("# agg_perf_by_slowest: %lf\n", pdata.agg_perf_by_slowest);
    }
479

480 481 482 483 484 485
    /* File Calc */
    calc_file(&job, file_hash, &fdata);
    if ((mask & OPTION_FILE))
    {
        printf("\n# files\n");
        printf("# -----\n");
Philip Carns's avatar
Philip Carns committed
486
        printf("# total: %" PRId64 " %" PRId64 " %" PRId64 "\n",
487 488 489
               fdata.total,
               fdata.total_size,
               fdata.total_max);
Philip Carns's avatar
Philip Carns committed
490
        printf("# read_only: %" PRId64 " %" PRId64 " %" PRId64 "\n",
491 492 493
               fdata.read_only,
               fdata.read_only_size,
               fdata.read_only_max);
Philip Carns's avatar
Philip Carns committed
494
        printf("# write_only: %" PRId64 " %" PRId64 " %" PRId64 "\n",
495 496 497
               fdata.write_only,
               fdata.write_only_size,
               fdata.write_only_max);
Philip Carns's avatar
Philip Carns committed
498
        printf("# read_write: %" PRId64 " %" PRId64 " %" PRId64 "\n",
499 500 501
               fdata.read_write,
               fdata.read_write_size,
               fdata.read_write_max);
Philip Carns's avatar
Philip Carns committed
502
        printf("# unique: %" PRId64 " %" PRId64 " %" PRId64 "\n",
503 504 505
               fdata.unique,
               fdata.unique_size,
               fdata.unique_max);
Philip Carns's avatar
Philip Carns committed
506
        printf("# shared: %" PRId64 " %" PRId64 " %" PRId64 "\n",
507 508 509
               fdata.shared,
               fdata.shared_size,
               fdata.shared_max);
510 511
    }

512 513 514 515 516 517
    if ((mask & OPTION_FILE_LIST))
    {
        printf("\n# Per-file summary of I/O activity.\n");
        file_list(&job, file_hash);
    }

518
    if(ret < 0)
519
    {
520
        fprintf(stderr, "Error: failed to parse log file.\n");
521
        fflush(stderr);
522 523 524
        return(-1);
    }

525 526 527 528 529
    for(i=0; i<mount_count; i++)
    {
        free(mnt_pts[i]);
        free(fs_types[i]);
    }
530 531 532 533 534 535
    if(mount_count > 0)
    {
        free(devs);
        free(mnt_pts);
        free(fs_types);
    }
536
 
537
    darshan_log_close(file);
538 539 540 541 542 543 544

    HASH_ITER(hlink, file_hash, curr, tmp)
    {
        HASH_DELETE(hlink, file_hash, curr);
        free(curr);
    }

545 546
    return(0);
}
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577

void accum_file(struct darshan_file *dfile,
                hash_entry_t *hfile, 
                file_data_t *fdata)
{
    int i;

    hfile->procs += 1;

    if (dfile->rank == -1)
    {
        hfile->type |= FILETYPE_SHARED;
    }
    else if (hfile->procs > 1)
    {
        hfile->type &= (~FILETYPE_UNIQUE);
        hfile->type |= FILETYPE_PARTSHARED;
    }
    else
    {
        hfile->type |= FILETYPE_UNIQUE;
    }

    for (i = 0; i < CP_NUM_INDICES; i++)
    {
        switch(i)
        {
        case CP_DEVICE:
        case CP_MODE:
        case CP_MEM_ALIGNMENT:
        case CP_FILE_ALIGNMENT:
578 579
            if(CP_FILE_PARTIAL(hfile))
                hfile->counters[i] = dfile->counters[i];
580 581 582 583 584 585
            break;
        case CP_SIZE_AT_OPEN:
            if (hfile->counters[i] == -1)
            {
                hfile->counters[i] = dfile->counters[i];
            }
586
            if (hfile->counters[i] > dfile->counters[i] && !CP_FILE_PARTIAL(dfile))
587 588 589 590 591 592 593 594 595 596 597
            {
                hfile->counters[i] = dfile->counters[i];
            }
            break;
        case CP_MAX_BYTE_READ:
        case CP_MAX_BYTE_WRITTEN:
            if (hfile->counters[i] < dfile->counters[i])
            {
                hfile->counters[i] = dfile->counters[i];
            }
            break;
598 599 600 601 602 603 604 605 606 607 608 609 610 611 612

        case CP_STRIDE1_STRIDE:
        case CP_STRIDE2_STRIDE:
        case CP_STRIDE3_STRIDE:
        case CP_STRIDE4_STRIDE:
        case CP_ACCESS1_ACCESS:
        case CP_ACCESS2_ACCESS:
        case CP_ACCESS3_ACCESS:
        case CP_ACCESS4_ACCESS:
           /*
            * do nothing here because these will be stored
            * when the _COUNT is accessed.
            */
           break;
 
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
        case CP_STRIDE1_COUNT:
        case CP_STRIDE2_COUNT:
        case CP_STRIDE3_COUNT:
        case CP_STRIDE4_COUNT:
        case CP_ACCESS1_COUNT:
        case CP_ACCESS2_COUNT:
        case CP_ACCESS3_COUNT:
        case CP_ACCESS4_COUNT:
            if (hfile->counters[i] < dfile->counters[i])
            {
                hfile->counters[i]   = dfile->counters[i];
                hfile->counters[i-4] = dfile->counters[i-4];
            }
            break;
        case CP_FASTEST_RANK:
        case CP_SLOWEST_RANK:
        case CP_FASTEST_RANK_BYTES:
        case CP_SLOWEST_RANK_BYTES:
            hfile->counters[i] = 0;
            break;
        case CP_MAX_READ_TIME_SIZE:
        case CP_MAX_WRITE_TIME_SIZE:
            break;
        default:
            hfile->counters[i] += dfile->counters[i];
            break;
        }
    }

    for (i = 0; i < CP_F_NUM_INDICES; i++)
    {
        switch(i)
        {
            case CP_F_FASTEST_RANK_TIME:
            case CP_F_SLOWEST_RANK_TIME:
            case CP_F_VARIANCE_RANK_TIME:
            case CP_F_VARIANCE_RANK_BYTES:
                hfile->fcounters[i] = 0;
                break;
            case CP_F_MAX_READ_TIME:
                if (hfile->fcounters[i] > dfile->fcounters[i])
                {
                    hfile->fcounters[i] = dfile->fcounters[i];
                    hfile->counters[CP_MAX_READ_TIME_SIZE] =
                        dfile->counters[CP_MAX_READ_TIME_SIZE];
                }
                break;
            case CP_F_MAX_WRITE_TIME:
                if (hfile->fcounters[i] > dfile->fcounters[i])
                {
                    hfile->fcounters[i] = dfile->fcounters[i];
                    hfile->counters[CP_MAX_WRITE_TIME_SIZE] =
                        dfile->counters[CP_MAX_WRITE_TIME_SIZE];
                }
                break;
            default:
                hfile->fcounters[i] += dfile->fcounters[i];
                break;
        }
    }

    return;
}

677 678 679 680 681 682
void file_list(struct darshan_job *djob, hash_entry_t *file_hash)
{
    hash_entry_t *curr = NULL;
    hash_entry_t *tmp = NULL;

    printf("# <hash>: hash of file name\n");
Philip Carns's avatar
Philip Carns committed
683
    printf("# <suffix>: last %d characters of file name\n", CP_NAME_SUFFIX_LEN);
684
    
Philip Carns's avatar
Philip Carns committed
685
    printf("\n# <hash>\t<suffix>\n");
686 687
    HASH_ITER(hlink, file_hash, curr, tmp)
    {
Philip Carns's avatar
Philip Carns committed
688 689 690
        printf("%" PRIu64 "\t%s\n",
            curr->hash,
            curr->name_suffix);
691 692 693 694
    }

    return;
}
695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800

void calc_file(struct darshan_job *djob,
               hash_entry_t *file_hash, 
               file_data_t *fdata)
{
    hash_entry_t *curr = NULL;
    hash_entry_t *tmp = NULL;

    memset(fdata, 0, sizeof(*fdata));

    HASH_ITER(hlink, file_hash, curr, tmp)
    {
        int64_t max;
        int64_t r;
        int64_t w;

        max = max3(curr->counters[CP_SIZE_AT_OPEN],
                   curr->counters[CP_MAX_BYTE_READ],
                   curr->counters[CP_MAX_BYTE_WRITTEN]);

        r = (curr->counters[CP_POSIX_READS]+
             curr->counters[CP_POSIX_FREADS]+
             curr->counters[CP_INDEP_READS]+
             curr->counters[CP_COLL_READS]+
             curr->counters[CP_SPLIT_READS]+
             curr->counters[CP_NB_READS]);

        w = (curr->counters[CP_POSIX_WRITES]+
             curr->counters[CP_POSIX_FWRITES]+
             curr->counters[CP_INDEP_WRITES]+
             curr->counters[CP_COLL_WRITES]+
             curr->counters[CP_SPLIT_WRITES]+
             curr->counters[CP_NB_WRITES]);

        fdata->total += 1;
        fdata->total_size += max;
        fdata->total_max = max(fdata->total_max, max);

        if (r && !w)
        {
            fdata->read_only += 1;
            fdata->read_only_size += max;
            fdata->read_only_max = max(fdata->read_only_max, max);
        }

        if (!r && w)
        {
            fdata->write_only += 1;
            fdata->write_only_size += max;
            fdata->write_only_max = max(fdata->write_only_max, max);
        }

        if (r && w)
        {
            fdata->read_write += 1;
            fdata->read_write_size += max;
            fdata->read_write_max = max(fdata->read_write_max, max);
        }

        if ((curr->type & (FILETYPE_SHARED|FILETYPE_PARTSHARED)))
        {
            fdata->shared += 1;
            fdata->shared_size += max;
            fdata->shared_max = max(fdata->shared_max, max);
        }

        if ((curr->type & (FILETYPE_UNIQUE)))
        {
            fdata->unique += 1;
            fdata->unique_size += max;
            fdata->unique_max = max(fdata->unique_max, max);
        }
    }

    return;
}

void accum_perf(struct darshan_file *dfile,
                hash_entry_t *hfile,
                perf_data_t *pdata)
{
    int64_t mpi_file;

    pdata->total_bytes += dfile->counters[CP_BYTES_READ] +
                          dfile->counters[CP_BYTES_WRITTEN];

    mpi_file = dfile->counters[CP_INDEP_OPENS] +
               dfile->counters[CP_COLL_OPENS];

    /*
     * Calculation of Shared File Time
     *   Four Methods!!!!
     *     by_cumul: sum time counters and divide by nprocs
     *               (inaccurate if lots of variance between procs)
     *     by_open: difference between timestamp of open and close
     *              (inaccurate if file is left open without i/o happening)
     *     by_open_lastio: difference between timestamp of open and the
     *                     timestamp of last i/o
     *                     (similar to above but fixes case where file is left
     *                      open after io is complete)
     *     by_slowest: use slowest rank time from log data
     *                 (most accurate but requires newer log version)
     */
    if (dfile->rank == -1)
    {
        /* by_open (same for MPI or POSIX) */
801 802 803 804 805 806 807
        if (dfile->fcounters[CP_F_CLOSE_TIMESTAMP] >
            dfile->fcounters[CP_F_OPEN_TIMESTAMP])
        {
            pdata->shared_time_by_open +=
                dfile->fcounters[CP_F_CLOSE_TIMESTAMP] -
                dfile->fcounters[CP_F_OPEN_TIMESTAMP];
        }
808 809 810 811 812

        /* by_open_lastio (same for MPI or POSIX) */
        if (dfile->fcounters[CP_F_READ_END_TIMESTAMP] >
            dfile->fcounters[CP_F_WRITE_END_TIMESTAMP])
        {
813 814 815 816 817 818 819
            /* be careful: file may have been opened but not read or written */
            if(dfile->fcounters[CP_F_READ_END_TIMESTAMP] > dfile->fcounters[CP_F_OPEN_TIMESTAMP])
            {
                pdata->shared_time_by_open_lastio += 
                    dfile->fcounters[CP_F_READ_END_TIMESTAMP] - 
                    dfile->fcounters[CP_F_OPEN_TIMESTAMP];
            }
820 821 822
        }
        else
        {
823 824 825 826 827 828 829
            /* be careful: file may have been opened but not read or written */
            if(dfile->fcounters[CP_F_WRITE_END_TIMESTAMP] > dfile->fcounters[CP_F_OPEN_TIMESTAMP])
            {
                pdata->shared_time_by_open_lastio += 
                    dfile->fcounters[CP_F_WRITE_END_TIMESTAMP] - 
                    dfile->fcounters[CP_F_OPEN_TIMESTAMP];
            }
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
        }

        /* by_cumul */
        if (mpi_file)
        {
            pdata->shared_time_by_cumul +=
                dfile->fcounters[CP_F_MPI_META_TIME] +
                dfile->fcounters[CP_F_MPI_READ_TIME] +
                dfile->fcounters[CP_F_MPI_WRITE_TIME];
            pdata->shared_meta_time += dfile->fcounters[CP_F_MPI_META_TIME];
        }
        else
        {
            pdata->shared_time_by_cumul +=
                dfile->fcounters[CP_F_POSIX_META_TIME] +
                dfile->fcounters[CP_F_POSIX_READ_TIME] +
                dfile->fcounters[CP_F_POSIX_WRITE_TIME];
            pdata->shared_meta_time += dfile->fcounters[CP_F_POSIX_META_TIME];
        }

        /* by_slowest (same for MPI or POSIX) */
        pdata->shared_time_by_slowest +=
            dfile->fcounters[CP_F_SLOWEST_RANK_TIME];
    }

    /*
     * Calculation of Unique File Time
     *   record the data for each file and sum it 
     */
    else
    {
        if (mpi_file)
        {
863
#if 0
864 865 866 867
            hfile->cumul_time += dfile->fcounters[CP_F_MPI_META_TIME] +
                                dfile->fcounters[CP_F_MPI_READ_TIME] +
                                dfile->fcounters[CP_F_MPI_WRITE_TIME];
            hfile->meta_time += dfile->fcounters[CP_F_MPI_META_TIME];
868 869 870 871 872 873
#else
            pdata->rank_cumul_io_time[dfile->rank] += dfile->fcounters[CP_F_MPI_META_TIME] +
                                dfile->fcounters[CP_F_MPI_READ_TIME] +
                                dfile->fcounters[CP_F_MPI_WRITE_TIME];
            pdata->rank_cumul_md_time[dfile->rank] += dfile->fcounters[CP_F_MPI_META_TIME];
#endif
874 875 876
        }
        else
        {
877
#if 0
878 879 880 881
             hfile->cumul_time += dfile->fcounters[CP_F_POSIX_META_TIME] +
                                 dfile->fcounters[CP_F_POSIX_READ_TIME] +
                                 dfile->fcounters[CP_F_POSIX_WRITE_TIME];
             hfile->meta_time += dfile->fcounters[CP_F_POSIX_META_TIME];
882 883 884 885 886 887 888
#else
            pdata->rank_cumul_io_time[dfile->rank] += dfile->fcounters[CP_F_POSIX_META_TIME] +
                                dfile->fcounters[CP_F_POSIX_READ_TIME] +
                                dfile->fcounters[CP_F_POSIX_WRITE_TIME];
            pdata->rank_cumul_md_time[dfile->rank] += dfile->fcounters[CP_F_POSIX_META_TIME];

#endif
889
        }
890

891
#if 0
892 893
        pdata->rank_cumul_io_time[dfile->rank] += hfile->cumul_time;
        pdata->rank_cumul_md_time[dfile->rank] += hfile->meta_time;
894
#endif
895 896 897 898 899 900 901 902 903
    }

    return;
}

void calc_perf(struct darshan_job *djob,
               hash_entry_t *hash_rank_uniq,
               perf_data_t *pdata)
{
904
    int64_t i;
905 906 907 908 909 910

    pdata->shared_time_by_cumul =
        pdata->shared_time_by_cumul / (double)djob->nprocs;

    pdata->shared_meta_time = pdata->shared_meta_time / (double)djob->nprocs;

911
    for (i=0; i<djob->nprocs; i++)
912
    {
913
        if (pdata->rank_cumul_io_time[i] > pdata->slowest_rank_time)
914
        {
915
            pdata->slowest_rank_time = pdata->rank_cumul_io_time[i];
916
            pdata->slowest_rank_meta_time = pdata->rank_cumul_md_time[i];
917 918 919
        }
    }

920
    if (pdata->slowest_rank_time + pdata->shared_time_by_cumul)
921
    pdata->agg_perf_by_cumul = ((double)pdata->total_bytes / 1048576.0) /
922 923 924 925
                                  (pdata->slowest_rank_time +
                                   pdata->shared_time_by_cumul);

    if (pdata->slowest_rank_time + pdata->shared_time_by_open)
926
    pdata->agg_perf_by_open  = ((double)pdata->total_bytes / 1048576.0) / 
927 928 929 930
                                   (pdata->slowest_rank_time +
                                    pdata->shared_time_by_open);

    if (pdata->slowest_rank_time + pdata->shared_time_by_open_lastio)
931 932 933
    pdata->agg_perf_by_open_lastio = ((double)pdata->total_bytes / 1048576.0) /
                                     (pdata->slowest_rank_time +
                                      pdata->shared_time_by_open_lastio);
934

935 936
    if (pdata->slowest_rank_time + pdata->shared_time_by_slowest)
    pdata->agg_perf_by_slowest = ((double)pdata->total_bytes / 1048576.0) /
937 938
                                     (pdata->slowest_rank_time +
                                      pdata->shared_time_by_slowest);
939 940 941

    return;
}