darshan-parser.c 7.67 KB
Newer Older
1 2 3 4 5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6 7 8 9 10 11 12 13
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <zlib.h>
#include <time.h>
14
#include <stdlib.h>
15

16
#include "darshan-logutils.h"
17 18 19 20 21 22 23 24 25

int main(int argc, char **argv)
{
    int ret;
    struct darshan_job job;
    struct darshan_file cp_file;
    char tmp_string[1024];
    int no_files_flag = 0;
    time_t tmp_time = 0;
26
    darshan_fd file;
27
    int i;
28
    int mount_count;
29
    int64_t* devs;
30 31
    char** mnt_pts;
    char** fs_types;
32
    int last_rank = 0;
33 34 35 36 37 38 39

    if(argc != 2)
    {
        fprintf(stderr, "Usage: %s <filename>\n", argv[0]);
        return(-1);
    }

40
    file = darshan_log_open(argv[1]);
41 42
    if(!file)
    {
43
        perror("darshan_log_open");
44 45 46 47
        return(-1);
    }
   
    /* read job info */
48
    ret = darshan_log_getjob(file, &job);
49
    if(ret < 0)
50
    {
51
        fprintf(stderr, "Error: unable to read job information from log file.\n");
52
        darshan_log_close(file);
53 54 55
        return(-1);
    }

56 57 58
    /* warn user about any missing information in this log format */
    darshan_log_print_version_warnings(&job);

59
    ret = darshan_log_getexe(file, tmp_string, &no_files_flag);
60
    if(ret < 0)
61
    {
62
        fprintf(stderr, "Error: unable to read trailing job information.\n");
63
        darshan_log_close(file);
64 65 66
        return(-1);
    }

67
    /* print job summary */
68
    printf("# darshan log version: %s\n", job.version_string);
69 70
    printf("# size of file statistics: %zu bytes\n", sizeof(cp_file));
    printf("# size of job statistics: %zu bytes\n", sizeof(job));
71 72 73 74 75 76 77 78 79 80
    printf("# exe: %s\n", tmp_string);
    printf("# uid: %d\n", job.uid);
    printf("# start_time: %ld\n", (long)job.start_time);
    tmp_time = (time_t)job.start_time;
    printf("# start_time_asci: %s", ctime(&tmp_time));
    printf("# end_time: %ld\n", (long)job.end_time);
    tmp_time = (time_t)job.end_time;
    printf("# end_time_asci: %s", ctime(&tmp_time));
    printf("# nprocs: %d\n", job.nprocs);
    printf("# run time: %ld\n", (long)(job.end_time - job.start_time + 1));
81 82 83 84
 
    /* print table of mounted file systems */
    ret = darshan_log_getmounts(file, &devs, &mnt_pts, &fs_types, &mount_count,
        &no_files_flag);
85
    printf("\n# mounted file systems (device, mount point, and fs type)\n");
86 87 88
    printf("# -------------------------------------------------------\n");
    for(i=0; i<mount_count; i++)
    {
89
        printf("# mount entry: %lld\t%s\t%s\n", lld(devs[i]), mnt_pts[i], fs_types[i]);
90 91
    }
  
92 93 94 95
    if(no_files_flag)
    {
        /* it looks like the app didn't open any files */
        printf("# no files opened.\n");
96
        darshan_log_close(file);
97 98 99 100 101 102 103 104 105 106 107
        return(0);
    }

    printf("\n# description of columns:\n");
    printf("#   <rank>: MPI rank.  -1 indicates that the file is shared\n");
    printf("#      across all processes and statistics are aggregated.\n");
    printf("#   <file>: hash of file path.  0 indicates that statistics\n");
    printf("#      are condensed to refer to all files opened at the given\n");
    printf("#      process.\n");
    printf("#   <counter> and <value>: statistical counters.\n");
    printf("#   <name suffix>: last %d characters of file name.\n", CP_NAME_SUFFIX_LEN);
108 109
    printf("#   <mount pt>: mount point that the file resides on.\n");
    printf("#   <fs type>: type of file system that the file resides on.\n");
110 111 112 113 114 115 116
    printf("\n# description of counters:\n");
    printf("#   CP_POSIX_*: posix operation counts.\n");
    printf("#   CP_COLL_*: MPI collective operation counts.\n");
    printf("#   CP_INDEP_*: MPI independent operation counts.\n");
    printf("#   CP_SPIT_*: MPI split collective operation counts.\n");
    printf("#   CP_NB_*: MPI non blocking operation counts.\n");
    printf("#   READS,WRITES,OPENS,SEEKS,STATS, and MMAPS are types of operations.\n");
117
    printf("#   CP_*_NC_OPENS: number of indep. and collective pnetcdf opens.\n");
118
    printf("#   CP_HDF5_OPENS: number of hdf5 opens.\n");
119 120 121 122 123 124 125 126 127 128 129
    printf("#   CP_COMBINER_*: combiner counts for MPI mem and file datatypes.\n");
    printf("#   CP_HINTS: number of times MPI hints were used.\n");
    printf("#   CP_VIEWS: number of times MPI file views were used.\n");
    printf("#   CP_MODE: mode that file was opened in.\n");
    printf("#   CP_BYTES_*: total bytes read and written.\n");
    printf("#   CP_MAX_BYTE_*: highest offset byte read and written.\n");
    printf("#   CP_CONSEC_*: number of exactly adjacent reads and writes.\n");
    printf("#   CP_SEQ_*: number of reads and writes from increasing offsets.\n");
    printf("#   CP_RW_SWITCHES: number of times access alternated between read and write.\n");
    printf("#   CP_*_ALIGNMENT: memory and file alignment.\n");
    printf("#   CP_*_NOT_ALIGNED: number of reads and writes that were not aligned.\n");
130
    printf("#   CP_MAX_*_TIME_SIZE: size of the slowest read and write operations.\n");
131 132 133 134 135 136 137
    printf("#   CP_SIZE_READ_*: histogram of read access sizes.\n");
    printf("#   CP_SIZE_READ_AGG_*: histogram of MPI datatype total sizes.\n");
    printf("#   CP_EXTENT_READ_*: histogram of MPI datatype extents.\n");
    printf("#   CP_STRIDE*_STRIDE: the four most common strides detected.\n");
    printf("#   CP_STRIDE*_COUNT: count of the four most common strides.\n");
    printf("#   CP_ACCESS*_ACCESS: the four most common access sizes.\n");
    printf("#   CP_ACCESS*_COUNT: count of the four most common access sizes.\n");
138
    printf("#   CP_DEVICE: device id reported by stat().\n");
139
    printf("#   CP_SIZE_AT_OPEN: size of file when first opened.\n");
140 141 142 143
    printf("#   CP_F_OPEN_TIMESTAMP: timestamp of first open (mpi or posix).\n");
    printf("#   CP_F_*_START_TIMESTAMP: timestamp of first read/write (mpi or posix).\n");
    printf("#   CP_F_*_END_TIMESTAMP: timestamp of last read/write (mpi or posix).\n");
    printf("#   CP_F_CLOSE_TIMESTAMP: timestamp of last close (mpi or posix).\n");
144 145 146 147
    printf("#   CP_F_POSIX_READ/WRITE_TIME: cumulative time spent in posix reads or writes.\n");
    printf("#   CP_F_MPI_READ/WRITE_TIME: cumulative time spent in mpi-io reads or writes.\n");
    printf("#   CP_F_POSIX_META_TIME: cumulative time spent in posix open, close, fsync, stat and seek, .\n");
    printf("#   CP_F_MPI_META_TIME: cumulative time spent in mpi-io open, close, set_view, and sync.\n");
148
    printf("#   CP_MAX_*_TIME: duration of the slowest read and write operations.\n");
149 150 151 152 153

    printf("\n");

    CP_PRINT_HEADER();

154
    while((ret = darshan_log_getfile(file, &job, &cp_file)) == 1)
155
    {
156 157
        char* mnt_pt = NULL;
        char* fs_type = NULL;
158 159 160 161 162 163 164 165

        if(cp_file.rank != -1 && cp_file.rank < last_rank)
        {
            fprintf(stderr, "Error: log file contains out of order rank data.\n");
            return(-1);
        }
        if(cp_file.rank != -1)
            last_rank = cp_file.rank;
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
        
        for(i=0; i<mount_count; i++)
        {
            if(cp_file.counters[CP_DEVICE] == devs[i])
            {
                mnt_pt = mnt_pts[i];
                fs_type = fs_types[i];
                break;
            }
        }
        if(!mnt_pt)
            mnt_pt = "UNKNOWN";
        if(!fs_type)
            fs_type = "UNKNOWN";

181 182
        for(i=0; i<CP_NUM_INDICES; i++)
        {
183
            CP_PRINT(&job, &cp_file, i, mnt_pt, fs_type);
184 185 186
        }
        for(i=0; i<CP_F_NUM_INDICES; i++)
        {
187
            CP_F_PRINT(&job, &cp_file, i, mnt_pt, fs_type);
188
        }
189 190
    }

191
    if(ret < 0)
192
    {
193
        fprintf(stderr, "Error: failed to parse log file.\n");
194 195 196
        return(-1);
    }

197 198 199 200 201 202 203 204 205
    for(i=0; i<mount_count; i++)
    {
        free(mnt_pts[i]);
        free(fs_types[i]);
    }
    free(devs);
    free(mnt_pts);
    free(fs_types);
 
206
    darshan_log_close(file);
207 208
    return(0);
}