Commit 9b021c1b authored by Shane Snyder's avatar Shane Snyder

update modules to print counter descriptions

parent 31e0fb03
......@@ -11,24 +11,38 @@
#define DARSHAN_BGQ_VER 1
#define BGQ_COUNTERS \
X(BGQ_CSJOBID, "control system jobid") \
X(BGQ_NNODES, "number of BGQ compute nodes") \
X(BGQ_RANKSPERNODE, "number of MPI ranks per node") \
X(BGQ_DDRPERNODE, "size in MB of DDR3 per node") \
X(BGQ_INODES, "number of i/o nodes") \
X(BGQ_ANODES, "dimension of A torus") \
X(BGQ_BNODES, "dimension of B torus") \
X(BGQ_CNODES, "dimension of C torus") \
X(BGQ_DNODES, "dimension of D torus") \
X(BGQ_ENODES, "dimension of E torus") \
X(BGQ_TORUSENABLED, "which dimensions are torus") \
X(BGQ_NUM_INDICES, "end of counters")
/* control system jobid*/\
X(BGQ_CSJOBID) \
/* number of BGQ compute nodes */\
X(BGQ_NNODES) \
/* number of MPI ranks per node */\
X(BGQ_RANKSPERNODE) \
/* size in MB of DDR3 per node */\
X(BGQ_DDRPERNODE) \
/* number of i/o nodes */\
X(BGQ_INODES) \
/* dimension of A torus */\
X(BGQ_ANODES) \
/* dimension of B torus */\
X(BGQ_BNODES) \
/* dimension of C torus */\
X(BGQ_CNODES) \
/* dimension of D torus */\
X(BGQ_DNODES) \
/* dimension of E torus */\
X(BGQ_ENODES) \
/* which dimensions are torus */\
X(BGQ_TORUSENABLED) \
/* end of counters */\
X(BGQ_NUM_INDICES)
#define BGQ_F_COUNTERS \
X(BGQ_F_TIMESTAMP, "timestamp when data was collected") \
X(BGQ_F_NUM_INDICES, "end of counters")
/* timestamp when data was collected */\
X(BGQ_F_TIMESTAMP) \
/* end of counters */\
X(BGQ_F_NUM_INDICES)
#define X(a, b) a,
#define X(a) a,
/* integer counters for the "BGQ" example module */
enum darshan_bgq_indices
{
......
......@@ -20,7 +20,7 @@
#include "darshan-logutils.h"
/* counter name strings for the POSIX module */
#define X(a, b) #a,
#define X(a) #a,
char *bgq_counter_names[] = {
BGQ_COUNTERS
};
......@@ -35,12 +35,14 @@ static int darshan_log_get_bgq_rec(darshan_fd fd, void* bgq_buf,
static int darshan_log_put_bgq_rec(darshan_fd fd, void* bgq_buf, int ver);
static void darshan_log_print_bgq_rec(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
static void darshan_log_print_bgq_description(void);
struct darshan_mod_logutil_funcs bgq_logutils =
{
.log_get_record = &darshan_log_get_bgq_rec,
.log_put_record = &darshan_log_put_bgq_rec,
.log_print_record = &darshan_log_print_bgq_rec,
.log_print_description = &darshan_log_print_bgq_description
};
static int darshan_log_get_bgq_rec(darshan_fd fd, void* bgq_buf,
......@@ -112,6 +114,23 @@ static void darshan_log_print_bgq_rec(void *file_rec, char *file_name,
return;
}
static void darshan_log_print_bgq_description()
{
printf("\n# desription of BGQ counters:\n");
printf("# BGQ_CSJOBID: BGQ control system job ID.\n");
printf("# BGQ_NNODES: number of BGQ compute nodes for this job.\n");
printf("# BGQ_RANKSPERNODE: number of MPI ranks per compute node.\n");
printf("# BGQ_DDRPERNODE: size in MB of DDR3 per compute node.\n");
printf("# BGQ_INODES: number of BGQ I/O nodes for this job.\n");
printf("# BGQ_*NODES: dimension of A, B, C, D, & E dimensions of torus.\n");
printf("# BGQ_TORUSENABLED: which dimensions of the torus are enabled.\n");
printf("# BGQ_F_TIMESTAMP: timestamp when the BGQ data was collected.\n");
DARSHAN_PRINT_HEADER();
return;
}
/*
* Local variables:
* c-indent-level: 4
......
......@@ -35,12 +35,14 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void* hdf5_buf,
static int darshan_log_put_hdf5_file(darshan_fd fd, void* hdf5_buf, int ver);
static void darshan_log_print_hdf5_file(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
static void darshan_log_print_hdf5_description(void);
struct darshan_mod_logutil_funcs hdf5_logutils =
{
.log_get_record = &darshan_log_get_hdf5_file,
.log_put_record = &darshan_log_put_hdf5_file,
.log_print_record = &darshan_log_print_hdf5_file,
.log_print_description = &darshan_log_print_hdf5_description
};
static int darshan_log_get_hdf5_file(darshan_fd fd, void* hdf5_buf,
......@@ -112,6 +114,18 @@ static void darshan_log_print_hdf5_file(void *file_rec, char *file_name,
return;
}
static void darshan_log_print_hdf5_description()
{
printf("\n# desription of HDF5 counters:\n");
printf("# HDF5_OPENS: HDF5 file open operation counts.\n");
printf("# HDF5_F_OPEN_TIMESTAMP: timestamp of first HDF5 file open.\n");
printf("# HDF5_F_CLOSE_TIMESTAMP: timestamp of last HDF5 file close.\n");
DARSHAN_PRINT_HEADER();
return;
}
/*
* Local variables:
* c-indent-level: 4
......
......@@ -89,6 +89,8 @@ struct darshan_mod_logutil_funcs
char *fs_type,
int ver
);
/* print module-specific description of I/O characterization data */
void (*log_print_description)(void);
};
extern struct darshan_mod_logutil_funcs *mod_logutils[];
......
......@@ -35,12 +35,14 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void* mpiio_buf,
static int darshan_log_put_mpiio_file(darshan_fd fd, void* mpiio_buf, int ver);
static void darshan_log_print_mpiio_file(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
static void darshan_log_print_mpiio_description(void);
struct darshan_mod_logutil_funcs mpiio_logutils =
{
.log_get_record = &darshan_log_get_mpiio_file,
.log_put_record = &darshan_log_put_mpiio_file,
.log_print_record = &darshan_log_print_mpiio_file,
.log_print_description = &darshan_log_print_mpiio_description
};
static int darshan_log_get_mpiio_file(darshan_fd fd, void* mpiio_buf,
......@@ -112,6 +114,40 @@ static void darshan_log_print_mpiio_file(void *file_rec, char *file_name,
return;
}
static void darshan_log_print_mpiio_description()
{
printf("\n# desription of MPIIO counters:\n");
printf("# MPIIO_INDEP_*: MPI independent operation counts.\n");
printf("# MPIIO_COLL_*: MPI collective operation counts.\n");
printf("# MPIIO_SPLIT_*: MPI split collective operation counts.\n");
printf("# MPIIO_NB_*: MPI non blocking operation counts.\n");
printf("# READS,WRITES,and OPENS are types of operations.\n");
printf("# MPIIO_SYNCS: MPI file sync operation counts.\n");
printf("# MPIIO_HINTS: number of times MPI hints were used.\n");
printf("# MPIIO_VIEWS: number of times MPI file views were used.\n");
printf("# MPIIO_MODE: MPI-IO access mode that file was opened with.\n");
printf("# MPIIO_BYTES_*: total bytes read and written at MPI-IO layer.\n");
printf("# MPIIO_RW_SWITCHES: number of times access alternated between read and write.\n");
printf("# MPIIO_MAX_*_TIME_SIZE: size of the slowest read and write operations.\n");
printf("# MPIIO_SIZE_*_AGG_*: histogram of MPI datatype total sizes for read and write operations.\n");
printf("# MPIIO_ACCESS*_ACCESS: the four most common total access sizes.\n");
printf("# MPIIO_ACCESS*_COUNT: count of the four most common total access sizes.\n");
printf("# MPIIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).\n");
printf("# MPIIO_*_RANK_BYTES: total bytes transferred at MPI-IO layer by the fastest and slowest ranks (for shared files).\n");
printf("# MPIIO_F_OPEN_TIMESTAMP: timestamp of first open.\n");
printf("# MPIIO_F_*_START_TIMESTAMP: timestamp of first MPI-IO read/write.\n");
printf("# MPIIO_F_*_END_TIMESTAMP: timestamp of last MPI-IO read/write.\n");
printf("# MPIIO_F_CLOSE_TIMESTAMP: timestamp of last close.\n");
printf("# MPIIO_F_READ/WRITE/META_TIME: cumulative time spent in MPI-IO read, write, or metadata operations.\n");
printf("# MPIIO_F_MAX_*_TIME: duration of the slowest MPI-IO read and write operations.\n");
printf("# MPIIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).\n");
printf("# MPIIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).\n");
DARSHAN_PRINT_HEADER();
return;
}
/*
* Local variables:
* c-indent-level: 4
......
......@@ -37,6 +37,7 @@ static int darshan_log_get_null_record(darshan_fd fd, void* null_buf,
static int darshan_log_put_null_record(darshan_fd fd, void* null_buf, int ver);
static void darshan_log_print_null_record(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
static void darshan_log_print_null_description(void);
/* structure storing each function needed for implementing the darshan
* logutil interface. these functions are used for reading, writing, and
......@@ -47,6 +48,7 @@ struct darshan_mod_logutil_funcs null_logutils =
.log_get_record = &darshan_log_get_null_record,
.log_put_record = &darshan_log_put_null_record,
.log_print_record = &darshan_log_print_null_record,
.log_print_description = &darshan_log_print_null_description
};
/* retrieve a NULL record from log file descriptor 'fd', storing the
......@@ -134,6 +136,18 @@ static void darshan_log_print_null_record(void *file_rec, char *file_name,
return;
}
/* print out a description of the NULL module record fields */
static void darshan_log_print_null_description()
{
printf("\n# desription of NULL counters:\n");
printf("# NULL_BARS: number of 'bar' function calls.\n");
printf("# NULL_BAR_DAT: value set by last call to function 'bar'.\n");
printf("# NULL_F_BAR_TIMESTAMP: timestamp of the first call to function 'bar'.\n");
printf("# NULL_F_BAR_DURATION: duration of the last call to function 'bar'.\n");
return;
}
/*
* Local variables:
* c-indent-level: 4
......
......@@ -321,6 +321,21 @@ int main(int argc, char **argv)
printf("# mount entry:\t%s\t%s\n", mnt_pts[i], fs_types[i]);
}
if(mask & OPTION_BASE)
{
printf("\n# description of columns:\n");
printf("# <module>: module responsible for this I/O record.\n");
printf("# <rank>: MPI rank. -1 indicates that the file is shared\n");
printf("# across all processes and statistics are aggregated.\n");
printf("# <record id>: hash of the record's file path\n");
printf("# <counter name> and <counter value>: statistical counters.\n");
printf("# A value of -1 indicates that Darshan could not monitor\n");
printf("# that counter, and its value should be ignored.\n");
printf("# <file name>: full file path for the record.\n");
printf("# <mount pt>: mount point that the file resides on.\n");
printf("# <fs type>: type of file system that the file resides on.\n");
}
/* warn user if this log file is incomplete */
pdata.rank_cumul_io_time = malloc(sizeof(double)*job.nprocs);
pdata.rank_cumul_md_time = malloc(sizeof(double)*job.nprocs);
......@@ -374,8 +389,9 @@ int main(int argc, char **argv)
if(mask & OPTION_BASE)
{
/* TODO: does each module print header of what each counter means??? */
DARSHAN_PRINT_HEADER();
/* print a header describing the module's I/O characterization data */
if(mod_logutils[i]->log_print_description)
mod_logutils[i]->log_print_description();
}
ret = mod_logutils[i]->log_get_record(fd, mod_buf, &rec_id);
......
......@@ -35,12 +35,14 @@ static int darshan_log_get_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf,
static int darshan_log_put_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf, int ver);
static void darshan_log_print_pnetcdf_file(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
static void darshan_log_print_pnetcdf_description(void);
struct darshan_mod_logutil_funcs pnetcdf_logutils =
{
.log_get_record = &darshan_log_get_pnetcdf_file,
.log_put_record = &darshan_log_put_pnetcdf_file,
.log_print_record = &darshan_log_print_pnetcdf_file,
.log_print_description = &darshan_log_print_pnetcdf_description
};
static int darshan_log_get_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf,
......@@ -112,6 +114,19 @@ static void darshan_log_print_pnetcdf_file(void *file_rec, char *file_name,
return;
}
static void darshan_log_print_pnetcdf_description()
{
printf("\n# desription of PNETCDF counters:\n");
printf("# PNETCDF_INDEP_OPENS: PNETCDF independent file open operation counts.\n");
printf("# PNETCDF_COLL_OPENS: PNETCDF collective file open operation counts.\n");
printf("# PNETCDF_F_OPEN_TIMESTAMP: timestamp of first PNETCDF file open.\n");
printf("# PNETCDF_F_CLOSE_TIMESTAMP: timestamp of last PNETCDF file close.\n");
DARSHAN_PRINT_HEADER();
return;
}
/*
* Local variables:
* c-indent-level: 4
......
......@@ -35,12 +35,14 @@ static int darshan_log_get_posix_file(darshan_fd fd, void* posix_buf,
static int darshan_log_put_posix_file(darshan_fd fd, void* posix_buf, int ver);
static void darshan_log_print_posix_file(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
static void darshan_log_print_posix_description(void);
struct darshan_mod_logutil_funcs posix_logutils =
{
.log_get_record = &darshan_log_get_posix_file,
.log_put_record = &darshan_log_put_posix_file,
.log_print_record = &darshan_log_print_posix_file,
.log_print_description = &darshan_log_print_posix_description
};
static int darshan_log_get_posix_file(darshan_fd fd, void* posix_buf,
......@@ -112,6 +114,41 @@ static void darshan_log_print_posix_file(void *file_rec, char *file_name,
return;
}
static void darshan_log_print_posix_description()
{
printf("\n# desription of POSIX counters:\n");
printf("# POSIX_*: posix operation counts.\n");
printf("# READS,WRITES,OPENS,SEEKS,STATS, and MMAPS are types of operations.\n");
printf("# POSIX_MODE: mode that file was opened in.\n");
printf("# POSIX_BYTES_*: total bytes read and written.\n");
printf("# POSIX_MAX_BYTE_*: highest offset byte read and written.\n");
printf("# POSIX_CONSEC_*: number of exactly adjacent reads and writes.\n");
printf("# POSIX_SEQ_*: number of reads and writes from increasing offsets.\n");
printf("# POSIX_RW_SWITCHES: number of times access alternated between read and write.\n");
printf("# POSIX_*_ALIGNMENT: memory and file alignment.\n");
printf("# POSIX_*_NOT_ALIGNED: number of reads and writes that were not aligned.\n");
printf("# POSIX_MAX_*_TIME_SIZE: size of the slowest read and write operations.\n");
printf("# POSIX_SIZE_*_*: histogram of read and write access sizes.\n");
printf("# POSIX_STRIDE*_STRIDE: the four most common strides detected.\n");
printf("# POSIX_STRIDE*_COUNT: count of the four most common strides.\n");
printf("# POSIX_ACCESS*_ACCESS: the four most common access sizes.\n");
printf("# POSIX_ACCESS*_COUNT: count of the four most common access sizes.\n");
printf("# POSIX_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).\n");
printf("# POSIX_*_RANK_BYTES: bytes transferred by the fastest and slowest ranks (for shared files).\n");
printf("# POSIX_F_OPEN_TIMESTAMP: timestamp of first open.\n");
printf("# POSIX_F_*_START_TIMESTAMP: timestamp of first read/write.\n");
printf("# POSIX_F_*_END_TIMESTAMP: timestamp of last read/write.\n");
printf("# POSIX_F_CLOSE_TIMESTAMP: timestamp of last close.\n");
printf("# POSIX_F_READ/WRITE/META_TIME: cumulative time spent in read, write, or metadata operations.\n");
printf("# POSIX_F_MAX_*_TIME: duration of the slowest read and write operations.\n");
printf("# POSIX_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).\n");
printf("# POSIX_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).\n");
DARSHAN_PRINT_HEADER();
return;
}
/*
* Local variables:
* c-indent-level: 4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment