Commit 598a2e85 authored by Shane Snyder's avatar Shane Snyder

add close_start and open_end timers

for MPIIO, PNETCDF, and HDF5 modules.
parent d20ef28f
......@@ -8,7 +8,7 @@
#define __DARSHAN_HDF5_LOG_FORMAT_H
/* current HDF5 log format version */
#define DARSHAN_HDF5_VER 1
#define DARSHAN_HDF5_VER 2
#define HDF5_COUNTERS \
/* count of HDF5 opens */\
......@@ -18,9 +18,13 @@
#define HDF5_F_COUNTERS \
/* timestamp of first open */\
X(HDF5_F_OPEN_TIMESTAMP) \
X(HDF5_F_OPEN_START_TIMESTAMP) \
/* timestamp of first close */\
X(HDF5_F_CLOSE_START_TIMESTAMP) \
/* timestamp of last open */\
X(HDF5_F_OPEN_END_TIMESTAMP) \
/* timestamp of last close */\
X(HDF5_F_CLOSE_TIMESTAMP) \
X(HDF5_F_CLOSE_END_TIMESTAMP) \
/* end of counters*/\
X(HDF5_F_NUM_INDICES)
......
......@@ -8,7 +8,7 @@
#define __DARSHAN_MPIIO_LOG_FORMAT_H
/* current MPI-IO log format version */
#define DARSHAN_MPIIO_VER 2
#define DARSHAN_MPIIO_VER 3
/* TODO: maybe use a counter to track cases in which a derived datatype is used? */
......@@ -92,17 +92,21 @@
#define MPIIO_F_COUNTERS \
/* timestamp of first open */\
X(MPIIO_F_OPEN_TIMESTAMP) \
X(MPIIO_F_OPEN_START_TIMESTAMP) \
/* timestamp of first read */\
X(MPIIO_F_READ_START_TIMESTAMP) \
/* timestamp of first write */\
X(MPIIO_F_WRITE_START_TIMESTAMP) \
/* timestamp of first close */\
X(MPIIO_F_CLOSE_START_TIMESTAMP) \
/* timestamp of last open */\
X(MPIIO_F_OPEN_END_TIMESTAMP) \
/* timestamp of last read */\
X(MPIIO_F_READ_END_TIMESTAMP) \
/* timestamp of last write */\
X(MPIIO_F_WRITE_END_TIMESTAMP) \
/* timestamp of last close */\
X(MPIIO_F_CLOSE_TIMESTAMP) \
X(MPIIO_F_CLOSE_END_TIMESTAMP) \
/* cumulative MPI-IO read time */\
X(MPIIO_F_READ_TIME) \
/* cumulative MPI-IO write time */\
......
......@@ -8,7 +8,7 @@
#define __DARSHAN_PNETCDF_LOG_FORMAT_H
/* current PNETCDF log format version */
#define DARSHAN_PNETCDF_VER 1
#define DARSHAN_PNETCDF_VER 2
#define PNETCDF_COUNTERS \
/* count of PNETCDF independent opens */\
......@@ -20,9 +20,13 @@
#define PNETCDF_F_COUNTERS \
/* timestamp of first open */\
X(PNETCDF_F_OPEN_TIMESTAMP) \
X(PNETCDF_F_OPEN_START_TIMESTAMP) \
/* timestamp of first close */\
X(PNETCDF_F_CLOSE_START_TIMESTAMP) \
/* timestamp of last open */\
X(PNETCDF_F_OPEN_END_TIMESTAMP) \
/* timestamp of last close */\
X(PNETCDF_F_CLOSE_TIMESTAMP) \
X(PNETCDF_F_CLOSE_END_TIMESTAMP) \
/* end of counters*/\
X(PNETCDF_F_NUM_INDICES)
......
......@@ -90,7 +90,7 @@ static int my_rank = -1;
HDF5_UNLOCK(); \
} while(0)
#define HDF5_RECORD_OPEN(__ret, __path, __tm1) do { \
#define HDF5_RECORD_OPEN(__ret, __path, __tm1, __tm2) do { \
darshan_record_id rec_id; \
struct hdf5_file_record_ref *rec_ref; \
char *newpath; \
......@@ -107,8 +107,10 @@ static int my_rank = -1;
if(newpath != __path) free(newpath); \
break; \
} \
if(rec_ref->file_rec->fcounters[HDF5_F_OPEN_TIMESTAMP] == 0) \
rec_ref->file_rec->fcounters[HDF5_F_OPEN_TIMESTAMP] = __tm1; \
if(rec_ref->file_rec->fcounters[HDF5_F_OPEN_START_TIMESTAMP] == 0 || \
rec_ref->file_rec->fcounters[HDF5_F_OPEN_START_TIMESTAMP] > __tm1) \
rec_ref->file_rec->fcounters[HDF5_F_OPEN_START_TIMESTAMP] = __tm1; \
rec_ref->file_rec->fcounters[HDF5_F_OPEN_END_TIMESTAMP] = __tm2; \
rec_ref->file_rec->counters[HDF5_OPENS] += 1; \
darshan_add_record_ref(&(hdf5_runtime->hid_hash), &__ret, sizeof(hid_t), rec_ref); \
if(newpath != __path) free(newpath); \
......@@ -123,7 +125,7 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags,
{
hid_t ret;
char* tmp;
double tm1;
double tm1, tm2;
unsigned majnum, minnum, relnum;
H5get_libversion(&majnum, &minnum, &relnum);
......@@ -146,6 +148,7 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags,
tm1 = darshan_core_wtime();
ret = __real_H5Fcreate(filename, flags, create_plist, access_plist);
tm2 = darshan_core_wtime();
if(ret >= 0)
{
/* use ROMIO approach to strip prefix if present */
......@@ -159,7 +162,7 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags,
}
HDF5_PRE_RECORD();
HDF5_RECORD_OPEN(ret, filename, tm1);
HDF5_RECORD_OPEN(ret, filename, tm1, tm2);
HDF5_POST_RECORD();
}
......@@ -171,7 +174,7 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
{
hid_t ret;
char* tmp;
double tm1;
double tm1, tm2;
unsigned majnum, minnum, relnum;
H5get_libversion(&majnum, &minnum, &relnum);
......@@ -194,6 +197,7 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
tm1 = darshan_core_wtime();
ret = __real_H5Fopen(filename, flags, access_plist);
tm2 = darshan_core_wtime();
if(ret >= 0)
{
/* use ROMIO approach to strip prefix if present */
......@@ -207,7 +211,7 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
}
HDF5_PRE_RECORD();
HDF5_RECORD_OPEN(ret, filename, tm1);
HDF5_RECORD_OPEN(ret, filename, tm1, tm2);
HDF5_POST_RECORD();
}
......@@ -218,19 +222,24 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
herr_t DARSHAN_DECL(H5Fclose)(hid_t file_id)
{
struct hdf5_file_record_ref *rec_ref;
double tm1, tm2;
herr_t ret;
MAP_OR_FAIL(H5Fclose);
tm1 = darshan_core_wtime();
ret = __real_H5Fclose(file_id);
tm2 = darshan_core_wtime();
HDF5_PRE_RECORD();
rec_ref = darshan_lookup_record_ref(hdf5_runtime->hid_hash,
&file_id, sizeof(hid_t));
if(rec_ref)
{
rec_ref->file_rec->fcounters[HDF5_F_CLOSE_TIMESTAMP] =
darshan_core_wtime();
if(rec_ref->file_rec->fcounters[HDF5_F_CLOSE_START_TIMESTAMP] == 0 ||
rec_ref->file_rec->fcounters[HDF5_F_CLOSE_START_TIMESTAMP] > tm1)
rec_ref->file_rec->fcounters[HDF5_F_CLOSE_START_TIMESTAMP] = tm1;
rec_ref->file_rec->fcounters[HDF5_F_CLOSE_END_TIMESTAMP] = tm2;
darshan_delete_record_ref(&(hdf5_runtime->hid_hash),
&file_id, sizeof(hid_t));
}
......@@ -349,7 +358,7 @@ static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v,
}
/* min non-zero (if available) value */
for(j=HDF5_F_OPEN_TIMESTAMP; j<=HDF5_F_OPEN_TIMESTAMP; j++)
for(j=HDF5_F_OPEN_START_TIMESTAMP; j<=HDF5_F_CLOSE_START_TIMESTAMP; j++)
{
if((infile->fcounters[j] < inoutfile->fcounters[j] &&
infile->fcounters[j] > 0) || inoutfile->fcounters[j] == 0)
......@@ -359,7 +368,7 @@ static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v,
}
/* max */
for(j=HDF5_F_CLOSE_TIMESTAMP; j<=HDF5_F_CLOSE_TIMESTAMP; j++)
for(j=HDF5_F_OPEN_END_TIMESTAMP; j<=HDF5_F_CLOSE_END_TIMESTAMP; j++)
{
if(infile->fcounters[j] > inoutfile->fcounters[j])
tmp_file.fcounters[j] = infile->fcounters[j];
......
......@@ -229,9 +229,10 @@ static int enable_dxt_io_trace = 0;
rec_ref->file_rec->counters[MPIIO_COLL_OPENS] += 1; \
if(__info != MPI_INFO_NULL) \
rec_ref->file_rec->counters[MPIIO_HINTS] += 1; \
if(rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] == 0 || \
rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] > __tm1) \
rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] = __tm1; \
if(rec_ref->file_rec->fcounters[MPIIO_F_OPEN_START_TIMESTAMP] == 0 || \
rec_ref->file_rec->fcounters[MPIIO_F_OPEN_START_TIMESTAMP] > __tm1) \
rec_ref->file_rec->fcounters[MPIIO_F_OPEN_START_TIMESTAMP] = __tm1; \
rec_ref->file_rec->fcounters[MPIIO_F_OPEN_END_TIMESTAMP] = __tm2; \
DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], \
__tm1, __tm2, rec_ref->last_meta_end); \
darshan_add_record_ref(&(mpiio_runtime->fh_hash), &__fh, sizeof(MPI_File), rec_ref); \
......@@ -1090,8 +1091,10 @@ int DARSHAN_DECL(MPI_File_close)(MPI_File *fh)
&tmp_fh, sizeof(MPI_File));
if(rec_ref)
{
rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_TIMESTAMP] =
darshan_core_wtime();
if(rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_START_TIMESTAMP] == 0 ||
rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_START_TIMESTAMP] > tm1)
rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_START_TIMESTAMP] = tm1;
rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_END_TIMESTAMP] = tm2;
DARSHAN_TIMER_INC_NO_OVERLAP(
rec_ref->file_rec->fcounters[MPIIO_F_META_TIME],
tm1, tm2, rec_ref->last_meta_end);
......@@ -1273,7 +1276,7 @@ static void mpiio_record_reduction_op(
}
/* min non-zero (if available) value */
for(j=MPIIO_F_OPEN_TIMESTAMP; j<=MPIIO_F_WRITE_START_TIMESTAMP; j++)
for(j=MPIIO_F_OPEN_START_TIMESTAMP; j<=MPIIO_F_CLOSE_START_TIMESTAMP; j++)
{
if((infile->fcounters[j] < inoutfile->fcounters[j] &&
infile->fcounters[j] > 0) || inoutfile->fcounters[j] == 0)
......@@ -1283,7 +1286,7 @@ static void mpiio_record_reduction_op(
}
/* max */
for(j=MPIIO_F_READ_END_TIMESTAMP; j<= MPIIO_F_CLOSE_TIMESTAMP; j++)
for(j=MPIIO_F_OPEN_END_TIMESTAMP; j<= MPIIO_F_CLOSE_END_TIMESTAMP; j++)
{
if(infile->fcounters[j] > inoutfile->fcounters[j])
tmp_file.fcounters[j] = infile->fcounters[j];
......
......@@ -76,7 +76,7 @@ static int my_rank = -1;
PNETCDF_UNLOCK(); \
} while(0)
#define PNETCDF_RECORD_OPEN(__ncidp, __path, __comm, __tm1) do { \
#define PNETCDF_RECORD_OPEN(__ncidp, __path, __comm, __tm1, __tm2) do { \
darshan_record_id rec_id; \
struct pnetcdf_file_record_ref *rec_ref; \
char *newpath; \
......@@ -95,8 +95,10 @@ static int my_rank = -1;
break; \
} \
PMPI_Comm_size(__comm, &comm_size); \
if(rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_TIMESTAMP] == 0) \
rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_TIMESTAMP] = __tm1; \
if(rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_START_TIMESTAMP] == 0 || \
rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_START_TIMESTAMP] > __tm1) \
rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_START_TIMESTAMP] = __tm1; \
rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_END_TIMESTAMP] = __tm2; \
if(comm_size == 1) rec_ref->file_rec->counters[PNETCDF_INDEP_OPENS] += 1; \
else rec_ref->file_rec->counters[PNETCDF_COLL_OPENS] += 1; \
darshan_add_record_ref(&(pnetcdf_runtime->ncid_hash), __ncidp, sizeof(int), rec_ref); \
......@@ -112,12 +114,13 @@ int DARSHAN_DECL(ncmpi_create)(MPI_Comm comm, const char *path,
{
int ret;
char* tmp;
double tm1;
double tm1, tm2;
MAP_OR_FAIL(ncmpi_create);
tm1 = darshan_core_wtime();
ret = __real_ncmpi_create(comm, path, cmode, info, ncidp);
tm2 = darshan_core_wtime();
if(ret == 0)
{
/* use ROMIO approach to strip prefix if present */
......@@ -131,7 +134,7 @@ int DARSHAN_DECL(ncmpi_create)(MPI_Comm comm, const char *path,
}
PNETCDF_PRE_RECORD();
PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1);
PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1, tm2);
PNETCDF_POST_RECORD();
}
......@@ -143,12 +146,13 @@ int DARSHAN_DECL(ncmpi_open)(MPI_Comm comm, const char *path,
{
int ret;
char* tmp;
double tm1;
double tm1, tm2;
MAP_OR_FAIL(ncmpi_open);
tm1 = darshan_core_wtime();
ret = __real_ncmpi_open(comm, path, omode, info, ncidp);
tm2 = darshan_core_wtime();
if(ret == 0)
{
/* use ROMIO approach to strip prefix if present */
......@@ -162,7 +166,7 @@ int DARSHAN_DECL(ncmpi_open)(MPI_Comm comm, const char *path,
}
PNETCDF_PRE_RECORD();
PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1);
PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1, tm2);
PNETCDF_POST_RECORD();
}
......@@ -173,18 +177,23 @@ int DARSHAN_DECL(ncmpi_close)(int ncid)
{
struct pnetcdf_file_record_ref *rec_ref;
int ret;
double tm1, tm2;
MAP_OR_FAIL(ncmpi_close);
tm1 = darshan_core_wtime();
ret = __real_ncmpi_close(ncid);
tm2 = darshan_core_wtime();
PNETCDF_PRE_RECORD();
rec_ref = darshan_lookup_record_ref(pnetcdf_runtime->ncid_hash,
&ncid, sizeof(int));
if(rec_ref)
{
rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_TIMESTAMP] =
darshan_core_wtime();
if(rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_START_TIMESTAMP] == 0 ||
rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_START_TIMESTAMP] > tm1)
rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_START_TIMESTAMP] = tm1;
rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_END_TIMESTAMP] = tm2;
darshan_delete_record_ref(&(pnetcdf_runtime->ncid_hash),
&ncid, sizeof(int));
}
......@@ -302,7 +311,7 @@ static void pnetcdf_record_reduction_op(void* infile_v, void* inoutfile_v,
}
/* min non-zero (if available) value */
for(j=PNETCDF_F_OPEN_TIMESTAMP; j<=PNETCDF_F_OPEN_TIMESTAMP; j++)
for(j=PNETCDF_F_OPEN_START_TIMESTAMP; j<=PNETCDF_F_CLOSE_START_TIMESTAMP; j++)
{
if((infile->fcounters[j] < inoutfile->fcounters[j] &&
infile->fcounters[j] > 0) || inoutfile->fcounters[j] == 0)
......@@ -312,7 +321,7 @@ static void pnetcdf_record_reduction_op(void* infile_v, void* inoutfile_v,
}
/* max */
for(j=PNETCDF_F_CLOSE_TIMESTAMP; j<=PNETCDF_F_CLOSE_TIMESTAMP; j++)
for(j=PNETCDF_F_OPEN_END_TIMESTAMP; j<=PNETCDF_F_CLOSE_END_TIMESTAMP; j++)
{
if(infile->fcounters[j] > inoutfile->fcounters[j])
tmp_file.fcounters[j] = infile->fcounters[j];
......
......@@ -30,6 +30,8 @@ char *hdf5_f_counter_names[] = {
};
#undef X
#define DARSHAN_HDF5_FILE_SIZE_1 40
static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p);
static int darshan_log_put_hdf5_file(darshan_fd fd, void* hdf5_buf);
static void darshan_log_print_hdf5_file(void *file_rec,
......@@ -52,6 +54,7 @@ struct darshan_mod_logutil_funcs hdf5_logutils =
static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
{
struct darshan_hdf5_file *file = *((struct darshan_hdf5_file **)hdf5_buf_p);
int rec_len;
int i;
int ret;
......@@ -65,12 +68,42 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
return(-1);
}
ret = darshan_log_get_mod(fd, DARSHAN_HDF5_MOD, file,
sizeof(struct darshan_hdf5_file));
if(fd->mod_ver[DARSHAN_HDF5_MOD] == DARSHAN_HDF5_VER)
{
/* log format is in current version, so we don't need to do any
* translation of counters while reading
*/
rec_len = sizeof(struct darshan_hdf5_file);
ret = darshan_log_get_mod(fd, DARSHAN_HDF5_MOD, file, rec_len);
}
else
{
char scratch[1024] = {0};
char *src_p, *dest_p;
int len;
rec_len = DARSHAN_HDF5_FILE_SIZE_1;
ret = darshan_log_get_mod(fd, DARSHAN_HDF5_MOD, scratch, rec_len);
if(ret != rec_len)
goto exit;
/* upconvert version 1 to version 2 in-place */
dest_p = scratch + (sizeof(struct darshan_base_record) +
(1 * sizeof(int64_t)) + (3 * sizeof(double)));
src_p = dest_p - (2 * sizeof(double));
len = sizeof(double);
memmove(dest_p, src_p, len);
/* set F_CLOSE_START and F_OPEN_END to -1 */
*((double *)src_p) = -1;
*((double *)(src_p + sizeof(double))) = -1;
memcpy(file, scratch, sizeof(struct darshan_hdf5_file));
}
exit:
if(*hdf5_buf_p == NULL)
{
if(ret == sizeof(struct darshan_hdf5_file))
if(ret == rec_len)
*hdf5_buf_p = file;
else
free(file);
......@@ -78,7 +111,7 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
if(ret < 0)
return(-1);
else if(ret < sizeof(struct darshan_hdf5_file))
else if(ret < rec_len)
return(0);
else
{
......@@ -90,7 +123,16 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
for(i=0; i<HDF5_NUM_INDICES; i++)
DARSHAN_BSWAP64(&file->counters[i]);
for(i=0; i<HDF5_F_NUM_INDICES; i++)
{
/* skip counters we explicitly set to -1 since they don't
* need to be byte swapped
*/
if((fd->mod_ver[DARSHAN_HDF5_MOD] == 1) &&
((i == HDF5_F_CLOSE_START_TIMESTAMP) ||
(i == HDF5_F_OPEN_END_TIMESTAMP)))
continue;
DARSHAN_BSWAP64(&file->fcounters[i]);
}
}
return(1);
......@@ -140,8 +182,15 @@ static void darshan_log_print_hdf5_description(int ver)
{
printf("\n# description of HDF5 counters:\n");
printf("# HDF5_OPENS: HDF5 file open operation counts.\n");
printf("# HDF5_F_OPEN_TIMESTAMP: timestamp of first HDF5 file open.\n");
printf("# HDF5_F_CLOSE_TIMESTAMP: timestamp of last HDF5 file close.\n");
printf("# HDF5_F_*_START_TIMESTAMP: timestamp of first HDF5 file open/close.\n");
printf("# HDF5_F_*_END_TIMESTAMP: timestamp of last HDF5 file open/close.\n");
if(ver == 1)
{
printf("\n# WARNING: HDF5 module log format version 1 does not support the following counters:\n");
printf("# - HDF5_F_CLOSE_START_TIMESTAMP\n");
printf("# - HDF5_F_OPEN_END_TIMESTAMP\n");
}
return;
}
......@@ -242,7 +291,8 @@ static void darshan_log_agg_hdf5_files(void *rec, void *agg_rec, int init_flag)
{
switch(i)
{
case HDF5_F_OPEN_TIMESTAMP:
case HDF5_F_OPEN_START_TIMESTAMP:
case HDF5_F_CLOSE_START_TIMESTAMP:
/* minimum non-zero */
if((hdf5_rec->fcounters[i] > 0) &&
((agg_hdf5_rec->fcounters[i] == 0) ||
......@@ -251,7 +301,8 @@ static void darshan_log_agg_hdf5_files(void *rec, void *agg_rec, int init_flag)
agg_hdf5_rec->fcounters[i] = hdf5_rec->fcounters[i];
}
break;
case HDF5_F_CLOSE_TIMESTAMP:
case HDF5_F_OPEN_END_TIMESTAMP:
case HDF5_F_CLOSE_END_TIMESTAMP:
/* maximum */
if(hdf5_rec->fcounters[i] > agg_hdf5_rec->fcounters[i])
{
......
......@@ -30,6 +30,8 @@ char *mpiio_f_counter_names[] = {
};
#undef X
#define DARSHAN_MPIIO_FILE_SIZE_1 544
static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p);
static int darshan_log_put_mpiio_file(darshan_fd fd, void* mpiio_buf);
static void darshan_log_print_mpiio_file(void *file_rec,
......@@ -52,6 +54,7 @@ struct darshan_mod_logutil_funcs mpiio_logutils =
static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
{
struct darshan_mpiio_file *file = *((struct darshan_mpiio_file **)mpiio_buf_p);
int rec_len;
int i;
int ret;
......@@ -64,13 +67,43 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
if(!file)
return(-1);
}
ret = darshan_log_get_mod(fd, DARSHAN_MPIIO_MOD, file,
sizeof(struct darshan_mpiio_file));
if(fd->mod_ver[DARSHAN_MPIIO_MOD] == DARSHAN_MPIIO_VER)
{
/* log format is in current version, so we don't need to do any
* translation of counters while reading
*/
rec_len = sizeof(struct darshan_mpiio_file);
ret = darshan_log_get_mod(fd, DARSHAN_MPIIO_MOD, file, rec_len);
}
else
{
char scratch[1024] = {0};
char *src_p, *dest_p;
int len;
rec_len = DARSHAN_MPIIO_FILE_SIZE_1;
ret = darshan_log_get_mod(fd, DARSHAN_MPIIO_MOD, scratch, rec_len);
if(ret != rec_len)
goto exit;
/* upconvert versions 1/2 to version 3 in-place */
dest_p = scratch + (sizeof(struct darshan_base_record) +
(51 * sizeof(int64_t)) + (5 * sizeof(double)));
src_p = dest_p - (2 * sizeof(double));
len = (12 * sizeof(double));
memmove(dest_p, src_p, len);
/* set F_CLOSE_START and F_OPEN_END to -1 */
*((double *)src_p) = -1;
*((double *)(src_p + sizeof(double))) = -1;
memcpy(file, scratch, sizeof(struct darshan_mpiio_file));
}
exit:
if(*mpiio_buf_p == NULL)
{
if(ret == sizeof(struct darshan_mpiio_file))
if(ret == rec_len)
*mpiio_buf_p = file;
else
free(file);
......@@ -78,7 +111,7 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
if(ret < 0)
return(-1);
else if(ret < sizeof(struct darshan_mpiio_file))
else if(ret < rec_len)
return(0);
else
{
......@@ -90,7 +123,16 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
for(i=0; i<MPIIO_NUM_INDICES; i++)
DARSHAN_BSWAP64(&file->counters[i]);
for(i=0; i<MPIIO_F_NUM_INDICES; i++)
{
/* skip counters we explicitly set to -1 since they don't
* need to be byte swapped
*/
if((fd->mod_ver[DARSHAN_MPIIO_MOD] < 3) &&
((i == MPIIO_F_CLOSE_START_TIMESTAMP) ||
(i == MPIIO_F_OPEN_END_TIMESTAMP)))
continue;
DARSHAN_BSWAP64(&file->fcounters[i]);
}
}
return(1);
......@@ -156,20 +198,24 @@ static void darshan_log_print_mpiio_description(int ver)
printf("# MPIIO_ACCESS*_COUNT: count of the four most common total access sizes.\n");
printf("# MPIIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).\n");
printf("# MPIIO_*_RANK_BYTES: total bytes transferred at MPI-IO layer by the fastest and slowest ranks (for shared files).\n");
printf("# MPIIO_F_OPEN_TIMESTAMP: timestamp of first open.\n");
printf("# MPIIO_F_*_START_TIMESTAMP: timestamp of first MPI-IO read/write.\n");
printf("# MPIIO_F_*_END_TIMESTAMP: timestamp of last MPI-IO read/write.\n");
printf("# MPIIO_F_CLOSE_TIMESTAMP: timestamp of last close.\n");
printf("# MPIIO_F_*_START_TIMESTAMP: timestamp of first MPI-IO open/read/write/close.\n");
printf("# MPIIO_F_*_END_TIMESTAMP: timestamp of last MPI-IO open/read/write/close.\n");
printf("# MPIIO_F_READ/WRITE/META_TIME: cumulative time spent in MPI-IO read, write, or metadata operations.\n");
printf("# MPIIO_F_MAX_*_TIME: duration of the slowest MPI-IO read and write operations.\n");
printf("# MPIIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).\n");
printf("# MPIIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).\n");
if(ver < 2)
if(ver == 1)
{
printf("\n# WARNING: MPIIO module log format version 1 has the following limitations:\n");
printf("# - MPIIO_F_WRITE_START_TIMESTAMP may not be accurate.\n");
}
if(ver <= 2)
{
printf("\n# WARNING: MPIIO module log format version <=2 does not support the following counters:\n");
printf("# - MPIIO_F_CLOSE_START_TIMESTAMP\n");
printf("# - MPIIO_F_OPEN_END_TIMESTAMP\n");
}
return;
}
......@@ -422,9 +468,10 @@ static void darshan_log_agg_mpiio_files(void *rec, void *agg_rec, int init_flag)
/* sum */
agg_mpi_rec->fcounters[i] += mpi_rec->fcounters[i];
break;
case MPIIO_F_OPEN_TIMESTAMP:
case MPIIO_F_OPEN_START_TIMESTAMP:
case MPIIO_F_READ_START_TIMESTAMP:
case MPIIO_F_WRITE_START_TIMESTAMP:
case MPIIO_F_CLOSE_START_TIMESTAMP:
/* minimum non-zero */
if((mpi_rec->fcounters[i] > 0) &&
((agg_mpi_rec->fcounters[i] == 0) ||
......@@ -433,9 +480,10 @@ static void darshan_log_agg_mpiio_files(void *rec, void *agg_rec, int init_flag)
agg_mpi_rec->fcounters[i] = mpi_rec->fcounters[i];
}
break;
case MPIIO_F_OPEN_END_TIMESTAMP:
case MPIIO_F_READ_END_TIMESTAMP:
case MPIIO_F_WRITE_END_TIMESTAMP:
case MPIIO_F_CLOSE_TIMESTAMP:
case MPIIO_F_CLOSE_END_TIMESTAMP:
/* maximum */
if(mpi_rec->fcounters[i] > agg_mpi_rec->fcounters[i])
{
......
......@@ -1149,18 +1149,20 @@ void mpiio_accum_file(struct darshan_mpiio_file *mfile,
{
switch(i)
{
case MPIIO_F_OPEN_TIMESTAMP:
case MPIIO_F_OPEN_START_TIMESTAMP:
case MPIIO_F_READ_START_TIMESTAMP:
case MPIIO_F_WRITE_START_TIMESTAMP:
case MPIIO_F_CLOSE_START_TIMESTAMP:
if(tmp->fcounters[i] == 0 ||
tmp->fcounters[i] > mfile->fcounters[i])
{
tmp->fcounters[i] = mfile->fcounters[i];
}
break;
case MPIIO_F_OPEN_END_TIMESTAMP:
case MPIIO_F_READ_END_TIMESTAMP:
case MPIIO_F_WRITE_END_TIMESTAMP:
case MPIIO_F_CLOSE_TIMESTAMP:
case MPIIO_F_CLOSE_END_TIMESTAMP:
if(tmp->fcounters[i] == 0 ||
tmp->fcounters[i] < mfile->fcounters[i])
{
......@@ -1386,12 +1388,12 @@ void mpiio_accum_perf(struct darshan_mpiio_file *mfile,
if(mfile->base_rec.rank == -1)
{
/* by_open */
if(mfile->fcounters[MPIIO_F_CLOSE_TIMESTAMP] >
mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP])
if(mfile->fcounters[MPIIO_F_CLOSE_END_TIMESTAMP] >
mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP])
{
pdata->shared_time_by_open +=
mfile->fcounters[MPIIO_F_CLOSE_TIMESTAMP] -
mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP];
mfile->fcounters[MPIIO_F_CLOSE_END_TIMESTAMP] -
mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP];
}
/* by_open_lastio */
......@@ -1399,21 +1401,21 @@ void mpiio_accum_perf(struct darshan_mpiio_file *mfile,
mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP])
{
/* be careful: file may have been opened but not read or written */
if(mfile->fcounters[MPIIO_F_READ_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP])
if(mfile->fcounters[MPIIO_F_READ_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP])
{
pdata->shared_time_by_open_lastio +=
mfile->fcounters[MPIIO_F_READ_END_TIMESTAMP] -
mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP];
mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP];
}
}
else
{
/* be careful: file may have been opened but not read or written */
if(mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP])
if(mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP])
{
pdata->shared_time_by_open_lastio +=
mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] -
mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP];
mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP];
}
}
......@@ -2011,7 +2013,7 @@ void mpiio_file_list(hash_entry_t *file_hash,
if(detail_flag)
{
for(i=MPIIO_F_OPEN_TIMESTAMP; i<=MPIIO_F_CLOSE_TIMESTAMP; i++)
for(i=MPIIO_F_OPEN_START_TIMESTAMP; i<=MPIIO_F_CLOSE_END_TIMESTAMP; i++)
{
printf("\t%f", file_rec->fcounters[i]);
}
......
......@@ -30,6 +30,8 @@ char *pnetcdf_f_counter_names[] = {
};
#undef X
#define DARSHAN_PNETCDF_FILE_SIZE_1 48
static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p);
static int darshan_log_put_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf);
static void darshan_log_print_pnetcdf_file(void *file_rec,
......@@ -52,6 +54,7 @@ struct darshan_mod_logutil_funcs pnetcdf_logutils =
static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p)
{
struct darshan_pnetcdf_file *file = *((struct darshan_pnetcdf_file **)pnetcdf_buf_p);
int rec_len;
int i;
int ret;
......@@ -65,12 +68,42 @@ static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p)
return(-1);
}
ret = darshan_log_get_mod(fd, DARSHAN_PNETCDF_MOD, file,
sizeof(struct darshan_pnetcdf_file));
if(fd->mod_ver[DARSHAN_PNETCDF_MOD] == DARSHAN_PNETCDF_VER)
{
/* log format is in current version, so we don't need to do any
* translation of counters while reading
*/
rec_len = sizeof(struct darshan_pnetcdf_file);
ret = darshan_log_get_mod(fd, DARSHAN_PNETCDF_MOD, file, rec_len);
}
else
{
char scratch[1024] = {0};
char *src_p, *dest_p;