GitLab maintenance scheduled for Today, 2019-12-05, from 17:00 to 18:00 CT - Services will be unavailable during this time.

Commit 7e432c02 authored by Philip Carns's avatar Philip Carns

added tracking for time and size of slowest posix operations


git-svn-id: https://svn.mcs.anl.gov/repos/darshan/trunk@98 3b7491f3-a168-0410-bf4b-c445ed680a29
parent 2b243814
......@@ -92,6 +92,8 @@ enum darshan_indices
CP_MEM_ALIGNMENT, /* mem alignment in bytes */
CP_FILE_NOT_ALIGNED, /* count of accesses not file aligned */
CP_FILE_ALIGNMENT, /* file alignment in bytes */
CP_MAX_READ_TIME_SIZE,
CP_MAX_WRITE_TIME_SIZE,
/* buckets */
CP_SIZE_READ_0_100, /* count of posix read size ranges */
CP_SIZE_READ_100_1K,
......@@ -194,6 +196,8 @@ enum f_darshan_indices
CP_F_MPI_META_TIME, /* cumulative mpi-io meta time */
CP_F_MPI_READ_TIME, /* cumulative mpi-io read time */
CP_F_MPI_WRITE_TIME, /* cumulative mpi-io write time */
CP_F_MAX_READ_TIME,
CP_F_MAX_WRITE_TIME,
CP_F_NUM_INDICES,
};
......
......@@ -71,6 +71,8 @@ char *darshan_names[] = {
"CP_MEM_ALIGNMENT", /* mem alignment in bytes */
"CP_FILE_NOT_ALIGNED", /* count of accesses not file aligned */
"CP_FILE_ALIGNMENT", /* file alignment in bytes */
"CP_MAX_READ_TIME_SIZE",
"CP_MAX_WRITE_TIME_SIZE",
"CP_SIZE_READ_0_100", /* count of posix read size ranges */
"CP_SIZE_READ_100_1K",
"CP_SIZE_READ_1K_10K",
......@@ -165,6 +167,8 @@ char *darshan_f_names[] = {
"CP_F_MPI_META_TIME", /* cumulative mpi-io metadata time */
"CP_F_MPI_READ_TIME", /* cumulative mpi-io read time */
"CP_F_MPI_WRITE_TIME", /* cumulative mpi-io write time */
"CP_F_MAX_READ_TIME",
"CP_F_MAX_WRITE_TIME",
"CP_F_NUM_INDICES",
};
......
......@@ -124,6 +124,7 @@ int main(int argc, char **argv)
printf("# CP_RW_SWITCHES: number of times access alternated between read and write.\n");
printf("# CP_*_ALIGNMENT: memory and file alignment.\n");
printf("# CP_*_NOT_ALIGNED: number of reads and writes that were not aligned.\n");
printf("# CP_MAX_*_TIME_SIZE: size of the slowest read and write operations.\n");
printf("# CP_SIZE_READ_*: histogram of read access sizes.\n");
printf("# CP_SIZE_READ_AGG_*: histogram of MPI datatype total sizes.\n");
printf("# CP_EXTENT_READ_*: histogram of MPI datatype extents.\n");
......@@ -139,6 +140,7 @@ int main(int argc, char **argv)
printf("# CP_F_MPI_READ/WRITE_TIME: cumulative time spent in mpi-io reads or writes.\n");
printf("# CP_F_POSIX_META_TIME: cumulative time spent in posix open, close, fsync, stat and seek, .\n");
printf("# CP_F_MPI_META_TIME: cumulative time spent in mpi-io open, close, set_view, and sync.\n");
printf("# CP_MAX_*_TIME: duration of the slowest read and write operations.\n");
printf("\n");
......@@ -206,6 +208,8 @@ int main(int argc, char **argv)
CP_PRINT(&job, &cp_file, CP_MEM_ALIGNMENT);
CP_PRINT(&job, &cp_file, CP_FILE_NOT_ALIGNED);
CP_PRINT(&job, &cp_file, CP_FILE_ALIGNMENT);
CP_PRINT(&job, &cp_file, CP_MAX_READ_TIME_SIZE);
CP_PRINT(&job, &cp_file, CP_MAX_WRITE_TIME_SIZE);
CP_PRINT(&job, &cp_file, CP_SIZE_READ_0_100);
CP_PRINT(&job, &cp_file, CP_SIZE_READ_100_1K);
CP_PRINT(&job, &cp_file, CP_SIZE_READ_1K_10K);
......@@ -295,6 +299,8 @@ int main(int argc, char **argv)
CP_F_PRINT(&job, &cp_file, CP_F_MPI_META_TIME);
CP_F_PRINT(&job, &cp_file, CP_F_MPI_READ_TIME);
CP_F_PRINT(&job, &cp_file, CP_F_MPI_WRITE_TIME);
CP_F_PRINT(&job, &cp_file, CP_F_MAX_READ_TIME);
CP_F_PRINT(&job, &cp_file, CP_F_MAX_WRITE_TIME);
}
if(ret > 0 && ret < sizeof(cp_file))
......
......@@ -1147,6 +1147,8 @@ static void darshan_file_reduce(void* infile_v,
/* pick one */
tmp_file.counters[CP_FILE_ALIGNMENT] = infile->counters[CP_FILE_ALIGNMENT];
/* skip CP_MAX_*_TIME_SIZE; handled in floating point section */
/* sum */
for(j=CP_SIZE_READ_0_100; j<=CP_EXTENT_WRITE_1G_PLUS; j++)
{
......@@ -1242,6 +1244,39 @@ static void darshan_file_reduce(void* infile_v,
inoutfile->fcounters[j];
}
/* max (special case) */
if(infile->fcounters[CP_F_MAX_WRITE_TIME] >
inoutfile->fcounters[CP_F_MAX_WRITE_TIME])
{
tmp_file.fcounters[CP_F_MAX_WRITE_TIME] =
infile->fcounters[CP_F_MAX_WRITE_TIME];
tmp_file.counters[CP_MAX_WRITE_TIME_SIZE] =
infile->counters[CP_MAX_WRITE_TIME_SIZE];
}
else
{
tmp_file.fcounters[CP_F_MAX_WRITE_TIME] =
inoutfile->fcounters[CP_F_MAX_WRITE_TIME];
tmp_file.counters[CP_MAX_WRITE_TIME_SIZE] =
inoutfile->counters[CP_MAX_WRITE_TIME_SIZE];
}
if(infile->fcounters[CP_F_MAX_READ_TIME] >
inoutfile->fcounters[CP_F_MAX_READ_TIME])
{
tmp_file.fcounters[CP_F_MAX_READ_TIME] =
infile->fcounters[CP_F_MAX_READ_TIME];
tmp_file.counters[CP_MAX_READ_TIME_SIZE] =
infile->counters[CP_MAX_READ_TIME_SIZE];
}
else
{
tmp_file.fcounters[CP_F_MAX_READ_TIME] =
inoutfile->fcounters[CP_F_MAX_READ_TIME];
tmp_file.counters[CP_MAX_READ_TIME_SIZE] =
inoutfile->counters[CP_MAX_READ_TIME_SIZE];
}
/* pick one name suffix */
strcpy(tmp_file.name_suffix, infile->name_suffix);
......
......@@ -84,6 +84,7 @@ static void cp_access_counter(struct darshan_file_runtime* file, ssize_t size,
off_t old_offset; \
int64_t file_alignment; \
struct darshan_file_runtime* file; \
double __elapsed = __tm2-__tm1; \
if(__ret < 0) break; \
file = darshan_file_by_fd(__fd); \
if(!file) break; \
......@@ -117,10 +118,13 @@ static void cp_access_counter(struct darshan_file_runtime* file, ssize_t size,
if(file->last_io_type == CP_READ) \
CP_INC(file, CP_RW_SWITCHES, 1); \
file->last_io_type = CP_WRITE; \
CP_F_INC(file, CP_F_POSIX_WRITE_TIME, (__tm2-__tm1)); \
CP_F_INC(file, CP_F_POSIX_WRITE_TIME, (__elapsed)); \
if(CP_F_VALUE(file, CP_F_WRITE_START_TIMESTAMP) == 0) \
CP_F_SET(file, CP_F_WRITE_START_TIMESTAMP, __tm1); \
CP_F_SET(file, CP_F_WRITE_END_TIMESTAMP, __tm2); \
if(CP_F_VALUE(file, CP_F_MAX_WRITE_TIME) < __elapsed){ \
CP_F_SET(file, CP_F_MAX_WRITE_TIME, __elapsed); \
CP_SET(file, CP_MAX_WRITE_TIME_SIZE, __ret); } \
} while(0)
#define CP_RECORD_READ(__ret, __fd, __count, __update_offset, __aligned, __stream_flag, __tm1, __tm2) do{ \
......@@ -128,6 +132,7 @@ static void cp_access_counter(struct darshan_file_runtime* file, ssize_t size,
off_t old_offset; \
struct darshan_file_runtime* file; \
int64_t file_alignment; \
double __elapsed = __tm2-__tm1; \
if(__ret < 0) break; \
file = darshan_file_by_fd(__fd); \
if(!file) break; \
......@@ -161,10 +166,13 @@ static void cp_access_counter(struct darshan_file_runtime* file, ssize_t size,
if(file->last_io_type == CP_WRITE) \
CP_INC(file, CP_RW_SWITCHES, 1); \
file->last_io_type = CP_READ; \
CP_F_INC(file, CP_F_POSIX_READ_TIME, (__tm2-__tm1)); \
CP_F_INC(file, CP_F_POSIX_READ_TIME, (__elapsed)); \
if(CP_F_VALUE(file, CP_F_READ_START_TIMESTAMP) == 0) \
CP_F_SET(file, CP_F_READ_START_TIMESTAMP, __tm1); \
CP_F_SET(file, CP_F_READ_END_TIMESTAMP, __tm2); \
if(CP_F_VALUE(file, CP_F_MAX_READ_TIME) < __elapsed){ \
CP_F_SET(file, CP_F_MAX_READ_TIME, __elapsed); \
CP_SET(file, CP_MAX_READ_TIME_SIZE, __ret); } \
} while(0)
#define CP_RECORD_OPEN(__ret, __path, __mode, __stream_flag, __tm1, __tm2) do { \
......@@ -1059,6 +1067,13 @@ void darshan_condense(void)
CP_MAX(base_file, i, CP_VALUE(iter_file, i));
break;
/* do nothing with these; they are handled in the floating
* point loop
*/
case CP_MAX_WRITE_TIME_SIZE:
case CP_MAX_READ_TIME_SIZE:
break;
/* most records can simply be added */
default:
CP_INC(base_file, i, CP_VALUE(iter_file, i));
......@@ -1067,7 +1082,28 @@ void darshan_condense(void)
}
for(i=0; i<CP_F_NUM_INDICES; i++)
{
CP_F_SET(base_file, i, CP_F_VALUE(iter_file, i) + CP_F_VALUE(base_file, i));
switch(i)
{
case CP_F_MAX_WRITE_TIME:
if(CP_F_VALUE(iter_file, i) > CP_F_VALUE(base_file, i))
{
CP_F_SET(base_file, i, CP_F_VALUE(iter_file, i));
CP_SET(base_file, CP_MAX_WRITE_TIME_SIZE,
CP_VALUE(iter_file, CP_MAX_WRITE_TIME_SIZE));
}
break;
case CP_F_MAX_READ_TIME:
if(CP_F_VALUE(iter_file, i) > CP_F_VALUE(base_file, i))
{
CP_F_SET(base_file, i, CP_F_VALUE(iter_file, i));
CP_SET(base_file, CP_MAX_READ_TIME_SIZE,
CP_VALUE(iter_file, CP_MAX_READ_TIME_SIZE));
}
break;
default:
CP_F_SET(base_file, i, CP_F_VALUE(iter_file, i) + CP_F_VALUE(base_file, i));
break;
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment