Commit b5b3f202 authored by Philip Carns's avatar Philip Carns

fixes #132

Darshan now considers MPI-IO time if available when calculating
CP_F_FASTEST_RANK_TIME and CP_F_SLOWEST_RANK_TIME


git-svn-id: https://svn.mcs.anl.gov/repos/darshan/trunk@860 3b7491f3-a168-0410-bf4b-c445ed680a29
parent 9aeaecab
......@@ -27,6 +27,8 @@ darshan-2.2.9-pre1
- More thorough output from darshan-parser --perf, suggested by Huong Luu.
- Increased uncompressed log header size from 1 KiB to 4 KiB to support
additional annotations.
- CP_F_{FASTEST/SLOWEST}_RANK_TIME counters now take MPI-IO time into
account, not just POSIX time, issue reported by Huong Luu.
darshan-2.2.8
=============
......
......@@ -19,7 +19,7 @@
#endif
/* update this on file format changes */
#define CP_VERSION "2.04"
#define CP_VERSION "2.05"
/* magic number for validating output files and checking byte order */
#define CP_MAGIC_NR 6567223
......@@ -217,7 +217,11 @@ enum f_darshan_indices
CP_F_MPI_WRITE_TIME, /* cumulative mpi-io write time */
CP_F_MAX_READ_TIME,
CP_F_MAX_WRITE_TIME,
CP_F_FASTEST_RANK_TIME,
/* Total I/O and meta time consumed by fastest and slowest ranks,
* reported in either MPI or POSIX time depending on how the file
* was accessed.
*/
CP_F_FASTEST_RANK_TIME,
CP_F_SLOWEST_RANK_TIME,
CP_F_VARIANCE_RANK_TIME,
CP_F_VARIANCE_RANK_BYTES,
......
......@@ -1262,6 +1262,7 @@ static int cp_log_reduction(struct darshan_job_runtime* final_job, int rank,
MPI_Datatype rtype;
struct darshan_file* tmp_array = NULL;
int shared_count = 0;
double mpi_time, posix_time;
/* register a reduction operation */
ret = DARSHAN_MPI_CALL(PMPI_Op_create)(darshan_file_reduce, 1, &reduce_op);
......@@ -1333,6 +1334,14 @@ static int cp_log_reduction(struct darshan_job_runtime* final_job, int rank,
{
if(final_job->file_array[j].hash == hash_array[i])
{
posix_time =
final_job->file_array[j].fcounters[CP_F_POSIX_META_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_READ_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_WRITE_TIME];
mpi_time =
final_job->file_array[j].fcounters[CP_F_MPI_META_TIME] +
final_job->file_array[j].fcounters[CP_F_MPI_READ_TIME] +
final_job->file_array[j].fcounters[CP_F_MPI_WRITE_TIME];
/*
* Initialize fastest/slowest info prior
......@@ -1343,20 +1352,29 @@ static int cp_log_reduction(struct darshan_job_runtime* final_job, int rank,
final_job->file_array[j].counters[CP_FASTEST_RANK_BYTES] =
final_job->file_array[j].counters[CP_BYTES_READ] +
final_job->file_array[j].counters[CP_BYTES_WRITTEN];
final_job->file_array[j].fcounters[CP_F_FASTEST_RANK_TIME] =
final_job->file_array[j].fcounters[CP_F_POSIX_META_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_READ_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_WRITE_TIME];
/* use MPI timing if this file was accessed with MPI */
if(mpi_time > 0)
{
final_job->file_array[j].fcounters[CP_F_FASTEST_RANK_TIME] =
mpi_time;
}
else
{
final_job->file_array[j].fcounters[CP_F_FASTEST_RANK_TIME] =
posix_time;
}
/* Until reduction occurs, we assume that this rank is
* both the fastest and slowest. It is up to the
* reduction operator to find the true min and max if it
* is a shared file.
*/
final_job->file_array[j].counters[CP_SLOWEST_RANK] =
final_job->file_array[j].rank;
final_job->file_array[j].counters[CP_FASTEST_RANK];
final_job->file_array[j].counters[CP_SLOWEST_RANK_BYTES] =
final_job->file_array[j].counters[CP_BYTES_READ] +
final_job->file_array[j].counters[CP_BYTES_WRITTEN];
final_job->file_array[j].counters[CP_FASTEST_RANK_BYTES];
final_job->file_array[j].fcounters[CP_F_SLOWEST_RANK_TIME] =
final_job->file_array[j].fcounters[CP_F_POSIX_META_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_READ_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_WRITE_TIME];
final_job->file_array[j].fcounters[CP_F_FASTEST_RANK_TIME];
final_job->file_array[j].rank = -1;
break;
......
......@@ -374,7 +374,14 @@ int darshan_log_getjob(darshan_fd file, struct darshan_job *job)
return(-1);
}
if(strcmp(file->version, "2.04") == 0)
if(strcmp(file->version, "2.05") == 0)
{
getjob_internal = getjob_internal_201;
getfile_internal = getfile_internal_204;
file->job_struct_size = sizeof(*job);
file->COMPAT_CP_EXE_LEN = CP_EXE_LEN;
}
else if(strcmp(file->version, "2.04") == 0)
{
getjob_internal = getjob_internal_201;
getfile_internal = getfile_internal_204;
......@@ -755,17 +762,26 @@ void darshan_log_close(darshan_fd file)
*/
void darshan_log_print_version_warnings(struct darshan_job *job)
{
if(strcmp(job->version_string, "2.04") == 0)
if(strcmp(job->version_string, "2.05") == 0)
{
/* current version */
return;
}
if(strcmp(job->version_string, "2.04") == 0)
{
printf("# WARNING: version 2.04 log format has the following limitations:\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
if(strcmp(job->version_string, "2.03") == 0)
{
/* no meaningful change to interpretation of log file, 2.04 just
/* no meaningful change to interpretation of log file, 2.03 just
* increased the header space available for annotations.
*/
printf("# WARNING: version 2.03 log format has the following limitations:\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
......@@ -773,6 +789,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
{
printf("# WARNING: version 2.01 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
......@@ -781,6 +798,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# WARNING: version 2.01 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
......@@ -789,6 +807,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# WARNING: version 2.00 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
......@@ -807,6 +826,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
......@@ -826,6 +846,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# - may have incorrect mount point mappings for files with rank > 0.\n");
printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
......@@ -848,6 +869,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n");
printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
......@@ -877,6 +899,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n");
printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
......
......@@ -361,7 +361,7 @@ int main(int argc, char **argv)
printf("# CP_F_POSIX_META_TIME: cumulative time spent in posix open, close, fsync, stat and seek, .\n");
printf("# CP_F_MPI_META_TIME: cumulative time spent in mpi-io open, close, set_view, and sync.\n");
printf("# CP_MAX_*_TIME: duration of the slowest read and write operations.\n");
printf("# CP_*_RANK_TIME: fastest, slowest variance of transfer time.\n");
printf("# CP_*_RANK_TIME: fastest, slowest variance of transfer time. Note that these counters show MPI-IO time for files accessed using MPI-IO, and POSIX time otherwise.\n");
printf("\n");
CP_PRINT_HEADER();
......
......@@ -297,7 +297,7 @@ each file:
| CP_F_MPI_WRITE_TIME | Cumulative time spent write and sync at the MPI-IO level
| CP_F_MAX_READ_TIME | Duration of the slowest individual POSIX read operation
| CP_F_MAX_WRITE_TIME | Duration of the slowest individual POSIX write operation
| CP_F_FASTEST_RANK_TIME | The time of the rank which had the smallest amount of time spent in I/O (CP_F_POSIX_READ_TIME + CP_F_POSIX_WRITE_TIME + CP_F_POSIX_META_TIME)
| CP_F_FASTEST_RANK_TIME | The time of the rank which had the smallest amount of time spent in I/O. If the file was accessed usign MPI-IO it combines the MPI meta, read, and write time. If the file was not accessed with MPI-IO then it combines the posix meta, read, and write time.
| CP_F_SLOWEST_RANK_TIME | The time of the rank which had the largest amount of time spent in I/O
| CP_F_VARIANCE_RANK_TIME | The population variance for I/O time of all the ranks
| CP_F_VARIANCE_RANK_BYTES | The population variance for bytes transferred of all the ranks
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment