Commit b5b3f202 authored by Philip Carns's avatar Philip Carns
Browse files

fixes #132

Darshan now considers MPI-IO time if available when calculating
CP_F_FASTEST_RANK_TIME and CP_F_SLOWEST_RANK_TIME


git-svn-id: https://svn.mcs.anl.gov/repos/darshan/trunk@860 3b7491f3-a168-0410-bf4b-c445ed680a29
parent 9aeaecab
...@@ -27,6 +27,8 @@ darshan-2.2.9-pre1 ...@@ -27,6 +27,8 @@ darshan-2.2.9-pre1
- More thorough output from darshan-parser --perf, suggested by Huong Luu. - More thorough output from darshan-parser --perf, suggested by Huong Luu.
- Increased uncompressed log header size from 1 KiB to 4 KiB to support - Increased uncompressed log header size from 1 KiB to 4 KiB to support
additional annotations. additional annotations.
- CP_F_{FASTEST/SLOWEST}_RANK_TIME counters now take MPI-IO time into
account, not just POSIX time, issue reported by Huong Luu.
darshan-2.2.8 darshan-2.2.8
============= =============
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#endif #endif
/* update this on file format changes */ /* update this on file format changes */
#define CP_VERSION "2.04" #define CP_VERSION "2.05"
/* magic number for validating output files and checking byte order */ /* magic number for validating output files and checking byte order */
#define CP_MAGIC_NR 6567223 #define CP_MAGIC_NR 6567223
...@@ -217,7 +217,11 @@ enum f_darshan_indices ...@@ -217,7 +217,11 @@ enum f_darshan_indices
CP_F_MPI_WRITE_TIME, /* cumulative mpi-io write time */ CP_F_MPI_WRITE_TIME, /* cumulative mpi-io write time */
CP_F_MAX_READ_TIME, CP_F_MAX_READ_TIME,
CP_F_MAX_WRITE_TIME, CP_F_MAX_WRITE_TIME,
CP_F_FASTEST_RANK_TIME, /* Total I/O and meta time consumed by fastest and slowest ranks,
* reported in either MPI or POSIX time depending on how the file
* was accessed.
*/
CP_F_FASTEST_RANK_TIME,
CP_F_SLOWEST_RANK_TIME, CP_F_SLOWEST_RANK_TIME,
CP_F_VARIANCE_RANK_TIME, CP_F_VARIANCE_RANK_TIME,
CP_F_VARIANCE_RANK_BYTES, CP_F_VARIANCE_RANK_BYTES,
......
...@@ -1262,6 +1262,7 @@ static int cp_log_reduction(struct darshan_job_runtime* final_job, int rank, ...@@ -1262,6 +1262,7 @@ static int cp_log_reduction(struct darshan_job_runtime* final_job, int rank,
MPI_Datatype rtype; MPI_Datatype rtype;
struct darshan_file* tmp_array = NULL; struct darshan_file* tmp_array = NULL;
int shared_count = 0; int shared_count = 0;
double mpi_time, posix_time;
/* register a reduction operation */ /* register a reduction operation */
ret = DARSHAN_MPI_CALL(PMPI_Op_create)(darshan_file_reduce, 1, &reduce_op); ret = DARSHAN_MPI_CALL(PMPI_Op_create)(darshan_file_reduce, 1, &reduce_op);
...@@ -1333,6 +1334,14 @@ static int cp_log_reduction(struct darshan_job_runtime* final_job, int rank, ...@@ -1333,6 +1334,14 @@ static int cp_log_reduction(struct darshan_job_runtime* final_job, int rank,
{ {
if(final_job->file_array[j].hash == hash_array[i]) if(final_job->file_array[j].hash == hash_array[i])
{ {
posix_time =
final_job->file_array[j].fcounters[CP_F_POSIX_META_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_READ_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_WRITE_TIME];
mpi_time =
final_job->file_array[j].fcounters[CP_F_MPI_META_TIME] +
final_job->file_array[j].fcounters[CP_F_MPI_READ_TIME] +
final_job->file_array[j].fcounters[CP_F_MPI_WRITE_TIME];
/* /*
* Initialize fastest/slowest info prior * Initialize fastest/slowest info prior
...@@ -1343,20 +1352,29 @@ static int cp_log_reduction(struct darshan_job_runtime* final_job, int rank, ...@@ -1343,20 +1352,29 @@ static int cp_log_reduction(struct darshan_job_runtime* final_job, int rank,
final_job->file_array[j].counters[CP_FASTEST_RANK_BYTES] = final_job->file_array[j].counters[CP_FASTEST_RANK_BYTES] =
final_job->file_array[j].counters[CP_BYTES_READ] + final_job->file_array[j].counters[CP_BYTES_READ] +
final_job->file_array[j].counters[CP_BYTES_WRITTEN]; final_job->file_array[j].counters[CP_BYTES_WRITTEN];
final_job->file_array[j].fcounters[CP_F_FASTEST_RANK_TIME] = /* use MPI timing if this file was accessed with MPI */
final_job->file_array[j].fcounters[CP_F_POSIX_META_TIME] + if(mpi_time > 0)
final_job->file_array[j].fcounters[CP_F_POSIX_READ_TIME] + {
final_job->file_array[j].fcounters[CP_F_POSIX_WRITE_TIME]; final_job->file_array[j].fcounters[CP_F_FASTEST_RANK_TIME] =
mpi_time;
}
else
{
final_job->file_array[j].fcounters[CP_F_FASTEST_RANK_TIME] =
posix_time;
}
/* Until reduction occurs, we assume that this rank is
* both the fastest and slowest. It is up to the
* reduction operator to find the true min and max if it
* is a shared file.
*/
final_job->file_array[j].counters[CP_SLOWEST_RANK] = final_job->file_array[j].counters[CP_SLOWEST_RANK] =
final_job->file_array[j].rank; final_job->file_array[j].counters[CP_FASTEST_RANK];
final_job->file_array[j].counters[CP_SLOWEST_RANK_BYTES] = final_job->file_array[j].counters[CP_SLOWEST_RANK_BYTES] =
final_job->file_array[j].counters[CP_BYTES_READ] + final_job->file_array[j].counters[CP_FASTEST_RANK_BYTES];
final_job->file_array[j].counters[CP_BYTES_WRITTEN];
final_job->file_array[j].fcounters[CP_F_SLOWEST_RANK_TIME] = final_job->file_array[j].fcounters[CP_F_SLOWEST_RANK_TIME] =
final_job->file_array[j].fcounters[CP_F_POSIX_META_TIME] + final_job->file_array[j].fcounters[CP_F_FASTEST_RANK_TIME];
final_job->file_array[j].fcounters[CP_F_POSIX_READ_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_WRITE_TIME];
final_job->file_array[j].rank = -1; final_job->file_array[j].rank = -1;
break; break;
......
...@@ -374,7 +374,14 @@ int darshan_log_getjob(darshan_fd file, struct darshan_job *job) ...@@ -374,7 +374,14 @@ int darshan_log_getjob(darshan_fd file, struct darshan_job *job)
return(-1); return(-1);
} }
if(strcmp(file->version, "2.04") == 0) if(strcmp(file->version, "2.05") == 0)
{
getjob_internal = getjob_internal_201;
getfile_internal = getfile_internal_204;
file->job_struct_size = sizeof(*job);
file->COMPAT_CP_EXE_LEN = CP_EXE_LEN;
}
else if(strcmp(file->version, "2.04") == 0)
{ {
getjob_internal = getjob_internal_201; getjob_internal = getjob_internal_201;
getfile_internal = getfile_internal_204; getfile_internal = getfile_internal_204;
...@@ -755,17 +762,26 @@ void darshan_log_close(darshan_fd file) ...@@ -755,17 +762,26 @@ void darshan_log_close(darshan_fd file)
*/ */
void darshan_log_print_version_warnings(struct darshan_job *job) void darshan_log_print_version_warnings(struct darshan_job *job)
{ {
if(strcmp(job->version_string, "2.04") == 0) if(strcmp(job->version_string, "2.05") == 0)
{ {
/* current version */ /* current version */
return; return;
} }
if(strcmp(job->version_string, "2.04") == 0)
{
printf("# WARNING: version 2.04 log format has the following limitations:\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
if(strcmp(job->version_string, "2.03") == 0) if(strcmp(job->version_string, "2.03") == 0)
{ {
/* no meaningful change to interpretation of log file, 2.04 just /* no meaningful change to interpretation of log file, 2.03 just
* increased the header space available for annotations. * increased the header space available for annotations.
*/ */
printf("# WARNING: version 2.03 log format has the following limitations:\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return; return;
} }
...@@ -773,6 +789,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -773,6 +789,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
{ {
printf("# WARNING: version 2.01 log format has the following limitations:\n"); printf("# WARNING: version 2.01 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n"); printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return; return;
} }
...@@ -781,6 +798,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -781,6 +798,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# WARNING: version 2.01 log format has the following limitations:\n"); printf("# WARNING: version 2.01 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n"); printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n"); printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return; return;
} }
...@@ -789,6 +807,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -789,6 +807,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# WARNING: version 2.00 log format has the following limitations:\n"); printf("# WARNING: version 2.00 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n"); printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n"); printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return; return;
} }
...@@ -807,6 +826,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -807,6 +826,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n"); printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - does not store the job id in the file.\n"); printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n"); printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return; return;
} }
...@@ -826,6 +846,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -826,6 +846,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# - may have incorrect mount point mappings for files with rank > 0.\n"); printf("# - may have incorrect mount point mappings for files with rank > 0.\n");
printf("# - does not store the job id in the file.\n"); printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n"); printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return; return;
} }
...@@ -848,6 +869,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -848,6 +869,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n"); printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n");
printf("# - does not store the job id in the file.\n"); printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n"); printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return; return;
} }
...@@ -877,6 +899,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -877,6 +899,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n"); printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n");
printf("# - does not store the job id in the file.\n"); printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n"); printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return; return;
} }
......
...@@ -361,7 +361,7 @@ int main(int argc, char **argv) ...@@ -361,7 +361,7 @@ int main(int argc, char **argv)
printf("# CP_F_POSIX_META_TIME: cumulative time spent in posix open, close, fsync, stat and seek, .\n"); printf("# CP_F_POSIX_META_TIME: cumulative time spent in posix open, close, fsync, stat and seek, .\n");
printf("# CP_F_MPI_META_TIME: cumulative time spent in mpi-io open, close, set_view, and sync.\n"); printf("# CP_F_MPI_META_TIME: cumulative time spent in mpi-io open, close, set_view, and sync.\n");
printf("# CP_MAX_*_TIME: duration of the slowest read and write operations.\n"); printf("# CP_MAX_*_TIME: duration of the slowest read and write operations.\n");
printf("# CP_*_RANK_TIME: fastest, slowest variance of transfer time.\n"); printf("# CP_*_RANK_TIME: fastest, slowest variance of transfer time. Note that these counters show MPI-IO time for files accessed using MPI-IO, and POSIX time otherwise.\n");
printf("\n"); printf("\n");
CP_PRINT_HEADER(); CP_PRINT_HEADER();
......
...@@ -297,7 +297,7 @@ each file: ...@@ -297,7 +297,7 @@ each file:
| CP_F_MPI_WRITE_TIME | Cumulative time spent write and sync at the MPI-IO level | CP_F_MPI_WRITE_TIME | Cumulative time spent write and sync at the MPI-IO level
| CP_F_MAX_READ_TIME | Duration of the slowest individual POSIX read operation | CP_F_MAX_READ_TIME | Duration of the slowest individual POSIX read operation
| CP_F_MAX_WRITE_TIME | Duration of the slowest individual POSIX write operation | CP_F_MAX_WRITE_TIME | Duration of the slowest individual POSIX write operation
| CP_F_FASTEST_RANK_TIME | The time of the rank which had the smallest amount of time spent in I/O (CP_F_POSIX_READ_TIME + CP_F_POSIX_WRITE_TIME + CP_F_POSIX_META_TIME) | CP_F_FASTEST_RANK_TIME | The time of the rank which had the smallest amount of time spent in I/O. If the file was accessed usign MPI-IO it combines the MPI meta, read, and write time. If the file was not accessed with MPI-IO then it combines the posix meta, read, and write time.
| CP_F_SLOWEST_RANK_TIME | The time of the rank which had the largest amount of time spent in I/O | CP_F_SLOWEST_RANK_TIME | The time of the rank which had the largest amount of time spent in I/O
| CP_F_VARIANCE_RANK_TIME | The population variance for I/O time of all the ranks | CP_F_VARIANCE_RANK_TIME | The population variance for I/O time of all the ranks
| CP_F_VARIANCE_RANK_BYTES | The population variance for bytes transferred of all the ranks | CP_F_VARIANCE_RANK_BYTES | The population variance for bytes transferred of all the ranks
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment