Commit 4b7e381c authored by Philip Carns's avatar Philip Carns

implement slowest and variance counters in stdio

- untested
parent cc619189
...@@ -160,6 +160,9 @@ static void stdio_shutdown( ...@@ -160,6 +160,9 @@ static void stdio_shutdown(
int *stdio_buf_sz); int *stdio_buf_sz);
static void stdio_record_reduction_op(void* infile_v, void* inoutfile_v, static void stdio_record_reduction_op(void* infile_v, void* inoutfile_v,
int *len, MPI_Datatype *datatype); int *len, MPI_Datatype *datatype);
static void stdio_shared_record_variance(
MPI_Comm mod_comm, struct darshan_stdio_file *inrec_array,
struct darshan_stdio_file *outrec_array, int shared_rec_count);
static struct stdio_file_record_ref *stdio_track_new_file_record( static struct stdio_file_record_ref *stdio_track_new_file_record(
darshan_record_id rec_id, const char *path); darshan_record_id rec_id, const char *path);
static void stdio_cleanup_runtime(); static void stdio_cleanup_runtime();
...@@ -1006,6 +1009,48 @@ static void stdio_record_reduction_op(void* infile_v, void* inoutfile_v, ...@@ -1006,6 +1009,48 @@ static void stdio_record_reduction_op(void* infile_v, void* inoutfile_v,
tmp_file.fcounters[j] = inoutfile->fcounters[j]; tmp_file.fcounters[j] = inoutfile->fcounters[j];
} }
/* min (zeroes are ok here; some procs don't do I/O) */
if(infile->fcounters[STDIO_F_FASTEST_RANK_TIME] <
inoutfile->fcounters[STDIO_F_FASTEST_RANK_TIME])
{
tmp_file.counters[STDIO_FASTEST_RANK] =
infile->counters[STDIO_FASTEST_RANK];
tmp_file.counters[STDIO_FASTEST_RANK_BYTES] =
infile->counters[STDIO_FASTEST_RANK_BYTES];
tmp_file.fcounters[STDIO_F_FASTEST_RANK_TIME] =
infile->fcounters[STDIO_F_FASTEST_RANK_TIME];
}
else
{
tmp_file.counters[STDIO_FASTEST_RANK] =
inoutfile->counters[STDIO_FASTEST_RANK];
tmp_file.counters[STDIO_FASTEST_RANK_BYTES] =
inoutfile->counters[STDIO_FASTEST_RANK_BYTES];
tmp_file.fcounters[STDIO_F_FASTEST_RANK_TIME] =
inoutfile->fcounters[STDIO_F_FASTEST_RANK_TIME];
}
/* max */
if(infile->fcounters[STDIO_F_SLOWEST_RANK_TIME] >
inoutfile->fcounters[STDIO_F_SLOWEST_RANK_TIME])
{
tmp_file.counters[STDIO_SLOWEST_RANK] =
infile->counters[STDIO_SLOWEST_RANK];
tmp_file.counters[STDIO_SLOWEST_RANK_BYTES] =
infile->counters[STDIO_SLOWEST_RANK_BYTES];
tmp_file.fcounters[STDIO_F_SLOWEST_RANK_TIME] =
infile->fcounters[STDIO_F_SLOWEST_RANK_TIME];
}
else
{
tmp_file.counters[STDIO_SLOWEST_RANK] =
inoutfile->counters[STDIO_SLOWEST_RANK];
tmp_file.counters[STDIO_SLOWEST_RANK_BYTES] =
inoutfile->counters[STDIO_SLOWEST_RANK_BYTES];
tmp_file.fcounters[STDIO_F_SLOWEST_RANK_TIME] =
inoutfile->fcounters[STDIO_F_SLOWEST_RANK_TIME];
}
/* update pointers */ /* update pointers */
*inoutfile = tmp_file; *inoutfile = tmp_file;
inoutfile++; inoutfile++;
...@@ -1030,6 +1075,7 @@ static void stdio_shutdown( ...@@ -1030,6 +1075,7 @@ static void stdio_shutdown(
MPI_Datatype red_type; MPI_Datatype red_type;
MPI_Op red_op; MPI_Op red_op;
int stdio_rec_count; int stdio_rec_count;
double stdio_time;
STDIO_LOCK(); STDIO_LOCK();
assert(stdio_runtime); assert(stdio_runtime);
...@@ -1048,6 +1094,31 @@ static void stdio_shutdown( ...@@ -1048,6 +1094,31 @@ static void stdio_shutdown(
&shared_recs[i], sizeof(darshan_record_id)); &shared_recs[i], sizeof(darshan_record_id));
assert(rec_ref); assert(rec_ref);
stdio_time =
rec_ref->file_rec->fcounters[STDIO_F_READ_TIME] +
rec_ref->file_rec->fcounters[STDIO_F_WRITE_TIME] +
rec_ref->file_rec->fcounters[STDIO_F_META_TIME];
/* initialize fastest/slowest info prior to the reduction */
rec_ref->file_rec->counters[STDIO_FASTEST_RANK] =
rec_ref->file_rec->base_rec.rank;
rec_ref->file_rec->counters[STDIO_FASTEST_RANK_BYTES] =
rec_ref->file_rec->counters[STDIO_BYTES_READ] +
rec_ref->file_rec->counters[STDIO_BYTES_WRITTEN];
rec_ref->file_rec->fcounters[STDIO_F_FASTEST_RANK_TIME] =
stdio_time;
/* until reduction occurs, we assume that this rank is both
* the fastest and slowest. It is up to the reduction operator
* to find the true min and max.
*/
rec_ref->file_rec->counters[STDIO_SLOWEST_RANK] =
rec_ref->file_rec->counters[STDIO_FASTEST_RANK];
rec_ref->file_rec->counters[STDIO_SLOWEST_RANK_BYTES] =
rec_ref->file_rec->counters[STDIO_FASTEST_RANK_BYTES];
rec_ref->file_rec->fcounters[STDIO_F_SLOWEST_RANK_TIME] =
rec_ref->file_rec->fcounters[STDIO_F_FASTEST_RANK_TIME];
rec_ref->file_rec->base_rec.rank = -1; rec_ref->file_rec->base_rec.rank = -1;
} }
...@@ -1084,6 +1155,10 @@ static void stdio_shutdown( ...@@ -1084,6 +1155,10 @@ static void stdio_shutdown(
DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf, DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf,
shared_rec_count, red_type, red_op, 0, mod_comm); shared_rec_count, red_type, red_op, 0, mod_comm);
/* get the time and byte variances for shared files */
stdio_shared_record_variance(mod_comm, red_send_buf, red_recv_buf,
shared_rec_count);
/* clean up reduction state */ /* clean up reduction state */
if(my_rank == 0) if(my_rank == 0)
{ {
...@@ -1175,6 +1250,89 @@ static void stdio_cleanup_runtime() ...@@ -1175,6 +1250,89 @@ static void stdio_cleanup_runtime()
return; return;
} }
static void stdio_shared_record_variance(MPI_Comm mod_comm,
struct darshan_stdio_file *inrec_array, struct darshan_stdio_file *outrec_array,
int shared_rec_count)
{
MPI_Datatype var_dt;
MPI_Op var_op;
int i;
struct darshan_variance_dt *var_send_buf = NULL;
struct darshan_variance_dt *var_recv_buf = NULL;
DARSHAN_MPI_CALL(PMPI_Type_contiguous)(sizeof(struct darshan_variance_dt),
MPI_BYTE, &var_dt);
DARSHAN_MPI_CALL(PMPI_Type_commit)(&var_dt);
DARSHAN_MPI_CALL(PMPI_Op_create)(darshan_variance_reduce, 1, &var_op);
var_send_buf = malloc(shared_rec_count * sizeof(struct darshan_variance_dt));
if(!var_send_buf)
return;
if(my_rank == 0)
{
var_recv_buf = malloc(shared_rec_count * sizeof(struct darshan_variance_dt));
if(!var_recv_buf)
return;
}
/* get total i/o time variances for shared records */
for(i=0; i<shared_rec_count; i++)
{
var_send_buf[i].n = 1;
var_send_buf[i].S = 0;
var_send_buf[i].T = inrec_array[i].fcounters[STDIO_F_READ_TIME] +
inrec_array[i].fcounters[STDIO_F_WRITE_TIME] +
inrec_array[i].fcounters[STDIO_F_META_TIME];
}
DARSHAN_MPI_CALL(PMPI_Reduce)(var_send_buf, var_recv_buf, shared_rec_count,
var_dt, var_op, 0, mod_comm);
if(my_rank == 0)
{
for(i=0; i<shared_rec_count; i++)
{
outrec_array[i].fcounters[STDIO_F_VARIANCE_RANK_TIME] =
(var_recv_buf[i].S / var_recv_buf[i].n);
}
}
/* get total bytes moved variances for shared records */
for(i=0; i<shared_rec_count; i++)
{
var_send_buf[i].n = 1;
var_send_buf[i].S = 0;
var_send_buf[i].T = (double)
inrec_array[i].counters[STDIO_BYTES_READ] +
inrec_array[i].counters[STDIO_BYTES_WRITTEN];
}
DARSHAN_MPI_CALL(PMPI_Reduce)(var_send_buf, var_recv_buf, shared_rec_count,
var_dt, var_op, 0, mod_comm);
if(my_rank == 0)
{
for(i=0; i<shared_rec_count; i++)
{
outrec_array[i].fcounters[STDIO_F_VARIANCE_RANK_BYTES] =
(var_recv_buf[i].S / var_recv_buf[i].n);
}
}
DARSHAN_MPI_CALL(PMPI_Type_free)(&var_dt);
DARSHAN_MPI_CALL(PMPI_Op_free)(&var_op);
free(var_send_buf);
free(var_recv_buf);
return;
}
/* /*
* Local variables: * Local variables:
* c-indent-level: 4 * c-indent-level: 4
......
...@@ -29,6 +29,11 @@ ...@@ -29,6 +29,11 @@
X(STDIO_MAX_BYTE_READ) \ X(STDIO_MAX_BYTE_READ) \
/* maximum byte (offset) written */\ /* maximum byte (offset) written */\
X(STDIO_MAX_BYTE_WRITTEN) \ X(STDIO_MAX_BYTE_WRITTEN) \
/* rank and number of bytes moved for fastest/slowest ranks */\
X(STDIO_FASTEST_RANK) \
X(STDIO_FASTEST_RANK_BYTES) \
X(STDIO_SLOWEST_RANK) \
X(STDIO_SLOWEST_RANK_BYTES) \
/* end of counters */\ /* end of counters */\
X(STDIO_NUM_INDICES) X(STDIO_NUM_INDICES)
...@@ -55,6 +60,13 @@ ...@@ -55,6 +60,13 @@
X(STDIO_F_WRITE_END_TIMESTAMP) \ X(STDIO_F_WRITE_END_TIMESTAMP) \
/* timestamp of last read completion */\ /* timestamp of last read completion */\
X(STDIO_F_READ_END_TIMESTAMP) \ X(STDIO_F_READ_END_TIMESTAMP) \
/* total i/o and meta time consumed for fastest/slowest ranks */\
X(STDIO_F_FASTEST_RANK_TIME) \
X(STDIO_F_SLOWEST_RANK_TIME) \
/* variance of total i/o time and bytes moved across all ranks */\
/* NOTE: for shared records only */\
X(STDIO_F_VARIANCE_RANK_TIME) \
X(STDIO_F_VARIANCE_RANK_BYTES) \
/* end of counters */\ /* end of counters */\
X(STDIO_F_NUM_INDICES) X(STDIO_F_NUM_INDICES)
......
...@@ -141,9 +141,15 @@ static void darshan_log_print_stdio_description() ...@@ -141,9 +141,15 @@ static void darshan_log_print_stdio_description()
printf("# STDIO_{OPENS|WRITES|READS|SEEKS|FLUSHES} are types of operations.\n"); printf("# STDIO_{OPENS|WRITES|READS|SEEKS|FLUSHES} are types of operations.\n");
printf("# STDIO_BYTES_*: total bytes read and written.\n"); printf("# STDIO_BYTES_*: total bytes read and written.\n");
printf("# STDIO_MAX_BYTE_*: highest offset byte read and written.\n"); printf("# STDIO_MAX_BYTE_*: highest offset byte read and written.\n");
printf("# STDIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).\n");
printf("# STDIO_*_RANK_BYTES: bytes transferred by the fastest and slowest ranks (for shared files).\n");
printf("# STDIO_F_*_START_TIMESTAMP: timestamp of the first call to that type of function.\n"); printf("# STDIO_F_*_START_TIMESTAMP: timestamp of the first call to that type of function.\n");
printf("# STDIO_F_*_END_TIMESTAMP: timestamp of the completion of the last call to that type of function.\n"); printf("# STDIO_F_*_END_TIMESTAMP: timestamp of the completion of the last call to that type of function.\n");
printf("# STDIO_F_*_TIME: cumulative time spent in different types of functions.\n"); printf("# STDIO_F_*_TIME: cumulative time spent in different types of functions.\n");
printf("# STDIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).\n");
printf("# STDIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).\n");
DARSHAN_PRINT_HEADER();
return; return;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment