Commit 31d45b45 authored by Xin Wang's avatar Xin Wang

output allreduce statistics

parent ba682917
......@@ -30,6 +30,7 @@
do {if (CS_LP_DBG) printf(_fmt, __VA_ARGS__);} while (0)
#define MAX_STATS 65536
#define PAYLOAD_SZ 1024
#define MAX_COL 500
static int msg_size_hash_compare(
void *key, struct qhash_head *link);
......@@ -223,6 +224,8 @@ struct nw_state
double all_reduce_time;
int num_all_reduce;
double col_times[MAX_COL];
uint64_t col_msizes[MAX_COL];
double elapsed_time;
/* time spent in compute operations */
......@@ -1947,6 +1950,8 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t
{
if(bf->c1)
{
s->col_times[s->num_all_reduce] = 0;
s->col_msizes[s->num_all_reduce] = 0;
s->num_all_reduce--;
s->col_time = m->rc.saved_send_time;
s->all_reduce_time -= s->col_time;
......@@ -2092,6 +2097,8 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
{
bf->c1 = 1;
m->rc.saved_delay = s->all_reduce_time;
s->col_times[s->num_all_reduce] = (tw_now(lp) - s->col_time);
s->col_msizes[s->num_all_reduce] = mpi_op.u.collective.num_bytes;
s->all_reduce_time += (tw_now(lp) - s->col_time);
m->rc.saved_send_time = s->col_time;
s->col_time = 0;
......@@ -2209,9 +2216,21 @@ void nw_test_finalize(nw_state* s, tw_lp* lp)
if(s->recv_time > max_recv_time)
max_recv_time = s->recv_time;
memset(s->output_buf, 0, sizeof(s->output_buf));
if(s->local_rank == 0 && debug_cols)
written2 = sprintf(s->output_buf, "rank_id nrep msize runtime_ns");
if(debug_cols)
{
printf("\n Rank %lld Avg all reduce time %lf ", s->nw_id, ns_to_s(s->all_reduce_time / s->num_all_reduce));
for (int i=0; i< s->num_all_reduce; i++)
{
written2 += sprintf(s->output_buf + written2, "\n %d %d %lld %lf",
s->local_rank, i, s->col_msizes[i], s->col_times[i]);
}
lp_io_write(lp->gid, "mpi-col-stats", written2, s->output_buf);
}
avg_time += s->elapsed_time;
avg_comm_time += (s->elapsed_time - s->compute_time);
avg_wait_time += s->wait_time;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment