Commit a3f129b4 authored by Caitlin Ross's avatar Caitlin Ross

fixing more mpi replay RNG reverse errors

parent b8cece6d
...@@ -352,7 +352,7 @@ static void codes_exec_mpi_recv_rc( ...@@ -352,7 +352,7 @@ static void codes_exec_mpi_recv_rc(
nw_state* s, tw_bf * bf, nw_message* m, tw_lp* lp); nw_state* s, tw_bf * bf, nw_message* m, tw_lp* lp);
/* execute the computational delay */ /* execute the computational delay */
static void codes_exec_comp_delay( static void codes_exec_comp_delay(
nw_state* s, nw_message * m, tw_lp* lp, struct codes_workload_op * mpi_op); nw_state* s, tw_bf *bf, nw_message * m, tw_lp* lp, struct codes_workload_op * mpi_op);
/* gets the next MPI operation from the network-workloads API. */ /* gets the next MPI operation from the network-workloads API. */
static void get_next_mpi_operation( static void get_next_mpi_operation(
nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp); nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp);
...@@ -1296,7 +1296,7 @@ static int rm_matching_send(nw_state * ns, ...@@ -1296,7 +1296,7 @@ static int rm_matching_send(nw_state * ns,
if(qitem->op_type == CODES_WK_IRECV && !is_rend) if(qitem->op_type == CODES_WK_IRECV && !is_rend)
{ {
bf->c9 = 1; bf->c29 = 1;
update_completed_queue(ns, bf, m, lp, qitem->req_id); update_completed_queue(ns, bf, m, lp, qitem->req_id);
} }
else else
...@@ -1338,8 +1338,9 @@ static void codes_issue_next_event(tw_lp* lp) ...@@ -1338,8 +1338,9 @@ static void codes_issue_next_event(tw_lp* lp)
/* Simulate delays between MPI operations */ /* Simulate delays between MPI operations */
static void codes_exec_comp_delay( static void codes_exec_comp_delay(
nw_state* s, nw_message * m, tw_lp* lp, struct codes_workload_op * mpi_op) nw_state* s, tw_bf *bf, nw_message * m, tw_lp* lp, struct codes_workload_op * mpi_op)
{ {
bf->c28 = 0;
tw_event* e; tw_event* e;
tw_stime ts; tw_stime ts;
nw_message* msg; nw_message* msg;
...@@ -1348,7 +1349,10 @@ static void codes_exec_comp_delay( ...@@ -1348,7 +1349,10 @@ static void codes_exec_comp_delay(
s->compute_time += mpi_op->u.delay.nsecs; s->compute_time += mpi_op->u.delay.nsecs;
ts = mpi_op->u.delay.nsecs; ts = mpi_op->u.delay.nsecs;
if(ts <= g_tw_lookahead) if(ts <= g_tw_lookahead)
{
bf->c28 = 1;
ts = g_tw_lookahead + 0.1 + tw_rand_exponential(lp->rng, noise); ts = g_tw_lookahead + 0.1 + tw_rand_exponential(lp->rng, noise);
}
//ts += g_tw_lookahead + 0.1 + tw_rand_exponential(lp->rng, noise); //ts += g_tw_lookahead + 0.1 + tw_rand_exponential(lp->rng, noise);
assert(ts > 0); assert(ts > 0);
...@@ -1372,6 +1376,8 @@ static void codes_exec_mpi_recv_rc( ...@@ -1372,6 +1376,8 @@ static void codes_exec_mpi_recv_rc(
if(bf->c11) if(bf->c11)
codes_issue_next_event_rc(lp); codes_issue_next_event_rc(lp);
if(bf->c6)
codes_issue_next_event_rc(lp);
if(m->fwd.found_match >= 0) if(m->fwd.found_match >= 0)
{ {
ns->recv_time = m->rc.saved_recv_time; ns->recv_time = m->rc.saved_recv_time;
...@@ -1399,12 +1405,10 @@ static void codes_exec_mpi_recv_rc( ...@@ -1399,12 +1405,10 @@ static void codes_exec_mpi_recv_rc(
index++; index++;
} }
} }
if(bf->c9) if(bf->c29)
{ {
update_completed_queue_rc(ns, bf, m, lp); update_completed_queue_rc(ns, bf, m, lp);
} }
if(bf->c6)
codes_issue_next_event_rc(lp);
} }
else if(m->fwd.found_match < 0) else if(m->fwd.found_match < 0)
{ {
...@@ -1492,6 +1496,11 @@ static void codes_exec_mpi_send_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_ ...@@ -1492,6 +1496,11 @@ static void codes_exec_mpi_send_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_
if(bf->c15 || bf->c16) if(bf->c15 || bf->c16)
s->num_sends--; s->num_sends--;
if (bf->c15)
model_net_event_rc2(lp, &m->event_rc);
if (bf->c16)
model_net_event_rc2(lp, &m->event_rc);
if (bf->c17)
model_net_event_rc2(lp, &m->event_rc); model_net_event_rc2(lp, &m->event_rc);
if(bf->c4) if(bf->c4)
...@@ -1606,6 +1615,7 @@ static void codes_exec_mpi_send(nw_state* s, ...@@ -1606,6 +1615,7 @@ static void codes_exec_mpi_send(nw_state* s,
} }
else if(is_rend == 1) else if(is_rend == 1)
{ {
bf->c17 = 1;
/* initiate the actual data transfer, local completion message is sent /* initiate the actual data transfer, local completion message is sent
* for any blocking sends. */ * for any blocking sends. */
local_m.fwd.sim_start_time = mpi_op->sim_start_time; local_m.fwd.sim_start_time = mpi_op->sim_start_time;
...@@ -2229,6 +2239,7 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t ...@@ -2229,6 +2239,7 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t
codes_issue_next_event_rc(lp); codes_issue_next_event_rc(lp);
else else
{ {
if (bf->c28)
tw_rand_reverse_unif(lp->rng); tw_rand_reverse_unif(lp->rng);
s->compute_time = m->rc.saved_delay; s->compute_time = m->rc.saved_delay;
} }
...@@ -2236,7 +2247,7 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t ...@@ -2236,7 +2247,7 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t
break; break;
case CODES_WK_ALLREDUCE: case CODES_WK_ALLREDUCE:
{ {
if(bf->c1) if(bf->c27)
{ {
s->num_all_reduce--; s->num_all_reduce--;
s->col_time = m->rc.saved_send_time; s->col_time = m->rc.saved_send_time;
...@@ -2350,7 +2361,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l ...@@ -2350,7 +2361,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
if(disable_delay) if(disable_delay)
codes_issue_next_event(lp); codes_issue_next_event(lp);
else else
codes_exec_comp_delay(s, m, lp, mpi_op); codes_exec_comp_delay(s, bf, m, lp, mpi_op);
} }
break; break;
...@@ -2384,7 +2395,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l ...@@ -2384,7 +2395,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
s->num_cols++; s->num_cols++;
if(s->col_time > 0) if(s->col_time > 0)
{ {
bf->c1 = 1; bf->c27 = 1;
m->rc.saved_delay = s->all_reduce_time; m->rc.saved_delay = s->all_reduce_time;
s->all_reduce_time += (tw_now(lp) - s->col_time); s->all_reduce_time += (tw_now(lp) - s->col_time);
m->rc.saved_send_time = s->col_time; m->rc.saved_send_time = s->col_time;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment