GitLab maintenance scheduled form Friday, 2021-06-18 5:00pm to Satursday, 2021-06-19 10:00pm CT - Services will be unavailable during this time.

Commit 56ec9353 authored by mubarak's avatar mubarak

Adding updated hash on dragonfly, MPI sim layer updates

parent e94c7507
...@@ -89,6 +89,8 @@ struct terminal_message ...@@ -89,6 +89,8 @@ struct terminal_message
/* LP ID of the sending node, has to be a network node in the dragonfly */ /* LP ID of the sending node, has to be a network node in the dragonfly */
tw_lpid sender_node; tw_lpid sender_node;
tw_lpid next_stop; tw_lpid next_stop;
struct dfly_qhash_entry * saved_hash;
}; };
#endif /* end of include guard: DRAGONFLY_H */ #endif /* end of include guard: DRAGONFLY_H */
......
...@@ -127,6 +127,11 @@ struct nw_state ...@@ -127,6 +127,11 @@ struct nw_state
struct pending_waits * pending_waits; struct pending_waits * pending_waits;
/* List of completed send/receive requests */ /* List of completed send/receive requests */
struct completed_requests * completed_reqs; struct completed_requests * completed_reqs;
unsigned long num_bytes_sent;
unsigned long num_bytes_recvd;
char output_buf[512];
}; };
/* data for handling reverse computation. /* data for handling reverse computation.
...@@ -276,12 +281,12 @@ static void printQueue(tw_lpid lpid, struct mpi_queue_ptrs* mpi_queue, char* msg ...@@ -276,12 +281,12 @@ static void printQueue(tw_lpid lpid, struct mpi_queue_ptrs* mpi_queue, char* msg
while(tmp) while(tmp)
{ {
if(tmp->mpi_op->op_type == CODES_WK_SEND || tmp->mpi_op->op_type == CODES_WK_ISEND) if(tmp->mpi_op->op_type == CODES_WK_SEND || tmp->mpi_op->op_type == CODES_WK_ISEND)
printf("\n lpid %llu send operation data type %d count %d tag %d source %d", printf("\n lpid %llu send operation count %d tag %d source %d",
lpid, tmp->mpi_op->u.send.data_type, tmp->mpi_op->u.send.count, lpid, tmp->mpi_op->u.send.num_bytes,
tmp->mpi_op->u.send.tag, tmp->mpi_op->u.send.source_rank); tmp->mpi_op->u.send.tag, tmp->mpi_op->u.send.source_rank);
else if(tmp->mpi_op->op_type == CODES_WK_IRECV || tmp->mpi_op->op_type == CODES_WK_RECV) else if(tmp->mpi_op->op_type == CODES_WK_IRECV || tmp->mpi_op->op_type == CODES_WK_RECV)
printf("\n lpid %llu recv operation data type %d count %d tag %d source %d", printf("\n lpid %llu recv operation count %d num bytes %d tag %d source %d",
lpid, tmp->mpi_op->u.recv.data_type, tmp->mpi_op->u.recv.count, lpid, tmp->mpi_op->u.recv.count, tmp->mpi_op->u.recv.num_bytes,
tmp->mpi_op->u.recv.tag, tmp->mpi_op->u.recv.source_rank ); tmp->mpi_op->u.recv.tag, tmp->mpi_op->u.recv.source_rank );
else else
printf("\n Invalid data type in the queue %d ", tmp->mpi_op->op_type); printf("\n Invalid data type in the queue %d ", tmp->mpi_op->op_type);
...@@ -825,6 +830,7 @@ static void codes_exec_comp_delay( ...@@ -825,6 +830,7 @@ static void codes_exec_comp_delay(
static void codes_exec_mpi_recv_rc(nw_state* s, nw_message* m, tw_lp* lp, struct codes_workload_op * mpi_op) static void codes_exec_mpi_recv_rc(nw_state* s, nw_message* m, tw_lp* lp, struct codes_workload_op * mpi_op)
{ {
num_bytes_recvd -= mpi_op->u.recv.num_bytes; num_bytes_recvd -= mpi_op->u.recv.num_bytes;
s->num_bytes_recvd -= mpi_op->u.recv.num_bytes;
s->recv_time = m->saved_recv_time; s->recv_time = m->saved_recv_time;
if(m->found_match >= 0) if(m->found_match >= 0)
{ {
...@@ -851,6 +857,7 @@ static void codes_exec_mpi_recv(nw_state* s, tw_lp* lp, nw_message * m, struct c ...@@ -851,6 +857,7 @@ static void codes_exec_mpi_recv(nw_state* s, tw_lp* lp, nw_message * m, struct c
m->saved_recv_time = s->recv_time; m->saved_recv_time = s->recv_time;
mpi_op->sim_start_time = tw_now(lp); mpi_op->sim_start_time = tw_now(lp);
num_bytes_recvd += mpi_op->u.recv.num_bytes; num_bytes_recvd += mpi_op->u.recv.num_bytes;
s->num_bytes_recvd += mpi_op->u.recv.num_bytes;
if(lp->gid == TRACE) if(lp->gid == TRACE)
printf("\n %lf codes exec mpi recv req id %d", tw_now(lp), (int)mpi_op->u.recv.req_id); printf("\n %lf codes exec mpi recv req id %d", tw_now(lp), (int)mpi_op->u.recv.req_id);
...@@ -907,7 +914,7 @@ static void codes_exec_mpi_send(nw_state* s, tw_lp* lp, struct codes_workload_op ...@@ -907,7 +914,7 @@ static void codes_exec_mpi_send(nw_state* s, tw_lp* lp, struct codes_workload_op
} }
num_bytes_sent += mpi_op->u.send.num_bytes; num_bytes_sent += mpi_op->u.send.num_bytes;
s->num_bytes_sent += mpi_op->u.send.num_bytes;
nw_message local_m; nw_message local_m;
nw_message remote_m; nw_message remote_m;
...@@ -1164,6 +1171,7 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t ...@@ -1164,6 +1171,7 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t
tw_rand_reverse_unif(lp->rng); tw_rand_reverse_unif(lp->rng);
s->num_sends--; s->num_sends--;
num_bytes_sent -= mpi_op->u.send.num_bytes; num_bytes_sent -= mpi_op->u.send.num_bytes;
s->num_bytes_sent -= mpi_op->u.send.num_bytes;
} }
break; break;
...@@ -1308,15 +1316,23 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l ...@@ -1308,15 +1316,23 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
void nw_test_finalize(nw_state* s, tw_lp* lp) void nw_test_finalize(nw_state* s, tw_lp* lp)
{ {
int written = 0;
if(!s->nw_id)
written = sprintf(s->output_buf, "# Format <LP ID> <Terminal ID> <Total sends> <Total Recvs> <Bytes sent> <Bytes recvd> <Send time> <Comm. time> <Compute time>");
if(s->nw_id < num_net_traces) if(s->nw_id < num_net_traces)
{ {
printf("\n LP %llu unmatched irecvs %d unmatched sends %d Total sends %ld receives %ld collectives %ld delays %ld wait alls %ld waits %ld send time %lf wait %lf", printf("\n LP %llu unmatched irecvs %d unmatched sends %d Total sends %ld receives %ld collectives %ld delays %ld wait alls %ld waits %ld send time %lf wait %lf",
lp->gid, s->pending_recvs_queue->num_elems, s->arrival_queue->num_elems, s->num_sends, s->num_recvs, s->num_cols, s->num_delays, s->num_waitall, s->num_wait, s->send_time, s->wait_time); lp->gid, s->pending_recvs_queue->num_elems, s->arrival_queue->num_elems, s->num_sends, s->num_recvs, s->num_cols, s->num_delays, s->num_waitall, s->num_wait, s->send_time, s->wait_time);
if(lp->gid == TRACE) //if(lp->gid == TRACE)
{ //{
printQueue(lp->gid, s->pending_recvs_queue, "irecv "); printQueue(lp->gid, s->pending_recvs_queue, "irecv ");
printQueue(lp->gid, s->arrival_queue, "isend"); printQueue(lp->gid, s->arrival_queue, "isend");
} //}
written += sprintf(s->output_buf + written, "\n %lu %lu %ld %ld %ld %ld %lf %lf %lf", lp->gid, s->nw_id, s->num_sends, s->num_recvs, s->num_bytes_sent,
s->num_bytes_recvd, s->send_time, s->elapsed_time - s->compute_time, s->compute_time);
lp_io_write(lp->gid, "mpi-replay-stats", written, s->output_buf);
if(s->elapsed_time - s->compute_time > max_comm_time) if(s->elapsed_time - s->compute_time > max_comm_time)
max_comm_time = s->elapsed_time - s->compute_time; max_comm_time = s->elapsed_time - s->compute_time;
......
...@@ -298,7 +298,7 @@ static int dragonfly_hash_func(void *k, int table_size) ...@@ -298,7 +298,7 @@ static int dragonfly_hash_func(void *k, int table_size)
struct dfly_hash_key *tmp = (struct dfly_hash_key *)k; struct dfly_hash_key *tmp = (struct dfly_hash_key *)k;
uint64_t key = (~tmp->message_id) + (tmp->message_id << 18); uint64_t key = (~tmp->message_id) + (tmp->message_id << 18);
key = key * 21; key = key * 21;
key = key ^ (tmp->sender_id << 6); key = ~key ^ (tmp->sender_id >> 4);
key = key * tmp->sender_id; key = key * tmp->sender_id;
return (int)(key & (uint64_t)(table_size - 1)); return (int)(key & (uint64_t)(table_size - 1));
} }
...@@ -1320,7 +1320,8 @@ void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_message * msg, tw ...@@ -1320,7 +1320,8 @@ void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_message * msg, tw
N_finished_msgs--; N_finished_msgs--;
s->total_msg_size -= msg->total_size; s->total_msg_size -= msg->total_size;
struct dfly_qhash_entry * d_entry_pop = (struct dfly_qhash_entry*)rc_stack_pop(s->st); // struct dfly_qhash_entry * d_entry_pop = (struct dfly_qhash_entry*)rc_stack_pop(s->st);
struct dfly_qhash_entry * d_entry_pop = msg->saved_hash;
qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link)); qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link));
s->rank_tbl_pop++; s->rank_tbl_pop++;
...@@ -1555,7 +1556,8 @@ void packet_arrive(terminal_state * s, tw_bf * bf, terminal_message * msg, ...@@ -1555,7 +1556,8 @@ void packet_arrive(terminal_state * s, tw_bf * bf, terminal_message * msg,
send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size); send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size);
/* Remove the hash entry */ /* Remove the hash entry */
qhash_del(hash_link); qhash_del(hash_link);
rc_stack_push(lp, tmp, free_tmp, s->st); msg->saved_hash = tmp;
//rc_stack_push(lp, tmp, free_tmp, s->st);
s->rank_tbl_pop--; s->rank_tbl_pop--;
} }
return; return;
...@@ -2062,11 +2064,11 @@ get_next_stop(router_state * s, ...@@ -2062,11 +2064,11 @@ get_next_stop(router_state * s,
/* It means the packet has arrived at the destination group. Now divert it to the destination router. */ /* It means the packet has arrived at the destination group. Now divert it to the destination router. */
if(s->group_id == dest_group_id) if(s->group_id == dest_group_id)
{ {
if(msg->last_hop == TERMINAL && path == NON_MINIMAL) { //if(msg->last_hop == TERMINAL && path == NON_MINIMAL) {
dest_lp = (s->group_id * s->params->num_routers) + intm_id % s->params->num_routers; // dest_lp = (s->group_id * s->params->num_routers) + intm_id % s->params->num_routers;
} else { //} else {
dest_lp = dest_router_id; dest_lp = dest_router_id;
} //}
} }
else else
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment