Commit ddf608ee authored by Misbah Mubarak's avatar Misbah Mubarak

Merge branch 'multi-ranks-per-node' into 'master'

Fixing multiple ranks per node mapping selection

See merge request !71
parents 8d81f7f3 b82824c0
......@@ -133,7 +133,17 @@ static double sampling_end_time = 3000000000;
static int enable_debug = 0;
/* set group context */
struct codes_mctx group_ratio;
struct codes_mctx mapping_context;
enum MAPPING_CONTEXTS
{
GROUP_RATIO=1,
GROUP_RATIO_REVERSE,
GROUP_DIRECT,
GROUP_MODULO,
GROUP_MODULO_REVERSE,
UNKNOWN
};
static int map_ctxt = GROUP_MODULO;
/* MPI_OP_GET_NEXT is for getting next MPI operation when the previous operation completes.
* MPI_SEND_ARRIVED is issued when a MPI message arrives at its destination (the message is transported by model-net and an event is invoked when it arrives.
......@@ -1706,7 +1716,7 @@ static void codes_exec_mpi_send(nw_state* s,
remote_m = local_m;
remote_m.msg_type = MPI_SEND_ARRIVED;
m->event_rc = model_net_event_mctx(net_id, &group_ratio, &group_ratio,
m->event_rc = model_net_event_mctx(net_id, &mapping_context, &mapping_context,
prio, dest_rank, mpi_op->u.send.num_bytes, (self_overhead + copy_overhead + soft_delay_mpi + nic_delay),
sizeof(nw_message), (const void*)&remote_m, sizeof(nw_message), (const void*)&local_m, lp);
}
......@@ -1727,7 +1737,7 @@ static void codes_exec_mpi_send(nw_state* s,
remote_m.fwd.req_id = mpi_op->u.send.req_id;
remote_m.fwd.app_id = s->app_id;
m->event_rc = model_net_event_mctx(net_id, &group_ratio, &group_ratio,
m->event_rc = model_net_event_mctx(net_id, &mapping_context, &mapping_context,
prio, dest_rank, CONTROL_MSG_SZ, (self_overhead + soft_delay_mpi + nic_delay),
sizeof(nw_message), (const void*)&remote_m, 0, NULL, lp);
}
......@@ -1741,7 +1751,7 @@ static void codes_exec_mpi_send(nw_state* s,
remote_m = local_m;
remote_m.msg_type = MPI_REND_ARRIVED;
m->event_rc = model_net_event_mctx(net_id, &group_ratio, &group_ratio,
m->event_rc = model_net_event_mctx(net_id, &mapping_context, &mapping_context,
prio, dest_rank, mpi_op->u.send.num_bytes, (self_overhead + soft_delay_mpi + nic_delay),
sizeof(nw_message), (const void*)&remote_m, sizeof(nw_message), (const void*)&local_m, lp);
}
......@@ -1900,7 +1910,7 @@ static void send_ack_back(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp, m
else
tw_error(TW_LOC, "\n Invalid app id");
m->event_rc = model_net_event_mctx(net_id, &group_ratio, &group_ratio,
m->event_rc = model_net_event_mctx(net_id, &mapping_context, &mapping_context,
prio, dest_rank, CONTROL_MSG_SZ, (self_overhead + soft_delay_mpi + nic_delay),
sizeof(nw_message), (const void*)&remote_m, 0, NULL, lp);
......@@ -3037,37 +3047,54 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
}
if(enable_msg_tracking)
{
char log_name[512];
sprintf(log_name, "%s/mpi-msg-sz-logs-%s-syn-sz-%d-mean-%f-%d",
mpi_msg_dir,
file_name_of_job[0],
payload_sz,
mean_interval,
rand());
char log_name[512];
sprintf(log_name, "%s/mpi-msg-sz-logs-%s-syn-sz-%d-mean-%f-%d",
mpi_msg_dir,
file_name_of_job[0],
payload_sz,
mean_interval,
rand());
msg_size_log = fopen(log_name, "w+");
msg_size_log = fopen(log_name, "w+");
if(!msg_size_log)
{
printf("\n Error logging MPI operations... quitting ");
MPI_Finalize();
return -1;
}
char agg_log_name[512];
sprintf(agg_log_name, "%s/mpi-aggregate-logs-%d.bin", sampling_dir, rank);
workload_agg_log = fopen(agg_log_name, "w+");
workload_meta_log = fopen("mpi-workload-meta-log", "w+");
if(!msg_size_log)
{
printf("\n Error logging MPI operations... quitting ");
MPI_Finalize();
return -1;
}
char agg_log_name[512];
sprintf(agg_log_name, "%s/mpi-aggregate-logs-%d.bin", sampling_dir, rank);
workload_agg_log = fopen(agg_log_name, "w+");
workload_meta_log = fopen("mpi-workload-meta-log", "w+");
if(!workload_agg_log || !workload_meta_log)
{
printf("\n Error logging MPI operations... quitting ");
MPI_Finalize();
return -1;
}
}
group_ratio = codes_mctx_set_group_ratio(NULL, true);
if(!workload_agg_log || !workload_meta_log)
{
printf("\n Error logging MPI operations... quitting ");
MPI_Finalize();
return -1;
}
}
switch(map_ctxt)
{
case GROUP_RATIO:
mapping_context = codes_mctx_set_group_ratio(NULL, true);
break;
case GROUP_RATIO_REVERSE:
mapping_context = codes_mctx_set_group_ratio_reverse(NULL, true);
break;
case GROUP_DIRECT:
mapping_context = codes_mctx_set_group_direct(1,NULL, true);
break;
case GROUP_MODULO:
mapping_context = codes_mctx_set_group_modulo(NULL, true);
break;
case GROUP_MODULO_REVERSE:
mapping_context = codes_mctx_set_group_modulo_reverse(NULL, true);
break;
}
if(enable_sampling)
model_net_enable_sampling(sampling_interval, sampling_end_time);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment