Commit ceaebc7a authored by Xin Wang's avatar Xin Wang
Browse files

update conceptual implementation, add few examples

parent 46cdfffc
......@@ -71,6 +71,9 @@ if USE_CONC
src_libcodes_la_SOURCES += src/workload/methods/codes-conc-online-comm-wrkld.C
AM_CPPFLAGS += ${CONCEPTUAL_CFLAGS} -DUSE_CONC=1
LDADD += ${CONCEPTUAL_LIBS}
src_libcodes_la_SOURCES += src/workload/conceputal-skeleton-apps/conc-bisect.c
src_libcodes_la_SOURCES += src/workload/conceputal-skeleton-apps/conc-cosmoflow.c
src_libcodes_la_SOURCES += src/workload/conceputal-skeleton-apps/conc-hotpotato.c
src_libcodes_la_SOURCES += src/workload/conceputal-skeleton-apps/conc-latencyall.c
src_libcodes_la_SOURCES += src/workload/conceputal-skeleton-apps/conc-latency.c
endif
......
This diff is collapsed.
###################################################
# Measure random bisection-bandwidth patterns #
# By Scott Pakin <pakin@lanl.gov> #
# #
# Inspired by Hoefler, Schneider, and Lumsdaine's #
# "Multistage Switches are not Crossbars" paper #
###################################################
Require language version "1.5".
nummsgs is "Number of messages per trial" and comes from "--nummsgs" or "-n" with default 100.
wups is "Number of warmup messages" and comes from "--wups" or "-w" with default 3.
msgsize is "Message size in bytes" and comes from "--bytes" or "-b" with default 1M.
numtrials is "Number of bisection patterns" and comes from "--trials" or "-t" with default 5000.
rounding is "Round measurements to the nearest N" and comes from "--round" or "-r" with default 5.
For each trial in {1, ..., numtrials} {
task 0 is assigned to processor 0 then
task 0 outputs "Testing random bisection pattern " and trial and "/" and numtrials then
all tasks are assigned to a random processor then
tasks src such that src is even send wups msgsize-byte messages to task src+1 then
all tasks synchronize then
all tasks reset their counters then
tasks src such that src is even send nummsgs msgsize-byte messages to task src+1 then
all tasks synchronize then
all tasks log a histogram of rounding*round(total_bytes*1E6/(elapsed_usecs*1M)/rounding) as "Bandwidth (MiB/s)"
}
This diff is collapsed.
# Measure the performance of MPI_Allgather()
# By Scott Pakin <pakin@lanl.gov>
#
# N.B. Requires the c_mpi backend.
Require language version "1.5".
# Parse the command line.
numwords is "Message size (words)" and comes from "--msgsize" or "-s" with default 28825K.
reps is "Number of repetitions" and comes from "--reps" or "-r" with default 100.
computetime is "Computation time (ms)" and comes from "--compute" or "-c" with default 129.
# Allocate a send buffer and a receive buffer.
Task 0 multicasts a numwords*num_tasks word message from buffer 0 to all other tasks.
Task 0 multicasts a numwords*num_tasks word message from buffer 1 to all other tasks.
# Measure the performance of MPI_Allreduce().
Task 0 resets its counters then
for reps repetitions {
all tasks COMPUTES FOR computetime MILLISECONDS then
all tasks backend execute "
MPI_Allreduce([MESSAGE BUFFER 0], [MESSAGE BUFFER 1], (int)" and numwords and ",
MPI_INT, MPI_SUM, MPI_COMM_WORLD);
" then
all tasks backend execute "
MPI_Allreduce([MESSAGE BUFFER 0], [MESSAGE BUFFER 1], (int)" and numwords and ",
MPI_INT, MPI_SUM, MPI_COMM_WORLD);
"
} then
task 0 logs elapsed_usecs/1000 as "Elapse time (ms)".
This diff is collapsed.
# Virtual ring "hot potato" test
Require language version "1.5".
trials is "Number of trials to perform" and comes from "--trials" or
"-t" with default 100000.
Assert that "the hot-potato test requires at least two tasks" with num_tasks>=2.
Let len be 0 while {
for each task_count in {2, ..., num_tasks} {
task 0 outputs "Performing " and trials and " " and
task_count and "-task runs...." then
for trials repetitions plus 5 warmup repetitions and a synchronization {
task 0 resets its counters then
task 0 sends a len byte message to unsuspecting task 1 then
task (n+1) mod task_count receives a len byte message from task n such that n<task_count then
task n such that n>0 /\ n<task_count sends a len byte message to unsuspecting task (n+1) mod task_count then
task 0 logs the task_count as "# of tasks" and
the minimum of elapsed_usecs/task_count as "Latency (usecs)" and
the mean of elapsed_usecs/task_count as "Latency (usecs)" and
the variance of elapsed_usecs/task_count as "Latency (usecs)"
} then
task 0 computes aggregates
}
}
......@@ -15,10 +15,7 @@ def eliminate_logging(inLines):
inLines[idx+i] = "//"+inLines[idx+i]
elif 'ncptl_free (logfile_uuid)' in line:
for i in range(0, 12):
inLines[idx-i] = "//"+inLines[idx-i]
elif 'case 1' in line:
for i in range(5, 9):
inLines[idx+i] = "//"+inLines[idx+i]
inLines[idx-i] = "//"+inLines[idx-i]
elif 'int mpiresult' in line:
for i in range(0,30):
inLines[idx+i] = "//"+inLines[idx+i]
......
......@@ -53,7 +53,7 @@ char offset_file[8192];
static int wrkld_id;
static int num_net_traces = 0;
static int num_dumpi_traces = 0;
static int64_t EAGER_THRESHOLD = 8192;
static int64_t EAGER_THRESHOLD = INT64_MAX;
static long num_ops = 0;
static int upper_threshold = 1048576;
......@@ -101,6 +101,7 @@ static float noise = 1.0;
static int num_nw_lps = 0, num_mpi_lps = 0;
static int num_syn_clients;
static int finished_syn_clients = 0;
static int syn_type = 0;
FILE * workload_log = NULL;
......@@ -478,18 +479,19 @@ static void notify_background_traffic(
jid = codes_jobmap_to_local_id(ns->nw_id, jobmap_ctx);
int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
for(int other_id = 0; other_id < num_jobs; other_id++)
//Assumption: synthetic job are at the bottom of workload configure file
for(int other_id = num_jobs - 1; other_id >= 0; other_id--)
{
if(other_id == jid.job)
continue;
if(finished_syn_clients == num_syn_clients)
break;
struct codes_jobmap_id other_jid;
other_jid.job = other_id;
int num_other_ranks = codes_jobmap_get_num_ranks(other_id, jobmap_ctx);
lprintf("\n Other ranks %d ", num_other_ranks);
// lprintf("\n Other ranks %d ", num_other_ranks);
tw_stime ts = (1.1 * g_tw_lookahead) + tw_rand_exponential(lp->rng, mean_interval/10000);
tw_lpid global_dest_id;
......@@ -505,6 +507,7 @@ static void notify_background_traffic(
m_new = (struct nw_message*)tw_event_data(e);
m_new->msg_type = CLI_BCKGND_FIN;
tw_event_send(e);
finished_syn_clients += 1;
}
}
return;
......@@ -532,13 +535,21 @@ static void notify_neighbor(
tw_bf * bf,
struct nw_message * m)
{
if(ns->local_rank == num_dumpi_traces - 1
int num_ranks = codes_jobmap_get_num_ranks(ns->app_id, jobmap_ctx);
//if all application workloads finishes, notify background traffic to stop
if(ns->local_rank == num_ranks - 1
&& ns->is_finished == 1
&& ns->neighbor_completed == 1)
{
// printf("\n All workloads completed, notifying background traffic ");
bf->c0 = 1;
notify_background_traffic(ns, lp, bf, m);
// printf("\n All ranks completed");
num_dumpi_traces -= num_ranks;
if(num_dumpi_traces == 0) {
// printf("All app traces finished, notify background traffic to stop\n");
bf->c0 = 1;
notify_background_traffic(ns, lp, bf, m);
}
return;
}
......@@ -548,9 +559,8 @@ static void notify_neighbor(
if(ns->is_finished == 1 && (ns->neighbor_completed == 1 || ns->local_rank == 0))
{
bf->c1 = 1;
// printf("\n Local rank %d notifying neighbor %d ", ns->local_rank, ns->local_rank+1);
bf->c1 = 1;
// printf("\n Job %d Local rank %d notifying neighbor %d ", ns->app_id, ns->local_rank, ns->local_rank+1);
tw_stime ts = (1.1 * g_tw_lookahead) + tw_rand_exponential(lp->rng, mean_interval/10000);
nbr_jid.rank = ns->local_rank + 1;
......@@ -599,7 +609,6 @@ void finish_nbr_wkld_rc(
tw_lp * lp)
{
ns->neighbor_completed = 0;
notify_neighbor_rc(ns, lp, b, msg);
}
......@@ -610,7 +619,6 @@ void finish_nbr_wkld(
tw_lp * lp)
{
ns->neighbor_completed = 1;
notify_neighbor(ns, lp, b, msg);
}
static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp)
......@@ -772,7 +780,7 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l
}
}
/* New event after MEAN_INTERVAL */
tw_stime ts = mean_interval + tw_rand_exponential(lp->rng, noise);
tw_stime ts = mean_interval + tw_rand_exponential(lp->rng, noise);
tw_event * e;
nw_message * m_new;
e = tw_event_new(lp->gid, ts, lp);
......@@ -1240,8 +1248,10 @@ static int rm_matching_rcv(nw_state * ns,
qlist_del(&qi->ql);
rc_stack_push(lp, qi, free, ns->processed_ops);
// printf("\nrm_matching_rcv: return index\n");
return index;
}
// printf("\nrm_matching_rcv: return -1\n");
return -1;
}
......@@ -1310,8 +1320,10 @@ static int rm_matching_send(nw_state * ns,
qlist_del(&qi->ql);
rc_stack_push(lp, qi, free, ns->processed_ops);
// printf("\nrm_matching_send: return index\n");
return index;
}
// printf("\nrm_matching_send: return -1\n");
return -1;
}
static void codes_issue_next_event_rc(tw_lp * lp)
......@@ -1439,13 +1451,15 @@ static void codes_exec_mpi_recv(
recv_op->req_id = mpi_op->u.recv.req_id;
//printf("\n Req id %d bytes %d source %d tag %d ", recv_op->req_id, recv_op->num_bytes, recv_op->source_rank, recv_op->tag);
// printf("\n codes_exec_mpi_recv: Req id %d bytes %d source %d dest %d tag %d \n",
// recv_op->req_id, recv_op->num_bytes, recv_op->source_rank, recv_op->dest_rank, recv_op->tag);
// if(s->nw_id == (tw_lpid)TRACK_LP)
// printf("\n Receive op posted num bytes %llu source %d ", recv_op->num_bytes,
// recv_op->source_rank);
int found_matching_sends = rm_matching_send(s, bf, m, lp, recv_op);
/* for mpi irecvs, this is a non-blocking receive so just post it and move on with the trace read. */
if(mpi_op->op_type == CODES_WK_IRECV)
{
......@@ -1455,6 +1469,7 @@ static void codes_exec_mpi_recv(
/* save the req id inserted in the completed queue for reverse computation. */
if(found_matching_sends < 0)
{
// printf("\ncodes_exec_mpi_recv: Not found matched send.\n");
m->fwd.found_match = -1;
qlist_add_tail(&recv_op->ql, &s->pending_recvs_queue);
......@@ -1462,6 +1477,7 @@ static void codes_exec_mpi_recv(
else
{
//bf->c6 = 1;
// printf("\ncodes_exec_mpi_recv: Found match send.\n");
m->fwd.found_match = found_matching_sends;
}
}
......@@ -1525,7 +1541,7 @@ static void codes_exec_mpi_send(nw_state* s,
}
if(lp->gid == TRACK_LP)
printf("\n Sender rank %llu global dest rank %d dest-rank %d bytes %"PRIu64" Tag %d", LLU(s->nw_id), global_dest_rank, mpi_op->u.send.dest_rank, mpi_op->u.send.num_bytes, mpi_op->u.send.tag);
printf("\n codes_exec_mpi_send: Sender rank %llu global dest rank %d dest-rank %d bytes %"PRIu64" Tag %d\n", LLU(s->nw_id), global_dest_rank, mpi_op->u.send.dest_rank, mpi_op->u.send.num_bytes, mpi_op->u.send.tag);
m->rc.saved_num_bytes = mpi_op->u.send.num_bytes;
/* model-net event */
tw_lpid dest_rank = codes_mapping_get_lpid_from_relative(global_dest_rank, NULL, "nw-lp", NULL, 0);
......@@ -1588,6 +1604,7 @@ static void codes_exec_mpi_send(nw_state* s,
{
/* Initiate the handshake. Issue a control message to the destination first. No local message,
* only remote message sent. */
// printf("\ncodes_exec_mpi_send: MPI_Send Rend, Initiate the handshake, MPI_SEND_ARRIVED\n");
bf->c16 = 1;
s->num_sends++;
remote_m.fwd.sim_start_time = tw_now(lp);
......@@ -1599,6 +1616,8 @@ static void codes_exec_mpi_send(nw_state* s,
remote_m.fwd.num_bytes = mpi_op->u.send.num_bytes;
remote_m.fwd.req_id = mpi_op->u.send.req_id;
remote_m.fwd.app_id = s->app_id;
// printf("\n codes_exec_mpi_send: Req id %d bytes %d source %d dest %d tag %d \n",
// mpi_op->u.send.req_id, mpi_op->u.send.num_bytes, mpi_op->u.send.source_rank, dest_rank, mpi_op->u.send.tag);
m->event_rc = model_net_event_mctx(net_id, &group_ratio, &group_ratio,
"mpi-workload", dest_rank, CONTROL_MSG_SZ, (self_overhead + soft_delay_mpi + nic_delay),
......@@ -1608,6 +1627,7 @@ static void codes_exec_mpi_send(nw_state* s,
{
/* initiate the actual data transfer, local completion message is sent
* for any blocking sends. */
// printf("\ncodes_exec_mpi_send: MPI_Send Rend, initiate the actual data transfer, MPI_REND_ARRIVED\n");
local_m.fwd.sim_start_time = mpi_op->sim_start_time;
local_m.fwd.rend_send = 1;
remote_m = local_m;
......@@ -1690,6 +1710,7 @@ static void update_completed_queue(nw_state* s,
if(!waiting)
{
// printf("\nupdate_completed_queue: not waiting\n");
bf->c0 = 1;
completed_requests * req = (completed_requests*)malloc(sizeof(completed_requests));
req->req_id = req_id;
......@@ -1703,6 +1724,7 @@ static void update_completed_queue(nw_state* s,
}
else
{
// printf("\nupdate_completed_queue: waiting\n");
bf->c1 = 1;
m->fwd.num_matched = clear_completed_reqs(s, lp, s->wait_op->req_ids, s->wait_op->count);
......@@ -1747,6 +1769,9 @@ static void send_ack_back(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp, m
remote_m.fwd.num_bytes = mpi_op->num_bytes;
remote_m.fwd.req_id = mpi_op->req_id;
remote_m.fwd.matched_req = matched_req;
// printf("\nsend_ack_back: Send a message back to sender indicating availability\n");
// printf("\n send_ack_back: Req id %d bytes %d source %d dest %d tag %d \n",
// mpi_op->req_id, mpi_op->num_bytes, mpi_op->source_rank, mpi_op->dest_rank, mpi_op->tag);
m->event_rc = model_net_event_mctx(net_id, &group_ratio, &group_ratio,
"test", dest_rank, CONTROL_MSG_SZ, (self_overhead + soft_delay_mpi + nic_delay),
......@@ -1811,8 +1836,9 @@ static void update_arrival_queue_rc(nw_state* s,
/* once an isend operation arrives, the pending receives queue is checked to find out if there is a irecv that has already been posted. If no isend has been posted, */
static void update_arrival_queue(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
{
// printf("\nupdate_arrival_queue\n" );
if(s->app_id != m->fwd.app_id)
printf("\n Received message for app %d my id %d my rank %llu ",
printf("\n Received message for app %d my id %d my rank %llu \n",
m->fwd.app_id, s->app_id, LLU(s->nw_id));
assert(s->app_id == m->fwd.app_id);
......@@ -1843,6 +1869,7 @@ static void update_arrival_queue(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
m_callback->fwd.msg_send_time = tw_now(lp) - m->fwd.sim_start_time;
tw_event_send(e_callback);
}
/* Now reconstruct the queue item */
mpi_msgs_queue * arrived_op = (mpi_msgs_queue *) malloc(sizeof(mpi_msgs_queue));
arrived_op->req_init_time = m->fwd.sim_start_time;
......@@ -1854,18 +1881,20 @@ static void update_arrival_queue(nw_state* s, tw_bf * bf, nw_message * m, tw_lp
arrived_op->dest_rank = m->fwd.dest_rank;
// if(s->nw_id == (tw_lpid)TRACK_LP)
// printf("\n Send op arrived source rank %d num bytes %llu", arrived_op->source_rank,
// arrived_op->num_bytes);
// printf("\nupdate_arrival_queue: Send op arrived source rank %d num bytes %llu\n", arrived_op->source_rank,
// arrived_op->num_bytes);
int found_matching_recv = rm_matching_rcv(s, bf, m, lp, arrived_op);
if(found_matching_recv < 0)
{
m->fwd.found_match = -1;
// printf("\nupdate_arrival_queue: not found match\n");
qlist_add_tail(&arrived_op->ql, &s->arrival_queue);
}
else
{
// printf("\nupdate_arrival_queue: found match\n");
m->fwd.found_match = found_matching_recv;
free(arrived_op);
}
......@@ -2120,17 +2149,20 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
nw_message *m_callback = (nw_message*)tw_event_data(e_callback);
m_callback->msg_type = MPI_SEND_ARRIVED_CB;
m_callback->fwd.msg_send_time = tw_now(lp) - m->fwd.sim_start_time;
// printf("App %d Rank %d issue MPI_SEND_ARRIVED_CB\n",s->app_id, s->local_rank);
tw_event_send(e_callback);
/* request id pending completion */
if(m->fwd.matched_req >= 0)
{
bf->c8 = 1;
// printf("request id pending completion\n");
update_completed_queue(s, bf, m, lp, m->fwd.matched_req);
}
else /* blocking receive pending completion*/
{
bf->c10 = 1;
// printf("blocking receive pending completion\n");
codes_issue_next_event(lp);
}
......@@ -2317,30 +2349,36 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, mpi_op);
m->mpi_op = mpi_op;
m->op_type = mpi_op->op_type;
// printf("CODES issue ops: %d \n", mpi_op->op_type);
if(mpi_op->op_type == CODES_WK_END)
{
// printf("CODES_WK_END signal found.\n");
s->elapsed_time = tw_now(lp) - s->start_time;
s->is_finished = 1;
if(!alloc_spec)
{
bf->c9 = 1;
// printf("not alloc spec, finished workload.\n");
return;
}
/* Notify ranks from other job that checkpoint traffic has
* completed */
int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx);
if(num_jobs <= 1 || is_synthetic == 0)
if(num_jobs <= 1 || is_synthetic == 0)
{
// printf("Only one job or no synthetic, finished workload.\n");
bf->c19 = 1;
return;
}
notify_neighbor(s, lp, bf, m);
// printf("Client rank %llu completed workload, local rank %d .\n", s->nw_id, s->local_rank);
/* Notify ranks from other job that checkpoint traffic has
* completed */
// printf("Local rank %d (nwid: %d) finished workload, notify neighbor.\n", s->local_rank, s->nw_id);
notify_neighbor(s, lp, bf, m);
return;
}
switch(mpi_op->op_type)
......@@ -2348,7 +2386,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
case CODES_WK_SEND:
case CODES_WK_ISEND:
{
//printf("\n MPI SEND ");
// printf("CODES issue MPI SEND \n");
codes_exec_mpi_send(s, bf, m, lp, mpi_op, 0);
}
break;
......@@ -2357,7 +2395,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
case CODES_WK_IRECV:
{
s->num_recvs++;
//printf("\n MPI RECV ");
// printf("CODES issue MPI RECV \n");
codes_exec_mpi_recv(s, bf, m, lp, mpi_op);
}
break;
......@@ -2438,6 +2476,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
void nw_test_finalize(nw_state* s, tw_lp* lp)
{
// printf("\nnw_test_finalize: app %d rank %d \n", s->app_id, s->local_rank);
total_syn_data += s->syn_data;
int written = 0;
......@@ -2467,8 +2506,10 @@ void nw_test_finalize(nw_state* s, tw_lp* lp)
}
if(strcmp(workload_type, "swm-online") == 0)
codes_workload_finalize("swm_online_comm_workload", params, s->app_id, s->local_rank);
if(strcmp(workload_type, "conc-online") == 0)
codes_workload_finalize("conc_online_comm_workload", params, s->app_id, s->local_rank);
if(strcmp(workload_type, "conc-online") == 0) {
// printf("Finalize Conceptual workloads\n");
codes_workload_finalize("conc_online_comm_workload", params, s->app_id, s->local_rank);
}
struct msg_size_info * tmp_msg = NULL;
struct qlist_head * ent = NULL;
......@@ -2792,20 +2833,19 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
if(ref != EOF && strncmp(file_name_of_job[i], "synthetic", 9) == 0)
{
num_syn_clients = num_traces_of_job[i];
num_syn_clients += num_traces_of_job[i];
num_net_traces += num_traces_of_job[i];
}
else if(ref!=EOF)
{
if(enable_debug)
printf("\n%d traces of app %s \n", num_traces_of_job[i], file_name_of_job[i]);
num_net_traces += num_traces_of_job[i];
num_dumpi_traces += num_traces_of_job[i];
}
i++;
}
printf("\n num_net_traces %d ", num_net_traces);
printf("\n num_net_traces %d \n", num_net_traces);
fclose(name_file);
assert(strlen(alloc_file) != 0);
alloc_spec = 1;
......@@ -2828,6 +2868,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv )
jobmap_ctx = codes_jobmap_configure(CODES_JOBMAP_LIST, &jobmap_p);
}
}
MPI_Comm_rank(MPI_COMM_CODES, &rank);
MPI_Comm_size(MPI_COMM_CODES, &nprocs);
......
......@@ -2402,8 +2402,8 @@ void dragonfly_custom_router_final(router_state * s,
written = 0;
if(!s->router_id)
{
written = sprintf(s->output_buf, "# Format <LP ID> <Group ID> <Router ID> <Link Traffic per router port(s)>");
written += sprintf(s->output_buf + written, "# Router ports in the order: %d green links, %d black links %d global channels \n",
written = sprintf(s->output_buf2, "# Format <LP ID> <Group ID> <Router ID> <Link Traffic per router port(s)>");
written += sprintf(s->output_buf2 + written, "# Router ports in the order: %d green links, %d black links %d global channels \n",
p->num_router_cols * p->num_row_chans, p->num_router_rows * p->num_col_chans, p->num_global_channels);
}
written += sprintf(s->output_buf2 + written, "\n %llu %d %d",
......
......@@ -10,6 +10,9 @@
/* list of available benchmarks begin */
extern struct codes_conceptual_bench bisect_bench;
extern struct codes_conceptual_bench cosmoflow_bench;
extern struct codes_conceptual_bench hotpotato_bench;
extern struct codes_conceptual_bench latencyall_bench;
extern struct codes_conceptual_bench latency_bench;
/* list of available benchmarks end */
......@@ -17,6 +20,9 @@ extern struct codes_conceptual_bench latency_bench;
static struct codes_conceptual_bench const * bench_array_default[] =
{
/* default benchmarks begin */
&bisect_bench,
&cosmoflow_bench,
&hotpotato_bench,
&latencyall_bench,
&latency_bench,
/* default benchmarks end */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/**********************************************************************
* This file was generated by coNCePTuaL on Thu Oct 4 23:46:17 2018
* This file was generated by coNCePTuaL on Mon Jan 7 23:09:59 2019
* using the c_mpi backend (C + MPI).
* Do not modify this file; modify /Users/xin/macworkspace/codes-dev/codes/scripts/conceptual_benchmarks/latency.ncptl instead.
*
......
/**********************************************************************
* This file was generated by coNCePTuaL on Thu Oct 4 23:46:17 2018
* This file was generated by coNCePTuaL on Mon Jan 7 23:10:00 2019
* using the c_mpi backend (C + MPI).
* Do not modify this file; modify /Users/xin/macworkspace/codes-dev/codes/scripts/conceptual_benchmarks/latency-all.ncptl instead.
*
......
......@@ -122,9 +122,9 @@ void CODES_MPI_Finalize()
struct shared_context * sctx = static_cast<shared_context*>(arg);
sctx->fifo.push_back(&wrkld_per_rank);
// printf("\n finalize workload for rank %d num_sends %ld num_recvs %ld num_isends %ld num_irecvs %ld num_allreduce %ld num_barrier %ld num_waitalls %ld\n",
// printf("\n rank %d finalize workload: num_sends %ld num_recvs %ld num_isends %ld num_irecvs %ld num_allreduce %ld num_barrier %ld num_waitalls %ld\n",
// sctx->my_rank, num_sends, num_recvs, num_isends, num_irecvs, num_allreduce, num_barriers, num_waitalls);
// printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread);
ABT_thread_yield_to(global_prod_thread);
}
......@@ -139,11 +139,14 @@ void CODES_MPI_Send(const void *buf,
// printf("\n Sending to rank %d ", comm_id);
struct codes_workload_op wrkld_per_rank;
int datatypesize;
MPI_Type_size(datatype, &datatypesize);
wrkld_per_rank.op_type = CODES_WK_SEND;
wrkld_per_rank.u.send.tag = tag;
wrkld_per_rank.u.send.count = count;
wrkld_per_rank.u.send.data_type = datatype;
wrkld_per_rank.u.send.num_bytes = count * sizeof(datatype);
wrkld_per_rank.u.send.num_bytes = count * datatypesize;
wrkld_per_rank.u.send.dest_rank = dest;
/* Retreive the shared context state */
......@@ -156,8 +159,8 @@ void CODES_MPI_Send(const void *buf,
struct shared_context * sctx = static_cast<shared_context*>(arg);
wrkld_per_rank.u.send.source_rank = sctx->my_rank;
sctx->fifo.push_back(&wrkld_per_rank);
// printf("Rank %d Send Event to dest %d: %d * %lu = %lld, fifo size: %lu\n", sctx->my_rank, dest, count,
// sizeof(datatype), wrkld_per_rank.u.send.num_bytes, sctx->fifo.size());
// printf("Rank %d Send Event to dest %d: %lld, fifo size: %lu\n", sctx->my_rank, dest,
// wrkld_per_rank.u.send.num_bytes, sctx->fifo.size());
// printf("Rank %d yield to CODES thread: %p\n", sctx->my_rank, global_prod_thread);
int rc = ABT_thread_yield_to(global_prod_thread);
......@@ -175,12 +178,15 @@ void CODES_MPI_Recv(void *buf,
/* Add an event in the shared queue and then yield */
struct codes_workload_op wrkld_per_rank;
int datatypesize;
MPI_Type_size(datatype, &datatypesize);
wrkld_per_rank.op_type = CODES_WK_RECV;
wrkld_per_rank.u.recv.tag = tag;
wrkld_per_rank.u.recv.source_rank = source;
wrkld_per_rank.u.recv.<