Commit e2ded66e authored by mubarak's avatar mubarak

Merging the Makefile to resolve conflict

parents d324b101 2fea9e0a
......@@ -38,18 +38,25 @@ src_libcodes_net_a_SOURCES = \
src/models/networks/model-net/model-net-sched-impl.h \
src/models/networks/model-net/model-net-sched-impl.c \
src/models/mpi-trace-replay/model-net-mpi-wrklds.c \
src/models/mpi-trace-replay/model-net-synthetic.c
src/models/mpi-trace-replay/model-net-synthetic.c \
src/models/mpi-trace-replay/model-net-dumpi-traces-dump.c
bin_PROGRAMS += src/models/mpi-trace-replay/model-net-mpi-wrklds
bin_PROGRAMS += src/models/mpi-trace-replay/model-net-dumpi-traces-dump
bin_PROGRAMS += src/models/mpi-trace-replay/model-net-synthetic
src_models_mpi_trace_replay_model_net_mpi_wrklds_SOURCES = src/models/mpi-trace-replay/model-net-mpi-wrklds.c
src_models_mpi_trace_replay_model_net_mpi_wrklds_LDADD = $(testlib) $(CODES_BASE_LIBS)
src_models_mpi_trace_replay_model_net_mpi_wrklds_LDFLAGS = $(CODES_BASE_LDFLAGS)
src_models_mpi_trace_replay_model_net_mpi_wrklds_CFLAGS = ${CODES_BASE_CFLAGS}
bin_PROGRAMS += src/models/mpi-trace-replay/model-net-synthetic
src_models_mpi_trace_replay_model_net_synthetic_SOURCES = src/models/mpi-trace-replay/model-net-synthetic.c
src_models_mpi_trace_replay_model_net_synthetic_LDADD = $(testlib) $(CODES_BASE_LIBS)
src_models_mpi_trace_replay_model_net_synthetic_LDFLAGS = $(CODES_BASE_LDFLAGS)
src_models_mpi_trace_replay_model_net_synthetic_CFLAGS = ${CODES_BASE_CFLAGS}
src_models_mpi_trace_replay_model_net_dumpi_traces_dump_SOURCES = src/models/mpi-trace-replay/model-net-dumpi-traces-dump.c
src_models_mpi_trace_replay_model_net_dumpi_traces_dump_LDADD = $(testlib) $(CODES_BASE_LIBS)
src_models_mpi_trace_replay_model_net_dumpi_traces_dump_LDFLAGS = $(CODES_BASE_LDFLAGS)
src_models_mpi_trace_replay_model_net_dumpi_traces_dump_CFLAGS = ${CODES_BASE_CFLAGS}
......@@ -2,26 +2,44 @@
instructions available at:
http://sst.sandia.gov/about_dumpi.html
Configure dumpi with the following parameters:
../configure --enable-libdumpi --enable-test --disable-shared --prefix=/home/mubarm/dumpi/dumpi/install CC=mpicc CXX=mpicxx
2- Configure codes-base with DUMPI. Make sure the CC environment variable
refers to a MPI compiler
./configure --with-ross=/path/to/ross/install --with-dumpi=/path/to/dumpi/install
--prefix=/path/to/codes-base/install CC=mpicc
3- Build codes-base
3- Build codes-base (See codes-base INSTALL for instructions on building codes-base with dumpi)
make clean && make && make install
4- Configure and build codes-net (See README.txt for instructions on building codes-net).
4- Configure and build codes-net (See INSTALL for instructions on building codes-net).
5- Download and untar the design forward DUMPI traces from URL
http://portal.nersc.gov/project/CAL/designforward.htm
6- Configure model-net using its config file (Example .conf files available at src/models/mpi-trace-replay/)
----------------- RUNNING CODES NETWORK WORKLOAD TEST PROGRAM -----------------------
6- Download and untar the DUMPI AMG application trace for 27 MPI ranks using the following download link:
wget http://portal.nersc.gov/project/CAL/doe-miniapps-mpi-traces/AMG/df_AMG_n27_dumpi.tar.gz
7- Run the test program for codes-nw-workload using.
mpirun -np 4 ./src/models/mpi-trace-replay/model-net-dumpi-traces-dump --sync=3 --workload_type=dumpi --workload_file=/home/mubarm/df_traces/df_AMG_n27_dumpi/dumpi-2014.03.03.14.55.00- ../src/models/mpi-trace-replay/conf/modelnet-mpi-test.conf
The program shows the number of sends, receives, collectives and wait operations in the DUMPI trace log.
Note: If using a different DUMPI trace file, make sure to update the modelnet-mpi-test.conf file in the config directory.
----------------- RUNNING MODEL-NET WITH CODES NW WORKLOADS -----------------------------
8- Configure model-net using its config file (Example .conf files available at src/models/mpi-trace-replay/)
Make sure the number of nw-lp and model-net LP are the same in the config file.
7- From the main source directory of codes-net, run the DUMPI trace replay simulation on top of
9- From the main source directory of codes-net, run the DUMPI trace replay simulation on top of
model-net using (/dumpi-2014-04-05.22.12.17.37- is the prefix of the all DUMPI trace files.
We skip the last 4 digit prefix of the DUMPI trace file names).
......@@ -29,7 +47,7 @@
The simulation runs in ROSS serial, conservative and optimistic modes.
8- Some example runs with small-scale traces
10- Some example runs with small-scale traces
(i) AMG 8 MPI tasks http://portal.nersc.gov/project/CAL/designforward.htm#AMG
......
......@@ -2,7 +2,7 @@ LPGROUPS
{
MODELNET_GRP
{
repetitions="18";
repetitions="2048";
nw-lp="1";
modelnet_torus="1";
}
......@@ -10,15 +10,15 @@ LPGROUPS
PARAMS
{
packet_size="512";
message_size="296";
message_size="336";
modelnet_order=( "torus" );
# scheduler options
modelnet_scheduler="fcfs";
net_startup_ns="1.5";
net_bw_mbps="20000";
n_dims="3";
dim_length="3,3,2";
link_bandwidth="2.0";
n_dims="5";
dim_length="8,4,4,4,4";
link_bandwidth="10.0";
buffer_size="1310720";
num_vc="1";
chunk_size="64";
......
......@@ -2,15 +2,14 @@ LPGROUPS
{
MODELNET_GRP
{
repetitions="8";
repetitions="27";
nw-lp="1";
modelnet_simplenet="1";
}
}
PARAMS
{
packet_size="512";
message_size="296";
message_size="784";
modelnet_order=( "simplenet" );
# scheduler options
modelnet_scheduler="fcfs";
......
/*
* Copyright (C) 2014 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
#include <ross.h>
#include <inttypes.h>
#include "codes/codes-nw-workload.h"
#include "codes/codes.h"
#include "codes/configuration.h"
#include "codes/codes_mapping.h"
#define TRACE -1
#define DEBUG 0
char workload_type[128];
char workload_file[8192];
char offset_file[8192];
static int wrkld_id;
typedef struct nw_state nw_state;
typedef struct nw_message nw_message;
static int net_id = 0;
static float noise = 5.0;
static int num_net_lps, num_nw_lps;
long long num_bytes_sent=0;
long long num_bytes_recvd=0;
double total_time = 0;
double avg_time = 0;
double avg_comm_time = 0;
double avg_wait_time = 0;
double avg_send_time = 0;
double avg_recv_time = 0;
double avg_col_time = 0;
double avg_compute_time = 0;
long total_waits = 0;
long total_collectives = 0;
long total_sends = 0;
long total_recvs = 0;
long total_delays = 0;
/* global variables for codes mapping */
static char lp_group_name[MAX_NAME_LENGTH], lp_type_name[MAX_NAME_LENGTH], annotation[MAX_NAME_LENGTH];
static int mapping_grp_id, mapping_type_id, mapping_rep_id, mapping_offset;
enum MPI_NW_EVENTS
{
MPI_OP_GET_NEXT=1,
};
/* state of the network LP. It contains the pointers to send/receive lists */
struct nw_state
{
long num_events_per_lp;
tw_lpid nw_id;
short wrkld_end;
/* count of sends, receives, collectives and delays */
unsigned long num_sends;
unsigned long num_recvs;
unsigned long num_cols;
unsigned long num_delays;
unsigned long num_wait;
unsigned long num_waitall;
unsigned long num_waitsome;
unsigned long num_waitany;
/* time spent by the LP in executing the app trace*/
double elapsed_time;
/* time spent in compute operations */
double compute_time;
/* time spent in message send/isend */
double send_time;
/* time spent in message receive */
double recv_time;
/* time spent in wait operation */
double wait_time;
/* time spent in collective operations*/
double col_time;
/* total time spent in operations */
double total_time;
};
/* network event being sent. msg_type is the type of message being sent, found_match is the index of the list maintained for reverse computation, op is the MPI event to be executed/reversed */
struct nw_message
{
int msg_type;
struct mpi_event_list op;
};
/* initialize queues, get next operation */
static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp);
/* conversion from seconds to nanaoseconds */
static tw_stime s_to_ns(tw_stime ns);
/* issue next event */
static void codes_issue_next_event(tw_lp* lp);
/* Trigger getting next event at LP */
static void codes_issue_next_event(tw_lp* lp)
{
tw_event *e;
nw_message* msg;
tw_stime ts;
ts = g_tw_lookahead + 0.1 + tw_rand_exponential(lp->rng, noise);
e = tw_event_new( lp->gid, ts, lp );
msg = tw_event_data(e);
msg->msg_type = MPI_OP_GET_NEXT;
tw_event_send(e);
}
/* convert seconds to ns */
static tw_stime s_to_ns(tw_stime ns)
{
return(ns * (1000.0 * 1000.0 * 1000.0));
}
/* initializes the network node LP, loads the trace file in the structs, calls the first MPI operation to be executed */
void nw_test_init(nw_state* s, tw_lp* lp)
{
/* initialize the LP's and load the data */
char * params;
scala_trace_params params_sc;
dumpi_trace_params params_d;
s->nw_id = lp->gid;
s->wrkld_end = 0;
s->num_sends = 0;
s->num_recvs = 0;
s->num_cols = 0;
s->num_delays = 0;
s->num_wait = 0;
s->num_waitall = 0;
s->num_waitsome = 0;
s->num_waitany = 0;
s->elapsed_time = 0;
s->compute_time = 0;
if (strcmp(workload_type, "dumpi") == 0){
strcpy(params_d.file_name, workload_file);
params_d.num_net_traces = num_net_lps;
params = (char*)&params_d;
}
/* In this case, the LP will not generate any workload related events*/
if(s->nw_id >= params_d.num_net_traces)
{
//printf("\n network LP not generating events %d ", (int)s->nw_id);
return;
}
wrkld_id = codes_nw_workload_load("dumpi-trace-workload", params, (int)s->nw_id);
/* clock starts ticking */
s->elapsed_time = tw_now(lp);
codes_issue_next_event(lp);
return;
}
void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
{
switch(m->msg_type)
{
case MPI_OP_GET_NEXT:
get_next_mpi_operation(s, bf, m, lp);
break;
default:
printf("\n Incorrect event handler ");
break;
}
}
void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
{
codes_nw_workload_get_next_rc(wrkld_id, (int)s->nw_id, &m->op);
if(m->op.op_type == CODES_NW_END)
return;
s->total_time -= (m->op.end_time - m->op.start_time);
switch(m->op.op_type)
{
case CODES_NW_SEND:
case CODES_NW_ISEND:
{
s->num_sends--;
s->send_time -= (m->op.end_time - m->op.start_time);
num_bytes_sent -= m->op.u.send.num_bytes;
};
break;
case CODES_NW_RECV:
case CODES_NW_IRECV:
{
s->num_recvs--;
s->recv_time -= (m->op.end_time - m->op.start_time);
num_bytes_recvd -= m->op.u.recv.num_bytes;
}
break;
case CODES_NW_DELAY:
{
s->num_delays--;
s->compute_time -= (m->op.end_time - m->op.start_time);
}
break;
case CODES_NW_BCAST:
case CODES_NW_ALLGATHER:
case CODES_NW_ALLGATHERV:
case CODES_NW_ALLTOALL:
case CODES_NW_ALLTOALLV:
case CODES_NW_REDUCE:
case CODES_NW_ALLREDUCE:
case CODES_NW_COL:
{
s->num_cols--;
s->col_time -= (m->op.end_time - m->op.start_time);
}
break;
case CODES_NW_WAIT:
{
s->num_wait--;
s->wait_time -= (m->op.end_time - m->op.start_time);
}
break;
case CODES_NW_WAITALL:
{
s->num_waitall--;
s->wait_time -= (m->op.end_time - m->op.start_time);
}
break;
case CODES_NW_WAITSOME:
{
s->num_waitsome--;
s->wait_time -= (m->op.end_time - m->op.start_time);
}
break;
case CODES_NW_WAITANY:
{
s->num_waitany--;
s->wait_time -= (m->op.end_time - m->op.start_time);
}
break;
default:
{
printf("\n Invalid op type %d", m->op.op_type);
return;
}
}
tw_rand_reverse_unif(lp->rng);
}
static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
{
mpi_event_list mpi_op;
codes_nw_workload_get_next(wrkld_id, (int)s->nw_id, &mpi_op);
memcpy(&m->op, &mpi_op, sizeof(struct mpi_event_list));
if(mpi_op.op_type == CODES_NW_END)
{
return;
}
s->total_time += (mpi_op.end_time - mpi_op.start_time);
switch(mpi_op.op_type)
{
case CODES_NW_SEND:
case CODES_NW_ISEND:
{
s->num_sends++;
s->send_time += (mpi_op.end_time - mpi_op.start_time);
num_bytes_sent += mpi_op.u.send.num_bytes;
}
break;
case CODES_NW_RECV:
case CODES_NW_IRECV:
{
s->num_recvs++;
s->recv_time += (mpi_op.end_time - mpi_op.start_time);
num_bytes_recvd += mpi_op.u.recv.num_bytes;
}
break;
case CODES_NW_DELAY:
{
s->num_delays++;
s->compute_time += (mpi_op.end_time - mpi_op.start_time);
}
break;
case CODES_NW_BCAST:
case CODES_NW_ALLGATHER:
case CODES_NW_ALLGATHERV:
case CODES_NW_ALLTOALL:
case CODES_NW_ALLTOALLV:
case CODES_NW_REDUCE:
case CODES_NW_ALLREDUCE:
case CODES_NW_COL:
{
s->num_cols++;
s->col_time += (mpi_op.end_time - mpi_op.start_time);
}
break;
case CODES_NW_WAIT:
{
s->num_wait++;
s->wait_time += (mpi_op.end_time - mpi_op.start_time);
}
break;
case CODES_NW_WAITALL:
{
s->num_waitall++;
s->wait_time += (mpi_op.end_time - mpi_op.start_time);
}
break;
case CODES_NW_WAITSOME:
{
s->num_waitsome++;
s->wait_time += (mpi_op.end_time - mpi_op.start_time);
}
break;
case CODES_NW_WAITANY:
{
s->num_waitany++;
s->wait_time += (mpi_op.end_time - mpi_op.start_time);
}
break;
default:
{
printf("\n Invalid op type %d ", m->op.op_type);
return;
}
}
codes_issue_next_event(lp);
}
void nw_test_finalize(nw_state* s, tw_lp* lp)
{
total_waits += (s->num_wait + s->num_waitall + s->num_waitsome + s->num_waitany);
total_recvs += (s->num_recvs);
total_sends += (s->num_sends);
total_delays += s->num_delays;
total_collectives += s->num_cols;
printf("\n LP %ld total sends %ld receives %ld wait_alls %ld waits %ld ", lp->gid, s->num_sends,s->num_recvs, s->num_waitall, s->num_wait);
avg_time += s->total_time;
avg_compute_time += s->compute_time;
avg_comm_time += (s->total_time - s->compute_time);
avg_wait_time += s->wait_time;
avg_send_time += s->send_time;
avg_recv_time += s->recv_time;
avg_col_time += s->col_time;
}
const tw_optdef app_opt [] =
{
TWOPT_GROUP("Network workload test"),
TWOPT_CHAR("workload_type", workload_type, "workload type (either \"scalatrace\" or \"dumpi\")"),
TWOPT_CHAR("workload_file", workload_file, "workload file name"),
TWOPT_CHAR("offset_file", offset_file, "offset file name"),
TWOPT_END()
};
tw_lptype nw_lp = {
(init_f) nw_test_init,
(pre_run_f) NULL,
(event_f) nw_test_event_handler,
(revent_f) nw_test_event_handler_rc,
(final_f) nw_test_finalize,
(map_f) codes_mapping,
sizeof(nw_state)
};
const tw_lptype* nw_get_lp_type()
{
return(&nw_lp);
}
static void nw_add_lp_type()
{
lp_type_register("nw-lp", nw_get_lp_type());
}
int main( int argc, char** argv )
{
int rank, nprocs;
int num_nets;
int* net_ids;
g_tw_ts_end = s_to_ns(60*60*24*365); /* one year, in nsecs */
workload_type[0]='\0';
tw_opt_add(app_opt);
tw_init(&argc, &argv);
if(strlen(workload_file) == 0)
{
if(tw_ismaster())
printf("\n Usage: mpirun -np n ./codes-nw-test --sync=1/2/3 --workload_type=type --workload_file=workload-file-name");
tw_end();
return -1;
}
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
configuration_load(argv[2], MPI_COMM_WORLD, &config);
nw_add_lp_type();
codes_mapping_setup();
num_net_lps = codes_mapping_get_lp_count("MODELNET_GRP", 0, "nw-lp", NULL, 0);
tw_run();
long long total_bytes_sent, total_bytes_recvd;
double avg_run_time;
double avg_comm_run_time;
double avg_col_run_time;
double total_avg_send_time;
double total_avg_wait_time;
double total_avg_recv_time;
double total_avg_col_time;
double total_avg_comp_time;
long overall_sends, overall_recvs, overall_waits, overall_cols;
MPI_Reduce(&num_bytes_sent, &total_bytes_sent, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&num_bytes_recvd, &total_bytes_recvd, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&avg_time, &avg_run_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&avg_recv_time, &total_avg_recv_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&avg_comm_time, &avg_comm_run_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&avg_col_time, &avg_col_run_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&avg_wait_time, &total_avg_wait_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&avg_send_time, &total_avg_send_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&avg_compute_time, &total_avg_comp_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&total_sends, &overall_sends, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&total_recvs, &overall_recvs, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&total_waits, &overall_waits, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&total_collectives, &overall_cols, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
if(!g_tw_mynode)
printf("\n Total bytes sent %lld recvd %lld \n avg runtime %lf \n avg comm time %lf avg compute time %lf \n avg collective time %lf avg send time %lf \n avg recv time %lf \n avg wait time %lf \n total sends %ld total recvs %ld total waits %ld total collectives %ld ", total_bytes_sent, total_bytes_recvd,
avg_run_time/num_net_lps,
avg_comm_run_time/num_net_lps,
total_avg_comp_time/num_net_lps,
total_avg_col_time/num_net_lps,
total_avg_send_time/num_net_lps,
total_avg_recv_time/num_net_lps,
total_avg_wait_time/num_net_lps,
overall_sends, overall_recvs, overall_waits, overall_cols);
tw_end();
return 0;
}
......@@ -4,6 +4,7 @@
*
*/
#include <ross.h>
#include <inttypes.h>
#include "codes/codes-nw-workload.h"
#include "codes/codes.h"
......@@ -11,8 +12,7 @@
#include "codes/codes_mapping.h"
#include "codes/model-net.h"
#define TRACE 0
#define DEBUG 0
#define TRACE -1
char workload_type[128];
char workload_file[8192];
......@@ -22,18 +22,23 @@ static int num_net_traces = 0;
typedef struct nw_state nw_state;
typedef struct nw_message nw_message;
typedef int16_t dumpi_req_id;
static int net_id = 0;
static float noise = 5.0;
static int num_net_lps, num_nw_lps;
long long num_bytes_sent=0;
long long num_bytes_recvd=0;
long long max_time = 0;
double max_time = 0, max_comm_time = 0, max_wait_time = 0, max_send_time = 0, max_recv_time = 0;
double avg_time = 0, avg_comm_time = 0, avg_wait_time = 0, avg_send_time = 0, avg_recv_time = 0;
/* global variables for codes mapping */
static char lp_group_name[MAX_NAME_LENGTH], lp_type_name[MAX_NAME_LENGTH], annotation[MAX_NAME_LENGTH];
static int mapping_grp_id, mapping_type_id, mapping_rep_id, mapping_offset;
/* MPI_OP_GET_NEXT is for getting next MPI operation when the previous operation completes.
* MPI_SEND_ARRIVED is issued when a MPI message arrives at its destination (the message is transported by model-net and an event is invoked when it arrives.
* MPI_SEND_POSTED is issued when a MPI message has left the source LP (message is transported via model-net). */
enum MPI_NW_EVENTS
{
MPI_OP_GET_NEXT=1,
......@@ -41,13 +46,29 @@ enum MPI_NW_EVENTS
MPI_SEND_POSTED,
};
/* stores pointers of pending MPI operations to be matched with their respective sends/receives. */
struct mpi_msgs_queue
{
mpi_event_list* mpi_op;
struct mpi_msgs_queue* next;
};
/* maintains the head and tail of the queue, as well as the number of elements currently in queue */
/* stores request IDs of completed MPI operations (Isends or Irecvs) */
struct completed_requests
{
dumpi_req_id req_id;
struct completed_requests* next;
};
/* for wait operations, store the pending operation and number of completed waits so far. */
struct pending_waits
{
mpi_event_list* mpi_op;
int num_completed;
tw_stime start_time;
};
/* maintains the head and tail of the queue, as well as the number of elements currently in queue. Queues are pending_recvs queue (holds unmatched MPI recv operations) and arrival_queue (holds unmatched MPI send messages). */
struct mpi_queue_ptrs
{
int num_elems;
......@@ -67,31 +88,93 @@ struct nw_state
unsigned long num_recvs;
unsigned long num_cols;
unsigned long num_delays;
unsigned long num_wait;
unsigned long num_waitall;
unsigned long num_waitsome;
/* time spent by the LP in executing the app trace*/
unsigned long long elapsed_time;
double elapsed_time;
/* time spent in compute operations */
unsigned long long compute_time;
double compute_time;
/* search time */
double search_overhead;
/* time spent in message send/isend */
double send_time;
/* time spent in message receive */
double recv_time;
/* time spent in wait operation */
double wait_time;
/* FIFO for isend messages arrived on destination */
struct mpi_queue_ptrs* arrival_queue;