Commit 92ea64e4 authored by Caitlin Ross's avatar Caitlin Ross
Browse files

Merge branch 'master' into analysis-lp

parents 71b1dd55 b8df46df
http://www.mcs.anl.gov/projects/codes/ # CODES Discrete-event Simulation Framework
CODES is a set of models and utilities to aid in building parallel discrete https://xgitlab.cels.anl.gov/codes/codes/wikis/home
event simulations, on top of the ROSS simulation framework
(https://github.com/carothersc/ROSS).
Documentation can be found in the doc subdirectory. Particularly, Discrete event driven simulation of HPC system architectures and subsystems has emerged as a productive and cost-effective means to evaluating potential HPC designs, along with capabilities for executing simulations of extreme scale systems. The goal of the CODES project is use highly parallel simulation to explore the design of exascale storage/network architectures and distributed data-intensive science facilities.
doc/BUILD\_STEPS documents the build process and doc/GETTING\_STARTED documents
library features at a high level. Our simulations build upon the Rensselaer Optimistic Simulation System (ROSS), a discrete event simulation framework that allows simulations to be run in parallel, decreasing the simulation run time of massive simulations to hours. We are using ROSS to explore topics including large-scale storage systems, I/O workloads, HPC network fabrics, distributed science systems, and data-intensive computation environments.
The CODES project is a collaboration between the Mathematics and Computer Science department at Argonne National Laboratory and Rensselaer Polytechnic Institute. We collaborate with researchers at University of California at Davis to come up with novel methods for analysis and visualizations of large-scale event driven simulations. We also collaborate with Lawrence Livermore National Laboratory for modeling HPC interconnect systems.
Documentation can be found in the wiki:
https://xgitlab.cels.anl.gov/codes/codes/wikis/home
\ No newline at end of file
...@@ -69,6 +69,7 @@ struct recorder_params ...@@ -69,6 +69,7 @@ struct recorder_params
struct dumpi_trace_params { struct dumpi_trace_params {
char file_name[MAX_NAME_LENGTH_WKLD]; char file_name[MAX_NAME_LENGTH_WKLD];
int num_net_traces; int num_net_traces;
int nprocs;
#ifdef ENABLE_CORTEX_PYTHON #ifdef ENABLE_CORTEX_PYTHON
char cortex_script[MAX_NAME_LENGTH_WKLD]; char cortex_script[MAX_NAME_LENGTH_WKLD];
char cortex_class[MAX_NAME_LENGTH_WKLD]; char cortex_class[MAX_NAME_LENGTH_WKLD];
...@@ -162,6 +163,7 @@ struct codes_workload_op ...@@ -162,6 +163,7 @@ struct codes_workload_op
double end_time; double end_time;
double sim_start_time; double sim_start_time;
int64_t sequence_id;
/* parameters for each operation type */ /* parameters for each operation type */
union union
{ {
...@@ -198,7 +200,7 @@ struct codes_workload_op ...@@ -198,7 +200,7 @@ struct codes_workload_op
int16_t data_type; /* MPI data type to be matched with the recv */ int16_t data_type; /* MPI data type to be matched with the recv */
int count; /* number of elements to be received */ int count; /* number of elements to be received */
int tag; /* tag of the message */ int tag; /* tag of the message */
int req_id; unsigned int req_id;
} send; } send;
struct { struct {
/* TODO: not sure why source rank is here */ /* TODO: not sure why source rank is here */
...@@ -208,7 +210,7 @@ struct codes_workload_op ...@@ -208,7 +210,7 @@ struct codes_workload_op
int16_t data_type; /* MPI data type to be matched with the send */ int16_t data_type; /* MPI data type to be matched with the send */
int count; /* number of elements to be sent */ int count; /* number of elements to be sent */
int tag; /* tag of the message */ int tag; /* tag of the message */
int req_id; unsigned int req_id;
} recv; } recv;
/* TODO: non-stub for other collectives */ /* TODO: non-stub for other collectives */
struct { struct {
...@@ -216,14 +218,14 @@ struct codes_workload_op ...@@ -216,14 +218,14 @@ struct codes_workload_op
} collective; } collective;
struct { struct {
int count; int count;
int* req_ids; unsigned int* req_ids;
} waits; } waits;
struct { struct {
int req_id; unsigned int req_id;
} wait; } wait;
struct struct
{ {
int req_id; unsigned int req_id;
} }
free; free;
}u; }u;
......
...@@ -32,6 +32,7 @@ extern void free_tmp(void * ptr); ...@@ -32,6 +32,7 @@ extern void free_tmp(void * ptr);
typedef struct message_list message_list; typedef struct message_list message_list;
struct message_list { struct message_list {
//CHANGE: add message types for new networks here
union { union {
terminal_message dfly_msg; terminal_message dfly_msg;
em_message em_msg; em_message em_msg;
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* *
*/ */
//CHANGE: modify to match you header file name
#ifndef EXPRESS_MESH_H #ifndef EXPRESS_MESH_H
#define EXPRESS_MESH_H #define EXPRESS_MESH_H
...@@ -13,76 +14,67 @@ extern "C" { ...@@ -13,76 +14,67 @@ extern "C" {
#include <ross.h> #include <ross.h>
//CHANGE: modify to match the struct
typedef struct net_message net_message;
//CHANGE: modify the struct name - add to message_list union in common-net.h
typedef struct em_message em_message; typedef struct em_message em_message;
struct em_message struct em_message
{ {
/* magic number */ //common entries:
int magic; int magic; /* magic number */
/* flit travel start time*/ short type; /* event type of the flit */
tw_stime travel_start_time;
/* packet ID of the flit */
unsigned long long packet_ID;
/* event type of the flit */
short type;
/* category: comes from codes */
char category[CATEGORY_NAME_MAX];
/* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid final_dest_gid;
/*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_lp;
tw_lpid sender_mn_lp; // source modelnet id
/* destination terminal ID */
tw_lpid dest_terminal_id;
int dest_terminal;
/* source terminal ID */
tw_lpid src_terminal_id;
short saved_channel;
short my_N_hop;
short hops[8];
/* Intermediate LP ID from which this message is coming */ tw_stime travel_start_time; /* flit travel start time*/
unsigned int intm_lp_id; unsigned long long packet_ID; /* packet ID of the flit */
short saved_vc; char category[CATEGORY_NAME_MAX]; /* category: comes from codes */
short dim_change;
/* last hop of the message, can be a terminal, local router or global router */
int last_hop;
/* For routing */
uint64_t chunk_id;
uint64_t packet_size;
uint64_t message_id;
uint64_t total_size;
int saved_remote_esize; tw_lpid final_dest_gid; /* final destination LP ID, this comes from codes can be a server or any other LP type*/
int remote_event_size_bytes; tw_lpid sender_lp; /*sending LP ID from CODES, can be a server or any other LP type */
int local_event_size_bytes; tw_lpid sender_mn_lp; // source modelnet id (think NIC)
tw_lpid src_terminal_id; /* source terminal ID - mostly same as sender_mn_lp */
tw_lpid dest_terminal_id; /* destination modelnet id */
int dest_terminal; /* logical id of destination modelnet id */
// For buffer message /* packet/message identifier and status */
int vc_index; uint64_t chunk_id; //which chunk of packet I am
int output_chan; uint64_t packet_size; //what is the size of my packet
model_net_event_return event_rc; uint64_t message_id; //seq number at message level - NIC specified
uint64_t total_size; //total size of the message
int remote_event_size_bytes; // data size for target event at destination
int local_event_size_bytes; // data size for event at source
int is_pull; int is_pull;
uint64_t pull_size; uint64_t pull_size;
tw_stime msg_start_time;
/* for reverse computation */ //info for path traversal
short my_N_hop; /* hops traversed so far */
short hops[8]; /* can be used for storing different types of hops */
unsigned int intm_lp_id; /* Intermediate LP ID that sent this packet */
int last_hop; /* last hop of the message, can be a terminal, local router or global router */
int vc_index; /* stores port info */
int output_chan; /* virtual channel within port */
//info for reverse computation
short saved_channel;
short saved_vc;
model_net_event_return event_rc;
tw_stime saved_available_time; tw_stime saved_available_time;
tw_stime saved_avg_time; tw_stime saved_avg_time;
tw_stime saved_rcv_time; tw_stime saved_rcv_time;
tw_stime saved_busy_time; tw_stime saved_busy_time;
tw_stime saved_total_time; tw_stime saved_total_time;
tw_stime saved_hist_start_time; tw_stime saved_hist_start_time;
tw_stime saved_sample_time; tw_stime saved_sample_time;
tw_stime msg_start_time;
int saved_hist_num; //CHANGE: info for specific networks
int saved_occupancy; short dim_change;
}; };
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif #endif
/*
* Copyright (C) 2014 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
//CHANGE: modify to match you header file name
#ifndef NET_TEMPLATE_H
#define NET_TEMPLATE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <ross.h>
//CHANGE: modify to match the struct
typedef struct net_message net_message;
//CHANGE: modify the struct name - add to message_list union in common-net.h
struct net_message
{
//common entries:
int magic; /* magic number */
short type; /* event type of the flit */
tw_stime travel_start_time; /* flit travel start time*/
unsigned long long packet_ID; /* packet ID of the flit */
char category[CATEGORY_NAME_MAX]; /* category: comes from codes */
tw_lpid final_dest_gid; /* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid sender_lp; /*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_mn_lp; // source modelnet id (think NIC)
tw_lpid src_terminal_id; /* source terminal ID - mostly same as sender_mn_lp */
tw_lpid dest_terminal_id; /* destination modelnet id */
int dest_terminal; /* logical id of destination modelnet id */
/* packet/message identifier and status */
uint64_t chunk_id; //which chunk of packet I am
uint64_t packet_size; //what is the size of my packet
uint64_t message_id; //seq number at message level - NIC specified
uint64_t total_size; //total size of the message
int remote_event_size_bytes; // data size for target event at destination
int local_event_size_bytes; // data size for event at source
int is_pull;
uint64_t pull_size;
tw_stime msg_start_time;
//info for path traversal
short my_N_hop; /* hops traversed so far */
short hops[8]; /* can be used for storing different types of hops */
unsigned int intm_lp_id; /* Intermediate LP ID that sent this packet */
int last_hop; /* last hop of the message, can be a terminal, local router or global router */
int vc_index; /* stores port info */
int output_chan; /* virtual channel within port */
//info for reverse computation
short saved_channel;
short saved_vc;
model_net_event_return event_rc;
tw_stime saved_available_time;
tw_stime saved_avg_time;
tw_stime saved_rcv_time;
tw_stime saved_busy_time;
tw_stime saved_total_time;
tw_stime saved_hist_start_time;
tw_stime saved_sample_time;
//CHANGE: add info for specific networks
};
#ifdef __cplusplus
}
#endif
#endif
...@@ -113,16 +113,15 @@ AC_SUBST(RECORDER_CPPFLAGS) ...@@ -113,16 +113,15 @@ AC_SUBST(RECORDER_CPPFLAGS)
AC_ARG_WITH([dumpi],[AS_HELP_STRING([--with-dumpi@<:@=DIR@:>@], AC_ARG_WITH([dumpi],[AS_HELP_STRING([--with-dumpi@<:@=DIR@:>@],
[location of Dumpi installation])]) [location of Dumpi installation])])
if test "x${with_dumpi}" != "x" ; then if test "x${with_dumpi}" != "x" ; then
AC_CHECK_FILE([${with_dumpi}/lib/libundumpi.la], CFLAGS="-I${with_dumpi}/include"
AM_CONDITIONAL(USE_DUMPI, true), LIBS="-L${with_dumpi}/lib/ -lundumpi"
AC_MSG_ERROR(Could not find libundumpi.la)) AC_CHECK_LIB([undumpi],
[undumpi_open], [], [AC_MSG_ERROR(Could not find dumpi)])
AM_CONDITIONAL(USE_DUMPI, true)
DUMPI_CFLAGS="-I${with_dumpi}/include" DUMPI_CFLAGS="-I${with_dumpi}/include"
# DUMPI_CFLAGS+=" -I${with_dumpi}/include/dumpi/common"
# DUMPI_CFLAGS+=" -I${with_dumpi}/include/dumpi/libdumpi"
# DUMPI_CFLAGS+=" -I${with_dumpi}/include/dumpi/libundumpi"
DUMPI_LIBS="-L${with_dumpi}/lib/ -lundumpi" DUMPI_LIBS="-L${with_dumpi}/lib/ -lundumpi"
AC_SUBST(DUMPI_LIBS) AC_SUBST(DUMPI_LIBS)
AC_SUBST(DUMPI_CFLAGS) AC_SUBST(DUMPI_CFLAGS)
else else
AM_CONDITIONAL(USE_DUMPI, false) AM_CONDITIONAL(USE_DUMPI, false)
fi fi
......
...@@ -30,8 +30,8 @@ intercon = open(sys.argv[3], "wb") ...@@ -30,8 +30,8 @@ intercon = open(sys.argv[3], "wb")
def router(group, row, col): def router(group, row, col):
return group*96 + row*16 + col return group*96 + row*16 + col
numblack = np.zeros((864,864), dtype=np.int) numblack = np.zeros((960,960), dtype=np.int)
numblue = np.zeros((864,864), dtype=np.int) numblue = np.zeros((960,960), dtype=np.int)
with open(filename) as ofile: with open(filename) as ofile:
matches = re.findall('c\d+-\dc\ds\d+a0l\d+\((\d+):(\d):(\d+)\).(\w+).->.c\d+-\dc\ds\d+a0l\d+\((\d+):(\d):(\d+)\)', ofile.read(), re.MULTILINE) matches = re.findall('c\d+-\dc\ds\d+a0l\d+\((\d+):(\d):(\d+)\).(\w+).->.c\d+-\dc\ds\d+a0l\d+\((\d+):(\d):(\d+)\)', ofile.read(), re.MULTILINE)
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#define lprintf(_fmt, ...) \ #define lprintf(_fmt, ...) \
do {if (CS_LP_DBG) printf(_fmt, __VA_ARGS__);} while (0) do {if (CS_LP_DBG) printf(_fmt, __VA_ARGS__);} while (0)
#define MAX_STATS 65536 #define MAX_STATS 65536
#define PAYLOAD_SZ 1024
static int msg_size_hash_compare( static int msg_size_hash_compare(
void *key, struct qhash_head *link); void *key, struct qhash_head *link);
...@@ -41,7 +40,7 @@ static int preserve_wait_ordering = 0; ...@@ -41,7 +40,7 @@ static int preserve_wait_ordering = 0;
static int enable_msg_tracking = 0; static int enable_msg_tracking = 0;
static int is_synthetic = 0; static int is_synthetic = 0;
tw_lpid TRACK_LP = -1; tw_lpid TRACK_LP = -1;
int nprocs = 0;
static double total_syn_data = 0; static double total_syn_data = 0;
static int unmatched = 0; static int unmatched = 0;
char workload_type[128]; char workload_type[128];
...@@ -55,6 +54,7 @@ static int64_t EAGER_THRESHOLD = 8192; ...@@ -55,6 +54,7 @@ static int64_t EAGER_THRESHOLD = 8192;
static int alloc_spec = 0; static int alloc_spec = 0;
static tw_stime self_overhead = 10.0; static tw_stime self_overhead = 10.0;
static tw_stime mean_interval = 100000; static tw_stime mean_interval = 100000;
static int payload_sz = 1024;
/* Doing LP IO*/ /* Doing LP IO*/
static char lp_io_dir[256] = {'\0'}; static char lp_io_dir[256] = {'\0'};
...@@ -86,7 +86,7 @@ static char cortex_gen[512] = "\0"; ...@@ -86,7 +86,7 @@ static char cortex_gen[512] = "\0";
typedef struct nw_state nw_state; typedef struct nw_state nw_state;
typedef struct nw_message nw_message; typedef struct nw_message nw_message;
typedef int dumpi_req_id; typedef unsigned int dumpi_req_id;
static int net_id = 0; static int net_id = 0;
static float noise = 2.0; static float noise = 2.0;
...@@ -138,6 +138,14 @@ enum MPI_NW_EVENTS ...@@ -138,6 +138,14 @@ enum MPI_NW_EVENTS
CLI_NBR_FINISH, CLI_NBR_FINISH,
}; };
/* type of synthetic traffic */
enum TRAFFIC
{
UNIFORM = 1, /* sends message to a randomly selected node */
NEAREST_NEIGHBOR = 2, /* sends message to the next node (potentially connected to the same router) */
ALLTOALL = 3, /* sends message to all other nodes */
STENCIL = 4 /* sends message to 4 nearby neighbors */
};
struct mpi_workload_sample struct mpi_workload_sample
{ {
/* Sampling data */ /* Sampling data */
...@@ -156,6 +164,7 @@ struct mpi_msgs_queue ...@@ -156,6 +164,7 @@ struct mpi_msgs_queue
int source_rank; int source_rank;
int dest_rank; int dest_rank;
int64_t num_bytes; int64_t num_bytes;
int64_t seq_id;
tw_stime req_init_time; tw_stime req_init_time;
dumpi_req_id req_id; dumpi_req_id req_id;
struct qlist_head ql; struct qlist_head ql;
...@@ -164,7 +173,7 @@ struct mpi_msgs_queue ...@@ -164,7 +173,7 @@ struct mpi_msgs_queue
/* stores request IDs of completed MPI operations (Isends or Irecvs) */ /* stores request IDs of completed MPI operations (Isends or Irecvs) */
struct completed_requests struct completed_requests
{ {
int req_id; unsigned int req_id;
struct qlist_head ql; struct qlist_head ql;
int index; int index;
}; };
...@@ -173,7 +182,7 @@ struct completed_requests ...@@ -173,7 +182,7 @@ struct completed_requests
struct pending_waits struct pending_waits
{ {
int op_type; int op_type;
int req_ids[MAX_WAIT_REQS]; unsigned int req_ids[MAX_WAIT_REQS];
int num_completed; int num_completed;
int count; int count;
tw_stime start_time; tw_stime start_time;
...@@ -202,6 +211,7 @@ struct nw_state ...@@ -202,6 +211,7 @@ struct nw_state
int app_id; int app_id;
int local_rank; int local_rank;
int synthetic_pattern;
int is_finished; int is_finished;
int neighbor_completed; int neighbor_completed;
...@@ -293,7 +303,7 @@ struct nw_message ...@@ -293,7 +303,7 @@ struct nw_message
double sim_start_time; double sim_start_time;
// for callbacks - time message was received // for callbacks - time message was received
double msg_send_time; double msg_send_time;
int req_id; unsigned int req_id;
int matched_req; int matched_req;
int tag; int tag;
int app_id; int app_id;
...@@ -308,6 +318,7 @@ struct nw_message ...@@ -308,6 +318,7 @@ struct nw_message
double saved_wait_time; double saved_wait_time;
double saved_delay; double saved_delay;
int64_t saved_num_bytes; int64_t saved_num_bytes;
int saved_syn_length;
} rc; } rc;
}; };
...@@ -590,12 +601,13 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp ...@@ -590,12 +601,13 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp
if(bf->c0) if(bf->c0)
return; return;
model_net_event_rc2(lp, &m->event_rc); int i;
s->gen_data -= PAYLOAD_SZ; for (i=0; i < m->rc.saved_syn_length; i++){
model_net_event_rc2(lp, &m->event_rc);
num_syn_bytes_sent -= PAYLOAD_SZ; s->gen_data -= payload_sz;
tw_rand_reverse_unif(lp->rng); num_syn_bytes_sent -= payload_sz;
tw_rand_reverse_unif(lp->rng); }
tw_rand_reverse_unif(lp->rng);
} }
...@@ -610,38 +622,105 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l ...@@ -610,38 +622,105 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l
/* Get job information */ /* Get job information */
tw_lpid global_dest_id; tw_lpid global_dest_id;
int intm_dest_id;
nw_message remote_m;
struct codes_jobmap_id jid; struct codes_jobmap_id jid;
jid = codes_jobmap_to_local_id(s->nw_id, jobmap_ctx); jid = codes_jobmap_to_local_id(s->nw_id, jobmap_ctx);
int num_clients = codes_jobmap_get_num_ranks(jid.job, jobmap_ctx); int num_clients = codes_jobmap_get_num_ranks(jid.job, jobmap_ctx);
int dest_svr = tw_rand_integer(lp->rng, 0, num_clients - 1);
if(dest_svr == s->local_rank) /* Find destination */
int* dest_svr = NULL;
int i, length=0;
switch(s->synthetic_pattern)
{ {
dest_svr = (s->local_rank + 1) % num_clients; case UNIFORM:
} {
length = 1;
jid.rank = dest_svr; dest_svr = (int*) calloc(1, sizeof(int));
dest_svr[0] = tw_rand_integer(lp->rng, 0, num_clients - 1);
int intm_dest_id = codes_jobmap_to_global_id(jid, jobmap_ctx); if(dest_svr[0] == s->local_rank)
global_dest_id = codes_mapping_get_lpid_from_relative(intm_dest_id, NULL, NW_LP_NM, NULL, 0); dest_svr[0] = (s->local_rank + 1) % num_clients;
}
nw_message remote_m; break;
remote_m.fwd.sim_start_time = tw_now(lp); case NEAREST_NEIGHBOR:
remote_m.fwd.dest_rank = dest_svr; {
remote_m.msg_type = CLI_BCKGND_ARRIVE; length = 1;
remote_m.fwd.num_bytes = PAYLOAD_SZ; dest_svr = (int*) calloc(1, sizeof(int));
remote_m.fwd.app_id = s->app_id; dest_svr[0] = (s->local_rank + 1) % num_clients;
remote_m.fwd.src_rank = s->local_rank; }
break;
m->event_rc = model_net_event(net_id, "synthetic-tr", global_dest_id, PAYLOAD_SZ, 0.0, case ALLTOALL:
sizeof(nw_message), (const void*)&remote_m, {
0, NULL, lp); dest_svr = (int*) calloc(num_clients-1, sizeof(int));
int index = 0;
s->gen_data += PAYLOAD_SZ; for (i=0;i<num_clients;i++)
num_syn_bytes_sent += PAYLOAD_SZ; {