...
  View open merge request
Commits (27)
......@@ -66,6 +66,7 @@ typedef struct mn_stats mn_stats;
X(SIMPLEP2P, "modelnet_simplep2p", "simplep2p", &simplep2p_method)\
X(TORUS, "modelnet_torus", "torus", &torus_method)\
X(SLIMFLY, "modelnet_slimfly", "slimfly", &slimfly_method)\
X(SLIMFLY_ROUTER, "modelnet_slimfly_router", "slimfly_router", &slimfly_router_method)\
X(FATTREE, "modelnet_fattree", "fattree", &fattree_method)\
X(DRAGONFLY, "modelnet_dragonfly", "dragonfly", &dragonfly_method)\
X(DRAGONFLY_ROUTER, "modelnet_dragonfly_router", "dragonfly_router", &dragonfly_router_method)\
......
......@@ -63,6 +63,7 @@ struct slim_terminal_message
// For buffer message
short vc_index;
short rail_id;
int sender_radix;
int output_chan;
model_net_event_return event_rc;
......@@ -74,13 +75,16 @@ struct slim_terminal_message
tw_stime saved_available_time;
tw_stime saved_avg_time;
tw_stime saved_rcv_time;
tw_stime saved_busy_time;
tw_stime saved_busy_time;
tw_stime saved_total_time;
int saved_send_loop;
// tw_stime saved_credit_time;
// tw_stime saved_collective_init_time;
tw_stime saved_hist_start_time;
tw_stime msg_start_time;
int rng_calls; //counter for rng calls so they can be rolled back in a single loop
short saved_completed_chunks;
int saved_hist_num;
int saved_occupancy;
......
......@@ -198,6 +198,7 @@ bin_PROGRAMS += src/network-workloads/model-net-dumpi-traces-dump
bin_PROGRAMS += src/network-workloads/model-net-synthetic
bin_PROGRAMS += src/network-workloads/model-net-synthetic-custom-dfly
bin_PROGRAMS += src/network-workloads/model-net-synthetic-slimfly
bin_PROGRAMS += src/network-workloads/model-net-synthetic-all
bin_PROGRAMS += src/network-workloads/model-net-synthetic-fattree
bin_PROGRAMS += src/network-workloads/model-net-synthetic-dfly-plus
bin_PROGRAMS += src/network-workloads/model-net-synthetic-dally-dfly
......
neil@MIMAS-L:~/RossDev/codes/scripts/dragonfly-dally$ python3 dragonfly-dally-topo-gen.py 36 9 dfdally-3k-intra dfdally-3k-inter fitfly-neil!?
Dragonfly (Dally) Network:
Number of Groups: 19
Router Radix: 36
Number Routers Per Group: 18
Number Terminal Per Router: 9
Number GC per Router: 9
Number GC per Group: 162
Number GC between Groups: 9
Total Routers: 342
Total Number Terminals: 3078
Verifying Radix Usage...
Verifying Group Interconnection Counts...
New Dragonfly Plus Network
Dragonfly Plus Network: Generating Local Group Connections
Dragonfly Plus Network: Generating Global Group Connections
Verifying Symmetry...
Verifying Radix Usage...
Verifying Fairness...
Verifying Dragonfly Nature...
Verifying Inter Group Connection Uniformity...
Writing out IntraConnection File 'dfp-3k-intra':
Writing out InterConnection File 'dfp-3k-inter':
Dragonfly Plus Network:
Number of Groups: 10
Router Radix: 36
Number Spine Per Group: 18
Number Leaf Per Group: 18
Number Terminal Per Leaf: 18
Number GC per Spine: 18
Number GC per Group: 324
Number GC between Groups: 36
Total Spine: 180
Total Leaf: 180
Total Routers: 360
Total Number Terminals: 3240
......@@ -5,13 +5,13 @@ LPGROUPS
repetitions="50";
nw-lp="3";
modelnet_slimfly="3";
slimfly_router="1";
modelnet_slimfly_router="1";
}
}
PARAMS
{
packet_size="256";
modelnet_order=( "slimfly" );
modelnet_order=( "slimfly", "slimfly_router");
# scheduler options
modelnet_scheduler="fcfs";
chunk_size="256";
......
......@@ -3,15 +3,15 @@ LPGROUPS
MODELNET_GRP
{
repetitions="50";
server="3";
nw-lp="3";
modelnet_slimfly="3";
slimfly_router="1";
modelnet_slimfly_router="1";
}
}
PARAMS
{
packet_size="256";
modelnet_order=( "slimfly" );
modelnet_order=( "slimfly", "slimfly_router");
# scheduler options
modelnet_scheduler="fcfs";
chunk_size="256";
......
......@@ -30,6 +30,7 @@
#define MAX_STATS 65536
#define COL_TAG 1235
#define BAR_TAG 1234
#define PRINT_SYNTH_TRAFFIC 0
static int msg_size_hash_compare(
void *key, struct qhash_head *link);
......@@ -876,23 +877,24 @@ void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp)
(void)bf;
(void)lp;
// printf("\n Data arrived %d total data %ld ", m->fwd.num_bytes, s->syn_data);
if(s->local_rank == 0)
{
printf("\n Data arrived %lld rank %llu total data %ld ", m->fwd.num_bytes, s->nw_id, s->syn_data);
/* if(s->syn_data > upper_threshold)
if(s->local_rank == 0)
{
printf("\n Data arrived %lld rank %llu total data %ld ", m->fwd.num_bytes, s->nw_id, s->syn_data);
if(s->syn_data > upper_threshold)
{
struct rusage mem_usage;
int who = RUSAGE_SELF;
int err = getrusage(who, &mem_usage);
printf("\n Memory usage %lf gigabytes", ((double)mem_usage.ru_maxrss / (1024.0 * 1024.0)));
upper_threshold += 1048576;
}*/
}
if(PRINT_SYNTH_TRAFFIC) {
if(s->local_rank == 0)
{
printf("\n Data arrived %lld rank %llu total data %ld ", m->fwd.num_bytes, s->nw_id, s->syn_data);
/* if(s->syn_data > upper_threshold)
if(s->local_rank == 0)
{
printf("\n Data arrived %lld rank %llu total data %ld ", m->fwd.num_bytes, s->nw_id, s->syn_data);
if(s->syn_data > upper_threshold)
{
struct rusage mem_usage;
int who = RUSAGE_SELF;
int err = getrusage(who, &mem_usage);
printf("\n Memory usage %lf gigabytes", ((double)mem_usage.ru_maxrss / (1024.0 * 1024.0)));
upper_threshold += 1048576;
}*/
}
}
m->rc.saved_send_time = s->send_time;
m->rc.saved_send_time_sample = s->ross_sample.send_time;
if((tw_now(lp) - m->fwd.sim_start_time) > s->max_time)
......
This diff is collapsed.
......@@ -15,6 +15,7 @@
#define MN_NAME "model_net_base"
#define DEBUG 0
/**** BEGIN SIMULATION DATA STRUCTURES ****/
int model_net_base_magic;
......@@ -397,6 +398,8 @@ void model_net_base_configure(){
offsetof(model_net_wrap_msg, msg.m_dally_dfly);
msg_offsets[SLIMFLY] =
offsetof(model_net_wrap_msg, msg.m_slim);
msg_offsets[SLIMFLY_ROUTER] =
offsetof(model_net_wrap_msg, msg.m_slim);
msg_offsets[FATTREE] =
offsetof(model_net_wrap_msg, msg.m_fat);
msg_offsets[LOGGP] =
......@@ -553,7 +556,7 @@ void model_net_base_event(
tw_lp * lp){
if(m->h.magic != model_net_base_magic)
printf("\n LP ID mismatched %llu %d ", lp->gid);
printf("\n LP ID mismatched %llu\n", lp->gid);
assert(m->h.magic == model_net_base_magic);
......@@ -632,6 +635,9 @@ void handle_new_msg(
tw_bf *b,
model_net_wrap_msg * m,
tw_lp * lp){
#if DEBUG
printf("%llu Entered handle_new_msg()\n",LLU(tw_now(lp)));
#endif
static int num_servers = -1;
static int servers_per_node = -1;
if(num_servers == -1) {
......@@ -650,8 +656,8 @@ void handle_new_msg(
if(!g_tw_mynode) {
fprintf(stdout, "Set num_servers per router %d, servers per "
"injection queue per router %d, servers per node copy queue "
"per node %d\n", num_servers, servers_per_node,
servers_per_node_queue);
"per node %d, num nics %d\n", num_servers, servers_per_node,
servers_per_node_queue, ns->nics_per_router);
}
}
......@@ -667,6 +673,7 @@ void handle_new_msg(
exp_time -= tw_now(lp);
tw_stime delay = codes_local_latency(lp);
ns->node_copy_next_available_time[queue] = tw_now(lp) + exp_time;
// ns->node_copy_next_available_time[queue] = exp_time;
int remote_event_size = r->remote_event_size;
int self_event_size = r->self_event_size;
void *e_msg = (m+1);
......@@ -709,6 +716,9 @@ void handle_new_msg(
}
m_new->msg.m_base.isQueueReq = 0;
tw_event_send(e);
#if DEBUG
printf("%llu isQueueReq and dropping outof handle_new_msg(\n",LLU(tw_now(lp)));
#endif
return;
}
// simply pass down to the scheduler
......@@ -741,10 +751,20 @@ void handle_new_msg(
} else {
codes_mapping_get_lp_info2(r->src_lp, NULL, NULL, NULL, &rep_id, &offset);
}
queue_offset = offset/servers_per_node;
#if DEBUG
printf("r->src_lp:%llu, num_servers:%d num_queues:%d, offset:%d servers_per_node:%d\n",LLU(r->src_lp), num_servers, ns->params->num_queues, offset, servers_per_node);
#endif
queue_offset = (offset/servers_per_node) % ns->params->num_queues;
}
r->queue_offset = queue_offset;
#if DEBUG
printf("queue_offset:%d\n",queue_offset);
#endif
//printf("num_queues:%d q0_loop:%d q1_loop:%d\n",ns->params->num_queues,ns->in_sched_send_loop[0], ns->in_sched_send_loop[1]);
//for(int j=0; j<ns->params->num_queues; j++){
// queue_offset = j;
// r->queue_offset = j;
// set message-specific params
int is_from_remote = m->msg.m_base.is_from_remote;
model_net_sched *ss = is_from_remote ? ns->sched_recv : ns->sched_send[queue_offset];
......@@ -760,8 +780,11 @@ void handle_new_msg(
/* NOTE: we can do this because the sched rc struct in the event is
* *very* lightly used (there's harmless overlap in usage for the
* priority scheduler) */
#if DEBUG
printf("%llu handle_shed_next() from handle_new_msg()\n",LLU(tw_now(lp)));
#endif
handle_sched_next(ns, b, m, lp);
assert(*in_sched_loop); // we shouldn't have fallen out of the loop
assert(*in_sched_loop); // we shouldn't have fallen out of the loop - Note: TODO this was commented out to allow for injection above 100% bandwidth
}
}
......@@ -804,6 +827,9 @@ void handle_sched_next(
tw_bf *b,
model_net_wrap_msg * m,
tw_lp * lp){
#if DEBUG
printf("%llu handle sched_next function\n",LLU(tw_now(lp)));
#endif
tw_stime poffset;
model_net_request *r = &m->msg.m_base.req;
int is_from_remote = m->msg.m_base.is_from_remote;
......@@ -813,6 +839,9 @@ void handle_sched_next(
int ret = model_net_sched_next(&poffset, ss, m+1, &m->msg.m_base.rc, lp);
// we only need to know whether scheduling is finished or not - if not,
// go to the 'next iteration' of the loop
#if DEBUG
printf("return value from model_net_sched_next(): %d in_sched_loop changing from %d to 0\n",ret,*in_sched_loop);
#endif
if (ret == -1){
b->c0 = 1;
*in_sched_loop = 0;
......@@ -941,6 +970,9 @@ void model_net_method_idle_event2(tw_stime offset_ts, int is_recv_queue,
tw_event *e = tw_event_new(lp->gid, offset_ts, lp);
model_net_wrap_msg *m_wrap = tw_event_data(e);
model_net_request *r_wrap = &m_wrap->msg.m_base.req;
#if DEBUG
printf("%llu handle_sched_next() from model_net_method_idle_event2()\n",LLU(tw_now(lp)));
#endif
msg_set_header(model_net_base_magic, MN_BASE_SCHED_NEXT, lp->gid,
&m_wrap->h);
m_wrap->msg.m_base.is_from_remote = is_recv_queue;
......
......@@ -28,6 +28,7 @@ extern struct model_net_method dragonfly_plus_router_method;
extern struct model_net_method dragonfly_dally_method;
extern struct model_net_method dragonfly_dally_router_method;
extern struct model_net_method slimfly_method;
extern struct model_net_method slimfly_router_method;
extern struct model_net_method fattree_method;
extern struct model_net_method dragonfly_router_method;
extern struct model_net_method dragonfly_custom_router_method;
......@@ -145,7 +146,7 @@ int* model_net_configure(int *id_count){
memset(is_msg_params_set, 0,
MAX_MN_MSG_PARAM_TYPES*sizeof(*is_msg_params_set));
ret = configuration_get_value_double(&config, "PARAMS", "intra_bandwidth", NULL,
ret = configuration_get_value_double(&config, "PARAMS", "cn_bandwidth", NULL,
&cn_bandwidth);
if(ret && !g_tw_mynode) {
fprintf(stderr, "Bandwidth of compute node channels not specified, "
......
This diff is collapsed.
......@@ -5,13 +5,13 @@ LPGROUPS
repetitions="50";
server="3";
modelnet_slimfly="3";
slimfly_router="1";
modelnet_slimfly_router="1";
}
}
PARAMS
{
packet_size="512";
modelnet_order=( "slimfly");
modelnet_order=( "slimfly", "slimfly_router");
# scheduler options
modelnet_scheduler="fcfs";
chunk_size="256";
......
......@@ -185,7 +185,7 @@ int main(
if(net_id == SLIMFLY)
{
strcpy(router_name, "slimfly_router");
strcpy(router_name, "modelnet_slimfly_router");
}
if(net_id == EXPRESS_MESH)
......