Commit 8f92a75b authored by Misbah Mubarak's avatar Misbah Mubarak

Preparing beta version for new dragonfly model, modifications to routings,...

Preparing beta version for new dragonfly model, modifications to routings, adding documentation for generating config files, adding libtools to support C++ code compilation
parent 62564150
Nikhil Jain (UIUC)
Nikhil Jain, Abhinav Bhatele (LLNL)
- Improvements in credit-based flow control of CODES dragonfly and torus network models.
- Addition of direct scheme for setting up dragonfly network topology.
- Network configuration setup for custom dragonfly model.
- Topology generations scripts for custom dragonfly model.
- Bug reporter for CODES network models.
- Fat tree network setup and adaptive routing.
Jens Domke (U. of Dresden)
- Static routing in fat tree network model.
Xu Yang (IIT)
- Added support for running multiple application workloads with CODES MPI
......
......@@ -319,7 +319,7 @@ transparent message passing support between LPs, allowing the underlying
network LPs to do the work of routing while user LPs model their applications.
It consists of a number of both simple and complex network models as well as
configuration utilities and communication APIs. A somewhat stale overview is
also given at src/models/networks/model-net/doc/README.
also given at src/networks/model-net/doc/README.
= Components of model-net
......@@ -407,8 +407,8 @@ for the latency and bandwidth costs: "net_latency_ns_file" and
"net_bw_mbps_file".
More details about the models can be found at
src/models/networks/model-net/doc/README.simplenet.txt and
src/models/networks/model-net/doc/README.simplep2p.txt, respectively.
src/networks/model-net/doc/README.simplenet.txt and
src/networks/model-net/doc/README.simplep2p.txt, respectively.
== LogGP
......@@ -422,7 +422,7 @@ The only configuration entry the LogGP model requires is
configuration file.
For more details on gathering parameters for the LogGP model, as well as it's
usage and caveats, see the document src/models/model-net/doc/README.loggp.txt.
usage and caveats, see the document src/model-net/doc/README.loggp.txt.
== Torus
......@@ -435,7 +435,7 @@ performed in:
(PADS), 2014.
The configuration and model setup can be found at:
src/models/model-net/doc/README.torus.txt
src/model-net/doc/README.torus.txt
== Dragonfly
......@@ -443,7 +443,7 @@ The dragonfly model (model-net LP name: "dragonfly") is a network
topology that utilizes the concept of virtual routers to produce systems with
very high virtual radix out of network components with a lower radix. The
topology itself and the simulation model are both described in
src/models/networks/model-net/doc/README.dragonfly.txt.
src/networks/model-net/doc/README.dragonfly.txt.
cite).
The configuration parameters are a little trickier here, as additional LPs
......@@ -453,7 +453,7 @@ represents a physical router. At least one "dragonfly_router" LP must be
present in every LP group with a "modelnet_dragonfly" LP.
Further configuration and model setup can be found at
src/models/model-net/doc/README.dragonfly.txt.
src/model-net/doc/README.dragonfly-custom.txt.
= CODES example model
......
** Generating inter and intra group files for Edison and Theta Interconnects **:
- Edison network config files:
python gen-network-config.py link-edison.txt intra-edison inter-edison
- Theta network config files:
python gen-network-config.py theta intra-theta inter-theta
** Generating customizable dragonfly interconnects **:
mpicc connections_general.c -o connections_general
./connections_general g r c intra-file inter-file
--> g: number of groups in the network
--> r: number of router rows within a group
--> c: number of router columns within a group
--> intra-file: output files for intra-group connections
--> inter-file: output file for inter-group connections
- The scripts and code for translating existing topologies and generating
cray-style dragonfly topologies have been contributed by Nikhil Jain, Abhinav
Bhatele and Peer-Timo Breemer from LLNL.
- For details on cray XC dragonfly network topology, see the following paper:
@inproceedings{faanes2012cray,
title={Cray cascade: a scalable HPC system based on a Dragonfly network},
author={Faanes, Greg and Bataineh, Abdulla and Roweth, Duncan and Froese,
Edwin and Alverson, Bob and Johnson, Tim and Kopnick, Joe and Higgins, Mike
and Reinhard, James and others},
booktitle={Proceedings of the International Conference on High
Performance Computing, Networking, Storage and Analysis},
pages={103},
year={2012},
organization={IEEE Computer Society Press}
}
......@@ -21,16 +21,16 @@
int main(int argc, char **argv) {
if(argc < 3) {
printf("Corret usage: %s <num_g> <intra_file> <inter_file>", argv[0]);
printf("Correct usage: %s <num_g> <num_rows> <num_cols> <intra_file> <inter_file>", argv[0]);
exit(0);
}
int g = atoi(argv[1]);
int r = 6;
int c = 16;
int r = atoi(argv[2]);
int c = atoi(argv[3]);
FILE *intra = fopen(argv[2], "wb");
FILE *inter = fopen(argv[3], "wb");
FILE *intra = fopen(argv[4], "wb");
FILE *inter = fopen(argv[5], "wb");
int router = 0;
int green = 0, black = 1;
......
......@@ -175,6 +175,7 @@ bin_PROGRAMS += src/workload/codes-workload-dump
bin_PROGRAMS += src/network-workloads/model-net-mpi-replay
bin_PROGRAMS += src/network-workloads/model-net-dumpi-traces-dump
bin_PROGRAMS += src/network-workloads/model-net-synthetic
bin_PROGRAMS += src/network-workloads/model-net-synthetic-custom-dfly
bin_PROGRAMS += src/network-workloads/model-net-synthetic-slimfly
src_workload_codes_workload_dump_SOURCES = \
......@@ -182,6 +183,7 @@ src_workload_codes_workload_dump_SOURCES = \
src_network_workloads_model_net_mpi_replay_SOURCES = src/network-workloads/model-net-mpi-replay.c
src_network_workloads_model_net_synthetic_SOURCES = src/network-workloads/model-net-synthetic.c
src_network_workloads_model_net_synthetic_custom_dfly_SOURCES = src/network-workloads/model-net-synthetic-custom-dfly.c
src_network_workloads_model_net_synthetic_slimfly_SOURCES = src/network-workloads/model-net-synthetic-slimfly.c src/network-workloads/dummy.C
src_network_workloads_model_net_dumpi_traces_dump_SOURCES = src/network-workloads/model-net-dumpi-traces-dump.c src/network-workloads/dummy.C
......
LPGROUPS
{
MODELNET_GRP
{
repetitions="1440";
# name of this lp changes according to the model
server="4";
# these lp names will be the same for dragonfly-custom model
modelnet_dragonfly_custom="4";
modelnet_dragonfly_custom_router="1";
}
}
PARAMS
{
# packet size in the network
packet_size="1024";
modelnet_order=( "dragonfly_custom","dragonfly_custom_router" );
# scheduler options
modelnet_scheduler="fcfs";
# chunk size in the network (when chunk size = packet size, packets will not be
# divided into chunks)
chunk_size="1024";
# modelnet_scheduler="round-robin";
# number of routers within each group
# this is dictated by the dragonfly configuration files
num_routers="96";
# number of groups in the network
num_groups="15";
# buffer size in bytes for local virtual channels
local_vc_size="8192";
#buffer size in bytes for global virtual channels
global_vc_size="16384";
#buffer size in bytes for compute node virtual channels
cn_vc_size="8192";
#bandwidth in GiB/s for local channels
local_bandwidth="5.25";
# bandwidth in GiB/s for global channels
global_bandwidth="4.7";
# bandwidth in GiB/s for compute node-router channels
cn_bandwidth="8.0";
# ROSS message size
message_size="584";
# number of compute nodes connected to router, dictated by dragonfly config
# file
num_cns_per_router="4";
# number of global channels per router
num_global_channels="8";
# network config file for intra-group connections
intra-group-connections="/Users/mmubarak/Documents/software_development/dragonfly-cray/codes/scripts/gen-cray-topo/intra-edison";
# network config file for inter-group connections
inter-group-connections="/Users/mmubarak/Documents/software_development/dragonfly-cray/codes/scripts/gen-cray-topo/inter-edison";
# routing protocol to be used
routing="adaptive";
}
......@@ -2,32 +2,53 @@ LPGROUPS
{
MODELNET_GRP
{
repetitions="1440";
nw-lp="1";
modelnet_dragonfly_custom="1";
repetitions="3200";
# name of this lp changes according to the model
nw-lp="4";
# these lp names will be the same for dragonfly-custom model
modelnet_dragonfly_custom="4";
modelnet_dragonfly_custom_router="1";
}
}
PARAMS
{
packet_size="512";
# packet size in the network
packet_size="1024";
modelnet_order=( "dragonfly_custom","dragonfly_custom_router" );
# scheduler options
modelnet_scheduler="fcfs";
chunk_size="256";
# chunk size in the network (when chunk size = packet size, packets will not be
# divided into chunks)
chunk_size="1024";
# modelnet_scheduler="round-robin";
num_routers="96";
num_groups="15";
local_vc_size="2048";
global_vc_size="8192";
cn_vc_size="1024";
# number of routers within each group
# this is dictated by the dragonfly configuration files
num_routers="128";
# number of groups in the network
num_groups="25";
# buffer size in bytes for local virtual channels
local_vc_size="8192";
#buffer size in bytes for global virtual channels
global_vc_size="16384";
#buffer size in bytes for compute node virtual channels
cn_vc_size="8192";
#bandwidth in GiB/s for local channels
local_bandwidth="5.25";
# bandwidth in GiB/s for global channels
global_bandwidth="4.7";
cn_bandwidth="5.25";
# bandwidth in GiB/s for compute node-router channels
cn_bandwidth="8.0";
# ROSS message size
message_size="584";
num_cns_per_router="1";
num_global_channels="4";
intra-group-connections="intracray2";
inter-group-connections="intercray2";
# number of compute nodes connected to router, dictated by dragonfly config
# file
num_cns_per_router="4";
# number of global channels per router
num_global_channels="8";
# network config file for intra-group connections
intra-group-connections="/Users/mmubarak/Documents/software_development/dragonfly-cray/codes/scripts/gen-cray-topo/intra-custom";
# network config file for inter-group connections
inter-group-connections="/Users/mmubarak/Documents/software_development/dragonfly-cray/codes/scripts/gen-cray-topo/inter-custom";
# routing protocol to be used
routing="prog-adaptive";
}
LPGROUPS
{
MODELNET_GRP
{
repetitions="1440";
# name of this lp changes according to the model
nw-lp="4";
# these lp names will be the same for dragonfly-custom model
modelnet_dragonfly_custom="4";
modelnet_dragonfly_custom_router="1";
}
}
PARAMS
{
# packet size in the network
packet_size="1024";
modelnet_order=( "dragonfly_custom","dragonfly_custom_router" );
# scheduler options
modelnet_scheduler="fcfs";
# chunk size in the network (when chunk size = packet size, packets will not be
# divided into chunks)
chunk_size="1024";
# modelnet_scheduler="round-robin";
# number of routers within each group
# this is dictated by the dragonfly configuration files
num_routers="96";
# number of groups in the network
num_groups="15";
# buffer size in bytes for local virtual channels
local_vc_size="8192";
#buffer size in bytes for global virtual channels
global_vc_size="16384";
#buffer size in bytes for compute node virtual channels
cn_vc_size="8192";
#bandwidth in GiB/s for local channels
local_bandwidth="5.25";
# bandwidth in GiB/s for global channels
global_bandwidth="4.7";
# bandwidth in GiB/s for compute node-router channels
cn_bandwidth="8.0";
# ROSS message size
message_size="584";
# number of compute nodes connected to router, dictated by dragonfly config
# file
num_cns_per_router="4";
# number of global channels per router
num_global_channels="8";
# network config file for intra-group connections
intra-group-connections="/Users/mmubarak/Documents/software_development/dragonfly-cray/codes/scripts/gen-cray-topo/intra-edison";
# network config file for inter-group connections
inter-group-connections="/Users/mmubarak/Documents/software_development/dragonfly-cray/codes/scripts/gen-cray-topo/inter-edison";
# routing protocol to be used
routing="adaptive";
}
LPGROUPS
{
MODELNET_GRP
{
repetitions="864";
# name of this lp changes according to the model
nw-lp="4";
# these lp names will be the same for dragonfly-custom model
modelnet_dragonfly_custom="4";
modelnet_dragonfly_custom_router="1";
}
}
PARAMS
{
# packet size in the network
packet_size="1024";
modelnet_order=( "dragonfly_custom","dragonfly_custom_router" );
# scheduler options
modelnet_scheduler="fcfs";
# chunk size in the network (when chunk size = packet size, packets will not be
# divided into chunks)
chunk_size="1024";
# modelnet_scheduler="round-robin";
# number of routers within each group
# this is dictated by the dragonfly configuration files
num_routers="96";
# number of groups in the network
num_groups="9";
# buffer size in bytes for local virtual channels
local_vc_size="8192";
#buffer size in bytes for global virtual channels
global_vc_size="16384";
#buffer size in bytes for compute node virtual channels
cn_vc_size="8192";
#bandwidth in GiB/s for local channels
local_bandwidth="5.25";
# bandwidth in GiB/s for global channels
global_bandwidth="4.7";
# bandwidth in GiB/s for compute node-router channels
cn_bandwidth="16.0";
# ROSS message size
message_size="584";
# number of compute nodes connected to router, dictated by dragonfly config
# file
num_cns_per_router="4";
# number of global channels per router
num_global_channels="12";
# network config file for intra-group connections
intra-group-connections="/Users/mmubarak/Documents/software_development/dragonfly-cray/codes/scripts/gen-cray-topo/intra-theta";
# network config file for inter-group connections
inter-group-connections="/Users/mmubarak/Documents/software_development/dragonfly-cray/codes/scripts/gen-cray-topo/inter-theta";
# routing protocol to be used
routing="prog-adaptive";
}
/*
* Copyright (C) 2015 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
/*
* The test program generates some synthetic traffic patterns for the model-net network models.
* currently it only support the dragonfly network model uniform random and nearest neighbor traffic patterns.
*/
#include "codes/model-net.h"
#include "codes/lp-io.h"
#include "codes/codes.h"
#include "codes/codes_mapping.h"
#include "codes/configuration.h"
#include "codes/lp-type-lookup.h"
#define PAYLOAD_SZ 2048
static int net_id = 0;
static int traffic = 1;
static double arrival_time = 1000.0;
/* whether to pull instead of push */
static int num_servers_per_rep = 0;
static int num_routers_per_grp = 0;
static int num_nodes_per_grp = 0;
static int num_nodes_per_cn = 0;
static int num_groups = 0;
static int num_nodes = 0;
static char lp_io_dir[256] = {'\0'};
static lp_io_handle io_handle;
static unsigned int lp_io_use_suffix = 0;
static int do_lp_io = 0;
static int num_msgs = 20;
static unsigned int sampling_interval = 800000;
static unsigned int sampling_end_time = 1600000;
typedef struct svr_msg svr_msg;
typedef struct svr_state svr_state;
/* global variables for codes mapping */
static char group_name[MAX_NAME_LENGTH];
static char lp_type_name[MAX_NAME_LENGTH];
static int group_index, lp_type_index, rep_id, offset;
/* type of events */
enum svr_event
{
KICKOFF, /* kickoff event */
REMOTE, /* remote event */
LOCAL /* local event */
};
/* type of synthetic traffic */
enum TRAFFIC
{
UNIFORM = 1, /* sends message to a randomly selected node */
NEAREST_GROUP = 2, /* sends message to the node connected to the neighboring router */
NEAREST_NEIGHBOR = 3 /* sends message to the next node (potentially connected to the same router) */
};
struct svr_state
{
int msg_sent_count; /* requests sent */
int msg_recvd_count; /* requests recvd */
int local_recvd_count; /* number of local messages received */
tw_stime start_ts; /* time that we started sending requests */
tw_stime end_ts; /* time that we ended sending requests */
};
struct svr_msg
{
enum svr_event svr_event_type;
tw_lpid src; /* source of this request or ack */
int incremented_flag; /* helper for reverse computation */
};
static void svr_init(
svr_state * ns,
tw_lp * lp);
static void svr_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp);
static void svr_rev_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp);
static void svr_finalize(
svr_state * ns,
tw_lp * lp);
tw_lptype svr_lp = {
(init_f) svr_init,
(pre_run_f) NULL,
(event_f) svr_event,
(revent_f) svr_rev_event,
(commit_f) NULL,
(final_f) svr_finalize,
(map_f) codes_mapping,
sizeof(svr_state),
};
const tw_optdef app_opt [] =
{
TWOPT_GROUP("Model net synthetic traffic " ),
TWOPT_UINT("traffic", traffic, "UNIFORM RANDOM=1, NEAREST NEIGHBOR=2 "),
TWOPT_UINT("num_messages", num_msgs, "Number of messages to be generated per terminal "),
TWOPT_UINT("sampling-interval", sampling_interval, "the sampling interval "),
TWOPT_UINT("sampling-end-time", sampling_end_time, "sampling end time "),
TWOPT_STIME("arrival_time", arrival_time, "INTER-ARRIVAL TIME"),
TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"),
TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"),
TWOPT_END()
};
const tw_lptype* svr_get_lp_type()
{
return(&svr_lp);
}
static void svr_add_lp_type()
{
lp_type_register("server", svr_get_lp_type());
}
static void issue_event(
svr_state * ns,
tw_lp * lp)
{
tw_event *e;
svr_msg *m;
tw_stime kickoff_time;
/* each server sends a dummy event to itself that will kick off the real
* simulation
*/
/* skew each kickoff event slightly to help avoid event ties later on */
kickoff_time = 1.1 * g_tw_lookahead + tw_rand_exponential(lp->rng, arrival_time);
e = tw_event_new(lp->gid, kickoff_time, lp);
m = tw_event_data(e);
m->svr_event_type = KICKOFF;
tw_event_send(e);
}
static void svr_init(
svr_state * ns,
tw_lp * lp)
{
ns->start_ts = 0.0;
issue_event(ns, lp);
return;
}
static void handle_kickoff_rev_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
if(m->incremented_flag)
return;
if(b->c1)
tw_rand_reverse_unif(lp->rng);
model_net_event_rc(net_id, lp, PAYLOAD_SZ);
ns->msg_sent_count--;
tw_rand_reverse_unif(lp->rng);
}
static void handle_kickoff_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
if(ns->msg_sent_count >= num_msgs)
{
m->incremented_flag = 1;
return;
}
m->incremented_flag = 0;
char anno[MAX_NAME_LENGTH];
tw_lpid local_dest = -1, global_dest = -1;
svr_msg * m_local = malloc(sizeof(svr_msg));
svr_msg * m_remote = malloc(sizeof(svr_msg));
m_local->svr_event_type = LOCAL;
m_local->src = lp->gid;
memcpy(m_remote, m_local, sizeof(svr_msg));
m_remote->svr_event_type = REMOTE;
assert(net_id == DRAGONFLY || net_id == DRAGONFLY_CUSTOM); /* only supported for dragonfly model right now. */
ns->start_ts = tw_now(lp);
codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, anno, &rep_id, &offset);
int local_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0);
/* in case of uniform random traffic, send to a random destination. */
if(traffic == UNIFORM)
{
b->c1 = 1;
local_dest = tw_rand_integer(lp->rng, 0, num_nodes - 1);
}
else if(traffic == NEAREST_GROUP)
{
local_dest = (local_id + num_nodes_per_grp) % num_nodes;
//printf("\n LP %ld sending to %ld num nodes %d ", local_id, local_dest, num_nodes);
}
else if(traffic == NEAREST_NEIGHBOR)
{
local_dest = (local_id + 1) % num_nodes;
// printf("\n LP %ld sending to %ld num nodes %d ", rep_id * 2 + offset, local_dest, num_nodes);
}
assert(local_dest < num_nodes);
// codes_mapping_get_lp_id(group_name, lp_type_name, anno, 1, local_dest / num_servers_per_rep, local_dest % num_servers_per_rep, &global_dest);
global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0);
ns->msg_sent_count++;
model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp);
issue_event(ns, lp);
return;
}
static void handle_remote_rev_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
ns->msg_recvd_count--;
}
static void handle_remote_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
ns->msg_recvd_count++;
}
static void handle_local_rev_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
ns->local_recvd_count--;
}
static void handle_local_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
ns->local_recvd_count++;
}
/* convert ns to seconds */
static tw_stime ns_to_s(tw_stime ns)
{
return(ns / (1000.0 * 1000.0 * 1000.0));
}
/* convert seconds to ns */
static tw_stime s_to_ns(tw_stime ns)
{
return(ns * (1000.0 * 1000.0 * 1000.0));
}
static void svr_finalize(
svr_state * ns,
tw_lp * lp)
{
ns->end_ts = tw_now(lp);
printf("server %llu recvd %d bytes in %f seconds, %f MiB/s sent_count %d recvd_count %d local_count %d \n", (unsigned long long)lp->gid, PAYLOAD_SZ*ns->msg_recvd_count, ns_to_s(ns->end_ts-ns->start_ts),
((double)(PAYLOAD_SZ*ns->msg_sent_count)/(double)(1024*1024)/ns_to_s(ns->end_ts-ns->start_ts)), ns->msg_sent_count, ns->msg_recvd_count, ns->local_recvd_count);
return;
}
static void svr_rev_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
switch (m->svr_event_type)
{
case REMOTE:
handle_remote_rev_event(ns, b, m, lp);
break;
case LOCAL:
handle_local_rev_event(ns, b, m, lp);
break;
case KICKOFF:
handle_kickoff_rev_event(ns, b, m, lp);
break;
default:
assert(0);
break;
}
}
static void svr_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
switch (m->svr_event_type)
{
case REMOTE:
handle_remote_event(ns, b, m, lp);
break;
case LOCAL:
handle_local_event(ns, b, m, lp);
break;
case KICKOFF:
handle_kickoff_event(ns, b, m, lp);
break;
default:
</