Commit 621ba97b authored by Misbah Mubarak's avatar Misbah Mubarak

Merge branch 'nikhil/ftree-mapfix' into 'master'

Clean up fat-tree a bit

See merge request !44
parents 4796657a 4937ed81
LPGROUPS
{
MODELNET_GRP
{
repetitions="198";
server="384";
modelnet_fattree="24";
fattree_switch="6";
}
}
PARAMS
{
ft_type="0";
packet_size="8192";
chunk_size="8192";
message_size="512";
modelnet_scheduler="fcfs";
modelnet_order=( "fattree" );
num_levels="3";
tapering="2";
num_rails="2";
switch_count="198";
switch_radix="36";
router_delay="90";
soft_delay="200";
nic_delay="400";
nic_seq_delay="100";
num_injection_queues="2";
link_bandwidth="11.9";
cn_bandwidth="24";
vc_size="65536";
cn_vc_size="65536";
node_copy_queues="4";
intra_bandwidth="30";
rdma_delay="1000";
eager_limit="64000";
copy_per_byte="0.01";
node_eager_limit="64000";
rail_select="adaptive";
rail_select_limit="8192";
routing="adaptive";
routing_folder="taper_routes";
dot_file="ftree";
dump_topo="0";
}
LPGROUPS
{
MODELNET_GRP
{
repetitions="252";
server="288";
modelnet_fattree="18";
fattree_switch="6";
}
}
PARAMS
{
ft_type="0";
packet_size="8192";
chunk_size="8192";
message_size="512";
modelnet_scheduler="fcfs";
modelnet_order=( "fattree" );
num_levels="3";
tapering="1";
num_rails="2";
switch_count="252";
switch_radix="36";
router_delay="90";
soft_delay="200";
nic_delay="400";
nic_seq_delay="100";
num_injection_queues="2";
link_bandwidth="11.9";
cn_bandwidth="24";
vc_size="65536";
cn_vc_size="65536";
node_copy_queues="4";
intra_bandwidth="30";
rdma_delay="1000";
eager_limit="64000";
copy_per_byte="0.01";
node_eager_limit="64000";
rail_select="adaptive";
rail_select_limit="8192";
routing="adaptive";
routing_folder="full_routes";
dot_file="ftree";
dump_topo="0";
}
LPGROUPS
{
MODELNET_GRP
{
repetitions="16";
server="4";
modelnet_fattree="4";
fattree_switch="3";
}
}
PARAMS
{
ft_type="1";
packet_size="512";
message_size="512";
chunk_size="32";
modelnet_scheduler="fcfs";
#modelnet_scheduler="round-robin";
modelnet_order=( "fattree" );
num_levels="3";
switch_count="16";
switch_radix="8";
router_delay="60";
soft_delay="1000";
vc_size="65536";
cn_vc_size="65536";
link_bandwidth="4.7";
cn_bandwidth="5.25";
}
*** README file for fattree network model ***
1- Configuring CODES dragonfly network model
CODES dragonfly network model can be configured using the fattee config file (currently
located in codes/src/network-workloads/conf). Below is an example config file:
1- Configuring CODES fat-tree network model
CODES fat-tree network model can be configured using example config files
(located in codes/src/network-workloads/conf/*fattree*). Below is the
bare-minimum example config file:
MODELNET_GRP
{
......@@ -21,38 +22,46 @@ PARAMS
....
}
The first section, MODELNET_GRP specifies the LP types, number of LPs per type and their
configuration. In the above case, there are 12 repetitions of 4 server LPs, 4 fat tree
network node/terminal LPs and 3 fat tree switch LPs. Each repetition represents a leaf
level switch, nodes connected to it, and higher level switches that may be needed to
construct the fat-tree. The 'fattree_switch' parameter indicates there are 3 levels
to this fat tree and each repitition will have one switch from each level. This
configuration will create a total of (fattree_switch)*repetitions=12*3=36 switch LPs,
with 'fattree_switch' many switch LPs per level.
The first section, MODELNET_GRP specifies the LP types, number of LPs per type
and their configuration. In the above case, there are 12 repetitions each with 4
server LPs, 4 fat tree network node/terminal LPs and 3 fat tree switch LPs. Each
repetition represents a leaf level switch, nodes connected to it, and higher
level switches that may be needed to construct the fat-tree. The
'fattree_switch' parameter indicates there are 3 levels to this fat tree and
each repetition will have one switch at each level. This configuration will
create a total of (fattree_switch)*repetitions=12*3=36 switch LPs, with
'fattree_switch' many levels.
Typically, for a balanced, single plane fat-tree:
modelnet_fattree = radix of switch/2
fattree_switch = number of levels in the fattree (2 or 3)
ft_type:
0: Custom- ("Pruned" Fat Tree)
1: Standard Full Fat Tree
The Custom- ft_type is simply a pruned standard full fat tree. This layout type starts
with the standard full fat tree and then removes pods and adjusts L1-L2 switch connections
as needed to drop the total node/terminal count in the system. This approach still maintains
full bisection bandwidth. Knowing a full standard fat tree uses k pods of k/2 switches per
pod (k/2 L1 switches and k/2 L0 switches) and each switch in L0 connects to k/2 terminals,
then each pod connects to (k/2)*(k/2) terminals. Therefore, the number of pods needed to get
N-many terminals using the Custom- ft_type is Np = ceil(N/[(k/2)*(k/2)]). So the config file
should have "repetitions" = "switch_count" = Np*(k/2).
0: General, possible pruned/multi-railed/tapered, fat-tree
1: Standard Full Fat Tree (deprecated - use type 0 for this case too)
2: Multi-nic fat-tree where each NIC is connected to an independent plane
The general- ft_type is simply a pruned standard full fat tree. This layout
represent a partially filled fat-tree, which is created by removing pods and
adjusting L1-L2 switch connections as needed to reduce the total node/terminal
count in the system. This approach still maintains full bisection bandwidth.
Knowing a full standard fat tree uses k pods of k/2 switches per pod (k/2 L1
switches and k/2 L0 switches) and each switch in L0 connects to k/2 terminals,
then each pod connects to (k/2)*(k/2) terminals. Therefore, the number of pods
needed to get N-many terminals using the Custom- ft_type is Np =
ceil(N/[(k/2)*(k/2)]). So the config file should have "repetitions" =
"switch_count" = Np*(k/2).
Supported PARAMS:
packet_size, chunk_size (ideally kept same)
modelnet_scheduler - NIC message scheduler
modelnet_order=( "fattree" );
num_levels : number of levels in the fattree (same as fattree_switch)
modelnet_order="fattree";
message_size="512" ;
num_levels : number of levels in the fattree (2 or 3)
tapering : controls division of ports to nodes at leaf level
num_rails : number of rails/planes in the system (fattree_switch = num_levels * num_rails)
switch_count : number of leaf level switches (same as repetitions)
switch_radix : radix of the switches
router_delay : delay caused by switched in ns
......@@ -60,12 +69,22 @@ vc_size : size of switch VCs in bytes
cn_vc_size : size of VC between NIC and switch in bytes
link_bandwidth, cn_bandwidth : in GB/s
routing : {adaptive, static}
num_injection_queues : number of injection queues in NIC (=num_rails)
rail_select : {adaptive, static} rail selection scheme for the packets
rail_select_limit : message size in bytes above which adaptive rail selection algorithm is enabled if chosen
For radix k switches with tapering t, t*k/(t+1) ports are used for connecting
to nodes, and remaining are connected to next level switches. fmod(k, t+1) has
to be 0. Default tapering is 1, i.e. full bandwidth. Note that, even when
tapering is >1, the number of leaf level switches in a Pod are k/2.
If simulation of system with multiple NICs per node is desired (each NIC has
its own rail/plane), ft_type = 2 should be selected. In this case, num_rails
should be set to the number of NICs/rails desired. Also, for this case,
fattree_switch = num_levels * num_rails
modelnet_fattree = usual_per_switch_nic * num_rails
num_injection_queues = 1
2- Static Routing
If static routing is chosen, two more PARAMS must be provided:
routing_folder : folder that contain lft files generated using method described below.
......
......@@ -730,10 +730,12 @@ static void fattree_read_config(const char * anno, fattree_param *p){
int i;
p->ft_type = 1;
p->ft_type = 0;
configuration_get_value_int(&config, "PARAMS", "ft_type", anno,
&p->ft_type);
if(!g_tw_mynode) printf("FT type is %d\n", p->ft_type);
if(p->ft_type == 1) printf("Use of FT type 1 is deprecated; please use type 0 for similar functionality\n");
if(p->ft_type == 2) printf("You have chosen FT type 2: this is for cases in which different NICs are desired for different rails.\n");
configuration_get_value_int(&config, "PARAMS", "num_levels", anno,
&p->num_levels);
......@@ -866,9 +868,9 @@ static void fattree_read_config(const char * anno, fattree_param *p){
if(!g_tw_mynode) printf("FT num rails is %d\n", p->num_rails);
if(p->ft_type == 2) {
p->ports_per_nic = p->num_rails;
} else {
p->ports_per_nic = 1;
} else {
p->ports_per_nic = p->num_rails;
}
p->router_delay = 50;
......@@ -1022,7 +1024,7 @@ void ft_terminal_init( ft_terminal_state * s, tw_lp * lp )
int num_lps = codes_mapping_get_lp_count(lp_group_name, 1, LP_CONFIG_NM,
s->anno, 0);
if(s->params->ft_type != 2) {
if(s->params->ft_type == 2) {
num_lps /= s->params->num_rails;
}
......@@ -1031,7 +1033,7 @@ void ft_terminal_init( ft_terminal_state * s, tw_lp * lp )
"%d, not the given value of %d\n", s->params->l0_term_size, num_lps);
}
if(s->params->ft_type != 2) {
if(s->params->ft_type == 2) {
s->terminal_id = (mapping_rep_id * num_lps) + (mapping_offset/s->params->num_rails);
s->rail_id = (mapping_offset % s->params->num_rails);
} else {
......@@ -1040,7 +1042,7 @@ void ft_terminal_init( ft_terminal_state * s, tw_lp * lp )
}
s->switch_id = s->terminal_id / s->params->l0_term_size;
s->switch_lp = (tw_lpid*)malloc(s->params->ports_per_nic * sizeof(tw_lpid));
if(s->params->ft_type != 2) {
if(s->params->ft_type == 2) {
codes_mapping_get_lp_id(lp_group_name, "fattree_switch", NULL, 1,
s->switch_id, 0 + s->params->num_levels * s->rail_id, &s->switch_lp[0]);
} else {
......@@ -1215,7 +1217,7 @@ void switch_init(switch_state * r, tw_lp * lp)
//if at level 0, first half ports go to terminals
if(r->switch_level == 0) {
int term_rails, term_railid;
if(p->ft_type != 2) {
if(p->ft_type == 2) {
term_rails = p->num_rails;
term_railid = r->rail_id;
} else {
......@@ -2672,7 +2674,7 @@ int ft_get_output_port( switch_state * s, tw_bf * bf, fattree_message * msg,
fattree_param *p = s->params;
int dest_term_local_id = codes_mapping_get_lp_relative_id(msg->dest_terminal_id, 0, 0);
if(s->params->ft_type != 2) {
if(s->params->ft_type == 2) {
dest_term_local_id /= s->params->num_rails;
}
/* either do static oblivious routing, if set up properly via LFTs */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment