...
 
Commits (51)
......@@ -24,6 +24,7 @@ extern "C" {
#include "net/dragonfly.h"
#include "net/dragonfly-custom.h"
#include "net/dragonfly-plus.h"
#include "net/dragonfly-dally.h"
#include "net/slimfly.h"
#include "net/fattree.h"
#include "net/loggp.h"
......@@ -133,6 +134,7 @@ typedef struct model_net_wrap_msg {
terminal_message m_dfly; // dragonfly
terminal_custom_message m_custom_dfly; // dragonfly-custom
terminal_plus_message m_dfly_plus; // dragonfly plus
terminal_dally_message m_dally_dfly; // dragonfly dally
slim_terminal_message m_slim; // slimfly
fattree_message m_fat; // fattree
loggp_message m_loggp; // loggp
......
......@@ -76,6 +76,8 @@ typedef struct mn_stats mn_stats;
X(EXPRESS_MESH_ROUTER, "modelnet_express_mesh_router", "express_mesh_router", &express_mesh_router_method)\
X(DRAGONFLY_PLUS, "modelnet_dragonfly_plus", "dragonfly_plus", &dragonfly_plus_method)\
X(DRAGONFLY_PLUS_ROUTER, "modelnet_dragonfly_plus_router", "dragonfly_plus_router", &dragonfly_plus_router_method)\
X(DRAGONFLY_DALLY, "modelnet_dragonfly_dally", "dragonfly_dally", &dragonfly_dally_method)\
X(DRAGONFLY_DALLY_ROUTER, "modelnet_dragonfly_dally_router", "dragonfly_dally_router", &dragonfly_dally_router_method)\
X(MAX_NETS, NULL, NULL, NULL)
#define X(a,b,c,d) a,
......
......@@ -75,9 +75,17 @@ struct terminal_custom_message
model_net_event_return event_rc;
int is_pull;
uint32_t pull_size;
int path_type;
/* for reverse computation */
int path_type;
short num_rngs;
short num_cll;
int qos_index;
short last_saved_qos;
short qos_reset1;
short qos_reset2;
tw_stime saved_available_time;
tw_stime saved_avg_time;
tw_stime saved_rcv_time;
......
/*
* Copyright (C) 2014 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
#ifndef DRAGONFLY_DALLY_H
#define DRAGONFLY_DALLY_H
#ifdef __cplusplus
extern "C" {
#endif
#include <ross.h>
typedef struct terminal_dally_message terminal_dally_message;
/* this message is used for both dragonfly compute nodes and routers */
struct terminal_dally_message
{
/* magic number */
int magic;
/* flit travel start time*/
tw_stime travel_start_time;
/* packet ID of the flit */
unsigned long long packet_ID;
/* event type of the flit */
short type;
/* category: comes from codes */
char category[CATEGORY_NAME_MAX];
/* store category hash in the event */
uint32_t category_hash;
/* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid final_dest_gid;
/*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_lp;
tw_lpid sender_mn_lp; // source modelnet id
/* destination terminal ID of the dragonfly */
tw_lpid dest_terminal_id;
/* source terminal ID of the dragonfly */
unsigned int src_terminal_id;
/* message originating router id. MM: Can we calculate it through
* sender_mn_lp??*/
unsigned int origin_router_id;
/* number of hops traversed by the packet */
short my_N_hop;
short my_l_hop, my_g_hop;
short saved_channel;
short saved_vc;
int next_stop;
short nonmin_done;
/* Intermediate LP ID from which this message is coming */
unsigned int intm_lp_id;
/* last hop of the message, can be a terminal, local router or global router */
short last_hop;
/* For routing */
int intm_rtr_id;
int saved_src_dest;
int saved_src_chan;
uint32_t chunk_id;
uint32_t packet_size;
uint32_t message_id;
uint32_t total_size;
int remote_event_size_bytes;
int local_event_size_bytes;
// For buffer message
short vc_index;
int output_chan;
model_net_event_return event_rc;
int is_pull;
uint32_t pull_size;
int path_type;
/* for reverse computation */
short num_rngs;
short num_cll;
int qos_index;
short last_saved_qos;
short qos_reset1;
short qos_reset2;
tw_stime saved_available_time;
tw_stime saved_avg_time;
tw_stime saved_rcv_time;
tw_stime saved_busy_time;
tw_stime saved_total_time;
tw_stime saved_sample_time;
tw_stime msg_start_time;
tw_stime saved_busy_time_ross;
tw_stime saved_fin_chunks_ross;
};
#ifdef __cplusplus
}
#endif
#endif /* end of include guard: DRAGONFLY_H */
/*
* Local variables:
* c-indent-level: 4
* c-basic-offset: 4
* End:
*
* vim: ft=c ts=8 sts=4 sw=4 expandtab
*/
......@@ -83,6 +83,21 @@ struct terminal_plus_message
int is_pull;
uint32_t pull_size;
/* for counting reverse calls */
short num_rngs;
short num_cll;
/* qos related attributes */
short last_saved_qos;
short qos_reset1;
short qos_reset2;
/* new qos rc - These are calloced in forward events, free'd in RC or commit_f */
/* note: dynamic memory here is OK since it's only accessed by the LP that alloced it in the first place. */
short rc_is_qos_set;
unsigned long long * rc_qos_data;
int * rc_qos_status;
/* for reverse computation */
int path_type;
tw_stime saved_available_time;
......
This diff is collapsed.
4,20,1,0
5,21,1,0
6,22,1,0
7,23,1,0
8,20,1,0
9,21,1,0
10,22,1,0
11,23,1,0
12,20,1,0
13,21,1,0
14,22,1,0
15,23,1,0
16,20,1,0
17,21,1,0
18,22,1,0
19,23,1,0
20,24,0,0
20,25,0,0
20,26,0,0
20,27,0,0
20,4,0,1
20,8,0,1
20,12,0,1
20,16,0,1
21,24,0,0
21,25,0,0
21,26,0,0
21,27,0,0
21,5,0,1
21,9,0,1
21,13,0,1
21,17,0,1
22,24,0,0
22,25,0,0
22,26,0,0
22,27,0,0
22,6,0,1
22,10,0,1
22,14,0,1
22,18,0,1
23,24,0,0
23,25,0,0
23,26,0,0
23,27,0,0
23,7,0,1
23,11,0,1
23,15,0,1
23,19,0,1
24,55,0,0
24,83,0,0
24,111,0,0
24,139,0,0
24,20,0,0
24,21,0,0
24,22,0,0
24,23,0,0
25,54,0,0
25,82,0,0
25,110,0,0
25,138,0,0
25,20,0,0
25,21,0,0
25,22,0,0
25,23,0,0
26,53,0,0
26,81,0,0
26,109,0,0
26,137,0,0
26,20,0,0
26,21,0,0
26,22,0,0
26,23,0,0
27,52,0,0
27,80,0,0
27,108,0,0
27,136,0,0
27,20,0,0
27,21,0,0
27,22,0,0
27,23,0,0
32,48,1,0
33,49,1,0
34,50,1,0
35,51,1,0
36,48,1,0
37,49,1,0
38,50,1,0
39,51,1,0
40,48,1,0
41,49,1,0
42,50,1,0
43,51,1,0
44,48,1,0
45,49,1,0
46,50,1,0
47,51,1,0
48,52,0,0
48,53,0,0
48,54,0,0
48,55,0,0
48,32,0,1
48,36,0,1
48,40,0,1
48,44,0,1
49,52,0,0
49,53,0,0
49,54,0,0
49,55,0,0
49,33,0,1
49,37,0,1
49,41,0,1
49,45,0,1
50,52,0,0
50,53,0,0
50,54,0,0
50,55,0,0
50,34,0,1
50,38,0,1
50,42,0,1
50,46,0,1
51,52,0,0
51,53,0,0
51,54,0,0
51,55,0,0
51,35,0,1
51,39,0,1
51,43,0,1
51,47,0,1
52,83,0,0
52,111,0,0
52,139,0,0
52,27,0,0
52,48,0,0
52,49,0,0
52,50,0,0
52,51,0,0
53,82,0,0
53,110,0,0
53,138,0,0
53,26,0,0
53,48,0,0
53,49,0,0
53,50,0,0
53,51,0,0
54,81,0,0
54,109,0,0
54,137,0,0
54,25,0,0
54,48,0,0
54,49,0,0
54,50,0,0
54,51,0,0
55,80,0,0
55,108,0,0
55,136,0,0
55,24,0,0
55,48,0,0
55,49,0,0
55,50,0,0
55,51,0,0
60,76,1,0
61,77,1,0
62,78,1,0
63,79,1,0
64,76,1,0
65,77,1,0
66,78,1,0
67,79,1,0
68,76,1,0
69,77,1,0
70,78,1,0
71,79,1,0
72,76,1,0
73,77,1,0
74,78,1,0
75,79,1,0
76,80,0,0
76,81,0,0
76,82,0,0
76,83,0,0
76,60,0,1
76,64,0,1
76,68,0,1
76,72,0,1
77,80,0,0
77,81,0,0
77,82,0,0
77,83,0,0
77,61,0,1
77,65,0,1
77,69,0,1
77,73,0,1
78,80,0,0
78,81,0,0
78,82,0,0
78,83,0,0
78,62,0,1
78,66,0,1
78,70,0,1
78,74,0,1
79,80,0,0
79,81,0,0
79,82,0,0
79,83,0,0
79,63,0,1
79,67,0,1
79,71,0,1
79,75,0,1
80,111,0,0
80,139,0,0
80,27,0,0
80,55,0,0
80,76,0,0
80,77,0,0
80,78,0,0
80,79,0,0
81,110,0,0
81,138,0,0
81,26,0,0
81,54,0,0
81,76,0,0
81,77,0,0
81,78,0,0
81,79,0,0
82,109,0,0
82,137,0,0
82,25,0,0
82,53,0,0
82,76,0,0
82,77,0,0
82,78,0,0
82,79,0,0
83,108,0,0
83,136,0,0
83,24,0,0
83,52,0,0
83,76,0,0
83,77,0,0
83,78,0,0
83,79,0,0
88,104,1,0
89,105,1,0
90,106,1,0
91,107,1,0
92,104,1,0
93,105,1,0
94,106,1,0
95,107,1,0
96,104,1,0
97,105,1,0
98,106,1,0
99,107,1,0
100,104,1,0
101,105,1,0
102,106,1,0
103,107,1,0
104,108,0,0
104,109,0,0
104,110,0,0
104,111,0,0
104,88,0,1
104,92,0,1
104,96,0,1
104,100,0,1
105,108,0,0
105,109,0,0
105,110,0,0
105,111,0,0
105,89,0,1
105,93,0,1
105,97,0,1
105,101,0,1
106,108,0,0
106,109,0,0
106,110,0,0
106,111,0,0
106,90,0,1
106,94,0,1
106,98,0,1
106,102,0,1
107,108,0,0
107,109,0,0
107,110,0,0
107,111,0,0
107,91,0,1
107,95,0,1
107,99,0,1
107,103,0,1
108,139,0,0
108,27,0,0
108,55,0,0
108,83,0,0
108,104,0,0
108,105,0,0
108,106,0,0
108,107,0,0
109,138,0,0
109,26,0,0
109,54,0,0
109,82,0,0
109,104,0,0
109,105,0,0
109,106,0,0
109,107,0,0
110,137,0,0
110,25,0,0
110,53,0,0
110,81,0,0
110,104,0,0
110,105,0,0
110,106,0,0
110,107,0,0
111,136,0,0
111,24,0,0
111,52,0,0
111,80,0,0
111,104,0,0
111,105,0,0
111,106,0,0
111,107,0,0
116,132,1,0
117,133,1,0
118,134,1,0
119,135,1,0
120,132,1,0
121,133,1,0
122,134,1,0
123,135,1,0
124,132,1,0
125,133,1,0
126,134,1,0
127,135,1,0
128,132,1,0
129,133,1,0
130,134,1,0
131,135,1,0
132,136,0,0
132,137,0,0
132,138,0,0
132,139,0,0
132,116,0,1
132,120,0,1
132,124,0,1
132,128,0,1
133,136,0,0
133,137,0,0
133,138,0,0
133,139,0,0
133,117,0,1
133,121,0,1
133,125,0,1
133,129,0,1
134,136,0,0
134,137,0,0
134,138,0,0
134,139,0,0
134,118,0,1
134,122,0,1
134,126,0,1
134,130,0,1
135,136,0,0
135,137,0,0
135,138,0,0
135,139,0,0
135,119,0,1
135,123,0,1
135,127,0,1
135,131,0,1
136,27,0,0
136,55,0,0
136,83,0,0
136,111,0,0
136,132,0,0
136,133,0,0
136,134,0,0
136,135,0,0
137,26,0,0
137,54,0,0
137,82,0,0
137,110,0,0
137,132,0,0
137,133,0,0
137,134,0,0
137,135,0,0
138,25,0,0
138,53,0,0
138,81,0,0
138,109,0,0
138,132,0,0
138,133,0,0
138,134,0,0
138,135,0,0
139,24,0,0
139,52,0,0
139,80,0,0
139,108,0,0
139,132,0,0
139,133,0,0
139,134,0,0
139,135,0,0
......@@ -94,6 +94,8 @@ nobase_include_HEADERS = \
codes/net/common-net.h \
codes/net/dragonfly.h \
codes/net/dragonfly-custom.h \
codes/net/dragonfly-dally.h \
codes/net/dragonfly-plus.h \
codes/net/slimfly.h \
codes/net/fattree.h \
codes/net/loggp.h \
......@@ -162,6 +164,7 @@ src_libcodes_la_SOURCES = \
src/networks/model-net/dragonfly.c \
src/networks/model-net/dragonfly-custom.C \
src/networks/model-net/dragonfly-plus.C \
src/networks/model-net/dragonfly-dally.C \
src/networks/model-net/slimfly.c \
src/networks/model-net/fattree.c \
src/networks/model-net/loggp.c \
......@@ -196,6 +199,7 @@ bin_PROGRAMS += src/network-workloads/model-net-synthetic-custom-dfly
bin_PROGRAMS += src/network-workloads/model-net-synthetic-slimfly
bin_PROGRAMS += src/network-workloads/model-net-synthetic-fattree
bin_PROGRAMS += src/network-workloads/model-net-synthetic-dfly-plus
bin_PROGRAMS += src/network-workloads/model-net-synthetic-dally-dfly
src_workload_codes_workload_dump_SOURCES = \
......@@ -210,6 +214,7 @@ src_network_workloads_model_net_mpi_replay_CFLAGS = $(AM_CFLAGS)
src_network_workloads_model_net_synthetic_SOURCES = src/network-workloads/model-net-synthetic.c
src_network_workloads_model_net_synthetic_custom_dfly_SOURCES = src/network-workloads/model-net-synthetic-custom-dfly.c
src_network_workloads_model_net_synthetic_dfly_plus_SOURCES = src/network-workloads/model-net-synthetic-dfly-plus.c
src_network_workloads_model_net_synthetic_dally_dfly_SOURCES = src/network-workloads/model-net-synthetic-dally-dfly.c
src_networks_model_net_topology_test_SOURCES = src/networks/model-net/topology-test.c
#bin_PROGRAMS += src/network-workload/codes-nw-test
......
LPGROUPS
{
MODELNET_GRP
{
repetitions="1056";
# name of this lp changes according to the model
nw-lp="8";
# these lp names will be the same for dragonfly-custom model
modelnet_dragonfly_plus="8";
modelnet_dragonfly_plus_router="1";
}
}
PARAMS
{
# packet size in the network
packet_size="4096";
modelnet_order=( "dragonfly_plus","dragonfly_plus_router" );
# scheduler options
modelnet_scheduler="fcfs";
# chunk size in the network (when chunk size = packet size, packets will not be
# divided into chunks)
chunk_size="4096";
# modelnet_scheduler="round-robin";
# number of routers within each group
# each router row corresponds to a chassis in Cray systems
num_router_spine="16";
# each router column corresponds to a slot in a chassis
num_router_leaf="16";
# number of links connecting between group levels per router
num_level_chans="1";
# number of groups in the network
num_groups="33";
# buffer size in bytes for local virtual channels
local_vc_size="32768";
#buffer size in bytes for global virtual channels
global_vc_size="32768";
#buffer size in bytes for compute node virtual channels
cn_vc_size="32768";
#bandwidth in GiB/s for local channels
local_bandwidth="25.0";
# bandwidth in GiB/s for global channels
global_bandwidth="25.0";
# bandwidth in GiB/s for compute node-router channels
cn_bandwidth="25.0";
# ROSS message size
message_size="624";
# number of compute nodes connected to router, dictated by dragonfly config
# file
num_cns_per_router="16";
# number of global channels per router
num_global_connections="16";
# network config file for intra-group connections
intra-group-connections="/home/mmubarak/codes-online/codes/src/network-workloads/conf/dragonfly-custom/dfp_8k_intra";
# network config file for inter-group connections
inter-group-connections="/home/mmubarak/codes-online/codes/src/network-workloads/conf/dragonfly-custom/dfp_8k_inter";
# routing protocol to be used
routing="prog-adaptive";
# route scoring protocol to be used - options are 'alpha' or 'beta'
route_scoring_metric="alpha";
}
......@@ -6,18 +6,15 @@ LPGROUPS
# name of this lp changes according to the model
nw-lp="8";
# these lp names will be the same for dragonfly-custom model
modelnet_dragonfly_custom="8";
modelnet_dragonfly_custom_router="1";
modelnet_dragonfly_dally="8";
modelnet_dragonfly_dally_router="1";
}
}
PARAMS
{
adaptive_threshold="8192";
# minimal-bias="1";
df-dally-vc = "1";
# packet size in the network
packet_size="4096";
modelnet_order=( "dragonfly_custom","dragonfly_custom_router" );
modelnet_order=( "dragonfly_dally","dragonfly_dally_router" );
# scheduler options
modelnet_scheduler="fcfs";
# chunk size in the network (when chunk size = packet size, packets will not be
......@@ -31,32 +28,35 @@ PARAMS
# number of groups in the network
num_groups="65";
# buffer size in bytes for local virtual channels
local_vc_size="32768";
local_vc_size="16384";
#buffer size in bytes for global virtual channels
global_vc_size="32768";
global_vc_size="16384";
#buffer size in bytes for compute node virtual channels
cn_vc_size="32768";
#bandwidth in GiB/s for local channels
local_bandwidth="25.0";
local_bandwidth="2.0";
# bandwidth in GiB/s for global channels
global_bandwidth="25.0";
global_bandwidth="2.0";
# bandwidth in GiB/s for compute node-router channels
cn_bandwidth="25.0";
cn_bandwidth="2.0";
# Number of row channels
num_row_chans="1";
# Number of column channels
num_col_chans="1";
# ROSS message size
message_size="640";
message_size="656";
# number of compute nodes connected to router, dictated by dragonfly config
# file
num_cns_per_router="8";
# number of global channels per router
num_global_channels="8";
# network config file for intra-group connections
intra-group-connections="/home/mubarak/codes-online/codes/src/network-workloads/conf/dragonfly-custom/dfdally_8k_intra";
intra-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally_8k_intra";
# network config file for inter-group connections
inter-group-connections="/home/mubarak/codes-online/codes/src/network-workloads/conf/dragonfly-custom/dfdally_8k_inter";
inter-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally_8k_inter";
# routing protocol to be used
routing="prog-adaptive";
adaptive_threshold="131072";
minimal-bias="1";
df-dally-vc = "1";
}
: Running Time = 22.1632 seconds
TW Library Statistics:
Total Events Processed 27882763
Events Aborted (part of RBs) 0
Events Rolled Back 1623745
Event Ties Detected in PE Queues 0
Efficiency 93.82 %
Total Remote (shared mem) Events Processed 0
Percent Remote Events 0.00 %
Total Remote (network) Events Processed 2239846
Percent Remote Events 8.53 %
Total Roll Backs 213747
Primary Roll Backs 206428
Secondary Roll Backs 7319
Fossil Collect Attempts 108948
Total GVT Computations 27237
Net Events Processed 26259018
Event Rate (events/sec) 1184800.8
Total Events Scheduled Past End Time 0
TW Memory Statistics:
Events Allocated 4510545
Memory Allocated 3342712
Memory Wasted 222
TW Network Statistics:
Remote sends 2517896
Remote recvs 2517896
TW Data Structure sizes in bytes (sizeof):
PE struct 624
KP struct 144
LP struct 128
LP Model struct 32
LP RNGs 80
Total LP 240
Event struct 144
Event struct with Model 752
TW Clock Cycle Statistics (MAX values in secs at 1.0000 GHz):
Priority Queue (enq/deq) 1.2060
AVL Tree (insert/delete) 6.1629
LZ4 (de)compression 0.0000
Buddy system 0.0000
RIO Loading 0.0000
RIO LP Init 0.1464
Event Processing 64.3321
Event Cancel 0.0668
Event Abort 0.0000
GVT 8.9914
Fossil Collect 4.3308
Primary Rollbacks 1.8111
Network Read 10.4349
Statistics Computation 0.0000
Statistics Write 0.0000
Total Time (Note: Using Running Time above for Speedup) 88.8275
TW GVT Statistics: MPI AllReduce
GVT Interval 16
GVT Real Time Interval (cycles) 0
GVT Real Time Interval (sec) 0.00000000
Batch Size 16
Forced GVT 0
Total GVT Computations 27237
Total All Reduce Calls 80374
Average Reduction / GVT 2.95
Average number of router hops traversed: 3.967510; average chunk latency: 3.983703 us; maximum chunk latency: 23.508762 us; avg message size: 2048.000000 bytes; finished messages: 675818; finished chunks: 1351636
Total packets generated: 1351636; finished: 1351636
\ No newline at end of file
: Running Time = 32.7864 seconds
TW Library Statistics:
Total Events Processed 33923697
Events Aborted (part of RBs) 0
Events Rolled Back 2115066
Event Ties Detected in PE Queues 0
Efficiency 93.35 %
Total Remote (shared mem) Events Processed 0
Percent Remote Events 0.00 %
Total Remote (network) Events Processed 3197222
Percent Remote Events 10.05 %
Total Roll Backs 298872
Primary Roll Backs 293249
Secondary Roll Backs 5623
Fossil Collect Attempts 132536
Total GVT Computations 33134
Net Events Processed 31808631
Event Rate (events/sec) 970177.4
Total Events Scheduled Past End Time 0
TW Memory Statistics:
Events Allocated 4510545
Memory Allocated 3342712
Memory Wasted 222
TW Network Statistics:
Remote sends 3630262
Remote recvs 3630262
TW Data Structure sizes in bytes (sizeof):
PE struct 624
KP struct 144
LP struct 128
LP Model struct 32
LP RNGs 80
Total LP 240
Event struct 144
Event struct with Model 752
TW Clock Cycle Statistics (MAX values in secs at 1.0000 GHz):
Priority Queue (enq/deq) 1.6238
AVL Tree (insert/delete) 9.6388
LZ4 (de)compression 0.0000
Buddy system 0.0000
RIO Loading 0.0000
RIO LP Init 0.1587
Event Processing 96.7147
Event Cancel 0.0741
Event Abort 0.0000
GVT 12.5980
Fossil Collect 5.9422
Primary Rollbacks 2.6389
Network Read 15.3961
Statistics Computation 0.0000
Statistics Write 0.0000
Total Time (Note: Using Running Time above for Speedup) 131.4039
TW GVT Statistics: MPI AllReduce
GVT Interval 16
GVT Real Time Interval (cycles) 0
GVT Real Time Interval (sec) 0.00000000
Batch Size 16
Forced GVT 0
Total GVT Computations 33134
Total All Reduce Calls 98239
Average Reduction / GVT 2.96
Average number of router hops traversed: 5.336127; average chunk latency: 8.764690 us; maximum chunk latency: 28.623821 us; avg message size: 2048.000000 bytes; finished messages: 675818; finished chunks: 1351636
Total packets generated: 1351636; finished: 1351636
LPGROUPS
{
MODELNET_GRP
{
repetitions="33";
# name of this lp changes according to the model
nw-lp="1024";
# these lp names will be the same for dragonfly-custom model
modelnet_dragonfly_plus="1024";
modelnet_dragonfly_plus_router="64";
}
}
PARAMS
{
# packet size in the network
packet_size="1024";
modelnet_order=( "dragonfly_plus","dragonfly_plus_router" );
# scheduler options
modelnet_scheduler="fcfs";
# chunk size in the network (when chunk size = packet size, packets will not be
# divided into chunks)
chunk_size="1024";
# modelnet_scheduler="round-robin";
# number of routers within each group
# each router row corresponds to a chassis in Cray systems
num_router_spine="32";
# each router column corresponds to a slot in a chassis
num_router_leaf="32";
# number of links connecting between group levels per router
num_level_chans="1";
# number of groups in the network
num_groups="33";
# predefined threshold (T) deciding when to reassign packet to a lower priority queue
queue_threshold="50";
# buffer size in bytes for local virtual channels
local_vc_size="8192";
#buffer size in bytes for global virtual channels
global_vc_size="16384";
#buffer size in bytes for compute node virtual channels
cn_vc_size="8192";
#bandwidth in GiB/s for local channels
local_bandwidth="5.25";
# bandwidth in GiB/s for global channels
global_bandwidth="1.5";
# bandwidth in GiB/s for compute node-router channels
cn_bandwidth="8.0";
# ROSS message size
message_size="608";
# number of compute nodes connected to router, dictated by dragonfly config
# file
num_cns_per_router="32";
# number of global ports per router
num_global_connections="32";
# network config file for intra-group connections
intra-group-connections="../src/network-workloads/conf/dragonfly-plus/neil-34k-intra";
# network config file for inter-group connections
inter-group-connections="../src/network-workloads/conf/dragonfly-plus/neil-34k-inter";
# routing protocol to be used
routing="on-the-fly-adaptive";
}
LPGROUPS
{
MODELNET_GRP
{
repetitions="5";
# name of this lp changes according to the model
nw-lp="4";
# these lp names will be the same for dragonfly-custom model
modelnet_dragonfly_plus="4";
modelnet_dragonfly_plus_router="4";
}
}
PARAMS
{
# packet size in the network
packet_size="1024";
modelnet_order=( "dragonfly_plus","dragonfly_plus_router" );
# scheduler options
modelnet_scheduler="fcfs";
# chunk size in the network (when chunk size = packet size, packets will not be
# divided into chunks)
chunk_size="1024";
# modelnet_scheduler="round-robin";
# number of routers within each group
# each router row corresponds to a chassis in Cray systems
num_router_spine="2";
# each router column corresponds to a slot in a chassis
num_router_leaf="2";
# number of links connecting between group levels per router
num_level_chans="1";
# number of groups in the network
num_groups="5";
# predefined threshold (T) deciding when to reassign packet to a lower priority queue
queue_threshold="50";
# buffer size in bytes for local virtual channels
local_vc_size="8192";
#buffer size in bytes for global virtual channels
global_vc_size="16384";
#buffer size in bytes for compute node virtual channels
cn_vc_size="8192";
#bandwidth in GiB/s for local channels
local_bandwidth="5.25";
# bandwidth in GiB/s for global channels
global_bandwidth="1.5";
# bandwidth in GiB/s for compute node-router channels
cn_bandwidth="8.0";
# ROSS message size
message_size="608";
# number of compute nodes connected to router, dictated by dragonfly config
# file
num_cns_per_router="2";
# number of global ports per router
num_global_connections="2";
# network config file for intra-group connections
intra-group-connections="../src/network-workloads/conf/dragonfly-plus/neil-intra-large";
# network config file for inter-group connections
inter-group-connections="../src/network-workloads/conf/dragonfly-plus/neil-inter-large";
# routing protocol to be used
routing="non-minimal-leaf";
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -108,6 +108,11 @@ static void handle_sched_next_rc(
tw_bf *b,
model_net_wrap_msg * m,
tw_lp * lp);
static void model_net_commit_event(
model_net_base_state * ns,
tw_bf *b,
model_net_wrap_msg * m,
tw_lp * lp);
/* ROSS function pointer table for this LP */
tw_lptype model_net_base_lp = {
......@@ -115,12 +120,23 @@ tw_lptype model_net_base_lp = {
(pre_run_f) NULL,
(event_f) model_net_base_event,
(revent_f) model_net_base_event_rc,
(commit_f) NULL,
(commit_f) model_net_commit_event,
(final_f) model_net_base_finalize,
(map_f) codes_mapping,
sizeof(model_net_base_state),
};
static void model_net_commit_event(model_net_base_state * ns, tw_bf *b, model_net_wrap_msg * m, tw_lp * lp)
{
if(m->h.event_type == MN_BASE_PASS)
{
void * sub_msg;
sub_msg = ((char*)m)+msg_offsets[ns->net_id];
if(ns->sub_type->commit != NULL)
ns->sub_type->commit(ns->sub_state, b, sub_msg, lp);
}
}
/* setup for the ROSS event tracing
*/
void mn_event_collect(model_net_wrap_msg *m, tw_lp *lp, char *buffer, int *collect_flag)
......@@ -375,10 +391,14 @@ void model_net_base_configure(){
offsetof(model_net_wrap_msg, msg.m_dfly_plus);
msg_offsets[DRAGONFLY_PLUS_ROUTER] =
offsetof(model_net_wrap_msg, msg.m_dfly_plus);
msg_offsets[DRAGONFLY_DALLY] =
offsetof(model_net_wrap_msg, msg.m_dally_dfly);
msg_offsets[DRAGONFLY_DALLY_ROUTER] =
offsetof(model_net_wrap_msg, msg.m_dally_dfly);
msg_offsets[SLIMFLY] =
offsetof(model_net_wrap_msg, msg.m_slim);
msg_offsets[FATTREE] =
offsetof(model_net_wrap_msg, msg.m_fat);
offsetof(model_net_wrap_msg, msg.m_fat);
msg_offsets[LOGGP] =
offsetof(model_net_wrap_msg, msg.m_loggp);
msg_offsets[EXPRESS_MESH] =
......@@ -386,6 +406,7 @@ void model_net_base_configure(){
msg_offsets[EXPRESS_MESH_ROUTER] =
offsetof(model_net_wrap_msg, msg.m_em);
// perform the configuration(s)
// This part is tricky, as we basically have to look up all annotations that
// have LP names of the form modelnet_*. For each of those, we need to read
......@@ -532,7 +553,7 @@ void model_net_base_event(
tw_lp * lp){
if(m->h.magic != model_net_base_magic)
printf("\n LP ID mismatched %llu ", lp->gid);
printf("\n LP ID mismatched %llu %d ", lp->gid);
assert(m->h.magic == model_net_base_magic);
......
......@@ -25,6 +25,8 @@ extern struct model_net_method dragonfly_method;
extern struct model_net_method dragonfly_custom_method;
extern struct model_net_method dragonfly_plus_method;
extern struct model_net_method dragonfly_plus_router_method;
extern struct model_net_method dragonfly_dally_method;
extern struct model_net_method dragonfly_dally_router_method;
extern struct model_net_method slimfly_method;
extern struct model_net_method fattree_method;
extern struct model_net_method dragonfly_router_method;
......@@ -271,12 +273,12 @@ static model_net_event_return model_net_noop_event(
model_net_event_return num_rng_calls = 0;
tw_stime poffset = mn_in_sequence ? mn_msg_offset : 0.0;
tw_stime delay = codes_local_latency(sender);
num_rng_calls++; // rng call is in codes_local_latency
tw_stime sendTime = message_size * codes_cn_delay;
if (self_event_size && self_event != NULL) {
poffset += delay;
num_rng_calls++;
tw_event *e = tw_event_new(sender->gid, poffset+offset+sendTime, sender);
memcpy(tw_event_data(e), self_event, self_event_size);
tw_event_send(e);
......@@ -284,7 +286,6 @@ static model_net_event_return model_net_noop_event(
if (remote_event_size && remote_event != NULL) {
poffset += delay;
num_rng_calls++;
/* special case - in a "pull" event, the "remote" message is actually
* to self */
tw_event *e = tw_event_new(is_pull ? sender->gid : final_dest_lp,
......@@ -314,6 +315,7 @@ static model_net_event_return model_net_event_impl_base(
void const * self_event,
tw_lp *sender) {
if (remote_event_size + self_event_size + sizeof(model_net_wrap_msg)
> g_tw_msg_sz){
tw_error(TW_LOC, "Error: model_net trying to transmit an event of size "
......@@ -328,11 +330,12 @@ static model_net_event_return model_net_event_impl_base(
tw_lpid dest_mn_lp = model_net_find_local_device_mctx(net_id, recv_map_ctx,
final_dest_lp);
if (src_mn_lp == dest_mn_lp && message_size < (uint64_t)codes_node_eager_limit)
if ( src_mn_lp == dest_mn_lp && message_size < (uint64_t)codes_node_eager_limit)
{
return model_net_noop_event(final_dest_lp, is_pull, offset, message_size,
remote_event_size, remote_event, self_event_size, self_event,
sender);
}
tw_stime poffset = codes_local_latency(sender);
if (mn_in_sequence){
tw_stime tmp = mn_msg_offset;
......
......@@ -23,6 +23,7 @@
#include "lammps.h"
#include "nekbone_swm_user_code.h"
#include "nearest_neighbor_swm_user_code.h"
// #include "all_to_one_swm_user_code.h"
#define ALLREDUCE_SHORT_MSG_SIZE 2048
......@@ -754,7 +755,6 @@ static void workload_caller(void * arg)
{
shared_context* sctx = static_cast<shared_context*>(arg);
//printf("\n workload name %s ", sctx->workload_name);
if(strcmp(sctx->workload_name, "lammps") == 0)
{
LAMMPS_SWM * lammps_swm = static_cast<LAMMPS_SWM*>(sctx->swm_obj);
......@@ -770,6 +770,11 @@ static void workload_caller(void * arg)
NearestNeighborSWMUserCode * nn_swm = static_cast<NearestNeighborSWMUserCode*>(sctx->swm_obj);
nn_swm->call();
}
// else if(strcmp(sctx->workload_name, "incast") == 0 || strcmp(sctx->workload_name, "incast1") == 0 || strcmp(sctx->workload_name, "incast2") == 0)
// {
// AllToOneSWMUserCode * incast_swm = static_cast<AllToOneSWMUserCode*>(sctx->swm_obj);
// incast_swm->call();
// }
}
static int comm_online_workload_load(const char * params, int app_id, int rank)
{
......@@ -807,10 +812,21 @@ static int comm_online_workload_load(const char * params, int app_id, int rank)
{
path.append("/skeleton.json");
}
// else if(strcmp(o_params->workload_name, "incast") == 0)
// {
// path.append("/incast.json");
// }
// else if(strcmp(o_params->workload_name, "incast1") == 0)
// {
// path.append("/incast1.json");
// }
// else if(strcmp(o_params->workload_name, "incast2") == 0)
// {
// path.append("/incast2.json");
// }
else
tw_error(TW_LOC, "\n Undefined workload type %s ", o_params->workload_name);
//printf("\n path %s ", path.c_str());
try {
std::ifstream jsonFile(path.c_str());
boost::property_tree::json_parser::read_json(jsonFile, root);
......@@ -837,6 +853,11 @@ static int comm_online_workload_load(const char * params, int app_id, int rank)
NearestNeighborSWMUserCode * nn_swm = new NearestNeighborSWMUserCode(root, generic_ptrs);
my_ctx->sctx.swm_obj = (void*)nn_swm;
}
// else if(strcmp(o_params->workload_name, "incast") == 0 || strcmp(o_params->workload_name, "incast1") == 0 || strcmp(o_params->workload_name, "incast2") == 0)
// {
// AllToOneSWMUserCode * incast_swm = new AllToOneSWMUserCode(root, generic_ptrs);
// my_ctx->sctx.swm_obj = (void*)incast_swm;
// }
if(global_prod_thread == NULL)
{
......