Commit aeb2cd91 authored by Neil McGlohon's avatar Neil McGlohon

Merge branch 'dfp-neil-completed' into 'master'

Dragonfly Plus Support

See merge request neil/codes!6
parents b8df46df 1cc15505
This diff is collapsed.
...@@ -6,11 +6,11 @@ ...@@ -6,11 +6,11 @@
/* This is the base model-net LP that all events pass through before /* This is the base model-net LP that all events pass through before
* performing any topology-specific work. Packet scheduling, dealing with * performing any topology-specific work. Packet scheduling, dealing with
* packet loss (potentially), etc. happens here. * packet loss (potentially), etc. happens here.
* Additionally includes wrapper event "send" function that all * Additionally includes wrapper event "send" function that all
* events for underlying models must go through */ * events for underlying models must go through */
#ifndef MODEL_NET_LP_H #ifndef MODEL_NET_LP_H
#define MODEL_NET_LP_H #define MODEL_NET_LP_H
#ifdef __cplusplus #ifdef __cplusplus
...@@ -23,6 +23,7 @@ extern "C" { ...@@ -23,6 +23,7 @@ extern "C" {
#include "model-net-sched.h" #include "model-net-sched.h"
#include "net/dragonfly.h" #include "net/dragonfly.h"
#include "net/dragonfly-custom.h" #include "net/dragonfly-custom.h"
#include "net/dragonfly-plus.h"
#include "net/slimfly.h" #include "net/slimfly.h"
#include "net/fattree.h" #include "net/fattree.h"
#include "net/loggp.h" #include "net/loggp.h"
...@@ -43,10 +44,10 @@ void model_net_base_configure(); ...@@ -43,10 +44,10 @@ void model_net_base_configure();
/// model developers /// model developers
// Construct a model-net-specific event, analagous to a tw_event_new and // Construct a model-net-specific event, analagous to a tw_event_new and
// codes_event_new. The difference here is that we return pointers to // codes_event_new. The difference here is that we return pointers to
// both the message data (to be cast into the appropriate type) and the // both the message data (to be cast into the appropriate type) and the
// pointer to the end of the event struct. // pointer to the end of the event struct.
// //
// This function is expected to be called within each specific model-net // This function is expected to be called within each specific model-net
// method - strange and disturbing things will happen otherwise // method - strange and disturbing things will happen otherwise
tw_event * model_net_method_event_new( tw_event * model_net_method_event_new(
...@@ -131,6 +132,7 @@ typedef struct model_net_wrap_msg { ...@@ -131,6 +132,7 @@ typedef struct model_net_wrap_msg {
model_net_base_msg m_base; // base lp model_net_base_msg m_base; // base lp
terminal_message m_dfly; // dragonfly terminal_message m_dfly; // dragonfly
terminal_custom_message m_custom_dfly; // dragonfly-custom terminal_custom_message m_custom_dfly; // dragonfly-custom
terminal_plus_message m_dfly_plus; // dragonfly plus
slim_terminal_message m_slim; // slimfly slim_terminal_message m_slim; // slimfly
fattree_message m_fat; // fattree fattree_message m_fat; // fattree
loggp_message m_loggp; // loggp loggp_message m_loggp; // loggp
......
...@@ -41,7 +41,7 @@ extern "C" { ...@@ -41,7 +41,7 @@ extern "C" {
/* HACK: there is currently no scheduling fidelity across multiple /* HACK: there is currently no scheduling fidelity across multiple
* model_net_event calls. Hence, problems arise when some LP sends multiple * model_net_event calls. Hence, problems arise when some LP sends multiple
* messages as part of an event and expects FCFS ordering. A proper fix which * messages as part of an event and expects FCFS ordering. A proper fix which
* involves model-net LP-level scheduling of requests is ideal, but not * involves model-net LP-level scheduling of requests is ideal, but not
* feasible for now (would basically have to redesign model-net), so expose * feasible for now (would basically have to redesign model-net), so expose
* explicit start-sequence and stop-sequence markers as a workaround * explicit start-sequence and stop-sequence markers as a workaround
*/ */
...@@ -74,6 +74,8 @@ typedef struct mn_stats mn_stats; ...@@ -74,6 +74,8 @@ typedef struct mn_stats mn_stats;
X(LOGGP, "modelnet_loggp", "loggp", &loggp_method)\ X(LOGGP, "modelnet_loggp", "loggp", &loggp_method)\
X(EXPRESS_MESH, "modelnet_express_mesh", "express_mesh", &express_mesh_method)\ X(EXPRESS_MESH, "modelnet_express_mesh", "express_mesh", &express_mesh_method)\
X(EXPRESS_MESH_ROUTER, "modelnet_express_mesh_router", "express_mesh_router", &express_mesh_router_method)\ X(EXPRESS_MESH_ROUTER, "modelnet_express_mesh_router", "express_mesh_router", &express_mesh_router_method)\
X(DRAGONFLY_PLUS, "modelnet_dragonfly_plus", "dragonfly_plus", &dragonfly_plus_method)\
X(DRAGONFLY_PLUS_ROUTER, "modelnet_dragonfly_plus_router", "dragonfly_plus_router", &dragonfly_plus_router_method)\
X(MAX_NETS, NULL, NULL, NULL) X(MAX_NETS, NULL, NULL, NULL)
#define X(a,b,c,d) a, #define X(a,b,c,d) a,
...@@ -144,7 +146,7 @@ struct mn_stats ...@@ -144,7 +146,7 @@ struct mn_stats
long max_event_size; long max_event_size;
}; };
/* Registers all model-net LPs in ROSS. Should be called after /* Registers all model-net LPs in ROSS. Should be called after
* configuration_load, but before codes_mapping_setup */ * configuration_load, but before codes_mapping_setup */
void model_net_register(); void model_net_register();
...@@ -166,8 +168,8 @@ void model_net_enable_sampling(tw_stime interval, tw_stime end); ...@@ -166,8 +168,8 @@ void model_net_enable_sampling(tw_stime interval, tw_stime end);
int model_net_sampling_enabled(void); int model_net_sampling_enabled(void);
/* Initialize/configure the network(s) based on the CODES configuration. /* Initialize/configure the network(s) based on the CODES configuration.
* returns an array of the network ids, indexed in the order given by the * returns an array of the network ids, indexed in the order given by the
* modelnet_order configuration parameter * modelnet_order configuration parameter
* OUTPUT id_count - the output number of networks */ * OUTPUT id_count - the output number of networks */
int* model_net_set_params(int *id_count); int* model_net_set_params(int *id_count);
...@@ -189,7 +191,7 @@ void model_net_event_collective_rc( ...@@ -189,7 +191,7 @@ void model_net_event_collective_rc(
int message_size, int message_size,
tw_lp *sender); tw_lp *sender);
/* allocate and transmit a new event that will pass through model_net to /* allocate and transmit a new event that will pass through model_net to
* arrive at its destination: * arrive at its destination:
* *
* - net_id: the type of network to send this message through. The set of * - net_id: the type of network to send this message through. The set of
...@@ -231,9 +233,9 @@ void model_net_event_collective_rc( ...@@ -231,9 +233,9 @@ void model_net_event_collective_rc(
// first argument becomes the network ID // first argument becomes the network ID
model_net_event_return model_net_event( model_net_event_return model_net_event(
int net_id, int net_id,
char const * category, char const * category,
tw_lpid final_dest_lp, tw_lpid final_dest_lp,
uint64_t message_size, uint64_t message_size,
tw_stime offset, tw_stime offset,
int remote_event_size, int remote_event_size,
void const * remote_event, void const * remote_event,
...@@ -251,9 +253,9 @@ model_net_event_return model_net_event( ...@@ -251,9 +253,9 @@ model_net_event_return model_net_event(
model_net_event_return model_net_event_annotated( model_net_event_return model_net_event_annotated(
int net_id, int net_id,
char const * annotation, char const * annotation,
char const * category, char const * category,
tw_lpid final_dest_lp, tw_lpid final_dest_lp,
uint64_t message_size, uint64_t message_size,
tw_stime offset, tw_stime offset,
int remote_event_size, int remote_event_size,
void const * remote_event, void const * remote_event,
...@@ -270,9 +272,9 @@ model_net_event_return model_net_event_mctx( ...@@ -270,9 +272,9 @@ model_net_event_return model_net_event_mctx(
int net_id, int net_id,
struct codes_mctx const * send_map_ctx, struct codes_mctx const * send_map_ctx,
struct codes_mctx const * recv_map_ctx, struct codes_mctx const * recv_map_ctx,
char const * category, char const * category,
tw_lpid final_dest_lp, tw_lpid final_dest_lp,
uint64_t message_size, uint64_t message_size,
tw_stime offset, tw_stime offset,
int remote_event_size, int remote_event_size,
void const * remote_event, void const * remote_event,
...@@ -309,7 +311,7 @@ int model_net_get_msg_sz(int net_id); ...@@ -309,7 +311,7 @@ int model_net_get_msg_sz(int net_id);
* identical to the sender argument to tw_event_new(). * identical to the sender argument to tw_event_new().
*/ */
/* NOTE: we may end up needing additoinal arguments here to track state for /* NOTE: we may end up needing additoinal arguments here to track state for
* reverse computation; add as needed * reverse computation; add as needed
*/ */
DEPRECATED DEPRECATED
void model_net_event_rc( void model_net_event_rc(
...@@ -333,7 +335,7 @@ void model_net_event_rc2( ...@@ -333,7 +335,7 @@ void model_net_event_rc2(
* Parameters are largely the same as model_net_event, with the following * Parameters are largely the same as model_net_event, with the following
* exceptions: * exceptions:
* - final_dest_lp is the lp to pull data from * - final_dest_lp is the lp to pull data from
* - self_event_size, self_event are applied at the requester upon receipt of * - self_event_size, self_event are applied at the requester upon receipt of
* the payload from the dest * the payload from the dest
*/ */
model_net_event_return model_net_pull_event( model_net_event_return model_net_pull_event(
...@@ -383,7 +385,7 @@ void model_net_pull_event_rc( ...@@ -383,7 +385,7 @@ void model_net_pull_event_rc(
* model-net implementation (currently implemented as a set of translation-unit * model-net implementation (currently implemented as a set of translation-unit
* globals). Upon a subsequent model_net_*event* call, the context is consumed * globals). Upon a subsequent model_net_*event* call, the context is consumed
* and reset to an unused state. * and reset to an unused state.
* *
* NOTE: this call MUST be placed in the same calling context as the subsequent * NOTE: this call MUST be placed in the same calling context as the subsequent
* model_net_*event* call. Otherwise, the parameters are not guaranteed to work * model_net_*event* call. Otherwise, the parameters are not guaranteed to work
* on the intended event, and may possibly be consumed by another, unrelated * on the intended event, and may possibly be consumed by another, unrelated
......
/*
* Copyright (C) 2014 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
#ifndef DRAGONFLY_PLUS_H
#define DRAGONFLY_PLUS_H
#ifdef __cplusplus
extern "C" {
#endif
#include <ross.h>
typedef struct terminal_plus_message terminal_plus_message;
/* this message is used for both dragonfly compute nodes and routers */
struct terminal_plus_message
{
/* magic number */
int magic;
/* flit travel start time*/
tw_stime travel_start_time;
/* packet ID of the flit */
unsigned long long packet_ID;
/* event type of the flit */
short type;
/* category: comes from codes */
char category[CATEGORY_NAME_MAX];
/* store category hash in the event */
uint32_t category_hash;
/* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid final_dest_gid;
/*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_lp;
tw_lpid sender_mn_lp; // source modelnet id
/* destination terminal ID of the dragonfly */
tw_lpid dest_terminal_id;
/* source terminal ID of the dragonfly */
unsigned int src_terminal_id;
/* message originating router id. MM: Can we calculate it through
* sender_mn_lp??*/
unsigned int origin_router_id;
/* number of hops traversed by the packet */
short my_N_hop;
short my_l_hop, my_g_hop;
short saved_channel;
short saved_vc;
int next_stop;
short nonmin_done;
/* Intermediate LP ID from which this message is coming */
unsigned int intm_lp_id;
/* last hop of the message, can be a terminal, local router or global router */
short last_hop;
/* For routing */
int saved_src_dest;
int saved_src_chan;
//DFP Specific Routing
int intm_rtr_id; //Router ID of the intermediate router for nonminimal routes
int intm_group_id; //Group ID of the intermediate router for nonminimal routes
short dfp_upward_channel_flag;
int dfp_dest_terminal_id; //this is the terminal id in the dfp network in range [0-total_num_terminals)
uint32_t chunk_id;
uint32_t packet_size;
uint32_t message_id;
uint32_t total_size;
int remote_event_size_bytes;
int local_event_size_bytes;
// For buffer message
short vc_index;
int output_chan;
model_net_event_return event_rc;
int is_pull;
uint32_t pull_size;
/* for reverse computation */
int path_type;
tw_stime saved_available_time;
tw_stime saved_avg_time;
tw_stime saved_rcv_time;
tw_stime saved_busy_time;
tw_stime saved_total_time;
tw_stime saved_sample_time;
tw_stime msg_start_time;
};
#ifdef __cplusplus
}
#endif
#endif /* end of include guard: DRAGONFLY_H */
/*
* Local variables:
* c-indent-level: 4
* c-basic-offset: 4
* End:
*
* vim: ft=c ts=8 sts=4 sw=4 expandtab
*/
This diff is collapsed.
# Copyright 2017 - Neil McGlohon
# mcglon@rpi.edu
import sys
from enum import Enum
import struct
import numpy as np
argv = sys.argv
if sys.version_info[0] < 3:
raise Exception("Python 3 or a more recent version is required.")
class TopSize(Enum):
SMALL = 0
MEDIUM = 1
LARGE = 2
def main():
if(len(argv) < 8):
raise Exception("Correct usage: python %s <num_group> <num_spine> <num_leaf> <topology_size> <redundant_global_cons_per> <intra-file> <inter-file>" % sys.argv[0])
groups = int(argv[1])
num_spine_routers = int(argv[2])
num_leaf_routers = int(argv[3])
topology_size = TopSize(int(argv[4]))
redundant_global_cons_per = int(argv[5])
intra = open(argv[6], "wb")
inter = open(argv[7], "wb")
writeIntra(num_spine_routers, num_leaf_routers, intra)
writeInter(num_spine_routers, num_leaf_routers, topology_size, groups, redundant_global_cons_per, inter)
intra.close()
inter.close()
def getRouterGID(localID, groupNumber, routers_per_group):
return(localID + (groupNumber*routers_per_group))
def writeIntra(num_spine_routers,num_leaf_routers,fd):
total_routers = num_spine_routers + num_leaf_routers
A = np.zeros((total_routers,total_routers)) #intra adjacency matrix in case you want one
#for each leaf router, connect it to all spine routers in group
for li in range(num_leaf_routers):
for si in range(num_leaf_routers,total_routers):
A[li][si] = 1
fd.write(struct.pack("2i",li,si))
print("INTRA %d %d"%(li,si))
#for each spine router, connect it to all leaf routers in group
for si in range(num_leaf_routers,total_routers):
for li in range(num_leaf_routers):
A[si][li] = 1
fd.write(struct.pack("2i",si,li))
print("INTRA %d %d"%(si,li))
def writeInter(num_spine_routers,num_leaf_routers, topology_size, num_groups, redundant_global_cons_per,fd):
total_routers_per_group = num_spine_routers + num_leaf_routers
global_total_routers = total_routers_per_group * num_groups
global_cons_per = redundant_global_cons_per + 1
Ag = np.zeros((global_total_routers,global_total_routers))
if (topology_size is TopSize.SMALL) or (topology_size is TopSize.MEDIUM):
#Every spine is connected to every other group
if (topology_size is TopSize.MEDIUM) and (redundant_global_cons_per > 0):
raise Exception("Error: redundant connections incompatible with Medium topology")
for source_gi in range(num_groups):
for si in range(num_leaf_routers, total_routers_per_group):
source_id = getRouterGID(si,source_gi,total_routers_per_group)
for dest_gi in range(num_groups):
if source_gi != dest_gi:
dest_id = getRouterGID(si, dest_gi,total_routers_per_group)
for i in range(global_cons_per):
fd.write(struct.pack("2i",source_id,dest_id))
print("INTER %d %d srcg %d destg %d"%(source_id,dest_id,source_gi,dest_gi))
elif topology_size is TopSize.LARGE:
#Each group is connected to every other group via single connection on individual spines
ind_radix = 2 * num_leaf_routers #TODO don't assume that the radix is half down half up
ind_up_radix = int(ind_radix/2)
num_other_groups = num_groups - 1
if(num_other_groups%num_spine_routers != 0):
raise Exception("ERROR: Assymetrical - num_other_groups\%num_spine_routers != 0") #TODO Consider allowing such a setting?
if(num_other_groups != (num_spine_routers**2)):
raise Exception("ERROR: Assymetrical - num_other_groups != num_spine_routers^2")
if num_other_groups != (num_spine_routers * ind_up_radix):
raise Exception("Error: Invalid topology - num groups exceeds group upward radix")
if ind_up_radix > num_groups:
raise Exception("ERROR: The number of global connections per spine router exceeds the number of groups. Not Large Topology!")
if ind_up_radix != num_spine_routers:
raise Exception("ERROR: the upward radix must equal the number of spine routers")
interlinks = []
spine_router_ids = [i for i in range(global_total_routers) if (i % total_routers_per_group) >= num_leaf_routers]
all_groups = [i for i in range(num_groups)]
for i,source_id in enumerate(spine_router_ids):
source_group_id = int(source_id / total_routers_per_group)
spine_local_id = source_id % total_routers_per_group
xth_spine = spine_local_id - num_leaf_routers #if we were only counting spine routers, this means that I am the xth spine where x is this value
other_groups = [j for j in range(num_groups) if j != source_group_id] #ids of groups not the source group
my_other_groups = [] #the specific groups that this spine router will connect to
for ii in range(ind_up_radix):
index = (source_group_id+1 + ii + xth_spine*ind_up_radix) % num_groups
my_other_groups.append(all_groups[index])
dest_spinal_offset = (num_spine_routers-1) - xth_spine #which xth spine will the spine router connect to in the dest group.
for dest_group_id in my_other_groups:
dest_group_offset = dest_group_id * total_routers_per_group #starting gid for routers in dest group
dest_id = dest_group_offset + dest_spinal_offset + num_leaf_routers #gid of destination router
Ag[source_id,dest_id] = 1
interlinks.append((source_id, dest_id, source_group_id, dest_group_id))
interlinks.sort(key=lambda x: x[0])
for link in interlinks:
fd.write(struct.pack("2i",link[0], link[1]))
print("INTER %d %d srcg %d destg %d" % (link[0], link[1], link[2], link[3]) )
for row in Ag:
row_sum = sum(row)
if row_sum > ind_up_radix:
raise Exception("Error: connectivity exceeds radix!")
for col in Ag.T:
col_sum = sum(col)
if col_sum > ind_up_radix:
raise Exception("Error: Connectivity exceeds radix!")
else:
raise Exception("Error: Invalid topology size given. Please use {0 | 1 | 2}")
if __name__ == '__main__':
main()
...@@ -157,6 +157,7 @@ src_libcodes_la_SOURCES = \ ...@@ -157,6 +157,7 @@ src_libcodes_la_SOURCES = \
src/networks/model-net/express-mesh.C \ src/networks/model-net/express-mesh.C \
src/networks/model-net/dragonfly.c \ src/networks/model-net/dragonfly.c \
src/networks/model-net/dragonfly-custom.C \ src/networks/model-net/dragonfly-custom.C \
src/networks/model-net/dragonfly-plus.C \
src/networks/model-net/slimfly.c \ src/networks/model-net/slimfly.c \
src/networks/model-net/fattree.c \ src/networks/model-net/fattree.c \
src/networks/model-net/loggp.c \ src/networks/model-net/loggp.c \
...@@ -164,7 +165,7 @@ src_libcodes_la_SOURCES = \ ...@@ -164,7 +165,7 @@ src_libcodes_la_SOURCES = \
src/networks/model-net/model-net-lp.c \ src/networks/model-net/model-net-lp.c \
src/networks/model-net/model-net-sched.c \ src/networks/model-net/model-net-sched.c \
src/networks/model-net/model-net-sched-impl.h \ src/networks/model-net/model-net-sched-impl.h \
src/networks/model-net/model-net-sched-impl.c src/networks/model-net/model-net-sched-impl.c
src_libcodes_mpi_replay_la_SOURCES = \ src_libcodes_mpi_replay_la_SOURCES = \
src/network-workloads/model-net-mpi-replay.c src/network-workloads/model-net-mpi-replay.c
...@@ -190,18 +191,21 @@ bin_PROGRAMS += src/network-workloads/model-net-synthetic ...@@ -190,18 +191,21 @@ bin_PROGRAMS += src/network-workloads/model-net-synthetic
bin_PROGRAMS += src/network-workloads/model-net-synthetic-custom-dfly bin_PROGRAMS += src/network-workloads/model-net-synthetic-custom-dfly
bin_PROGRAMS += src/network-workloads/model-net-synthetic-slimfly bin_PROGRAMS += src/network-workloads/model-net-synthetic-slimfly
bin_PROGRAMS += src/network-workloads/model-net-synthetic-fattree bin_PROGRAMS += src/network-workloads/model-net-synthetic-fattree
bin_PROGRAMS += src/network-workloads/model-net-synthetic-dfly-plus
src_workload_codes_workload_dump_SOURCES = \ src_workload_codes_workload_dump_SOURCES = \
src/workload/codes-workload-dump.c src/workload/codes-workload-dump.c
src_network_workloads_model_net_dumpi_traces_dump_SOURCES = src/network-workloads/model-net-dumpi-traces-dump.c src_network_workloads_model_net_dumpi_traces_dump_SOURCES = src/network-workloads/model-net-dumpi-traces-dump.c
src_network_workloads_model_net_synthetic_slimfly_SOURCES = src/network-workloads/model-net-synthetic-slimfly.c src_network_workloads_model_net_synthetic_slimfly_SOURCES = src/network-workloads/model-net-synthetic-slimfly.c
src_network_workloads_model_net_mpi_replay_SOURCES = \ src_network_workloads_model_net_mpi_replay_SOURCES = \
src/network-workloads/model-net-mpi-replay.c \ src/network-workloads/model-net-mpi-replay.c \
src/network-workloads/model-net-mpi-replay-main.c src/network-workloads/model-net-mpi-replay-main.c
src_network_workloads_model_net_mpi_replay_CFLAGS = $(AM_CFLAGS) src_network_workloads_model_net_mpi_replay_CFLAGS = $(AM_CFLAGS)
src_network_workloads_model_net_synthetic_SOURCES = src/network-workloads/model-net-synthetic.c src_network_workloads_model_net_synthetic_SOURCES = src/network-workloads/model-net-synthetic.c
src_network_workloads_model_net_synthetic_custom_dfly_SOURCES = src/network-workloads/model-net-synthetic-custom-dfly.c src_network_workloads_model_net_synthetic_custom_dfly_SOURCES = src/network-workloads/model-net-synthetic-custom-dfly.c
src_network_workloads_model_net_synthetic_dfly_plus_SOURCES = src/network-workloads/model-net-synthetic-dfly-plus.c
src_networks_model_net_topology_test_SOURCES = src/networks/model-net/topology-test.c src_networks_model_net_topology_test_SOURCES = src/networks/model-net/topology-test.c
#bin_PROGRAMS += src/network-workload/codes-nw-test #bin_PROGRAMS += src/network-workload/codes-nw-test
......
: Running Time = 22.1632 seconds
TW Library Statistics:
Total Events Processed 27882763
Events Aborted (part of RBs) 0
Events Rolled Back 1623745
Event Ties Detected in PE Queues 0
Efficiency 93.82 %
Total Remote (shared mem) Events Processed 0
Percent Remote Events 0.00 %
Total Remote (network) Events Processed 2239846
Percent Remote Events 8.53 %
Total Roll Backs 213747
Primary Roll Backs 206428
Secondary Roll Backs 7319
Fossil Collect Attempts 108948
Total GVT Computations 27237
Net Events Processed 26259018
Event Rate (events/sec) 1184800.8
Total Events Scheduled Past End Time 0
TW Memory Statistics:
Events Allocated 4510545
Memory Allocated 3342712
Memory Wasted 222
TW Network Statistics:
Remote sends 2517896
Remote recvs 2517896
TW Data Structure sizes in bytes (sizeof):
PE struct 624
KP struct 144
LP struct 128
LP Model struct 32
LP RNGs 80
Total LP 240
Event struct 144
Event struct with Model 752
TW Clock Cycle Statistics (MAX values in secs at 1.0000 GHz):
Priority Queue (enq/deq) 1.2060
AVL Tree (insert/delete) 6.1629
LZ4 (de)compression 0.0000
Buddy system 0.0000
RIO Loading 0.0000
RIO LP Init 0.1464
Event Processing 64.3321
Event Cancel 0.0668
Event Abort 0.0000
GVT 8.9914
Fossil Collect 4.3308
Primary Rollbacks 1.8111
Network Read 10.4349
Statistics Computation 0.0000
Statistics Write 0.0000
Total Time (Note: Using Running Time above for Speedup) 88.8275
TW GVT Statistics: MPI AllReduce
GVT Interval 16
GVT Real Time Interval (cycles) 0
GVT Real Time Interval (sec) 0.00000000
Batch Size 16
Forced GVT 0
Total GVT Computations 27237
Total All Reduce Calls 80374
Average Reduction / GVT 2.95
Average number of router hops traversed: 3.967510; average chunk latency: 3.983703 us; maximum chunk latency: 23.508762 us; avg message size: 2048.000000 bytes; finished messages: 675818; finished chunks: 1351636
Total packets generated: 1351636; finished: 1351636
\ No newline at end of file
: Running Time = 32.7864 seconds
TW Library Statistics:
Total Events Processed 33923697
Events Aborted (part of RBs) 0
Events Rolled Back 2115066
Event Ties Detected in PE Queues 0
Efficiency 93.35 %
Total Remote (shared mem) Events Processed 0
Percent Remote Events 0.00 %
Total Remote (network) Events Processed 3197222
Percent Remote Events 10.05 %
Total Roll Backs 298872
Primary Roll Backs 293249
Secondary Roll Backs 5623
Fossil Collect Attempts 132536
Total GVT Computations 33134
Net Events Processed 31808631
Event Rate (events/sec) 970177.4
Total Events Scheduled Past End Time 0
TW Memory Statistics:
Events Allocated 4510545
Memory Allocated 3342712
Memory Wasted 222
TW Network Statistics:
Remote sends 3630262
Remote recvs 3630262
TW Data Structure sizes in bytes (sizeof):
PE struct 624
KP struct 144
LP struct 128
LP Model struct 32
LP RNGs 80
Total LP 240
Event struct 144
Event struct with Model 752
TW Clock Cycle Statistics (MAX values in secs at 1.0000 GHz):
Priority Queue (enq/deq) 1.6238
AVL Tree (insert/delete) 9.6388
LZ4 (de)compression 0.0000
Buddy system 0.0000
RIO Loading 0.0000
RIO LP Init 0.1587
Event Processing 96.7147
Event Cancel 0.0741
Event Abort 0.0000
GVT 12.5980
Fossil Collect 5.9422
Primary Rollbacks 2.6389
Network Read 15.3961
Statistics Computation 0.0000
Statistics Write 0.0000
Total Time (Note: Using Running Time above for Speedup) 131.4039
TW GVT Statistics: MPI AllReduce
GVT Interval 16
GVT Real Time Interval (cycles) 0
GVT Real Time Interval (sec) 0.00000000
Batch Size 16
Forced GVT 0
Total GVT Computations 33134
Total All Reduce Calls 98239
Average Reduction / GVT 2.96
Average number of router hops traversed: 5.336127; average chunk latency: 8.764690 us; maximum chunk latency: 28.623821 us; avg message size: 2048.000000 bytes; finished messages: 675818; finished chunks: 1351636
Total packets generated: 1351636; finished: 1351636
LPGROUPS
{
MODELNET_GRP
{
repetitions="5";
# name of this lp changes according to the model
nw-lp="16";
# these lp names will be the same for dragonfly-custom model
modelnet_dragonfly_plus="16";
modelnet_dragonfly_plus_router="8";
}
}
PARAMS
{
# packet size in the network
packet_size="1024";
modelnet_order=( "dragonfly_plus","dragonfly_plus_router" );
# scheduler options
modelnet_scheduler="fcfs";
# chunk size in the network (when chunk size = packet size, packets will not be
# divided into chunks)
chunk_size="1024";
# modelnet_scheduler="round-robin";
# number of routers within each group
# each router row corresponds to a chassis in Cray systems
num_router_spine="4";
# each router column corresponds to a slot in a chassis
num_router_leaf="4";