Commit aeb2cd91 authored by Neil McGlohon's avatar Neil McGlohon

Merge branch 'dfp-neil-completed' into 'master'

Dragonfly Plus Support

See merge request neil/codes!6
parents b8df46df 1cc15505
#ifndef CONNECTION_MANAGER_H
#define CONNECTION_MANAGER_H
/**
* connection-manager.h -- Simple, Readable, Connection management interface
* Neil McGlohon
*
* Copyright (c) 2018 Rensselaer Polytechnic Institute
*/
#include <map>
#include <vector>
#include "codes/codes.h"
#include "codes/model-net.h"
using namespace std;
/**
* @brief Enum differentiating local router connection types from global.
* Local connections will have router IDs ranging from [0,num_router_per_group)
* whereas global connections will have router IDs ranging from [0,total_routers)
*/
enum ConnectionType
{
CONN_LOCAL = 1,
CONN_GLOBAL,
CONN_TERMINAL
};
/**
* @brief Struct for connection information.
*/
struct Connection
{
int port; //port ID of the connection
int src_lid; //local id of the source
int src_gid; //global id of the source
int src_group_id; //group id of the source
int dest_lid; //local id of the destination
int dest_gid; //global id of the destination
int dest_group_id; //group id of the destination
ConnectionType conn_type; //type of the connection: CONN_LOCAL, CONN_GLOBAL, or CONN_TERMINAL
};
/**
* @class ConnectionManager
*
* @brief
* This class is meant to make organization of the connections between routers more
* streamlined. It provides a simple, readable interface which helps reduce
* semantic errors during development.
*
* @note
* This class was designed with dragonfly type topologies in mind. Certain parts may not
* make sense for other types of topologies, they might work fine, but no guarantees.
*
* @note
* There is the property intermediateRouterToGroupMap and related methods that are implemented but the
* logistics to get this information from input file is more complicated than its worth so I have commented
* them out.
*
* @note
* This class assumes that each router group has the same number of routers in it: _num_routers_per_group.
*/
class ConnectionManager {
map< int, vector< Connection > > intraGroupConnections; //direct connections within a group - IDs are group local - maps local id to list of connections to it
map< int, vector< Connection > > globalConnections; //direct connections between routers not in same group - IDs are global router IDs - maps global id to list of connections to it
map< int, vector< Connection > > terminalConnections; //direct connections between this router and its compute node terminals - maps terminal id to connections to it
map< int, Connection > _portMap; //Mapper for ports to connections
// map< int, vector< Connection > > intermediateRouterToGroupMap; //maps group id to list of routers that connect to it.
// //ex: intermediateRouterToGroupMap[3] returns a vector
// //of connections from this router to routers that have
// //direct connections to group 3
int _source_id_local; //local id (within group) of owner of this connection manager
int _source_id_global; //global id (not lp gid) of owner of this connection manager
int _source_group; //group id of the owner of this connection manager
int _used_intra_ports; //number of used ports for intra connections
int _used_inter_ports; //number of used ports for inter connections
int _used_terminal_ports; //number of used ports for terminal connections
int _max_intra_ports; //maximum number of ports for intra connecitons
int _max_inter_ports; //maximum number of ports for inter connections
int _max_terminal_ports; //maximum number of ports for terminal connections.
int _num_routers_per_group; //number of routers per group - used for turning global ID into local and back
public:
ConnectionManager(int src_id_local, int src_id_global, int src_group, int max_intra, int max_inter, int max_term, int num_router_per_group);
/**
* @brief Adds a connection to the manager
* @param dest_gid the global ID of the destination router
* @param type the type of the connection, CONN_LOCAL, CONN_GLOBAL, or CONN_TERMINAL
*/
void add_connection(int dest_gid, ConnectionType type);
// /**
// * @brief adds knowledge of what next hop routers have connections to specific groups
// * @param local_intm_id the local intra group id of the router that has the connection to dest_group_id
// * @param dest_group_id the id of the group that the connection goes to
// */
// void add_route_to_group(int local_intm_id, int dest_group_id);
// /**
// * @brief returns a vector of connections to routers that have direct connections to the specified group id
// * @param dest_group_id the id of the destination group that all connections returned have a direct connection to
// */
// vector< Connection > get_intm_conns_to_group(int dest_group_id);
// /**
// * @brief returns a vector of local router ids that have direct connections to the specified group id
// * @param dest_group_id the id of the destination group that all routers returned have a direct connection to
// * @note if a router has multiple intra group connections to a single router and that router has a connection
// * to the dest group then that router will appear multiple times in the returned vector.
// */
// vector< int > get_intm_routers_to_group(int dest_group_id)
/**
* @brief get the source ID of the owner of the manager
* @param type the type of the connection, CONN_LOCAL, CONN_GLOBAL, or CONN_TERMINAL
*/
int get_source_id(ConnectionType type);
/**
* @brief get the port(s) associated with a specific destination ID
* @param dest_id the ID (local or global depending on type) of the destination
* @param type the type of the connection, CONN_LOCAL, CONN_GLOBAL, or CONN_TERMINAL
*/
vector<int> get_ports(int dest_id, ConnectionType type);
/**
* @brief get the connection associated with a specific port number
* @param port the enumeration of the port in question
*/
Connection get_connection_on_port(int port);
/**
* @brief returns true if a connection exists in the manager from the source to the specified destination ID BY TYPE
* @param dest_id the ID of the destination depending on the type
* @param type the type of the connection, CONN_LOCAL, CONN_GLOBAL, or CONN_TERMINAL
* @note Will not return true if dest_id is within own group and type is CONN_GLOBAL, see is_any_connection_to()
*/
bool is_connected_to_by_type(int dest_id, ConnectionType type);
/**
* @brief returns true if any connection exists in the manager from the soruce to the specified global destination ID
* @param dest_global_id the global id of the destination
* @note This is meant to allow for a developer to determine connectivity just from the global ID, even if the two entities
* are connected by a local or terminal connection.
*/
bool is_any_connection_to(int dest_global_id);
/**
* @brief returns the total number of used ports by the owner of the manager
*/
int get_total_used_ports();
/**
* @brief returns the number of used ports for a specific connection type
* @param type the type of the connection, CONN_LOCAL, CONN_GLOBAL, or CONN_TERMINAL
*/
int get_used_ports_for(ConnectionType type);
/**
* @brief returns the type of connection associated with said port
* @param port_num the number of the port in question
*/
ConnectionType get_port_type(int port_num);
/**
* @brief returns a vector of connections to the destination ID based on the connection type
* @param dest_id the ID of the destination depending on the type
* @param type the type of the connection, CONN_LOCAL, CONN_GLOBAL, or CONN_TERMINAL
*/
vector< Connection > get_connections_to_gid(int dest_id, ConnectionType type);
/**
* @brief returns a vector of connections to the destination group. connections will be of type CONN_GLOBAL
* @param dest_group_id the id of the destination group
*/
vector< Connection > get_connections_to_group(int dest_group_id);
/**
* @brief returns a vector of all connections to routers via type specified.
* @param type the type of the connection, CONN_LOCAL, CONN_GLOBAL, or CONN_TERMINAL
* @note this will return connections to same destination on different ports as individual connections
*/
vector< Connection > get_connections_by_type(ConnectionType type);
/**
* @brief prints out the state of the connection manager
*/
void print_connections();
};
//******************* BEGIN IMPLEMENTATION ********************************************************
//******************* Connection Manager Implementation *******************************************
ConnectionManager::ConnectionManager(int src_id_local, int src_id_global, int src_group, int max_intra, int max_inter, int max_term, int num_router_per_group)
{
_source_id_local = src_id_local;
_source_id_global = src_id_global;
_source_group = src_group;
_used_intra_ports = 0;
_used_inter_ports = 0;
_used_terminal_ports = 0;
_max_intra_ports = max_intra;
_max_inter_ports = max_inter;
_max_terminal_ports = max_term;
_num_routers_per_group = num_router_per_group;
}
void ConnectionManager::add_connection(int dest_gid, ConnectionType type)
{
Connection conn;
conn.src_lid = _source_id_local;
conn.src_gid = _source_id_global;
conn.src_group_id = _source_group;
conn.conn_type = type;
conn.dest_lid = dest_gid % _num_routers_per_group;
conn.dest_gid = dest_gid;
conn.dest_group_id = dest_gid / _num_routers_per_group;
switch (type)
{
case CONN_LOCAL:
conn.port = this->get_used_ports_for(CONN_LOCAL);
intraGroupConnections[conn.dest_lid].push_back(conn);
_used_intra_ports++;
break;
case CONN_GLOBAL:
conn.port = _max_intra_ports + this->get_used_ports_for(CONN_GLOBAL);
globalConnections[conn.dest_gid].push_back(conn);
_used_inter_ports++;
break;
case CONN_TERMINAL:
conn.port = _max_intra_ports + _max_inter_ports + this->get_used_ports_for(CONN_TERMINAL);
conn.dest_group_id = _source_group;
terminalConnections[conn.dest_gid].push_back(conn);
_used_terminal_ports++;
break;
default:
assert(false);
// TW_ERROR(TW_LOC, "add_connection(dest_id, type): Undefined connection type\n");
}
_portMap[conn.port] = conn;
}
// void ConnectionManager::add_route_to_group(Connection conn, int dest_group_id)
// {
// intermediateRouterToGroupMap[dest_group_id].push_back(conn);
// }
// vector< Connection > ConnectionManager::get_intm_conns_to_group(int dest_group_id)
// {
// return intermediateRouterToGroupMap[dest_group_id];
// }
// vector< int > ConnectionManager::get_intm_routers_to_group(int dest_group_id)
// {
// vector< Connection > intm_router_conns = get_intm_conns_to_group(dest_group_id);
// vector< int > loc_intm_router_ids;
// vector< Connection >::iterator it;
// for(it = intm_router_conns.begin(); it != intm_router_conns.end(); it++)
// {
// loc_intm_router_ids.push_back((*it).other_id);
// }
// return loc_intm_router_ids;
// }
int ConnectionManager::get_source_id(ConnectionType type)
{
switch (type)
{
case CONN_LOCAL:
return _source_id_local;
case CONN_GLOBAL:
return _source_id_global;
default:
assert(false);
// TW_ERROR(TW_LOC, "get_source_id(type): Unsupported connection type\n");
}
}
vector<int> ConnectionManager::get_ports(int dest_id, ConnectionType type)
{
vector< Connection > conns = this->get_connections_to_gid(dest_id, type);
vector< int > ports_used;
vector< Connection >::iterator it = conns.begin();
for(; it != conns.end(); it++) {
ports_used.push_back((*it).port); //add port from connection list to the used ports list
}
return ports_used;
}
Connection ConnectionManager::get_connection_on_port(int port)
{
return _portMap[port];
}
bool ConnectionManager::is_connected_to_by_type(int dest_id, ConnectionType type)
{
switch (type)
{
case CONN_LOCAL:
if (intraGroupConnections.find(dest_id) != intraGroupConnections.end())
return true;
break;
case CONN_GLOBAL:
if (globalConnections.find(dest_id) != globalConnections.end())
return true;
break;
case CONN_TERMINAL:
if (terminalConnections.find(dest_id) != terminalConnections.end())
return true;
break;
default:
assert(false);
// TW_ERROR(TW_LOC, "get_used_ports_for(type): Undefined connection type\n");
}
return false;
}
bool ConnectionManager::is_any_connection_to(int dest_global_id)
{
int local_id = dest_global_id % _num_routers_per_group;
if (intraGroupConnections.find(local_id) != intraGroupConnections.end())
return true;
if (globalConnections.find(dest_global_id) != globalConnections.end())
return true;
if (terminalConnections.find(dest_global_id) != terminalConnections.end())
return true;
return false;
}
int ConnectionManager::get_total_used_ports()
{
return _used_intra_ports + _used_inter_ports + _used_terminal_ports;
}
int ConnectionManager::get_used_ports_for(ConnectionType type)
{
switch (type)
{
case CONN_LOCAL:
return _used_intra_ports;
case CONN_GLOBAL:
return _used_inter_ports;
case CONN_TERMINAL:
return _used_terminal_ports;
default:
assert(false);
// TW_ERROR(TW_LOC, "get_used_ports_for(type): Undefined connection type\n");
}
}
ConnectionType ConnectionManager::get_port_type(int port_num)
{
return _portMap[port_num].conn_type;
}
vector< Connection > ConnectionManager::get_connections_to_gid(int dest_gid, ConnectionType type)
{
switch (type)
{
case CONN_LOCAL:
return intraGroupConnections[dest_gid%_num_routers_per_group];
case CONN_GLOBAL:
return globalConnections[dest_gid];
case CONN_TERMINAL:
return terminalConnections[dest_gid];
default:
assert(false);
// TW_ERROR(TW_LOC, "get_connections(type): Undefined connection type\n");
}
}
vector< Connection > ConnectionManager::get_connections_to_group(int dest_group_id)
{
vector< Connection > conns_to_group;
map< int, vector< Connection > >::iterator it = globalConnections.begin();
for(; it != globalConnections.end(); it++) //iterate over each router that is connected to source
{
vector< Connection >::iterator conns_to_router;
for(conns_to_router = (it->second).begin(); conns_to_router != (it->second).end(); conns_to_router++) //iterate over each connection to a specific router
{
if ((*conns_to_router).dest_group_id == dest_group_id) {
conns_to_group.push_back(*conns_to_router);
}
}
}
return conns_to_group;
}
vector< Connection > ConnectionManager::get_connections_by_type(ConnectionType type)
{
map< int, vector< Connection > > theMap;
switch (type)
{
case CONN_LOCAL:
theMap = intraGroupConnections;
break;
case CONN_GLOBAL:
theMap = globalConnections;
break;
case CONN_TERMINAL:
theMap = terminalConnections;
break;
}
vector< Connection > retVec;
map< int, vector< Connection > >::iterator it;
for(it = theMap.begin(); it != theMap.end(); it++)
{
retVec.insert(retVec.end(), (*it).second.begin(), (*it).second.end());
}
return retVec;
}
void ConnectionManager::print_connections()
{
printf("Connections for Router: %d ---------------------------------------\n",_source_id_global);
int ports_printed = 0;
map<int,Connection>::iterator it = _portMap.begin();
for(; it != _portMap.end(); it++)
{
if ( (ports_printed == 0) && (_used_intra_ports > 0) )
{
printf(" -- Intra-Group Connections -- \n");
printf(" Port | Dest_ID | Group\n");
}
if ( (ports_printed == _used_intra_ports) && (_used_inter_ports > 0) )
{
printf(" -- Inter-Group Connections -- \n");
printf(" Port | Dest_ID | Group\n");
}
if ( (ports_printed == _used_intra_ports + _used_inter_ports) && (_used_terminal_ports > 0) )
{
printf(" -- Terminal Connections -- \n");
printf(" Port | Dest_ID | Group\n");
}
int port_num = it->first;
int group_id = it->second.dest_group_id;
int id,gid;
if( get_port_type(port_num) == CONN_LOCAL )
{
id = it->second.dest_lid;
gid = it->second.dest_gid;
printf(" %d -> (%d,%d) : %d \n", port_num, id, gid, group_id);
}
else {
id = it->second.dest_gid;
printf(" %d -> %d : %d \n", port_num, id, group_id);
}
ports_printed++;
}
}
#endif /* end of include guard:*/
\ No newline at end of file
......@@ -6,11 +6,11 @@
/* This is the base model-net LP that all events pass through before
* performing any topology-specific work. Packet scheduling, dealing with
* packet loss (potentially), etc. happens here.
* Additionally includes wrapper event "send" function that all
* packet loss (potentially), etc. happens here.
* Additionally includes wrapper event "send" function that all
* events for underlying models must go through */
#ifndef MODEL_NET_LP_H
#ifndef MODEL_NET_LP_H
#define MODEL_NET_LP_H
#ifdef __cplusplus
......@@ -23,6 +23,7 @@ extern "C" {
#include "model-net-sched.h"
#include "net/dragonfly.h"
#include "net/dragonfly-custom.h"
#include "net/dragonfly-plus.h"
#include "net/slimfly.h"
#include "net/fattree.h"
#include "net/loggp.h"
......@@ -43,10 +44,10 @@ void model_net_base_configure();
/// model developers
// Construct a model-net-specific event, analagous to a tw_event_new and
// codes_event_new. The difference here is that we return pointers to
// codes_event_new. The difference here is that we return pointers to
// both the message data (to be cast into the appropriate type) and the
// pointer to the end of the event struct.
//
//
// This function is expected to be called within each specific model-net
// method - strange and disturbing things will happen otherwise
tw_event * model_net_method_event_new(
......@@ -131,6 +132,7 @@ typedef struct model_net_wrap_msg {
model_net_base_msg m_base; // base lp
terminal_message m_dfly; // dragonfly
terminal_custom_message m_custom_dfly; // dragonfly-custom
terminal_plus_message m_dfly_plus; // dragonfly plus
slim_terminal_message m_slim; // slimfly
fattree_message m_fat; // fattree
loggp_message m_loggp; // loggp
......
......@@ -41,7 +41,7 @@ extern "C" {
/* HACK: there is currently no scheduling fidelity across multiple
* model_net_event calls. Hence, problems arise when some LP sends multiple
* messages as part of an event and expects FCFS ordering. A proper fix which
* involves model-net LP-level scheduling of requests is ideal, but not
* involves model-net LP-level scheduling of requests is ideal, but not
* feasible for now (would basically have to redesign model-net), so expose
* explicit start-sequence and stop-sequence markers as a workaround
*/
......@@ -74,6 +74,8 @@ typedef struct mn_stats mn_stats;
X(LOGGP, "modelnet_loggp", "loggp", &loggp_method)\
X(EXPRESS_MESH, "modelnet_express_mesh", "express_mesh", &express_mesh_method)\
X(EXPRESS_MESH_ROUTER, "modelnet_express_mesh_router", "express_mesh_router", &express_mesh_router_method)\
X(DRAGONFLY_PLUS, "modelnet_dragonfly_plus", "dragonfly_plus", &dragonfly_plus_method)\
X(DRAGONFLY_PLUS_ROUTER, "modelnet_dragonfly_plus_router", "dragonfly_plus_router", &dragonfly_plus_router_method)\
X(MAX_NETS, NULL, NULL, NULL)
#define X(a,b,c,d) a,
......@@ -144,7 +146,7 @@ struct mn_stats
long max_event_size;
};
/* Registers all model-net LPs in ROSS. Should be called after
/* Registers all model-net LPs in ROSS. Should be called after
* configuration_load, but before codes_mapping_setup */
void model_net_register();
......@@ -166,8 +168,8 @@ void model_net_enable_sampling(tw_stime interval, tw_stime end);
int model_net_sampling_enabled(void);
/* Initialize/configure the network(s) based on the CODES configuration.
* returns an array of the network ids, indexed in the order given by the
* modelnet_order configuration parameter
* returns an array of the network ids, indexed in the order given by the
* modelnet_order configuration parameter
* OUTPUT id_count - the output number of networks */
int* model_net_set_params(int *id_count);
......@@ -189,7 +191,7 @@ void model_net_event_collective_rc(
int message_size,
tw_lp *sender);
/* allocate and transmit a new event that will pass through model_net to
/* allocate and transmit a new event that will pass through model_net to
* arrive at its destination:
*
* - net_id: the type of network to send this message through. The set of
......@@ -231,9 +233,9 @@ void model_net_event_collective_rc(
// first argument becomes the network ID
model_net_event_return model_net_event(
int net_id,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
tw_stime offset,
int remote_event_size,
void const * remote_event,
......@@ -251,9 +253,9 @@ model_net_event_return model_net_event(
model_net_event_return model_net_event_annotated(
int net_id,
char const * annotation,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
tw_stime offset,
int remote_event_size,
void const * remote_event,
......@@ -270,9 +272,9 @@ model_net_event_return model_net_event_mctx(
int net_id,
struct codes_mctx const * send_map_ctx,
struct codes_mctx const * recv_map_ctx,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
tw_stime offset,
int remote_event_size,
void const * remote_event,
......@@ -309,7 +311,7 @@ int model_net_get_msg_sz(int net_id);
* identical to the sender argument to tw_event_new().
*/
/* NOTE: we may end up needing additoinal arguments here to track state for
* reverse computation; add as needed
* reverse computation; add as needed
*/
DEPRECATED
void model_net_event_rc(
......@@ -333,7 +335,7 @@ void model_net_event_rc2(
* Parameters are largely the same as model_net_event, with the following
* exceptions:
* - final_dest_lp is the lp to pull data from
* - self_event_size, self_event are applied at the requester upon receipt of
* - self_event_size, self_event are applied at the requester upon receipt of
* the payload from the dest
*/
model_net_event_return model_net_pull_event(
......@@ -383,7 +385,7 @@ void model_net_pull_event_rc(
* model-net implementation (currently implemented as a set of translation-unit
* globals). Upon a subsequent model_net_*event* call, the context is consumed
* and reset to an unused state.
*
*
* NOTE: this call MUST be placed in the same calling context as the subsequent
* model_net_*event* call. Otherwise, the parameters are not guaranteed to work
* on the intended event, and may possibly be consumed by another, unrelated
......
/*
* Copyright (C) 2014 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
#ifndef DRAGONFLY_PLUS_H
#define DRAGONFLY_PLUS_H
#ifdef __cplusplus
extern "C" {
#endif
#include <ross.h>
typedef struct terminal_plus_message terminal_plus_message;
/* this message is used for both dragonfly compute nodes and routers */
struct terminal_plus_message
{
/* magic number */
int magic;
/* flit travel start time*/
tw_stime travel_start_time;
/* packet ID of the flit */
unsigned long long packet_ID;
/* event type of the flit */
short type;
/* category: comes from codes */
char category[CATEGORY_NAME_MAX];
/* store category hash in the event */
uint32_t category_hash;
/* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid final_dest_gid;
/*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_lp;
tw_lpid sender_mn_lp; // source modelnet id
/* destination terminal ID of the dragonfly */
tw_lpid dest_terminal_id;
/* source terminal ID of the dragonfly */
unsigned int src_terminal_id;
/* message originating router id. MM: Can we calculate it through
* sender_mn_lp??*/
unsigned int origin_router_id;
/* number of hops traversed by the packet */
short my_N_hop;
short my_l_hop, my_g_hop;
short saved_channel;
short saved_vc;
int next_stop;
short nonmin_done;
/* Intermediate LP ID from which this message is coming */
unsigned int intm_lp_id;
/* last hop of the message, can be a terminal, local router or global router */
short last_hop;
/* For routing */
int saved_src_dest;
int saved_src_chan;
//DFP Specific Routing
int intm_rtr_id; //Router ID of the intermediate router for nonminimal routes
int intm_group_id; //Group ID of the intermediate router for nonminimal routes
short dfp_upward_channel_flag;
int dfp_dest_terminal_id; //this is the terminal id in the dfp network in range [0-total_num_terminals)
uint32_t chunk_id;
uint32_t packet_size;
uint32_t message_id;
uint32_t total_size;
int remote_event_size_bytes;
int local_event_size_bytes;
// For buffer message
short vc_index;
int output_chan;
model_net_event_return event_rc;
int is_pull;
uint32_t pull_size;
/* for reverse computation */
int path_type;
tw_stime saved_available_time;
tw_stime saved_avg_time;
tw_stime saved_rcv_time;
tw_stime saved_busy_time;
tw_stime saved_total_time;
tw_stime saved_sample_time;
tw_stime msg_start_time;
};
#ifdef __cplusplus
}
#endif
#endif /* end of include guard: DRAGONFLY_H */
/*
* Local variables:
* c-indent-level: 4
<