Commit 6a5bd6bf authored by Neil McGlohon's avatar Neil McGlohon

Dragonfly Plus: Add new model from dfcustom

parent 3435735f
......@@ -6,11 +6,11 @@
/* This is the base model-net LP that all events pass through before
* performing any topology-specific work. Packet scheduling, dealing with
* packet loss (potentially), etc. happens here.
* Additionally includes wrapper event "send" function that all
* packet loss (potentially), etc. happens here.
* Additionally includes wrapper event "send" function that all
* events for underlying models must go through */
#ifndef MODEL_NET_LP_H
#ifndef MODEL_NET_LP_H
#define MODEL_NET_LP_H
#ifdef __cplusplus
......@@ -23,6 +23,7 @@ extern "C" {
#include "model-net-sched.h"
#include "net/dragonfly.h"
#include "net/dragonfly-custom.h"
#include "net/dragonfly-plus.h"
#include "net/slimfly.h"
#include "net/fattree.h"
#include "net/loggp.h"
......@@ -43,10 +44,10 @@ void model_net_base_configure();
/// model developers
// Construct a model-net-specific event, analagous to a tw_event_new and
// codes_event_new. The difference here is that we return pointers to
// codes_event_new. The difference here is that we return pointers to
// both the message data (to be cast into the appropriate type) and the
// pointer to the end of the event struct.
//
//
// This function is expected to be called within each specific model-net
// method - strange and disturbing things will happen otherwise
tw_event * model_net_method_event_new(
......@@ -131,6 +132,7 @@ typedef struct model_net_wrap_msg {
model_net_base_msg m_base; // base lp
terminal_message m_dfly; // dragonfly
terminal_custom_message m_custom_dfly; // dragonfly-custom
terminal_plus_message m_dfly_plus; // dragonfly plus
slim_terminal_message m_slim; // slimfly
fattree_message m_fat; // fattree
loggp_message m_loggp; // loggp
......
......@@ -41,7 +41,7 @@ extern "C" {
/* HACK: there is currently no scheduling fidelity across multiple
* model_net_event calls. Hence, problems arise when some LP sends multiple
* messages as part of an event and expects FCFS ordering. A proper fix which
* involves model-net LP-level scheduling of requests is ideal, but not
* involves model-net LP-level scheduling of requests is ideal, but not
* feasible for now (would basically have to redesign model-net), so expose
* explicit start-sequence and stop-sequence markers as a workaround
*/
......@@ -71,6 +71,8 @@ typedef struct mn_stats mn_stats;
X(DRAGONFLY_ROUTER, "modelnet_dragonfly_router", "dragonfly_router", &dragonfly_router_method)\
X(DRAGONFLY_CUSTOM, "modelnet_dragonfly_custom", "dragonfly_custom", &dragonfly_custom_method)\
X(DRAGONFLY_CUSTOM_ROUTER, "modelnet_dragonfly_custom_router", "dragonfly_custom_router", &dragonfly_custom_router_method)\
X(DRAGONFLY_PLUS, "modelnet_dragonfly_plus", "dragonfly_plus", &dragonfly_plus_method)\
X(DRAGONFLY_PLUS_ROUTER, "modelnet_dragonfly_plus_router", "dragonfly_plus_router", &dragonfly_plus_router_method)\
X(LOGGP, "modelnet_loggp", "loggp", &loggp_method)\
X(EXPRESS_MESH, "modelnet_express_mesh", "express_mesh", &express_mesh_method)\
X(EXPRESS_MESH_ROUTER, "modelnet_express_mesh_router", "express_mesh_router", &express_mesh_router_method)\
......@@ -144,7 +146,7 @@ struct mn_stats
long max_event_size;
};
/* Registers all model-net LPs in ROSS. Should be called after
/* Registers all model-net LPs in ROSS. Should be called after
* configuration_load, but before codes_mapping_setup */
void model_net_register();
......@@ -166,8 +168,8 @@ void model_net_enable_sampling(tw_stime interval, tw_stime end);
int model_net_sampling_enabled(void);
/* Initialize/configure the network(s) based on the CODES configuration.
* returns an array of the network ids, indexed in the order given by the
* modelnet_order configuration parameter
* returns an array of the network ids, indexed in the order given by the
* modelnet_order configuration parameter
* OUTPUT id_count - the output number of networks */
int* model_net_set_params(int *id_count);
......@@ -189,7 +191,7 @@ void model_net_event_collective_rc(
int message_size,
tw_lp *sender);
/* allocate and transmit a new event that will pass through model_net to
/* allocate and transmit a new event that will pass through model_net to
* arrive at its destination:
*
* - net_id: the type of network to send this message through. The set of
......@@ -231,9 +233,9 @@ void model_net_event_collective_rc(
// first argument becomes the network ID
model_net_event_return model_net_event(
int net_id,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
tw_stime offset,
int remote_event_size,
void const * remote_event,
......@@ -251,9 +253,9 @@ model_net_event_return model_net_event(
model_net_event_return model_net_event_annotated(
int net_id,
char const * annotation,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
tw_stime offset,
int remote_event_size,
void const * remote_event,
......@@ -270,9 +272,9 @@ model_net_event_return model_net_event_mctx(
int net_id,
struct codes_mctx const * send_map_ctx,
struct codes_mctx const * recv_map_ctx,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
tw_stime offset,
int remote_event_size,
void const * remote_event,
......@@ -309,7 +311,7 @@ int model_net_get_msg_sz(int net_id);
* identical to the sender argument to tw_event_new().
*/
/* NOTE: we may end up needing additoinal arguments here to track state for
* reverse computation; add as needed
* reverse computation; add as needed
*/
DEPRECATED
void model_net_event_rc(
......@@ -333,7 +335,7 @@ void model_net_event_rc2(
* Parameters are largely the same as model_net_event, with the following
* exceptions:
* - final_dest_lp is the lp to pull data from
* - self_event_size, self_event are applied at the requester upon receipt of
* - self_event_size, self_event are applied at the requester upon receipt of
* the payload from the dest
*/
model_net_event_return model_net_pull_event(
......@@ -383,7 +385,7 @@ void model_net_pull_event_rc(
* model-net implementation (currently implemented as a set of translation-unit
* globals). Upon a subsequent model_net_*event* call, the context is consumed
* and reset to an unused state.
*
*
* NOTE: this call MUST be placed in the same calling context as the subsequent
* model_net_*event* call. Otherwise, the parameters are not guaranteed to work
* on the intended event, and may possibly be consumed by another, unrelated
......
/*
* Copyright (C) 2014 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
#ifndef DRAGONFLY_PLUS_H
#define DRAGONFLY_PLUS_H
#ifdef __cplusplus
extern "C" {
#endif
#include <ross.h>
typedef struct terminal_plus_message terminal_plus_message;
/* this message is used for both dragonfly compute nodes and routers */
struct terminal_plus_message
{
/* magic number */
int magic;
/* flit travel start time*/
tw_stime travel_start_time;
/* packet ID of the flit */
unsigned long long packet_ID;
/* event type of the flit */
short type;
/* category: comes from codes */
char category[CATEGORY_NAME_MAX];
/* store category hash in the event */
uint32_t category_hash;
/* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid final_dest_gid;
/*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_lp;
tw_lpid sender_mn_lp; // source modelnet id
/* destination terminal ID of the dragonfly */
tw_lpid dest_terminal_id;
/* source terminal ID of the dragonfly */
unsigned int src_terminal_id;
/* message originating router id. MM: Can we calculate it through
* sender_mn_lp??*/
unsigned int origin_router_id;
/* number of hops traversed by the packet */
short my_N_hop;
short my_l_hop, my_g_hop;
short saved_channel;
short saved_vc;
int next_stop;
short nonmin_done;
/* Intermediate LP ID from which this message is coming */
unsigned int intm_lp_id;
/* last hop of the message, can be a terminal, local router or global router */
short last_hop;
/* For routing */
int intm_rtr_id;
int saved_src_dest;
int saved_src_chan;
uint32_t chunk_id;
uint32_t packet_size;
uint32_t message_id;
uint32_t total_size;
int remote_event_size_bytes;
int local_event_size_bytes;
// For buffer message
short vc_index;
int output_chan;
model_net_event_return event_rc;
int is_pull;
uint32_t pull_size;
/* for reverse computation */
int path_type;
tw_stime saved_available_time;
tw_stime saved_avg_time;
tw_stime saved_rcv_time;
tw_stime saved_busy_time;
tw_stime saved_total_time;
tw_stime saved_sample_time;
tw_stime msg_start_time;
};
#ifdef __cplusplus
}
#endif
#endif /* end of include guard: DRAGONFLY_H */
/*
* Local variables:
* c-indent-level: 4
* c-basic-offset: 4
* End:
*
* vim: ft=c ts=8 sts=4 sw=4 expandtab
*/
LPGROUPS
{
MODELNET_GRP
{
repetitions="1520";
# name of this lp changes according to the model
nw-lp="8";
# these lp names will be the same for dragonfly-custom model
modelnet_dragonfly_custom="8";
}
}
PARAMS
{
# packet size in the network
packet_size="1024";
# chunk size in the network (when chunk size = packet size, packets will not be
# divided into chunks)
chunk_size="1024";
# ROSS message size
message_size="608";
# scheduler options
modelnet_scheduler="fcfs";
modelnet_order=( "dragonfly_plus", "dragonfly_plus_router");
# number of fattree levels in each group - DFP design specs 2
num_levels="2";
# number of groups in the network
num_groups="38";
# buffer size in bytes for local virtual channels
local_vc_size="8192";
#buffer size in bytes for global virtual channels
global_vc_size="16384";
#buffer size in bytes for compute node virtual channels
cn_vc_size="8192";
#bandwidth in GiB/s for local channels
local_bandwidth="5.25";
# bandwidth in GiB/s for global channels
global_bandwidth="1.5";
# bandwidth in GiB/s for compute node-router channels
cn_bandwidth="8.0";
# number of compute nodes connected to router, dictated by dragonfly config
# file
num_cns_per_router="8";
# number of global channels per router
num_global_channels="4";
# network config file for intra-group connections
intra-group-connections="../src/network-workloads/conf/dragonfly-custom/intra-custom";
# network config file for inter-group connections
inter-group-connections="../src/network-workloads/conf/dragonfly-custom/inter-custom";
# routing protocol to be used
routing="prog-adaptive";
}
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -140,7 +140,7 @@ void mn_event_collect(model_net_wrap_msg *m, tw_lp *lp, char *buffer, int *colle
type = 9001;
memcpy(buffer, &type, sizeof(type));
break;
case MN_BASE_SAMPLE:
case MN_BASE_SAMPLE:
type = 9002;
memcpy(buffer, &type, sizeof(type));
break;
......@@ -357,6 +357,10 @@ void model_net_base_configure(){
offsetof(model_net_wrap_msg, msg.m_custom_dfly);
msg_offsets[DRAGONFLY_CUSTOM_ROUTER] =
offsetof(model_net_wrap_msg, msg.m_custom_dfly);
msg_offsets[DRAGONFLY_PLUS] =
offsetof(model_net_wrap_msg, msg.m_dfly_plus);
msg_offsets[DRAGONFLY_PLUS_ROUTER] =
offsetof(model_net_wrap_msg, msg.m_dfly_plus);
msg_offsets[SLIMFLY] =
offsetof(model_net_wrap_msg, msg.m_slim);
msg_offsets[FATTREE] =
......@@ -439,10 +443,10 @@ void model_net_base_lp_init(
break;
}
}
ns->nics_per_router = codes_mapping_get_lp_count(group, 1,
lp_type_name, NULL, 1);
ns->msg_id = 0;
ns->next_available_time = 0;
ns->node_copy_next_available_time = (tw_stime*)malloc(ns->params->node_copy_queues * sizeof(tw_stime));
......@@ -596,7 +600,7 @@ void handle_new_msg(
char const *sender_lpname;
int rep_id, offset;
model_net_request *r = &m->msg.m_base.req;
codes_mapping_get_lp_info2(r->src_lp, &sender_group, &sender_lpname,
codes_mapping_get_lp_info2(r->src_lp, &sender_group, &sender_lpname,
NULL, &rep_id, &offset);
num_servers = codes_mapping_get_lp_count(sender_group, 1,
sender_lpname, NULL, 1);
......@@ -607,10 +611,10 @@ void handle_new_msg(
if(!g_tw_mynode) {
fprintf(stdout, "Set num_servers per router %d, servers per "
"injection queue per router %d, servers per node copy queue "
"per node %d\n", num_servers, servers_per_node,
"per node %d\n", num_servers, servers_per_node,
servers_per_node_queue);
}
}
}
if(lp->gid == m->msg.m_base.req.dest_mn_lp) {
model_net_request *r = &m->msg.m_base.req;
......@@ -618,7 +622,7 @@ void handle_new_msg(
codes_mapping_get_lp_info2(r->src_lp, NULL, NULL, NULL, &rep_id, &offset);
int queue = offset/ns->nics_per_router/servers_per_node_queue;
m->msg.m_base.save_ts = ns->node_copy_next_available_time[queue];
tw_stime exp_time = ((ns->node_copy_next_available_time[queue]
tw_stime exp_time = ((ns->node_copy_next_available_time[queue]
> tw_now(lp)) ? ns->node_copy_next_available_time[queue] : tw_now(lp));
exp_time += r->msg_size * codes_cn_delay;
exp_time -= tw_now(lp);
......@@ -632,7 +636,7 @@ void handle_new_msg(
tw_event *e = tw_event_new(r->final_dest_lp, exp_time, lp);
memcpy(tw_event_data(e), e_msg, remote_event_size);
tw_event_send(e);
e_msg = (char*)e_msg + remote_event_size;
e_msg = (char*)e_msg + remote_event_size;
}
if (self_event_size > 0) {
exp_time += delay;
......@@ -658,8 +662,8 @@ void handle_new_msg(
int self_event_size = r->self_event_size;
if (remote_event_size > 0){
memcpy(e_new_msg, e_msg, remote_event_size);
e_msg = (char*)e_msg + remote_event_size;
e_new_msg = (char*)e_new_msg + remote_event_size;
e_msg = (char*)e_msg + remote_event_size;
e_new_msg = (char*)e_new_msg + remote_event_size;
}
if (self_event_size > 0){
memcpy(e_new_msg, e_msg, self_event_size);
......@@ -667,7 +671,7 @@ void handle_new_msg(
m_new->msg.m_base.isQueueReq = 0;
tw_event_send(e);
return;
}
}
// simply pass down to the scheduler
model_net_request *r = &m->msg.m_base.req;
// don't forget to set packet size, now that we're responsible for it!
......@@ -689,7 +693,7 @@ void handle_new_msg(
if(num_servers == -1) {
char const *sender_group;
char const *sender_lpname;
codes_mapping_get_lp_info2(r->src_lp, &sender_group, &sender_lpname,
codes_mapping_get_lp_info2(r->src_lp, &sender_group, &sender_lpname,
NULL, &rep_id, &offset);
num_servers = codes_mapping_get_lp_count(sender_group, 1,
sender_lpname, NULL, 1);
......@@ -893,7 +897,7 @@ void model_net_method_idle_event(tw_stime offset_ts, int is_recv_queue,
model_net_method_idle_event2(offset_ts, is_recv_queue, 0, lp);
}
void model_net_method_idle_event2(tw_stime offset_ts, int is_recv_queue,
void model_net_method_idle_event2(tw_stime offset_ts, int is_recv_queue,
int queue_offset, tw_lp * lp){
tw_event *e = tw_event_new(lp->gid, offset_ts, lp);
model_net_wrap_msg *m_wrap = tw_event_data(e);
......
......@@ -23,6 +23,8 @@ extern struct model_net_method simplep2p_method;
extern struct model_net_method torus_method;
extern struct model_net_method dragonfly_method;
extern struct model_net_method dragonfly_custom_method;
extern struct model_net_method dragonfly_plus_method;
extern struct model_net_method dragonfly_plus_router_method;
extern struct model_net_method slimfly_method;
extern struct model_net_method fattree_method;
extern struct model_net_method dragonfly_router_method;
......@@ -45,7 +47,7 @@ char * model_net_method_names[] = {
/* Global array initialization, terminated with a NULL entry */
#define X(a,b,c,d) d,
struct model_net_method* method_array[] = {
struct model_net_method* method_array[] = {
NETWORK_DEF
};
#undef X
......@@ -75,7 +77,7 @@ void model_net_register(){
for (int lpt = 0; lpt < lpgroup->lptypes_count; lpt++){
char const *nm = lpgroup->lptypes[lpt].name.ptr;
for (int n = 0; n < MAX_NETS; n++){
if (!do_config_nets[n] &&
if (!do_config_nets[n] &&
strcmp(model_net_lp_config_names[n], nm) == 0){
do_config_nets[n] = 1;
break;
......@@ -140,7 +142,7 @@ int* model_net_configure(int *id_count){
// init the per-msg params here
memset(is_msg_params_set, 0,
MAX_MN_MSG_PARAM_TYPES*sizeof(*is_msg_params_set));
ret = configuration_get_value_double(&config, "PARAMS", "intra_bandwidth", NULL,
&cn_bandwidth);
if(ret && !g_tw_mynode) {
......@@ -152,7 +154,7 @@ int* model_net_configure(int *id_count){
if(!g_tw_mynode) {
printf("within node transfer per byte delay is %f\n", codes_cn_delay);
}
ret = configuration_get_value_int(&config, "PARAMS", "node_eager_limit", NULL,
&codes_node_eager_limit);
if(ret && !g_tw_mynode) {
......@@ -180,7 +182,7 @@ void model_net_write_stats(tw_lpid lpid, struct mn_stats* stat)
char data[1024];
sprintf(id, "model-net-category-%s", stat->category);
sprintf(data, "lp:%ld\tsend_count:%ld\tsend_bytes:%ld\tsend_time:%f\t"
sprintf(data, "lp:%ld\tsend_count:%ld\tsend_bytes:%ld\tsend_time:%f\t"
"recv_count:%ld\trecv_bytes:%ld\trecv_time:%f\tmax_event_size:%ld\n",
(long)lpid,
stat->send_count,
......@@ -301,9 +303,9 @@ static model_net_event_return model_net_event_impl_base(
int net_id,
struct codes_mctx const * send_map_ctx,
struct codes_mctx const * recv_map_ctx,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
int is_pull,
tw_stime offset,
int remote_event_size,
......@@ -312,7 +314,7 @@ static model_net_event_return model_net_event_impl_base(
void const * self_event,
tw_lp *sender) {
if (remote_event_size + self_event_size + sizeof(model_net_wrap_msg)
if (remote_event_size + self_event_size + sizeof(model_net_wrap_msg)
> g_tw_msg_sz){
tw_error(TW_LOC, "Error: model_net trying to transmit an event of size "
"%d but ROSS is configured for events of size %zd\n",
......@@ -343,7 +345,7 @@ static model_net_event_return model_net_event_impl_base(
model_net_wrap_msg *m = tw_event_data(e);
msg_set_header(model_net_base_magic, MN_BASE_NEW_MSG, sender->gid, &m->h);
// set the request struct
// set the request struct
model_net_request *r = &m->msg.m_base.req;
r->final_dest_lp = final_dest_lp;
r->dest_mn_lp = dest_mn_lp;
......@@ -377,14 +379,14 @@ static model_net_event_return model_net_event_impl_base(
m->msg.m_base.sched_params = sched_params;
else // set the default
model_net_sched_set_default_params(&m->msg.m_base.sched_params);
// once params are set, clear the flags
// once params are set, clear the flags
memset(is_msg_params_set, 0,
MAX_MN_MSG_PARAM_TYPES*sizeof(*is_msg_params_set));
void *e_msg = (m+1);
if (remote_event_size > 0){
memcpy(e_msg, remote_event, remote_event_size);
e_msg = (char*)e_msg + remote_event_size;
e_msg = (char*)e_msg + remote_event_size;
}
if (self_event_size > 0){
memcpy(e_msg, self_event, self_event_size);
......@@ -401,9 +403,9 @@ static void model_net_event_impl_base_rc(tw_lp *sender){
model_net_event_return model_net_event(
int net_id,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
tw_stime offset,
int remote_event_size,
void const * remote_event,
......@@ -420,9 +422,9 @@ model_net_event_return model_net_event(
model_net_event_return model_net_event_annotated(
int net_id,
char const * annotation,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
tw_stime offset,
int remote_event_size,
void const * remote_event,
......@@ -439,9 +441,9 @@ model_net_event_return model_net_event_mctx(
int net_id,
struct codes_mctx const * send_map_ctx,
struct codes_mctx const * recv_map_ctx,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
char const * category,
tw_lpid final_dest_lp,
uint64_t message_size,
tw_stime offset,
int remote_event_size,
void const * remote_event,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment