Commit bfc3531f authored by Nikhil's avatar Nikhil
Browse files

Copy dragonfly to dragonfly-custom

Change-Id: I4e6919802940df120b87ff4c709d3825993373ac
parent 2e81c3e0
/*
* Copyright (C) 2013 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
// Local router ID: 0 --- total_router-1
// Router LP ID
// Terminal LP ID
#include <ross.h>
#define DEBUG_LP 892
#include "codes/jenkins-hash.h"
#include "codes/codes_mapping.h"
#include "codes/codes.h"
#include "codes/model-net.h"
#include "codes/model-net-method.h"
#include "codes/model-net-lp.h"
#include "codes/net/dragonfly.h"
#include "sys/file.h"
#include "codes/quickhash.h"
#include "codes/rc-stack.h"
#define CREDIT_SIZE 8
#define MEAN_PROCESS 1.0
/* collective specific parameters */
#define TREE_DEGREE 4
#define LEVEL_DELAY 1000
#define DRAGONFLY_COLLECTIVE_DEBUG 0
#define NUM_COLLECTIVES 1
#define COLLECTIVE_COMPUTATION_DELAY 5700
#define DRAGONFLY_FAN_OUT_DELAY 20.0
#define WINDOW_LENGTH 0
#define DFLY_HASH_TABLE_SIZE 262144
// debugging parameters
#define TRACK -1
#define TRACK_PKT -1
#define TRACK_MSG -1
#define PRINT_ROUTER_TABLE 1
#define DEBUG 0
#define USE_DIRECT_SCHEME 1
#define MAX_STATS 65536
#define LP_CONFIG_NM_TERM (model_net_lp_config_names[DRAGONFLY])
#define LP_METHOD_NM_TERM (model_net_method_names[DRAGONFLY])
#define LP_CONFIG_NM_ROUT (model_net_lp_config_names[DRAGONFLY_ROUTER])
#define LP_METHOD_NM_ROUT (model_net_method_names[DRAGONFLY_ROUTER])
int debug_slot_count = 0;
long term_ecount, router_ecount, term_rev_ecount, router_rev_ecount;
long packet_gen = 0, packet_fin = 0;
static double maxd(double a, double b) { return a < b ? b : a; }
/* minimal and non-minimal packet counts for adaptive routing*/
static int minimal_count=0, nonmin_count=0;
static int num_routers_per_mgrp = 0;
typedef struct dragonfly_param dragonfly_param;
/* annotation-specific parameters (unannotated entry occurs at the
* last index) */
static uint64_t num_params = 0;
static dragonfly_param * all_params = NULL;
static const config_anno_map_t * anno_map = NULL;
/* global variables for codes mapping */
static char lp_group_name[MAX_NAME_LENGTH];
static int mapping_grp_id, mapping_type_id, mapping_rep_id, mapping_offset;
/* router magic number */
int router_magic_num = 0;
/* terminal magic number */
int terminal_magic_num = 0;
FILE * dragonfly_log = NULL;
int sample_bytes_written = 0;
int sample_rtr_bytes_written = 0;
char cn_sample_file[MAX_NAME_LENGTH];
char router_sample_file[MAX_NAME_LENGTH];
typedef struct terminal_message_list terminal_message_list;
struct terminal_message_list {
terminal_message msg;
char* event_data;
terminal_message_list *next;
terminal_message_list *prev;
};
void init_terminal_message_list(terminal_message_list *this,
terminal_message *inmsg) {
this->msg = *inmsg;
this->event_data = NULL;
this->next = NULL;
this->prev = NULL;
}
void delete_terminal_message_list(terminal_message_list *this) {
if(this->event_data != NULL) free(this->event_data);
free(this);
}
struct dragonfly_param
{
// configuration parameters
int num_routers; /*Number of routers in a group*/
double local_bandwidth;/* bandwidth of the router-router channels within a group */
double global_bandwidth;/* bandwidth of the inter-group router connections */
double cn_bandwidth;/* bandwidth of the compute node channels connected to routers */
int num_vcs; /* number of virtual channels */
int local_vc_size; /* buffer size of the router-router channels */
int global_vc_size; /* buffer size of the global channels */
int cn_vc_size; /* buffer size of the compute node channels */
int chunk_size; /* full-sized packets are broken into smaller chunks.*/
// derived parameters
int num_cn;
int num_groups;
int radix;
int total_routers;
int total_terminals;
int num_global_channels;
double cn_delay;
double local_delay;
double global_delay;
double credit_delay;
double router_delay;
};
struct dfly_hash_key
{
uint64_t message_id;
tw_lpid sender_id;
};
struct dfly_router_sample
{
tw_lpid router_id;
tw_stime* busy_time;
int64_t* link_traffic_sample;
tw_stime end_time;
long fwd_events;
long rev_events;
};
struct dfly_cn_sample
{
tw_lpid terminal_id;
long fin_chunks_sample;
long data_size_sample;
double fin_hops_sample;
tw_stime fin_chunks_time;
tw_stime busy_time_sample;
tw_stime end_time;
long fwd_events;
long rev_events;
};
struct dfly_qhash_entry
{
struct dfly_hash_key key;
char * remote_event_data;
int num_chunks;
int remote_event_size;
struct qhash_head hash_link;
};
/* handles terminal and router events like packet generate/send/receive/buffer */
typedef enum event_t event_t;
typedef struct terminal_state terminal_state;
typedef struct router_state router_state;
/* dragonfly compute node data structure */
struct terminal_state
{
uint64_t packet_counter;
int packet_gen;
int packet_fin;
// Dragonfly specific parameters
unsigned int router_id;
unsigned int terminal_id;
// Each terminal will have an input and output channel with the router
int* vc_occupancy; // NUM_VC
int num_vcs;
tw_stime terminal_available_time;
terminal_message_list **terminal_msgs;
terminal_message_list **terminal_msgs_tail;
int in_send_loop;
// Terminal generate, sends and arrival T_SEND, T_ARRIVAL, T_GENERATE
// Router-Router Intra-group sends and receives RR_LSEND, RR_LARRIVE
// Router-Router Inter-group sends and receives RR_GSEND, RR_GARRIVE
struct mn_stats dragonfly_stats_array[CATEGORY_MAX];
/* collective init time */
tw_stime collective_init_time;
/* node ID in the tree */
tw_lpid node_id;
/* messages sent & received in collectives may get interchanged several times so we have to save the
origin server information in the node's state */
tw_lpid origin_svr;
/* parent node ID of the current node */
tw_lpid parent_node_id;
/* array of children to be allocated in terminal_init*/
tw_lpid* children;
/* children of a node can be less than or equal to the tree degree */
int num_children;
short is_root;
short is_leaf;
struct rc_stack * st;
int issueIdle;
int terminal_length;
/* to maintain a count of child nodes that have fanned in at the parent during the collective
fan-in phase*/
int num_fan_nodes;
const char * anno;
const dragonfly_param *params;
struct qhash_table *rank_tbl;
uint64_t rank_tbl_pop;
tw_stime total_time;
uint64_t total_msg_size;
double total_hops;
long finished_msgs;
long finished_chunks;
long finished_packets;
tw_stime last_buf_full;
tw_stime busy_time;
char output_buf[4096];
/* For LP suspend functionality */
int error_ct;
/* For sampling */
long fin_chunks_sample;
long data_size_sample;
double fin_hops_sample;
tw_stime fin_chunks_time;
tw_stime busy_time_sample;
char sample_buf[4096];
struct dfly_cn_sample * sample_stat;
int op_arr_size;
int max_arr_size;
/* for logging forward and reverse events */
long fwd_events;
long rev_events;
};
/* terminal event type (1-4) */
enum event_t
{
T_GENERATE=1,
T_ARRIVE,
T_SEND,
T_BUFFER,
R_SEND,
R_ARRIVE,
R_BUFFER,
D_COLLECTIVE_INIT,
D_COLLECTIVE_FAN_IN,
D_COLLECTIVE_FAN_OUT
};
/* status of a virtual channel can be idle, active, allocated or wait for credit */
enum vc_status
{
VC_IDLE,
VC_ACTIVE,
VC_ALLOC,
VC_CREDIT
};
/* whether the last hop of a packet was global, local or a terminal */
enum last_hop
{
GLOBAL,
LOCAL,
TERMINAL
};
/* three forms of routing algorithms available, adaptive routing is not
* accurate and fully functional in the current version as the formulas
* for detecting load on global channels are not very accurate */
enum ROUTING_ALGO
{
MINIMAL = 0,
NON_MINIMAL,
ADAPTIVE,
PROG_ADAPTIVE
};
struct router_state
{
unsigned int router_id;
int group_id;
int op_arr_size;
int max_arr_size;
int* global_channel;
tw_stime* next_output_available_time;
tw_stime* cur_hist_start_time;
tw_stime* last_buf_full;
tw_stime* busy_time;
tw_stime* busy_time_sample;
terminal_message_list ***pending_msgs;
terminal_message_list ***pending_msgs_tail;
terminal_message_list ***queued_msgs;
terminal_message_list ***queued_msgs_tail;
int *in_send_loop;
int *queued_count;
struct rc_stack * st;
int** vc_occupancy;
int64_t* link_traffic;
int64_t * link_traffic_sample;
const char * anno;
const dragonfly_param *params;
int* prev_hist_num;
int* cur_hist_num;
char output_buf[4096];
char output_buf2[4096];
struct dfly_router_sample * rsamples;
long fwd_events;
long rev_events;
};
static short routing = MINIMAL;
static tw_stime dragonfly_total_time = 0;
static tw_stime dragonfly_max_latency = 0;
static tw_stime max_collective = 0;
static long long total_hops = 0;
static long long N_finished_packets = 0;
static long long total_msg_sz = 0;
static long long N_finished_msgs = 0;
static long long N_finished_chunks = 0;
static int dragonfly_rank_hash_compare(
void *key, struct qhash_head *link)
{
struct dfly_hash_key *message_key = (struct dfly_hash_key *)key;
struct dfly_qhash_entry *tmp = NULL;
tmp = qhash_entry(link, struct dfly_qhash_entry, hash_link);
if (tmp->key.message_id == message_key->message_id
&& tmp->key.sender_id == message_key->sender_id)
return 1;
return 0;
}
static int dragonfly_hash_func(void *k, int table_size)
{
struct dfly_hash_key *tmp = (struct dfly_hash_key *)k;
//uint32_t pc = 0, pb = 0;
//bj_hashlittle2(tmp, sizeof(*tmp), &pc, &pb);
uint64_t key = (~tmp->message_id) + (tmp->message_id << 18);
key = key * 21;
key = ~key ^ (tmp->sender_id >> 4);
key = key * tmp->sender_id;
return (int)(key & (table_size - 1));
//return (int)(pc % (table_size - 1));
}
/* convert GiB/s and bytes to ns */
static tw_stime bytes_to_ns(uint64_t bytes, double GB_p_s)
{
tw_stime time;
/* bytes to GB */
time = ((double)bytes)/(1024.0*1024.0*1024.0);
/* GiB to s */
time = time / GB_p_s;
/* s to ns */
time = time * 1000.0 * 1000.0 * 1000.0;
return(time);
}
/* returns the dragonfly message size */
static int dragonfly_get_msg_sz(void)
{
return sizeof(terminal_message);
}
static void free_tmp(void * ptr)
{
struct dfly_qhash_entry * dfly = ptr;
free(dfly->remote_event_data);
free(dfly);
}
static void append_to_terminal_message_list(
terminal_message_list ** thisq,
terminal_message_list ** thistail,
int index,
terminal_message_list *msg) {
if(thisq[index] == NULL) {
thisq[index] = msg;
} else {
thistail[index]->next = msg;
msg->prev = thistail[index];
}
thistail[index] = msg;
}
static void prepend_to_terminal_message_list(
terminal_message_list ** thisq,
terminal_message_list ** thistail,
int index,
terminal_message_list *msg) {
if(thisq[index] == NULL) {
thistail[index] = msg;
} else {
thisq[index]->prev = msg;
msg->next = thisq[index];
}
thisq[index] = msg;
}
static terminal_message_list* return_head(
terminal_message_list ** thisq,
terminal_message_list ** thistail,
int index) {
terminal_message_list *head = thisq[index];
if(head != NULL) {
thisq[index] = head->next;
if(head->next != NULL) {
head->next->prev = NULL;
head->next = NULL;
} else {
thistail[index] = NULL;
}
}
return head;
}
static terminal_message_list* return_tail(
terminal_message_list ** thisq,
terminal_message_list ** thistail,
int index) {
terminal_message_list *tail = thistail[index];
assert(tail);
if(tail->prev != NULL) {
tail->prev->next = NULL;
thistail[index] = tail->prev;
tail->prev = NULL;
} else {
thistail[index] = NULL;
thisq[index] = NULL;
}
return tail;
}
static void dragonfly_read_config(const char * anno, dragonfly_param *params){
// shorthand
dragonfly_param *p = params;
int rc = configuration_get_value_int(&config, "PARAMS", "num_routers", anno,
&p->num_routers);
if(rc) {
p->num_routers = 4;
fprintf(stderr, "Number of dimensions not specified, setting to %d\n",
p->num_routers);
}
p->num_vcs = 3;
rc = configuration_get_value_int(&config, "PARAMS", "local_vc_size", anno, &p->local_vc_size);
if(rc) {
p->local_vc_size = 1024;
fprintf(stderr, "Buffer size of local channels not specified, setting to %d\n", p->local_vc_size);
}
rc = configuration_get_value_int(&config, "PARAMS", "global_vc_size", anno, &p->global_vc_size);
if(rc) {
p->global_vc_size = 2048;
fprintf(stderr, "Buffer size of global channels not specified, setting to %d\n", p->global_vc_size);
}
rc = configuration_get_value_int(&config, "PARAMS", "cn_vc_size", anno, &p->cn_vc_size);
if(rc) {
p->cn_vc_size = 1024;
fprintf(stderr, "Buffer size of compute node channels not specified, setting to %d\n", p->cn_vc_size);
}
rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", anno, &p->chunk_size);
if(rc) {
p->chunk_size = 512;
fprintf(stderr, "Chunk size for packets is specified, setting to %d\n", p->chunk_size);
}
rc = configuration_get_value_double(&config, "PARAMS", "local_bandwidth", anno, &p->local_bandwidth);
if(rc) {
p->local_bandwidth = 5.25;
fprintf(stderr, "Bandwidth of local channels not specified, setting to %lf\n", p->local_bandwidth);
}
rc = configuration_get_value_double(&config, "PARAMS", "global_bandwidth", anno, &p->global_bandwidth);
if(rc) {
p->global_bandwidth = 4.7;
fprintf(stderr, "Bandwidth of global channels not specified, setting to %lf\n", p->global_bandwidth);
}
rc = configuration_get_value_double(&config, "PARAMS", "cn_bandwidth", anno, &p->cn_bandwidth);
if(rc) {
p->cn_bandwidth = 5.25;
fprintf(stderr, "Bandwidth of compute node channels not specified, setting to %lf\n", p->cn_bandwidth);
}
p->router_delay = 50;
configuration_get_value_double(&config, "PARAMS", "router_delay", anno,
&p->router_delay);
configuration_get_value(&config, "PARAMS", "cn_sample_file", anno, cn_sample_file,
MAX_NAME_LENGTH);
configuration_get_value(&config, "PARAMS", "rt_sample_file", anno, router_sample_file,
MAX_NAME_LENGTH);
char routing_str[MAX_NAME_LENGTH];
configuration_get_value(&config, "PARAMS", "routing", anno, routing_str,
MAX_NAME_LENGTH);
if(strcmp(routing_str, "minimal") == 0)
routing = MINIMAL;
else if(strcmp(routing_str, "nonminimal")==0 ||
strcmp(routing_str,"non-minimal")==0)
routing = NON_MINIMAL;
else if (strcmp(routing_str, "adaptive") == 0)
routing = ADAPTIVE;
else if (strcmp(routing_str, "prog-adaptive") == 0)
routing = PROG_ADAPTIVE;
else
{
fprintf(stderr,
"No routing protocol specified, setting to minimal routing\n");
routing = -1;
}
// set the derived parameters
p->num_cn = p->num_routers/2;
p->num_global_channels = p->num_routers/2;
p->num_groups = p->num_routers * p->num_cn + 1;
p->radix = (p->num_routers + p->num_global_channels + p->num_cn);
p->total_routers = p->num_groups * p->num_routers;
p->total_terminals = p->total_routers * p->num_cn;
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(!rank) {
printf("\n Total nodes %d routers %d groups %d radix %d \n",
p->num_cn * p->total_routers, p->total_routers, p->num_groups,
p->radix);
}
p->cn_delay = bytes_to_ns(p->chunk_size, p->cn_bandwidth);
p->local_delay = bytes_to_ns(p->chunk_size, p->local_bandwidth);
p->global_delay = bytes_to_ns(p->chunk_size, p->global_bandwidth);
p->credit_delay = bytes_to_ns(8.0, p->local_bandwidth); //assume 8 bytes packet
}
static void dragonfly_configure(){
anno_map = codes_mapping_get_lp_anno_map(LP_CONFIG_NM_TERM);
assert(anno_map);
num_params = anno_map->num_annos + (anno_map->has_unanno_lp > 0);
all_params = malloc(num_params * sizeof(*all_params));
for (int i = 0; i < anno_map->num_annos; i++){
const char * anno = anno_map->annotations[i].ptr;
dragonfly_read_config(anno, &all_params[i]);
}
if (anno_map->has_unanno_lp > 0){
dragonfly_read_config(NULL, &all_params[anno_map->num_annos]);
}
}
/* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */
static void dragonfly_report_stats()
{
long long avg_hops, total_finished_packets, total_finished_chunks;
long long total_finished_msgs, final_msg_sz;
tw_stime avg_time, max_time;
int total_minimal_packets, total_nonmin_packets;
long total_gen, total_fin;
MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);