Commit d1dad464 authored by Nikhil's avatar Nikhil

Create template to ease adding a network

For simplest case, only 2 functions and premable changes should suffice to add a
new network. Update Express Mesh to serve as an example.

Change-Id: I48de25b4c00360201cb23a4a7089ddc08300137d
parent 4ede6937
......@@ -32,6 +32,7 @@ extern void free_tmp(void * ptr);
typedef struct message_list message_list;
struct message_list {
//CHANGE: add message types for new networks here
union {
terminal_message dfly_msg;
em_message em_msg;
......
......@@ -4,6 +4,7 @@
*
*/
//CHANGE: modify to match you header file name
#ifndef EXPRESS_MESH_H
#define EXPRESS_MESH_H
......@@ -13,76 +14,67 @@ extern "C" {
#include <ross.h>
//CHANGE: modify to match the struct
typedef struct net_message net_message;
//CHANGE: modify the struct name - add to message_list union in common-net.h
typedef struct em_message em_message;
struct em_message
{
/* magic number */
int magic;
/* flit travel start time*/
tw_stime travel_start_time;
/* packet ID of the flit */
unsigned long long packet_ID;
/* event type of the flit */
short type;
/* category: comes from codes */
char category[CATEGORY_NAME_MAX];
/* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid final_dest_gid;
/*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_lp;
tw_lpid sender_mn_lp; // source modelnet id
/* destination terminal ID */
tw_lpid dest_terminal_id;
int dest_terminal;
/* source terminal ID */
tw_lpid src_terminal_id;
short saved_channel;
short my_N_hop;
short hops[8];
//common entries:
int magic; /* magic number */
short type; /* event type of the flit */
/* Intermediate LP ID from which this message is coming */
unsigned int intm_lp_id;
short saved_vc;
short dim_change;
/* last hop of the message, can be a terminal, local router or global router */
int last_hop;
/* For routing */
uint64_t chunk_id;
uint64_t packet_size;
uint64_t message_id;
uint64_t total_size;
tw_stime travel_start_time; /* flit travel start time*/
unsigned long long packet_ID; /* packet ID of the flit */
char category[CATEGORY_NAME_MAX]; /* category: comes from codes */
int saved_remote_esize;
int remote_event_size_bytes;
int local_event_size_bytes;
tw_lpid final_dest_gid; /* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid sender_lp; /*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_mn_lp; // source modelnet id (think NIC)
tw_lpid src_terminal_id; /* source terminal ID - mostly same as sender_mn_lp */
tw_lpid dest_terminal_id; /* destination modelnet id */
int dest_terminal; /* logical id of destination modelnet id */
// For buffer message
int vc_index;
int output_chan;
model_net_event_return event_rc;
/* packet/message identifier and status */
uint64_t chunk_id; //which chunk of packet I am
uint64_t packet_size; //what is the size of my packet
uint64_t message_id; //seq number at message level - NIC specified
uint64_t total_size; //total size of the message
int remote_event_size_bytes; // data size for target event at destination
int local_event_size_bytes; // data size for event at source
int is_pull;
uint64_t pull_size;
tw_stime msg_start_time;
/* for reverse computation */
//info for path traversal
short my_N_hop; /* hops traversed so far */
short hops[8]; /* can be used for storing different types of hops */
unsigned int intm_lp_id; /* Intermediate LP ID that sent this packet */
int last_hop; /* last hop of the message, can be a terminal, local router or global router */
int vc_index; /* stores port info */
int output_chan; /* virtual channel within port */
//info for reverse computation
short saved_channel;
short saved_vc;
model_net_event_return event_rc;
tw_stime saved_available_time;
tw_stime saved_avg_time;
tw_stime saved_rcv_time;
tw_stime saved_busy_time;
tw_stime saved_busy_time;
tw_stime saved_total_time;
tw_stime saved_hist_start_time;
tw_stime saved_sample_time;
tw_stime msg_start_time;
int saved_hist_num;
int saved_occupancy;
//CHANGE: info for specific networks
short dim_change;
};
#ifdef __cplusplus
}
#endif
#endif
#endif
/*
* Copyright (C) 2014 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
//CHANGE: modify to match you header file name
#ifndef NET_TEMPLATE_H
#define NET_TEMPLATE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <ross.h>
//CHANGE: modify to match the struct
typedef struct net_message net_message;
//CHANGE: modify the struct name - add to message_list union in common-net.h
struct net_message
{
//common entries:
int magic; /* magic number */
short type; /* event type of the flit */
tw_stime travel_start_time; /* flit travel start time*/
unsigned long long packet_ID; /* packet ID of the flit */
char category[CATEGORY_NAME_MAX]; /* category: comes from codes */
tw_lpid final_dest_gid; /* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid sender_lp; /*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_mn_lp; // source modelnet id (think NIC)
tw_lpid src_terminal_id; /* source terminal ID - mostly same as sender_mn_lp */
tw_lpid dest_terminal_id; /* destination modelnet id */
int dest_terminal; /* logical id of destination modelnet id */
/* packet/message identifier and status */
uint64_t chunk_id; //which chunk of packet I am
uint64_t packet_size; //what is the size of my packet
uint64_t message_id; //seq number at message level - NIC specified
uint64_t total_size; //total size of the message
int remote_event_size_bytes; // data size for target event at destination
int local_event_size_bytes; // data size for event at source
int is_pull;
uint64_t pull_size;
tw_stime msg_start_time;
//info for path traversal
short my_N_hop; /* hops traversed so far */
short hops[8]; /* can be used for storing different types of hops */
unsigned int intm_lp_id; /* Intermediate LP ID that sent this packet */
int last_hop; /* last hop of the message, can be a terminal, local router or global router */
int vc_index; /* stores port info */
int output_chan; /* virtual channel within port */
//info for reverse computation
short saved_channel;
short saved_vc;
model_net_event_return event_rc;
tw_stime saved_available_time;
tw_stime saved_avg_time;
tw_stime saved_rcv_time;
tw_stime saved_busy_time;
tw_stime saved_total_time;
tw_stime saved_hist_start_time;
tw_stime saved_sample_time;
//CHANGE: add info for specific networks
};
#ifdef __cplusplus
}
#endif
#endif
......@@ -6,7 +6,10 @@
#include "codes/model-net.h"
#include "codes/model-net-method.h"
#include "codes/model-net-lp.h"
//CHANGE: use the network file created
#include "codes/net/express-mesh.h"
#include "codes/net/common-net.h"
#include "sys/file.h"
#include "codes/quickhash.h"
......@@ -14,27 +17,30 @@
#include <vector>
#define CREDIT_SZ 8
#define EM_HASH_TABLE_SIZE 262144
#define HASH_TABLE_SIZE 262144
#define MULT_FACTOR 2
#define DEBUG 0
#define MAX_STATS 65536
#define MULT_FACTOR 2
#define LP_CONFIG_NM_TERM (model_net_lp_config_names[EXPRESS_MESH])
#define LP_METHOD_NM_TERM (model_net_method_names[EXPRESS_MESH])
#define LP_CONFIG_NM_ROUT (model_net_lp_config_names[EXPRESS_MESH_ROUTER])
#define LP_METHOD_NM_ROUT (model_net_method_names[EXPRESS_MESH_ROUTER])
//CHANGE: define them for the local network
#define LOCAL_NETWORK_NAME EXPRESS_MESH
#define LOCAL_NETWORK_ROUTER_NAME EXPRESS_MESH_ROUTER
#define LOCAL_MSG_STRUCT em_message
#define LOCAL_MSG_NAME_FROM_UNION em_msg
#define LP_CONFIG_NM_TERM (model_net_lp_config_names[LOCAL_NETWORK_NAME])
#define LP_METHOD_NM_TERM (model_net_method_names[LOCAL_NETWORK_NAME])
#define LP_CONFIG_NM_ROUT (model_net_lp_config_names[LOCAL_NETWORK_ROUTER_NAME])
#define LP_METHOD_NM_ROUT (model_net_method_names[LOCAL_NETWORK_ROUTER_NAME])
static long packet_gen = 0, packet_fin = 0;
static double maxd(double a, double b) { return a < b ? b : a; }
typedef struct em_param em_param;
/* annotation-specific parameters (unannotated entry occurs at the
* last index) */
typedef struct local_param local_param;
static uint64_t num_params = 0;
static em_param * all_params = NULL;
static local_param * all_params = NULL;
static const config_anno_map_t * anno_map = NULL;
/* global variables for codes mapping */
......@@ -50,12 +56,12 @@ static int terminal_magic_num = 0;
static int sample_bytes_written = 0;
static int sample_rtr_bytes_written = 0;
char em_cn_sample_file[MAX_NAME_LENGTH];
char em_rtr_sample_file[MAX_NAME_LENGTH];
static char local_cn_sample_file[MAX_NAME_LENGTH];
static char local_rtr_sample_file[MAX_NAME_LENGTH];
static void init_message_list(message_list *thism,
em_message *inmsg) {
thism->em_msg = *inmsg;
static void init_message_list(message_list *thism,
LOCAL_MSG_STRUCT *inmsg) {
thism->LOCAL_MSG_NAME_FROM_UNION = *inmsg;
thism->event_data = NULL;
thism->next = NULL;
thism->prev = NULL;
......@@ -64,11 +70,8 @@ static void init_message_list(message_list *thism,
thism->altq_prev = NULL;
}
struct em_param
struct local_param
{
int n_dims; // Dimensions in the base torus layout
int *dim_length;
int gap; // Gap at which nodes are connected (0 for log)
double link_bandwidth;/* bandwidth of each link */
double cn_bandwidth;/* injection bandwidth */
int num_cn; // number of nodes per router
......@@ -76,19 +79,26 @@ struct em_param
int vc_size; /* buffer size of the router-router channels */
int cn_vc_size; /* buffer size of the compute node channels */
int chunk_size; /* full-sized packets are broken into smaller chunks.*/
int router_delay;
int routing;
int router_delay; /* delay at each router */
int routing; /* type of routing */
//derived param
int * factor; /* used in torus coordinate calculation */
int radix, *cons_per_dim, *offset_per_dim;
int total_routers;
int total_terminals;
double cn_delay;
double link_delay;
double credit_delay;
int radix; /* radix of the routers */
int total_routers; /* how many routers in the system */
int total_terminals; /* how many terminals in the system */
double cn_delay; /* bandwidth based time for 1 byte */
double link_delay; /* bandwidth based time for 1 byte */
double credit_delay; /* how long for credit to arrive - all bytes */
//CHANGE: add network specific data here
int n_dims; // Dimensions in the base torus layout
int *dim_length;
int gap; // Gap at which nodes are connected (0 for log)
int * factor; /* used in torus coordinate calculation */
int *cons_per_dim, *offset_per_dim;
};
struct em_router_sample
struct local_router_sample
{
tw_lpid router_id;
tw_stime* busy_time;
......@@ -98,7 +108,7 @@ struct em_router_sample
long rev_events;
};
struct em_cn_sample
struct local_cn_sample
{
tw_lpid terminal_id;
long fin_chunks_sample;
......@@ -115,26 +125,22 @@ struct em_cn_sample
typedef struct terminal_state terminal_state;
typedef struct router_state router_state;
/* EM compute node data structure */
/* compute node data (think NIC) structure */
struct terminal_state
{
uint64_t packet_counter;
int packet_gen;
int packet_fin;
//who am I
unsigned int terminal_id; //what is my local id
const char * anno;
const local_param *params;
//which router I am connected to
unsigned int router_id;
//which router I am connected to
unsigned int router_id;
tw_lpid router_gid;
// Each terminal will have an input/output channel(s) with the router
// Each terminal will have input/output channel(s) with the router
int** vc_occupancy; // NUM_VC
tw_stime terminal_available_time;
//available messages
//available messages to be sent
message_list ***terminal_msgs;
message_list ***terminal_msgs_tail;
int terminal_length;
......@@ -143,14 +149,14 @@ struct terminal_state
//packet aggregation
struct qhash_table *rank_tbl;
//transient storage
//transient storage for reverse computation
struct rc_stack * st;
const char * anno;
const em_param *params;
//stats
struct mn_stats em_stats_array[CATEGORY_MAX];
//stats collection
uint64_t packet_counter;
int packet_gen;
int packet_fin;
struct mn_stats local_stats_array[CATEGORY_MAX];
tw_stime total_time;
uint64_t total_msg_size;
double total_hops;
......@@ -168,16 +174,16 @@ struct terminal_state
tw_stime fin_chunks_time;
tw_stime busy_time_sample;
char sample_buf[4096];
struct em_cn_sample * sample_stat;
struct local_cn_sample * sample_stat;
int op_arr_size;
int max_arr_size;
/* for logging forward and reverse events */
long fwd_events;
long rev_events;
};
/* terminal event type (1-4) */
//CHANGE: may need to change if more functionality is desired
/* event types */
enum event_t
{
T_GENERATE=1,
......@@ -190,6 +196,7 @@ enum event_t
};
typedef enum event_t event_t;
//CHANGE: may need to change if more functionality is desired
/* whether the last hop of a packet was global, local or a terminal */
enum last_hop
{
......@@ -197,6 +204,7 @@ enum last_hop
TERMINAL
};
//CHANGE: may need to change if more functionality is desired
enum ROUTING_ALGO
{
STATIC = 0,
......@@ -207,37 +215,39 @@ struct router_state
{
//who am I
unsigned int router_id;
int* dim_position;
const char * anno;
const local_param *params;
//CHANGE: may need to be changed if linear storage is not desired
//array/linked list based storage of info about ports/vcs
tw_lpid* link_connections;
tw_stime* next_output_available_time;
message_list ***pending_msgs;
message_list ***pending_msgs_tail;
message_list ***queued_msgs;
message_list ***queued_msgs_tail;
int** vc_occupancy;
int *in_send_loop;
int *queued_count;
struct rc_stack * st;
int** vc_occupancy;
int64_t* link_traffic;
const char * anno;
const em_param *params;
//for reverse computation
struct rc_stack * st;
//sampling and stats
int64_t* link_traffic;
tw_stime* last_buf_full;
char output_buf[4096];
char output_buf2[4096];
tw_stime* busy_time;
tw_stime* busy_time_sample;
struct em_router_sample * rsamples;
struct local_router_sample * rsamples;
int op_arr_size;
int max_arr_size;
long fwd_events;
long rev_events;
long fwd_events, rev_events;
int64_t * link_traffic_sample;
char output_buf[4096];
char output_buf2[4096];
//CHANGE: add network specific data here
int* dim_position;
};
struct VC_Entry {
......@@ -246,8 +256,8 @@ struct VC_Entry {
};
//global stats
static tw_stime em_total_time = 0;
static tw_stime em_max_latency = 0;
static tw_stime local_total_time = 0;
static tw_stime local_max_latency = 0;
static long long total_hops = 0;
static long long N_finished_packets = 0;
......@@ -255,10 +265,10 @@ static long long total_msg_sz = 0;
static long long N_finished_msgs = 0;
static long long N_finished_chunks = 0;
/* returns the EM message size */
static int em_get_msg_sz(void)
/* returns the message size */
static int local_get_msg_sz(void)
{
return sizeof(em_message);
return sizeof(LOCAL_MSG_STRUCT);
}
/* helper functions - convert between flat ids and torus n-dimensional ids */
......@@ -288,51 +298,20 @@ static int to_flat_id(
return flat_id;
}
static void em_read_config(const char * anno, em_param *params){
em_param *p = params;
int rc = configuration_get_value_int(&config, "PARAMS", "n_dims", anno,
&p->n_dims);
if(rc) {
tw_error(TW_LOC, "Number of dimensions not specified\n");
}
rc = configuration_get_value_int(&config, "PARAMS", "gap", anno, &p->gap);
if(rc) {
tw_error(TW_LOC, "Gap not specified\n");
}
char dim_length_str[MAX_NAME_LENGTH];
rc = configuration_get_value(&config, "PARAMS", "dim_length", anno,
dim_length_str, MAX_NAME_LENGTH);
if (rc == 0){
tw_error(TW_LOC, "couldn't read PARAMS:dim_length");
}
char* token;
p->dim_length= (int*)malloc(p->n_dims * sizeof(*p->dim_length));
token = strtok(dim_length_str, ",");
int i = 0;
while(token != NULL)
{
sscanf(token, "%d", &p->dim_length[i]);
if(p->dim_length[i] <= 0)
{
tw_error(TW_LOC, "Invalid torus dimension specified "
"(%d at pos %d), exiting... ", p->dim_length[i], i);
}
i++;
token = strtok(NULL,",");
}
rc = configuration_get_value_double(&config, "PARAMS", "link_bandwidth",
//CHANGE: network specific params have to be read here
static void local_read_config(const char * anno, local_param *params){
local_param *p = params;
// general params - do not change unless you intent to modify them
rc = configuration_get_value_double(&config, "PARAMS", "link_bandwidth",
anno, &p->link_bandwidth);
if(rc) {
p->link_bandwidth = 5.25;
fprintf(stderr, "Bandwidth of links not specified, setting to %lf\n",
fprintf(stderr, "Bandwidth of links not specified, setting to %lf\n",
p->link_bandwidth);
}
rc = configuration_get_value_double(&config, "PARAMS", "cn_bandwidth",
rc = configuration_get_value_double(&config, "PARAMS", "cn_bandwidth",
anno, &p->cn_bandwidth);
if(rc) {
p->cn_bandwidth = 5.25;
......@@ -340,35 +319,35 @@ static void em_read_config(const char * anno, em_param *params){
"to %lf\n", p->cn_bandwidth);
}
rc = configuration_get_value_int(&config, "PARAMS", "num_cn", anno,
rc = configuration_get_value_int(&config, "PARAMS", "num_cn", anno,
&p->num_cn);
if(rc) {
tw_error(TW_LOC, "Nodes per router (num_cn) not specified\n");
}
rc = configuration_get_value_int(&config, "PARAMS", "num_vcs", anno,
rc = configuration_get_value_int(&config, "PARAMS", "num_vcs", anno,
&p->num_vcs);
if(rc) {
p->num_vcs = 1;
}
rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", anno,
rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", anno,
&p->chunk_size);
if(rc) {
p->chunk_size = 512;
fprintf(stderr, "Chunk size for packets is specified, setting to %d\n",
fprintf(stderr, "Chunk size for packets is not specified, setting to %d\n",
p->chunk_size);
}
rc = configuration_get_value_int(&config, "PARAMS", "vc_size", anno,
rc = configuration_get_value_int(&config, "PARAMS", "vc_size", anno,
&p->vc_size);
if(rc) {
p->vc_size = 32768;
fprintf(stderr, "Buffer size of link channels not specified, setting to %d\n",
fprintf(stderr, "Buffer size of link channels not specified, setting to %d\n",
p->vc_size);
}
rc = configuration_get_value_int(&config, "PARAMS", "cn_vc_size", anno,
rc = configuration_get_value_int(&config, "PARAMS", "cn_vc_size", anno,
&p->cn_vc_size);
if(rc) {
p->cn_vc_size = 65536;
......@@ -380,10 +359,44 @@ static void em_read_config(const char * anno, em_param *params){
configuration_get_value_int(&config, "PARAMS", "router_delay", anno,
&p->router_delay);
configuration_get_value(&config, "PARAMS", "cn_sample_file", anno,
em_cn_sample_file, MAX_NAME_LENGTH);
configuration_get_value(&config, "PARAMS", "rt_sample_file", anno,
em_rtr_sample_file, MAX_NAME_LENGTH);
configuration_get_value(&config, "PARAMS", "cn_sample_file", anno,
local_cn_sample_file, MAX_NAME_LENGTH);
configuration_get_value(&config, "PARAMS", "rt_sample_file", anno,
local_rtr_sample_file, MAX_NAME_LENGTH);
//CHANGE: add network specific parameters here
int rc = configuration_get_value_int(&config, "PARAMS", "n_dims", anno,
&p->n_dims);
if(rc) {
tw_error(TW_LOC, "Number of dimensions not specified\n");
}
rc = configuration_get_value_int(&config, "PARAMS", "gap", anno, &p->gap);
if(rc) {
tw_error(TW_LOC, "Gap not specified\n");
}
char dim_length_str[MAX_NAME_LENGTH];
rc = configuration_get_value(&config, "PARAMS", "dim_length", anno,
dim_length_str, MAX_NAME_LENGTH);
if (rc == 0){
tw_error(TW_LOC, "couldn't read PARAMS:dim_length");
}
char* token;
p->dim_length= (int*)malloc(p->n_dims * sizeof(*p->dim_length));
token = strtok(dim_length_str, ",");
int i = 0;
while(token != NULL)
{
sscanf(token, "%d", &p->dim_length[i]);
if(p->dim_length[i] <= 0)
{
tw_error(TW_LOC, "Invalid torus dimension specified "
"(%d at pos %d), exiting... ", p->dim_length[i], i);
}
i++;
token = strtok(NULL,",");
}
char routing_str[MAX_NAME_LENGTH];
configuration_get_value(&config, "PARAMS", "routing", anno, routing_str,
......@@ -399,11 +412,11 @@ static void em_read_config(const char * anno, em_param *params){
else
{
p->routing = STATIC;
fprintf(stderr,
fprintf(stderr,
"No routing protocol specified, setting to static routing\n");
}
// set the derived parameters
//CHANGE: derived parameters often are computed based on network specifics
p->radix = 0;
p->total_routers = 1;
p->cons_per_dim = (int *)malloc(p->n_dims * sizeof(int));
......@@ -418,11 +431,6 @@ static void em_read_config(const char * anno, em_param *params){
p->offset_per_dim[i] = p->offset_per_dim[i - 1] + p->cons_per_dim[i - 1];
}
}
if(p->num_cn != (p->radix/(p->n_dims * p->gap))) {
printf("Unbalanced system: expected num_cn - %d, obtained %d\n",
p->radix/(p->n_dims * p->gap), p->num_cn);
}
p->radix += p->num_cn;
p->total_terminals = p->total_routers * p->num_cn;
......@@ -433,28 +441,37 @@ static void em_read_config(const char * anno, em_param *params){
p->total_terminals, p->total_routers, p->radix);
}
//general derived parameters
p->cn_delay = bytes_to_ns(1, p->cn_bandwidth);
p->link_delay = bytes_to_ns(1, p->link_bandwidth);
p->credit_delay = bytes_to_ns(CREDIT_SZ, p->link_bandwidth);
uint32_t h1 = 0, h2 = 0;
bj_hashlittle2(LP_METHOD_NM_TERM, strlen(LP_METHOD_NM_TERM), &h1, &h2);
terminal_magic_num = h1 + h2;
bj_hashlittle2(LP_METHOD_NM_ROUT, strlen(LP_METHOD_NM_ROUT), &h1, &h2);
router_magic_num = h1 + h2;
}
static void em_configure(){
static void local_configure(){
anno_map = codes_mapping_get_lp_anno_map(LP_CONFIG_NM_TERM);
assert(anno_map);
num_params = anno_map->num_annos + (anno_map->has_unanno_lp > 0);
all_params = (em_param *)malloc(num_params * sizeof(*all_params));
all_params = (local_param *)malloc(num_params * sizeof(*all_params));
for (int i = 0; i < anno_map->num_annos; i++){
const char * anno = anno_map->annotations[i].ptr;
em_read_config(anno, &all_params[i]);
local_read_config(anno, &all_params[i]);
}
if (anno_map->has_unanno_lp > 0){
em_read_config(NULL, &all_params[anno_map->num_annos]);
local_read_config(NULL, &all_params[anno_map->num_annos]);
}
}
/* report EM statistics like average and maximum packet latency, average number of hops traversed */
static void em_report_stats()
/* report statistics like average and maximum packet latency, average number of hops traversed */
static void local_report_stats()
{
long long avg_hops, total_finished_packets, total_finished_chunks;
long long total_finished_msgs, final_msg_sz;
......@@ -462,19 +479,19 @@ static void em_report_stats()
int total_minimal_packets, total_nonmin_packets;
long total_gen, total_fin;
MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0,
MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0,
MPI_COMM_WORLD);
MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG,
MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG,
MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM,
MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM,
0, MPI_COMM_WORLD);
MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG,
MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG,