Commit f6576fd5 authored by Philip Carns's avatar Philip Carns

Merge remote-tracking branch 'origin/master' into darshan-3x-support

parents 30369685 b8df46df
......@@ -32,6 +32,7 @@ extern void free_tmp(void * ptr);
typedef struct message_list message_list;
struct message_list {
//CHANGE: add message types for new networks here
union {
terminal_message dfly_msg;
em_message em_msg;
......
......@@ -4,6 +4,7 @@
*
*/
//CHANGE: modify to match you header file name
#ifndef EXPRESS_MESH_H
#define EXPRESS_MESH_H
......@@ -13,76 +14,67 @@ extern "C" {
#include <ross.h>
//CHANGE: modify to match the struct
typedef struct net_message net_message;
//CHANGE: modify the struct name - add to message_list union in common-net.h
typedef struct em_message em_message;
struct em_message
{
/* magic number */
int magic;
/* flit travel start time*/
tw_stime travel_start_time;
/* packet ID of the flit */
unsigned long long packet_ID;
/* event type of the flit */
short type;
/* category: comes from codes */
char category[CATEGORY_NAME_MAX];
/* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid final_dest_gid;
/*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_lp;
tw_lpid sender_mn_lp; // source modelnet id
/* destination terminal ID */
tw_lpid dest_terminal_id;
int dest_terminal;
/* source terminal ID */
tw_lpid src_terminal_id;
short saved_channel;
short my_N_hop;
short hops[8];
//common entries:
int magic; /* magic number */
short type; /* event type of the flit */
/* Intermediate LP ID from which this message is coming */
unsigned int intm_lp_id;
short saved_vc;
short dim_change;
/* last hop of the message, can be a terminal, local router or global router */
int last_hop;
/* For routing */
uint64_t chunk_id;
uint64_t packet_size;
uint64_t message_id;
uint64_t total_size;
tw_stime travel_start_time; /* flit travel start time*/
unsigned long long packet_ID; /* packet ID of the flit */
char category[CATEGORY_NAME_MAX]; /* category: comes from codes */
int saved_remote_esize;
int remote_event_size_bytes;
int local_event_size_bytes;
tw_lpid final_dest_gid; /* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid sender_lp; /*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_mn_lp; // source modelnet id (think NIC)
tw_lpid src_terminal_id; /* source terminal ID - mostly same as sender_mn_lp */
tw_lpid dest_terminal_id; /* destination modelnet id */
int dest_terminal; /* logical id of destination modelnet id */
// For buffer message
int vc_index;
int output_chan;
model_net_event_return event_rc;
/* packet/message identifier and status */
uint64_t chunk_id; //which chunk of packet I am
uint64_t packet_size; //what is the size of my packet
uint64_t message_id; //seq number at message level - NIC specified
uint64_t total_size; //total size of the message
int remote_event_size_bytes; // data size for target event at destination
int local_event_size_bytes; // data size for event at source
int is_pull;
uint64_t pull_size;
tw_stime msg_start_time;
/* for reverse computation */
//info for path traversal
short my_N_hop; /* hops traversed so far */
short hops[8]; /* can be used for storing different types of hops */
unsigned int intm_lp_id; /* Intermediate LP ID that sent this packet */
int last_hop; /* last hop of the message, can be a terminal, local router or global router */
int vc_index; /* stores port info */
int output_chan; /* virtual channel within port */
//info for reverse computation
short saved_channel;
short saved_vc;
model_net_event_return event_rc;
tw_stime saved_available_time;
tw_stime saved_avg_time;
tw_stime saved_rcv_time;
tw_stime saved_busy_time;
tw_stime saved_busy_time;
tw_stime saved_total_time;
tw_stime saved_hist_start_time;
tw_stime saved_sample_time;
tw_stime msg_start_time;
int saved_hist_num;
int saved_occupancy;
//CHANGE: info for specific networks
short dim_change;
};
#ifdef __cplusplus
}
#endif
#endif
#endif
/*
* Copyright (C) 2014 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
//CHANGE: modify to match you header file name
#ifndef NET_TEMPLATE_H
#define NET_TEMPLATE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <ross.h>
//CHANGE: modify to match the struct
typedef struct net_message net_message;
//CHANGE: modify the struct name - add to message_list union in common-net.h
struct net_message
{
//common entries:
int magic; /* magic number */
short type; /* event type of the flit */
tw_stime travel_start_time; /* flit travel start time*/
unsigned long long packet_ID; /* packet ID of the flit */
char category[CATEGORY_NAME_MAX]; /* category: comes from codes */
tw_lpid final_dest_gid; /* final destination LP ID, this comes from codes can be a server or any other LP type*/
tw_lpid sender_lp; /*sending LP ID from CODES, can be a server or any other LP type */
tw_lpid sender_mn_lp; // source modelnet id (think NIC)
tw_lpid src_terminal_id; /* source terminal ID - mostly same as sender_mn_lp */
tw_lpid dest_terminal_id; /* destination modelnet id */
int dest_terminal; /* logical id of destination modelnet id */
/* packet/message identifier and status */
uint64_t chunk_id; //which chunk of packet I am
uint64_t packet_size; //what is the size of my packet
uint64_t message_id; //seq number at message level - NIC specified
uint64_t total_size; //total size of the message
int remote_event_size_bytes; // data size for target event at destination
int local_event_size_bytes; // data size for event at source
int is_pull;
uint64_t pull_size;
tw_stime msg_start_time;
//info for path traversal
short my_N_hop; /* hops traversed so far */
short hops[8]; /* can be used for storing different types of hops */
unsigned int intm_lp_id; /* Intermediate LP ID that sent this packet */
int last_hop; /* last hop of the message, can be a terminal, local router or global router */
int vc_index; /* stores port info */
int output_chan; /* virtual channel within port */
//info for reverse computation
short saved_channel;
short saved_vc;
model_net_event_return event_rc;
tw_stime saved_available_time;
tw_stime saved_avg_time;
tw_stime saved_rcv_time;
tw_stime saved_busy_time;
tw_stime saved_total_time;
tw_stime saved_hist_start_time;
tw_stime saved_sample_time;
//CHANGE: add info for specific networks
};
#ifdef __cplusplus
}
#endif
#endif
......@@ -30,8 +30,8 @@ intercon = open(sys.argv[3], "wb")
def router(group, row, col):
return group*96 + row*16 + col
numblack = np.zeros((864,864), dtype=np.int)
numblue = np.zeros((864,864), dtype=np.int)
numblack = np.zeros((960,960), dtype=np.int)
numblue = np.zeros((960,960), dtype=np.int)
with open(filename) as ofile:
matches = re.findall('c\d+-\dc\ds\d+a0l\d+\((\d+):(\d):(\d+)\).(\w+).->.c\d+-\dc\ds\d+a0l\d+\((\d+):(\d):(\d+)\)', ofile.read(), re.MULTILINE)
......
......@@ -28,7 +28,6 @@
#define lprintf(_fmt, ...) \
do {if (CS_LP_DBG) printf(_fmt, __VA_ARGS__);} while (0)
#define MAX_STATS 65536
#define PAYLOAD_SZ 1024
static int msg_size_hash_compare(
void *key, struct qhash_head *link);
......@@ -55,6 +54,7 @@ static int64_t EAGER_THRESHOLD = 8192;
static int alloc_spec = 0;
static tw_stime self_overhead = 10.0;
static tw_stime mean_interval = 100000;
static int payload_sz = 1024;
/* Doing LP IO*/
static char lp_io_dir[256] = {'\0'};
......@@ -138,6 +138,14 @@ enum MPI_NW_EVENTS
CLI_NBR_FINISH,
};
/* type of synthetic traffic */
enum TRAFFIC
{
UNIFORM = 1, /* sends message to a randomly selected node */
NEAREST_NEIGHBOR = 2, /* sends message to the next node (potentially connected to the same router) */
ALLTOALL = 3, /* sends message to all other nodes */
STENCIL = 4 /* sends message to 4 nearby neighbors */
};
struct mpi_workload_sample
{
/* Sampling data */
......@@ -203,6 +211,7 @@ struct nw_state
int app_id;
int local_rank;
int synthetic_pattern;
int is_finished;
int neighbor_completed;
......@@ -309,6 +318,7 @@ struct nw_message
double saved_wait_time;
double saved_delay;
int64_t saved_num_bytes;
int saved_syn_length;
} rc;
};
......@@ -591,12 +601,13 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp
if(bf->c0)
return;
model_net_event_rc2(lp, &m->event_rc);
s->gen_data -= PAYLOAD_SZ;
num_syn_bytes_sent -= PAYLOAD_SZ;
tw_rand_reverse_unif(lp->rng);
tw_rand_reverse_unif(lp->rng);
int i;
for (i=0; i < m->rc.saved_syn_length; i++){
model_net_event_rc2(lp, &m->event_rc);
s->gen_data -= payload_sz;
num_syn_bytes_sent -= payload_sz;
}
tw_rand_reverse_unif(lp->rng);
}
......@@ -611,38 +622,105 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l
/* Get job information */
tw_lpid global_dest_id;
int intm_dest_id;
nw_message remote_m;
struct codes_jobmap_id jid;
jid = codes_jobmap_to_local_id(s->nw_id, jobmap_ctx);
int num_clients = codes_jobmap_get_num_ranks(jid.job, jobmap_ctx);
int dest_svr = tw_rand_integer(lp->rng, 0, num_clients - 1);
if(dest_svr == s->local_rank)
/* Find destination */
int* dest_svr = NULL;
int i, length=0;
switch(s->synthetic_pattern)
{
dest_svr = (s->local_rank + 1) % num_clients;
}
jid.rank = dest_svr;
int intm_dest_id = codes_jobmap_to_global_id(jid, jobmap_ctx);
global_dest_id = codes_mapping_get_lpid_from_relative(intm_dest_id, NULL, NW_LP_NM, NULL, 0);
nw_message remote_m;
remote_m.fwd.sim_start_time = tw_now(lp);
remote_m.fwd.dest_rank = dest_svr;
remote_m.msg_type = CLI_BCKGND_ARRIVE;
remote_m.fwd.num_bytes = PAYLOAD_SZ;
remote_m.fwd.app_id = s->app_id;
remote_m.fwd.src_rank = s->local_rank;
m->event_rc = model_net_event(net_id, "synthetic-tr", global_dest_id, PAYLOAD_SZ, 0.0,
sizeof(nw_message), (const void*)&remote_m,
0, NULL, lp);
s->gen_data += PAYLOAD_SZ;
num_syn_bytes_sent += PAYLOAD_SZ;
case UNIFORM:
{
length = 1;
dest_svr = (int*) calloc(1, sizeof(int));
dest_svr[0] = tw_rand_integer(lp->rng, 0, num_clients - 1);
if(dest_svr[0] == s->local_rank)
dest_svr[0] = (s->local_rank + 1) % num_clients;
}
break;
case NEAREST_NEIGHBOR:
{
length = 1;
dest_svr = (int*) calloc(1, sizeof(int));
dest_svr[0] = (s->local_rank + 1) % num_clients;
}
break;
case ALLTOALL:
{
dest_svr = (int*) calloc(num_clients-1, sizeof(int));
int index = 0;
for (i=0;i<num_clients;i++)
{
if(i!=s->local_rank)
{
dest_svr[index] = i;
index++;
length++;
}
}
}
break;
case STENCIL: //2D 4-point stencil
{
/* I think this code snippet is coming from the LLNL stencil patterns. */
int digits, x=1, y=1, row, col, temp=num_clients;
length = 4;
dest_svr = (int*) calloc(4, sizeof(int));
for (digits = 0; temp > 0; temp >>= 1)
digits++;
digits = digits/2;
for (i = 0; i < digits; i++)
x = x * 2;
y = num_clients / x;
//printf("\nStencil Syn: x=%d, y=%d", x, y);
row = s->local_rank / y;
col = s->local_rank % y;
dest_svr[0] = row * y + ((col-1+y)%y); /* left neighbor */
dest_svr[1] = row * y + ((col+1+y)%y); /* right neighbor */
dest_svr[2] = ((row-1+x)%x) * y + col; /* bottom neighbor */
dest_svr[3] = ((row+1+x)%x) * y + col; /* up neighbor */
}
break;
default:
tw_error(TW_LOC, "Undefined traffic pattern");
}
/* Record length for reverse handler*/
m->rc.saved_syn_length = length;
if(length > 0)
{
// m->event_array_rc = (model_net_event_return) malloc(length * sizeof(model_net_event_return));
//printf("\nRANK %d Dests %d", s->local_rank, length);
for (i = 0; i < length; i++)
{
/* Generate synthetic traffic */
jid.rank = dest_svr[i];
intm_dest_id = codes_jobmap_to_global_id(jid, jobmap_ctx);
global_dest_id = codes_mapping_get_lpid_from_relative(intm_dest_id, NULL, NW_LP_NM, NULL, 0);
remote_m.fwd.sim_start_time = tw_now(lp);
remote_m.fwd.dest_rank = dest_svr[i];
remote_m.msg_type = CLI_BCKGND_ARRIVE;
remote_m.fwd.num_bytes = payload_sz;
remote_m.fwd.app_id = s->app_id;
remote_m.fwd.src_rank = s->local_rank;
// printf("\nAPP %d SRC %d Dest %d (twid %llu)", jid.job, s->local_rank, dest_svr[i], global_dest_id);
m->event_rc = model_net_event(net_id, "synthetic-tr", global_dest_id, payload_sz, 0.0,
sizeof(nw_message), (const void*)&remote_m,
0, NULL, lp);
s->gen_data += payload_sz;
num_syn_bytes_sent += payload_sz;
}
}
/* New event after MEAN_INTERVAL */
tw_stime ts = mean_interval + tw_rand_exponential(lp->rng, noise);
tw_event * e;
......@@ -651,6 +729,8 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l
m_new = (struct nw_message*)tw_event_data(e);
m_new->msg_type = CLI_BCKGND_GEN;
tw_event_send(e);
free(dest_svr);
}
void arrive_syn_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp)
......@@ -1845,6 +1925,18 @@ void nw_test_init(nw_state* s, tw_lp* lp)
if(strcmp(file_name_of_job[lid.job], "synthetic") == 0)
{
int synthetic_pattern;
sscanf(file_name_of_job[lid.job], "synthetic%d", &synthetic_pattern);
if(synthetic_pattern <=0 || synthetic_pattern > 4)
{
printf("\n Undefined synthetic pattern: setting to uniform random ");
s->synthetic_pattern = 1;
}
else
{
s->synthetic_pattern = synthetic_pattern;
}
tw_event * e;
nw_message * m_new;
tw_stime ts = tw_rand_exponential(lp->rng, mean_interval/1000);
......
This diff is collapsed.
This diff is collapsed.
......@@ -31,6 +31,7 @@ TESTS += tests/lp-io-test.sh \
tests/modelnet-test-torus.sh \
tests/modelnet-test-loggp.sh \
tests/modelnet-test-dragonfly.sh \
tests/modelnet-test-em.sh \
tests/modelnet-test-slimfly.sh \
tests/modelnet-test-dragonfly-synthetic.sh \
tests/modelnet-test-dragonfly-custom-synthetic.sh \
......@@ -66,6 +67,7 @@ EXTRA_DIST += tests/download-traces.sh \
tests/modelnet-test-dragonfly-traces.sh \
tests/modelnet-test-dragonfly-custom-synthetic.sh \
tests/modelnet-test-dragonfly-custom-traces.sh \
tests/modelnet-test-em.sh \
tests/modelnet-test-fattree-synthetic.sh \
tests/modelnet-test-slimfly.sh \
tests/modelnet-test-slimfly-synthetic.sh \
......
LPGROUPS
{
MODELNET_GRP
{
repetitions="64";
server="3";
modelnet_express_mesh="3";
modelnet_express_mesh_router="1";
}
}
PARAMS
{
message_size="512";
packet_size="4096";
chunk_size="4096";
modelnet_order=( "express_mesh", "express_mesh_router" );
#modelnet_scheduler="fcfs";
modelnet_scheduler="round-robin";
n_dims="3";
dim_length="4,4,4";
gap="1";
num_cn="3";
num_vcs="1";
link_bandwidth="12.5";
cn_bandwidth="12.5";
vc_size="65536";
cn_vc_size="65536";
routing="static";
soft_delay="0";
router_delay="90";
}
#!/bin/bash
tests/modelnet-test --sync=1 -- tests/conf/modelnet-test-em.conf
......@@ -187,8 +187,14 @@ int main(
{
strcpy(router_name, "slimfly_router");
}
if(net_id == EXPRESS_MESH)
{
strcpy(router_name, "modelnet_express_mesh_router");
}
if(net_id == SLIMFLY || net_id == DRAGONFLY)
if(net_id == SLIMFLY || net_id == DRAGONFLY || net_id == EXPRESS_MESH)
{
num_routers = codes_mapping_get_lp_count("MODELNET_GRP", 0,
router_name, NULL, 1);
......@@ -356,11 +362,8 @@ static void handle_kickoff_event(
int opt_offset = 0;
int total_lps = num_servers * 2 + num_routers;
if(net_id == DRAGONFLY && (lp->gid % lps_per_rep == num_servers_per_rep - 1))
opt_offset = num_servers_per_rep + num_routers_per_rep; /* optional offset due to dragonfly mapping */
if(net_id == SLIMFLY && (lp->gid % lps_per_rep == num_servers_per_rep -1))
opt_offset = num_servers_per_rep + num_routers_per_rep;
if((net_id == DRAGONFLY || net_id == SLIMFLY || net_id == EXPRESS_MESH) && (lp->gid % lps_per_rep == num_servers_per_rep - 1))
opt_offset = num_servers_per_rep + num_routers_per_rep;
/* each server sends a request to the next highest server */
int dest_id = (lp->gid + offset + opt_offset)%total_lps;
......@@ -451,12 +454,9 @@ static void handle_ack_event(
// printf("\n m->src %d lp->gid %d ", m->src, lp->gid);
int opt_offset = 0;
if(net_id == DRAGONFLY && (lp->gid % lps_per_rep == num_servers_per_rep - 1))
if((net_id == DRAGONFLY || net_id == SLIMFLY || net_id == EXPRESS_MESH) && (lp->gid % lps_per_rep == num_servers_per_rep - 1))
opt_offset = num_servers_per_rep + num_routers_per_rep; /* optional offset due to dragonfly mapping */
if(net_id == SLIMFLY && (lp->gid % lps_per_rep == num_servers_per_rep -1))
opt_offset = num_servers_per_rep + num_routers_per_rep;
tw_lpid dest_id = (lp->gid + offset + opt_offset)%(num_servers*2 + num_routers);
/* in the "pull" case, src should actually be self */
......@@ -512,12 +512,9 @@ static void handle_req_event(
// printf("\n m->src %d lp->gid %d ", m->src, lp->gid);
int opt_offset = 0;
if(net_id == DRAGONFLY && (m->src % lps_per_rep == num_servers_per_rep - 1))
if((net_id == DRAGONFLY || net_id == SLIMFLY || net_id == EXPRESS_MESH) && (m->src % lps_per_rep == num_servers_per_rep - 1))
opt_offset = num_servers_per_rep + num_routers_per_rep; /* optional offset due to dragonfly mapping */
if(net_id == SLIMFLY && (m->src % lps_per_rep == num_servers_per_rep -1))
opt_offset = num_servers_per_rep + num_routers_per_rep;
assert(lp->gid == (m->src + offset + opt_offset)%(num_servers*2 + num_routers));
ns->msg_recvd_count++;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment