Commit 346be3da authored by Misbah Mubarak's avatar Misbah Mubarak
Browse files

Merge branch 'merged-branch-v1' into 'master'

Merged branch v1

See merge request !30
parents 9897e59c 0c3bcbcf
/*
* Copyright (C) 2014 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
#include <mpi.h>
#include "codes/codes-mpi-replay.h"
int main(int argc, char** argv) {
MPI_Init(&argc,&argv);
// int rank, size;
// MPI_Comm_rank(MPI_COMM_WORLD,&rank);
// MPI_Comm_size(MPI_COMM_WORLD,&size);
// MPI_Comm comm;
// MPI_Comm_split(MPI_COMM_WORLD, rank < 2, rank, &comm);
// if(rank < 2)
// modelnet_mpi_replay(comm,&argc,&argv);
modelnet_mpi_replay(MPI_COMM_WORLD,&argc,&argv);
int flag;
MPI_Finalized(&flag);
if(!flag) MPI_Finalize();
return 0;
}
/*
* Local variables:
* c-indent-level: 4
* c-basic-offset: 4
* End:
*
* vim: ft=c ts=8 sts=4 sw=4 expandtab
*/
......@@ -28,7 +28,7 @@ static int num_routers_per_grp = 0;
static int num_nodes_per_grp = 0;
static int num_nodes_per_cn = 0;
static int num_groups = 0;
static int num_nodes = 0;
static unsigned long long num_nodes = 0;
static char lp_io_dir[256] = {'\0'};
static lp_io_handle io_handle;
......@@ -76,6 +76,7 @@ struct svr_msg
enum svr_event svr_event_type;
tw_lpid src; /* source of this request or ack */
int incremented_flag; /* helper for reverse computation */
model_net_event_return event_rc;
};
static void svr_init(
......@@ -133,6 +134,7 @@ static void issue_event(
svr_state * ns,
tw_lp * lp)
{
(void)ns;
tw_event *e;
svr_msg *m;
tw_stime kickoff_time;
......@@ -172,7 +174,7 @@ static void handle_kickoff_rev_event(
if(b->c1)
tw_rand_reverse_unif(lp->rng);
model_net_event_rc(net_id, lp, PAYLOAD_SZ);
model_net_event_rc2(lp, &m->event_rc);
ns->msg_sent_count--;
tw_rand_reverse_unif(lp->rng);
}
......@@ -239,6 +241,9 @@ static void handle_remote_rev_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->msg_recvd_count--;
}
......@@ -248,6 +253,9 @@ static void handle_remote_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->msg_recvd_count++;
}
......@@ -257,6 +265,9 @@ static void handle_local_rev_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->local_recvd_count--;
}
......@@ -266,6 +277,9 @@ static void handle_local_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->local_recvd_count++;
}
/* convert ns to seconds */
......@@ -358,10 +372,10 @@ int main(
return 0;
}
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_CODES, &rank);
MPI_Comm_size(MPI_COMM_CODES, &nprocs);
configuration_load(argv[2], MPI_COMM_WORLD, &config);
configuration_load(argv[2], MPI_COMM_CODES, &config);
model_net_register();
svr_add_lp_type();
......@@ -401,12 +415,12 @@ int main(
{
do_lp_io = 1;
int flags = lp_io_use_suffix ? LP_IO_UNIQ_SUFFIX : 0;
int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_WORLD);
int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_CODES);
assert(ret == 0 || !"lp_io_prepare failure");
}
tw_run();
if (do_lp_io){
int ret = lp_io_flush(io_handle, MPI_COMM_WORLD);
int ret = lp_io_flush(io_handle, MPI_COMM_CODES);
assert(ret == 0 || !"lp_io_flush failure");
}
model_net_report_stats(net_id);
......
......@@ -90,6 +90,7 @@ struct svr_msg
enum svr_event svr_event_type;
tw_lpid src; /* source of this request or ack */
int incremented_flag; /* helper for reverse computation */
model_net_event_return event_rc;
};
static void svr_init(
......@@ -120,6 +121,53 @@ tw_lptype svr_lp = {
sizeof(svr_state),
};
/* setup for the ROSS event tracing
* can have a different function for rbev_trace_f and ev_trace_f
* but right now it is set to the same function for both
*/
void ft_svr_event_collect(svr_msg *m, tw_lp *lp, char *buffer, int *collect_flag)
{
(void)lp;
(void)buffer;
(void)collect_flag;
int type = (int) m->svr_event_type;
memcpy(buffer, &type, sizeof(type));
}
/* can add in any model level data to be collected along with simulation engine data
* in the ROSS instrumentation. Will need to update the last field in
* ft_svr_model_types[0] for the size of the data to save in each function call
*/
void ft_svr_model_stat_collect(svr_state *s, tw_lp *lp, char *buffer)
{
(void)s;
(void)lp;
(void)buffer;
return;
}
st_model_types ft_svr_model_types[] = {
{(rbev_trace_f) ft_svr_event_collect,
sizeof(int),
(ev_trace_f) ft_svr_event_collect,
sizeof(int),
(model_stat_f) ft_svr_model_stat_collect,
0},
{NULL, 0, NULL, 0, NULL, 0}
};
static const st_model_types *ft_svr_get_model_stat_types(void)
{
return(&ft_svr_model_types[0]);
}
void ft_svr_register_model_stats()
{
st_model_type_register("server", ft_svr_get_model_stat_types());
}
const tw_optdef app_opt [] =
{
TWOPT_GROUP("Model net synthetic traffic " ),
......@@ -206,7 +254,7 @@ static void handle_kickoff_rev_event(
(void)b;
(void)m;
ns->msg_sent_count--;
model_net_event_rc(net_id, lp, PAYLOAD_SZ);
model_net_event_rc2(lp, &m->event_rc);
tw_rand_reverse_unif(lp->rng);
}
static void handle_kickoff_event(
......@@ -408,6 +456,9 @@ int main(
svr_add_lp_type();
if (g_st_ev_trace)
ft_svr_register_model_stats();
codes_mapping_setup();
......
......@@ -26,8 +26,6 @@ FILE * slimfly_results_log_2=NULL;
FILE * slimfly_ross_csv_log=NULL;
static int net_id = 0;
static int num_routers = 0;
static int num_servers = 0;
static int offset = 2;
static int traffic = 1;
static double arrival_time = 1000.0;
......@@ -45,14 +43,10 @@ static lp_io_handle io_handle;
static unsigned int lp_io_use_suffix = 0;
static int do_lp_io = 0;
/* whether to pull instead of push */
static int do_pull = 0;
static int num_servers_per_rep = 0;
static int num_routers_per_grp = 0;
static int num_nodes_per_grp = 0;
static int num_reps = 0;
static int num_groups = 0;
static int num_nodes = 0;
......@@ -95,6 +89,7 @@ struct svr_msg
enum svr_event svr_event_type;
tw_lpid src; /* source of this request or ack */
int incremented_flag; /* helper for reverse computation */
model_net_event_return event_rc;
};
static void svr_init(
......@@ -197,6 +192,7 @@ static void issue_event(
svr_state * ns,
tw_lp * lp)
{
(void)ns;
tw_event *e;
svr_msg *m;
tw_stime kickoff_time;
......@@ -256,11 +252,15 @@ static void handle_kickoff_rev_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
if(b->c1)
tw_rand_reverse_unif(lp->rng);
ns->msg_sent_count--;
model_net_event_rc(net_id, lp, PAYLOAD_SZ);
model_net_event_rc2(lp, &m->event_rc);
tw_rand_reverse_unif(lp->rng);
}
......@@ -270,6 +270,8 @@ static void handle_kickoff_event(
svr_msg * m,
tw_lp * lp)
{
(void)m;
char anno[MAX_NAME_LENGTH];
tw_lpid local_dest = -1, global_dest = -1;
......@@ -336,6 +338,9 @@ static void handle_remote_rev_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->msg_recvd_count--;
}
......@@ -345,6 +350,9 @@ static void handle_remote_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->msg_recvd_count++;
}
......@@ -354,6 +362,9 @@ static void handle_local_rev_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->local_recvd_count--;
}
......@@ -363,19 +374,11 @@ static void handle_local_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->local_recvd_count++;
}
/* convert ns to seconds */
static tw_stime ns_to_s(tw_stime ns)
{
return(ns / (1000.0 * 1000.0 * 1000.0));
}
/* convert seconds to ns */
static tw_stime s_to_ns(tw_stime ns)
{
return(ns * (1000.0 * 1000.0 * 1000.0));
}
int index_mine = 0;
......
......@@ -28,7 +28,7 @@ static int num_servers_per_rep = 0;
static int num_routers_per_grp = 0;
static int num_nodes_per_grp = 0;
static int num_groups = 0;
static int num_nodes = 0;
static unsigned long long num_nodes = 0;
static char lp_io_dir[256] = {'\0'};
static lp_io_handle io_handle;
......@@ -76,6 +76,7 @@ struct svr_msg
enum svr_event svr_event_type;
tw_lpid src; /* source of this request or ack */
int incremented_flag; /* helper for reverse computation */
model_net_event_return event_rc; /* model-net event reverse computation flag */
};
static void svr_init(
......@@ -110,28 +111,44 @@ tw_lptype svr_lp = {
* can have a different function for rbev_trace_f and ev_trace_f
* but right now it is set to the same function for both
*/
void svr_event_collect(svr_msg *m, tw_lp *lp, char *buffer)
void svr_event_collect(svr_msg *m, tw_lp *lp, char *buffer, int *collect_flag)
{
(void)lp;
(void)collect_flag;
int type = (int) m->svr_event_type;
memcpy(buffer, &type, sizeof(type));
}
st_trace_type svr_trace_types[] = {
/* can add in any model level data to be collected along with simulation engine data
* in the ROSS instrumentation. Will need to update the last field in
* svr_model_types[0] for the size of the data to save in each function call
*/
void svr_model_stat_collect(svr_state *s, tw_lp *lp, char *buffer)
{
(void)s;
(void)lp;
(void)buffer;
return;
}
st_model_types svr_model_types[] = {
{(rbev_trace_f) svr_event_collect,
sizeof(int),
(ev_trace_f) svr_event_collect,
sizeof(int)},
{0}
sizeof(int),
(model_stat_f) svr_model_stat_collect,
0},
{NULL, 0, NULL, 0, NULL, 0}
};
static const st_trace_type *svr_get_trace_types(void)
static const st_model_types *svr_get_model_stat_types(void)
{
return(&svr_trace_types[0]);
return(&svr_model_types[0]);
}
void svr_register_trace()
void svr_register_model_types()
{
trace_type_register("server", svr_get_trace_types());
st_model_type_register("server", svr_get_model_stat_types());
}
const tw_optdef app_opt [] =
......@@ -161,6 +178,8 @@ static void issue_event(
svr_state * ns,
tw_lp * lp)
{
(void)ns;
tw_event *e;
svr_msg *m;
tw_stime kickoff_time;
......@@ -200,7 +219,7 @@ static void handle_kickoff_rev_event(
if(b->c1)
tw_rand_reverse_unif(lp->rng);
model_net_event_rc(net_id, lp, PAYLOAD_SZ);
model_net_event_rc2(lp, &m->event_rc);
ns->msg_sent_count--;
tw_rand_reverse_unif(lp->rng);
}
......@@ -267,6 +286,9 @@ static void handle_remote_rev_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->msg_recvd_count--;
}
......@@ -276,6 +298,9 @@ static void handle_remote_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->msg_recvd_count++;
}
......@@ -285,6 +310,9 @@ static void handle_local_rev_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->local_recvd_count--;
}
......@@ -294,6 +322,9 @@ static void handle_local_event(
svr_msg * m,
tw_lp * lp)
{
(void)b;
(void)m;
(void)lp;
ns->local_recvd_count++;
}
/* convert ns to seconds */
......@@ -393,8 +424,8 @@ int main(
model_net_register();
svr_add_lp_type();
if (g_st_ev_trace)
svr_register_trace();
if (g_st_ev_trace || g_st_model_stats)
svr_register_model_types();
codes_mapping_setup();
......
......@@ -19,6 +19,12 @@
#include "codes/rc-stack.h"
#include <vector>
#include <map>
#include <set>
#ifdef ENABLE_CORTEX
#include <cortex/cortex.h>
#include <cortex/topology.h>
#endif
#define DUMP_CONNECTIONS 0
#define CREDIT_SIZE 8
......@@ -63,6 +69,13 @@ struct InterGroupLink {
int src, dest;
};
#ifdef ENABLE_CORTEX
/* This structure is defined at the end of the file */
extern "C" {
extern cortex_topology dragonfly_custom_cortex_topology;
}
#endif
static int debug_slot_count = 0;
static long term_ecount, router_ecount, term_rev_ecount, router_rev_ecount;
static long packet_gen = 0, packet_fin = 0;
......@@ -482,7 +495,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params){
// shorthand
dragonfly_param *p = params;
int myRank;
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
MPI_Comm_rank(MPI_COMM_CODES, &myRank);
int rc = configuration_get_value_int(&config, "PARAMS", "local_vc_size", anno, &p->local_vc_size);
if(rc) {
......@@ -564,7 +577,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params){
rc = configuration_get_value_int(&config, "PARAMS", "num_groups", anno, &p->num_groups);
if(rc) {
printf("Number of groups not specified. Aborting");
MPI_Abort(MPI_COMM_WORLD, 1);
MPI_Abort(MPI_COMM_CODES, 1);
}
rc = configuration_get_value_int(&config, "PARAMS", "num_col_chans", anno, &p->num_col_chans);
if(rc) {
......@@ -756,6 +769,9 @@ void dragonfly_custom_configure(){
if (anno_map->has_unanno_lp > 0){
dragonfly_read_config(NULL, &all_params[anno_map->num_annos]);
}
#ifdef ENABLE_CORTEX
model_net_topology = dragonfly_custom_cortex_topology;
#endif
}
/* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */
......@@ -767,20 +783,20 @@ void dragonfly_custom_report_stats()
int total_minimal_packets, total_nonmin_packets;
long total_gen, total_fin;
MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &dragonfly_total_time, &avg_time, 1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &dragonfly_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
MPI_Reduce( &total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
MPI_Reduce( &dragonfly_total_time, &avg_time, 1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES);
MPI_Reduce( &dragonfly_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES);
MPI_Reduce( &packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce( &packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES);
MPI_Reduce( &packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES);
if(routing == ADAPTIVE || routing == PROG_ADAPTIVE)
{
MPI_Reduce(&minimal_count, &total_minimal_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&nonmin_count, &total_nonmin_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&minimal_count, &total_minimal_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES);
MPI_Reduce(&nonmin_count, &total_nonmin_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES);
}
/* print statistics */
......@@ -1824,7 +1840,7 @@ void dragonfly_custom_rsample_fin(router_state * s,
"link traffic for each of the %d links (int64_t) \nsample end time (double) forward events per sample \nreverse events per sample ",
p->radix, p->radix);
fprintf(fp, "\n\nOrdering of links \n%d local (router-router same group) channels \n%d global (router-router remote group)"
" channels \n%d terminal channels", p->intra_grp_radix, p->num_global_channels);
" channels \n %d terminal channels", p->intra_grp_radix, p->num_global_channels, p->num_cn);
fclose(fp);
}
char rt_fn[MAX_NAME_LENGTH];
......@@ -3363,4 +3379,305 @@ struct model_net_method dragonfly_custom_router_method =
(init_f)dragonfly_custom_rsample_init,
NULL,//(final_f)dragonfly_custom_rsample_fin
};
#ifdef ENABLE_CORTEX
static int dragonfly_custom_get_number_of_compute_nodes(void* topo) {
const dragonfly_param * params = &all_params[num_params-1];
if(!params)
return -1.0;
return params->total_terminals;
}
static int dragonfly_custom_get_number_of_routers(void* topo) {
// TODO
const dragonfly_param * params = &all_params[num_params-1];
if(!params)
return -1.0;
return params->total_routers;
}
static double dragonfly_custom_get_router_link_bandwidth(void* topo, router_id_t r1, router_id_t r2) {
// TODO: handle this function for multiple cables between the routers.
// Right now it returns the bandwidth of a single cable only.
// Given two router ids r1 and r2, this function should return the bandwidth (double)
// of the link between the two routers, or 0 of such a link does not exist in the topology.
// The function should return -1 if one of the router id is invalid.
const dragonfly_param * params = &all_params[num_params-1];
if(!params)
return -1.0;
if(r1 > params->total_routers || r2 > params->total_routers)
return -1.0;
if(r1 < 0 || r2 < 0)
return -1.0;
int gid_r1 = r1 / params->num_routers;
int gid_r2 = r2 / params->num_routers;