Commit 66ba3dda authored by Misbah Mubarak's avatar Misbah Mubarak
Browse files

Adding statistics collection using lp-io in model-net API (comes from the...

Adding statistics collection using lp-io in model-net API (comes from the simple-net stats collection), works for simple-net, torus and the dragonfly network models
parent 36405e04
...@@ -19,6 +19,10 @@ ...@@ -19,6 +19,10 @@
#define MEAN_PROCESS 1.0 #define MEAN_PROCESS 1.0
#define MAX_NAME_LENGTH 256 #define MAX_NAME_LENGTH 256
#define CATEGORY_NAME_MAX 16
#define CATEGORY_MAX 12
// debugging parameters // debugging parameters
#define TRACK 235221 #define TRACK 235221
#define PRINT_ROUTER_TABLE 1 #define PRINT_ROUTER_TABLE 1
...@@ -109,6 +113,7 @@ struct terminal_state ...@@ -109,6 +113,7 @@ struct terminal_state
// Terminal generate, sends and arrival T_SEND, T_ARRIVAL, T_GENERATE // Terminal generate, sends and arrival T_SEND, T_ARRIVAL, T_GENERATE
// Router-Router Intra-group sends and receives RR_LSEND, RR_LARRIVE // Router-Router Intra-group sends and receives RR_LSEND, RR_LARRIVE
// Router-Router Inter-group sends and receives RR_GSEND, RR_GARRIVE // Router-Router Inter-group sends and receives RR_GSEND, RR_GARRIVE
struct mn_stats dragonfly_stats_array[CATEGORY_MAX];
}; };
/* terminal event type (1-4) */ /* terminal event type (1-4) */
enum event_t enum event_t
......
...@@ -10,12 +10,16 @@ ...@@ -10,12 +10,16 @@
#include "ross.h" #include "ross.h"
#include "codes/lp-type-lookup.h" #include "codes/lp-type-lookup.h"
#include "codes/configuration.h" #include "codes/configuration.h"
#include "codes/lp-io.h"
#define MAX_NAME_LENGTH 256 #define MAX_NAME_LENGTH 256
#define CATEGORY_NAME_MAX 16
#define CATEGORY_MAX 12
typedef struct simplenet_param simplenet_param; typedef struct simplenet_param simplenet_param;
typedef struct dragonfly_param dragonfly_param; typedef struct dragonfly_param dragonfly_param;
typedef struct torus_param torus_param; typedef struct torus_param torus_param;
typedef struct mn_stats mn_stats;
enum NETWORKS enum NETWORKS
{ {
...@@ -24,6 +28,19 @@ enum NETWORKS ...@@ -24,6 +28,19 @@ enum NETWORKS
DRAGONFLY DRAGONFLY
}; };
/* data structure for tracking network statistics */
struct mn_stats
{
char category[CATEGORY_NAME_MAX];
long send_count;
long send_bytes;
tw_stime send_time;
long recv_count;
long recv_bytes;
tw_stime recv_time;
long max_event_size;
};
/* structs for initializing a network/ specifying network parameters */ /* structs for initializing a network/ specifying network parameters */
struct simplenet_param struct simplenet_param
{ {
...@@ -130,6 +147,15 @@ int model_net_get_packet_size(int net_id); ...@@ -130,6 +147,15 @@ int model_net_get_packet_size(int net_id);
void model_net_add_lp_type(int net_id); void model_net_add_lp_type(int net_id);
void model_net_report_stats(int net_id); void model_net_report_stats(int net_id);
/* writing model-net statistics */
void model_net_write_stats(tw_lpid lpid, mn_stats* stat);
/* printing model-net statistics */
void model_net_print_stats(tw_lpid lpid, mn_stats mn_stats_array[]);
/* find model-net statistics */
mn_stats* model_net_find_stats(const char* category, mn_stats mn_stats_array[]);
#endif /* MODELNET_H */ #endif /* MODELNET_H */
/* /*
......
/* /*
* Copyright (C) 2013 University of Chicago. * * Copyright (C) 2013, University of Chicago
* See COPYRIGHT notice in top-level directory. * *
* * * See COPYRIGHT notice in top-level directory.
*/ * */
#ifndef INC_torus_h #ifndef INC_torus_h
#define INC_torus_h #define INC_torus_h
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <ross.h> #include <ross.h>
#include <assert.h> #include <assert.h>
#include "codes/lp-io.h"
#include "codes/codes_mapping.h" #include "codes/codes_mapping.h"
#include "codes/codes.h" #include "codes/codes.h"
#include "codes/model-net.h" #include "codes/model-net.h"
...@@ -18,10 +19,12 @@ ...@@ -18,10 +19,12 @@
#define CHUNK_SIZE 32 #define CHUNK_SIZE 32
#define DEBUG 1 #define DEBUG 1
#define MEAN_INTERVAL 100 #define MEAN_INTERVAL 100
#define CATEGORY_NAME_MAX 16
#define MAX_NAME_LENGTH 256 #define MAX_NAME_LENGTH 256
#define TRACE -1 #define TRACE -1
#define CATEGORY_NAME_MAX 16
#define CATEGORY_MAX 12
/* Torus network model implementation of codes, implements the modelnet API */ /* Torus network model implementation of codes, implements the modelnet API */
// Total number of nodes in torus, calculate in main // Total number of nodes in torus, calculate in main
...@@ -115,6 +118,9 @@ struct nodes_state ...@@ -115,6 +118,9 @@ struct nodes_state
/* neighbor LP ids for this torus node */ /* neighbor LP ids for this torus node */
int* neighbour_minus_lpID; int* neighbour_minus_lpID;
int* neighbour_plus_lpID; int* neighbour_plus_lpID;
/* records torus statistics for this LP having different communication categories */
struct mn_stats torus_stats_array[CATEGORY_MAX];
}; };
struct nodes_message struct nodes_message
......
...@@ -202,7 +202,7 @@ void packet_generate(terminal_state * s, tw_bf * bf, terminal_message * msg, tw_ ...@@ -202,7 +202,7 @@ void packet_generate(terminal_state * s, tw_bf * bf, terminal_message * msg, tw_
tw_stime ts; tw_stime ts;
tw_event *e; tw_event *e;
terminal_message *m; terminal_message *m;
int i; int i, total_event_size;
num_chunks = msg->packet_size / CHUNK_SIZE; num_chunks = msg->packet_size / CHUNK_SIZE;
msg->packet_ID = lp->gid + g_tw_nlp * s->packet_counter + tw_rand_integer(lp->rng, 0, lp->gid + g_tw_nlp * s->packet_counter); msg->packet_ID = lp->gid + g_tw_nlp * s->packet_counter + tw_rand_integer(lp->rng, 0, lp->gid + g_tw_nlp * s->packet_counter);
msg->travel_start_time = tw_now(lp); msg->travel_start_time = tw_now(lp);
...@@ -246,6 +246,14 @@ void packet_generate(terminal_state * s, tw_bf * bf, terminal_message * msg, tw_ ...@@ -246,6 +246,14 @@ void packet_generate(terminal_state * s, tw_bf * bf, terminal_message * msg, tw_
exit(-1); exit(-1);
} //else } //else
} // for } // for
total_event_size = dragonfly_get_msg_sz() + msg->remote_event_size_bytes + msg->local_event_size_bytes;
mn_stats* stat;
stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
stat->send_count++;
stat->send_bytes += msg->packet_size;
stat->send_time += (1/cn_bandwidth) * msg->packet_size;
if(stat->max_event_size < total_event_size)
stat->max_event_size = total_event_size;
return; return;
} }
...@@ -336,6 +344,11 @@ if( msg->packet_ID == TRACK && msg->chunk_id == num_chunks-1) ...@@ -336,6 +344,11 @@ if( msg->packet_ID == TRACK && msg->chunk_id == num_chunks-1)
if(msg->chunk_id == num_chunks-1) if(msg->chunk_id == num_chunks-1)
{ {
bf->c2 = 1; bf->c2 = 1;
mn_stats* stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
stat->recv_count++;
stat->recv_bytes += msg->packet_size;
stat->recv_time += tw_now(lp) - msg->travel_start_time;
N_finished_packets++; N_finished_packets++;
dragonfly_total_time += tw_now( lp ) - msg->travel_start_time; dragonfly_total_time += tw_now( lp ) - msg->travel_start_time;
total_hops += msg->my_N_hop; total_hops += msg->my_N_hop;
...@@ -455,6 +468,7 @@ void ...@@ -455,6 +468,7 @@ void
dragonfly_terminal_final( terminal_state * s, dragonfly_terminal_final( terminal_state * s,
tw_lp * lp ) tw_lp * lp )
{ {
model_net_print_stats(lp->gid, s->dragonfly_stats_array);
} }
void dragonfly_router_final(router_state * s, void dragonfly_router_final(router_state * s,
...@@ -796,6 +810,11 @@ void terminal_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_message ...@@ -796,6 +810,11 @@ void terminal_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_message
for(i = 0; i < num_chunks; i++) for(i = 0; i < num_chunks; i++)
tw_rand_reverse_unif(lp->rng); tw_rand_reverse_unif(lp->rng);
mn_stats* stat;
stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
stat->send_count--;
stat->send_bytes -= msg->packet_size;
stat->send_time -= (1/cn_bandwidth) * msg->packet_size;
} }
break; break;
...@@ -816,6 +835,11 @@ void terminal_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_message ...@@ -816,6 +835,11 @@ void terminal_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_message
s->next_credit_available_time = msg->saved_credit_time; s->next_credit_available_time = msg->saved_credit_time;
if(bf->c2) if(bf->c2)
{ {
mn_stats* stat;
stat = model_net_find_stats(msg->category, s->dragonfly_stats_array);
stat->recv_count--;
stat->recv_bytes -= msg->packet_size;
stat->recv_time -= tw_now(lp) - msg->travel_start_time;
N_finished_packets--; N_finished_packets--;
dragonfly_total_time -= (tw_now(lp) - msg->travel_start_time); dragonfly_total_time -= (tw_now(lp) - msg->travel_start_time);
total_hops -= msg->my_N_hop; total_hops -= msg->my_N_hop;
......
...@@ -24,11 +24,13 @@ static struct model_net_method* method_array[] = ...@@ -24,11 +24,13 @@ static struct model_net_method* method_array[] =
static int model_net_get_msg_sz(int net_id); static int model_net_get_msg_sz(int net_id);
static lp_io_handle handle;
int model_net_setup(char* name, int model_net_setup(char* name,
int packet_size, int packet_size,
const void* net_params) const void* net_params)
{ {
int i; int i, ret;
/* find struct for underlying method (according to configuration file) */ /* find struct for underlying method (according to configuration file) */
for(i=0; method_array[i] != NULL; i++) for(i=0; method_array[i] != NULL; i++)
{ {
...@@ -37,6 +39,11 @@ int model_net_setup(char* name, ...@@ -37,6 +39,11 @@ int model_net_setup(char* name,
method_array[i]->mn_setup(net_params); method_array[i]->mn_setup(net_params);
method_array[i]->packet_size = packet_size; method_array[i]->packet_size = packet_size;
model_net_add_lp_type(i); model_net_add_lp_type(i);
ret = lp_io_prepare(name, LP_IO_UNIQ_SUFFIX, &handle, MPI_COMM_WORLD);
if(ret < 0)
{
return -1;
}
return(i); return(i);
} }
} }
...@@ -44,6 +51,88 @@ int model_net_setup(char* name, ...@@ -44,6 +51,88 @@ int model_net_setup(char* name,
return -1; // indicating error return -1; // indicating error
} }
void model_net_write_stats(tw_lpid lpid, struct mn_stats* stat)
{
int ret;
char id[32];
char data[1024];
sprintf(id, "model-net-category-%s", stat->category);
sprintf(data, "lp:%ld\tsend_count:%ld\tsend_bytes:%ld\tsend_time:%f\t"
"recv_count:%ld\trecv_bytes:%ld\trecv_time:%f\tmax_event_size:%ld\n",
(long)lpid,
stat->send_count,
stat->send_bytes,
stat->send_time,
stat->recv_count,
stat->recv_bytes,
stat->recv_time,
stat->max_event_size);
ret = lp_io_write(lpid, id, strlen(data), data);
assert(ret == 0);
return;
}
void model_net_print_stats(tw_lpid lpid, mn_stats mn_stats_array[])
{
int i;
struct mn_stats all;
memset(&all, 0, sizeof(all));
sprintf(all.category, "all");
for(i=0; i<CATEGORY_MAX; i++)
{
if(strlen(mn_stats_array[i].category) > 0)
{
all.send_count += mn_stats_array[i].send_count;
all.send_bytes += mn_stats_array[i].send_bytes;
all.send_time += mn_stats_array[i].send_time;
all.recv_count += mn_stats_array[i].recv_count;
all.recv_bytes += mn_stats_array[i].recv_bytes;
all.recv_time += mn_stats_array[i].recv_time;
if(mn_stats_array[i].max_event_size > all.max_event_size)
all.max_event_size = mn_stats_array[i].max_event_size;
model_net_write_stats(lpid, &mn_stats_array[i]);
}
}
model_net_write_stats(lpid, &all);
}
struct mn_stats* model_net_find_stats(const char* category, mn_stats mn_stats_array[])
{
int i;
int new_flag = 0;
int found_flag = 0;
for(i=0; i<CATEGORY_MAX; i++)
{
if(strlen(mn_stats_array[i].category) == 0)
{
found_flag = 1;
new_flag = 1;
break;
}
if(strcmp(category, mn_stats_array[i].category) == 0)
{
found_flag = 1;
new_flag = 0;
break;
}
}
assert(found_flag);
if(new_flag)
{
strcpy(mn_stats_array[i].category, category);
}
return(&mn_stats_array[i]);
}
void model_net_event( void model_net_event(
int net_id, int net_id,
char* category, char* category,
...@@ -348,7 +437,11 @@ void model_net_report_stats(int net_id) ...@@ -348,7 +437,11 @@ void model_net_report_stats(int net_id)
// TODO: ADd checks by network names // TODO: ADd checks by network names
// // Add dragonfly and torus network models // // Add dragonfly and torus network models
return method_array[net_id]->mn_report_stats(); method_array[net_id]->mn_report_stats();
int ret = lp_io_flush(handle, MPI_COMM_WORLD);
assert(ret == 0);
return;
} }
/* registers the lp type */ /* registers the lp type */
void model_net_add_lp_type(int net_id) void model_net_add_lp_type(int net_id)
......
...@@ -30,25 +30,12 @@ enum sn_event_type ...@@ -30,25 +30,12 @@ enum sn_event_type
}; };
/* data structure for tracking network statistics */
struct sn_stats
{
char category[CATEGORY_NAME_MAX];
long send_count;
long send_bytes;
tw_stime send_time;
long recv_count;
long recv_bytes;
tw_stime recv_time;
long max_event_size;
};
struct sn_state struct sn_state
{ {
/* next idle times for network card, both inbound and outbound */ /* next idle times for network card, both inbound and outbound */
tw_stime net_send_next_idle; tw_stime net_send_next_idle;
tw_stime net_recv_next_idle; tw_stime net_recv_next_idle;
struct sn_stats sn_stats_array[CATEGORY_MAX]; struct mn_stats sn_stats_array[CATEGORY_MAX];
}; };
struct sn_message struct sn_message
...@@ -184,7 +171,6 @@ static void handle_msg_start_event( ...@@ -184,7 +171,6 @@ static void handle_msg_start_event(
tw_bf * b, tw_bf * b,
sn_message * m, sn_message * m,
tw_lp * lp); tw_lp * lp);
static struct sn_stats* find_stats(const char* category, sn_state *ns);
/* returns pointer to LP information for simplenet module */ /* returns pointer to LP information for simplenet module */
static const tw_lptype* sn_get_lp_type() static const tw_lptype* sn_get_lp_type()
...@@ -279,59 +265,11 @@ static void sn_rev_event( ...@@ -279,59 +265,11 @@ static void sn_rev_event(
return; return;
} }
static void write_stats(tw_lp* lp, struct sn_stats* stat)
{
int ret;
char id[32];
char data[1024];
sprintf(id, "sn-category-%s", stat->category);
sprintf(data, "lp:%ld\tsend_count:%ld\tsend_bytes:%ld\tsend_time:%f\t"
"recv_count:%ld\trecv_bytes:%ld\trecv_time:%f\tmax_event_size:%ld\n",
(long)lp->gid,
stat->send_count,
stat->send_bytes,
stat->send_time,
stat->recv_count,
stat->recv_bytes,
stat->recv_time,
stat->max_event_size);
ret = lp_io_write(lp->gid, id, strlen(data), data);
assert(ret == 0);
return;
}
static void sn_finalize( static void sn_finalize(
sn_state * ns, sn_state * ns,
tw_lp * lp) tw_lp * lp)
{ {
int i; model_net_print_stats(lp->gid, &ns->sn_stats_array[0]);
struct sn_stats all;
memset(&all, 0, sizeof(all));
sprintf(all.category, "all");
for(i=0; i<CATEGORY_MAX; i++)
{
if(strlen(ns->sn_stats_array[i].category) > 0)
{
all.send_count += ns->sn_stats_array[i].send_count;
all.send_bytes += ns->sn_stats_array[i].send_bytes;
all.send_time += ns->sn_stats_array[i].send_time;
all.recv_count += ns->sn_stats_array[i].recv_count;
all.recv_bytes += ns->sn_stats_array[i].recv_bytes;
all.recv_time += ns->sn_stats_array[i].recv_time;
if(ns->sn_stats_array[i].max_event_size > all.max_event_size)
all.max_event_size = ns->sn_stats_array[i].max_event_size;
write_stats(lp, &ns->sn_stats_array[i]);
}
}
write_stats(lp, &all);
return; return;
} }
...@@ -362,11 +300,11 @@ static void handle_msg_ready_rev_event( ...@@ -362,11 +300,11 @@ static void handle_msg_ready_rev_event(
sn_message * m, sn_message * m,
tw_lp * lp) tw_lp * lp)
{ {
struct sn_stats* stat; struct mn_stats* stat;
ns->net_recv_next_idle = m->net_recv_next_idle_saved; ns->net_recv_next_idle = m->net_recv_next_idle_saved;
stat = find_stats(m->category, ns); stat = model_net_find_stats(m->category, ns->sn_stats_array);
stat->recv_count--; stat->recv_count--;
stat->recv_bytes -= m->net_msg_size_bytes; stat->recv_bytes -= m->net_msg_size_bytes;
stat->recv_time -= rate_to_ns(m->net_msg_size_bytes, global_net_bw_mbs); stat->recv_time -= rate_to_ns(m->net_msg_size_bytes, global_net_bw_mbs);
...@@ -386,11 +324,11 @@ static void handle_msg_ready_event( ...@@ -386,11 +324,11 @@ static void handle_msg_ready_event(
tw_stime recv_queue_time = 0; tw_stime recv_queue_time = 0;
tw_event *e_new; tw_event *e_new;
sn_message *m_new; sn_message *m_new;
struct sn_stats* stat; struct mn_stats* stat;
//printf("handle_msg_ready_event(), lp %llu.\n", (unsigned long long)lp->gid); //printf("handle_msg_ready_event(), lp %llu.\n", (unsigned long long)lp->gid);
/* add statistics */ /* add statistics */
stat = find_stats(m->category, ns); stat = model_net_find_stats(m->category, ns->sn_stats_array);
stat->recv_count++; stat->recv_count++;
stat->recv_bytes += m->net_msg_size_bytes; stat->recv_bytes += m->net_msg_size_bytes;
stat->recv_time += rate_to_ns(m->net_msg_size_bytes, global_net_bw_mbs); stat->recv_time += rate_to_ns(m->net_msg_size_bytes, global_net_bw_mbs);
...@@ -431,8 +369,6 @@ static void handle_msg_start_rev_event( ...@@ -431,8 +369,6 @@ static void handle_msg_start_rev_event(
sn_message * m, sn_message * m,
tw_lp * lp) tw_lp * lp)
{ {
struct sn_stats* stat;
ns->net_send_next_idle = m->net_send_next_idle_saved; ns->net_send_next_idle = m->net_send_next_idle_saved;
if(m->local_event_size_bytes > 0) if(m->local_event_size_bytes > 0)
...@@ -440,7 +376,8 @@ static void handle_msg_start_rev_event( ...@@ -440,7 +376,8 @@ static void handle_msg_start_rev_event(
codes_local_latency_reverse(lp); codes_local_latency_reverse(lp);
} }
stat = find_stats(m->category, ns); mn_stats* stat;
stat = model_net_find_stats(m->category, ns->sn_stats_array);
stat->send_count--; stat->send_count--;
stat->send_bytes -= m->net_msg_size_bytes; stat->send_bytes -= m->net_msg_size_bytes;
stat->send_time -= global_net_startup_ns + rate_to_ns(m->net_msg_size_bytes, global_net_bw_mbs); stat->send_time -= global_net_startup_ns + rate_to_ns(m->net_msg_size_bytes, global_net_bw_mbs);
...@@ -460,7 +397,7 @@ static void handle_msg_start_event( ...@@ -460,7 +397,7 @@ static void handle_msg_start_event(
tw_event *e_new; tw_event *e_new;
sn_message *m_new; sn_message *m_new;
tw_stime send_queue_time = 0; tw_stime send_queue_time = 0;
struct sn_stats* stat; mn_stats* stat;
int mapping_grp_id, mapping_type_id, mapping_rep_id, mapping_offset; int mapping_grp_id, mapping_type_id, mapping_rep_id, mapping_offset;
tw_lpid dest_id; tw_lpid dest_id;
char lp_type_name[MAX_NAME_LENGTH], lp_group_name[MAX_NAME_LENGTH]; char lp_type_name[MAX_NAME_LENGTH], lp_group_name[MAX_NAME_LENGTH];
...@@ -470,7 +407,7 @@ static void handle_msg_start_event( ...@@ -470,7 +407,7 @@ static void handle_msg_start_event(
//printf("handle_msg_start_event(), lp %llu.\n", (unsigned long long)lp->gid); //printf("handle_msg_start_event(), lp %llu.\n", (unsigned long long)lp->gid);
/* add statistics */ /* add statistics */
stat = find_stats(m->category, ns); stat = model_net_find_stats(m->category, ns->sn_stats_array);
stat->send_count++; stat->send_count++;
stat->send_bytes += m->net_msg_size_bytes; stat->send_bytes += m->net_msg_size_bytes;