Commit 5fcc75db authored by Misbah Mubarak's avatar Misbah Mubarak

Adding functional QoS in sequential mode for the 1-D dragonfly

parent 66290294
...@@ -78,6 +78,10 @@ struct terminal_custom_message ...@@ -78,6 +78,10 @@ struct terminal_custom_message
/* for reverse computation */ /* for reverse computation */
int path_type; int path_type;
short last_saved_qos;
short saved_qos_status;
int saved_qos_data;
tw_stime saved_available_time; tw_stime saved_available_time;
tw_stime saved_avg_time; tw_stime saved_avg_time;
tw_stime saved_rcv_time; tw_stime saved_rcv_time;
......
CONT rand
3456 1024
2 2
1 1
1024 128 512 512
...@@ -31,17 +31,17 @@ PARAMS ...@@ -31,17 +31,17 @@ PARAMS
# number of groups in the network # number of groups in the network
num_groups="65"; num_groups="65";
# buffer size in bytes for local virtual channels # buffer size in bytes for local virtual channels
local_vc_size="32768"; local_vc_size="16384";
#buffer size in bytes for global virtual channels #buffer size in bytes for global virtual channels
global_vc_size="32768"; global_vc_size="16384";
#buffer size in bytes for compute node virtual channels #buffer size in bytes for compute node virtual channels
cn_vc_size="32768"; cn_vc_size="32768";
#bandwidth in GiB/s for local channels #bandwidth in GiB/s for local channels
local_bandwidth="25.0"; local_bandwidth="2.0";
# bandwidth in GiB/s for global channels # bandwidth in GiB/s for global channels
global_bandwidth="25.0"; global_bandwidth="2.0";
# bandwidth in GiB/s for compute node-router channels # bandwidth in GiB/s for compute node-router channels
cn_bandwidth="25.0"; cn_bandwidth="2.0";
# Number of row channels # Number of row channels
num_row_chans="1"; num_row_chans="1";
# Number of column channels # Number of column channels
...@@ -54,9 +54,9 @@ PARAMS ...@@ -54,9 +54,9 @@ PARAMS
# number of global channels per router # number of global channels per router
num_global_channels="8"; num_global_channels="8";
# network config file for intra-group connections # network config file for intra-group connections
intra-group-connections="/home/mubarak/codes-online/codes/src/network-workloads/conf/dragonfly-custom/dfdally_8k_intra"; intra-group-connections="../src/network-workloads/conf/dragonfly-custom/dfdally_8k_intra";
# network config file for inter-group connections # network config file for inter-group connections
inter-group-connections="/home/mubarak/codes-online/codes/src/network-workloads/conf/dragonfly-custom/dfdally_8k_inter"; inter-group-connections="../src/network-workloads/conf/dragonfly-custom/dfdally_8k_inter";
# routing protocol to be used # routing protocol to be used
routing="prog-adaptive"; routing="prog-adaptive";
} }
This diff is collapsed.
...@@ -532,7 +532,7 @@ void model_net_base_event( ...@@ -532,7 +532,7 @@ void model_net_base_event(
tw_lp * lp){ tw_lp * lp){
if(m->h.magic != model_net_base_magic) if(m->h.magic != model_net_base_magic)
printf("\n LP ID mismatched %llu ", lp->gid); printf("\n LP ID mismatched %llu %d ", lp->gid);
assert(m->h.magic == model_net_base_magic); assert(m->h.magic == model_net_base_magic);
......
...@@ -314,6 +314,7 @@ static model_net_event_return model_net_event_impl_base( ...@@ -314,6 +314,7 @@ static model_net_event_return model_net_event_impl_base(
void const * self_event, void const * self_event,
tw_lp *sender) { tw_lp *sender) {
if (remote_event_size + self_event_size + sizeof(model_net_wrap_msg) if (remote_event_size + self_event_size + sizeof(model_net_wrap_msg)
> g_tw_msg_sz){ > g_tw_msg_sz){
tw_error(TW_LOC, "Error: model_net trying to transmit an event of size " tw_error(TW_LOC, "Error: model_net trying to transmit an event of size "
...@@ -328,11 +329,13 @@ static model_net_event_return model_net_event_impl_base( ...@@ -328,11 +329,13 @@ static model_net_event_return model_net_event_impl_base(
tw_lpid dest_mn_lp = model_net_find_local_device_mctx(net_id, recv_map_ctx, tw_lpid dest_mn_lp = model_net_find_local_device_mctx(net_id, recv_map_ctx,
final_dest_lp); final_dest_lp);
if (src_mn_lp == dest_mn_lp && message_size < (uint64_t)codes_node_eager_limit) if ( src_mn_lp == dest_mn_lp && message_size < (uint64_t)codes_node_eager_limit)
{
printf("\n Calling model-net noop event! %d %d %s", src_mn_lp, dest_mn_lp, category);
return model_net_noop_event(final_dest_lp, is_pull, offset, message_size, return model_net_noop_event(final_dest_lp, is_pull, offset, message_size,
remote_event_size, remote_event, self_event_size, self_event, remote_event_size, remote_event, self_event_size, self_event,
sender); sender);
}
tw_stime poffset = codes_local_latency(sender); tw_stime poffset = codes_local_latency(sender);
if (mn_in_sequence){ if (mn_in_sequence){
tw_stime tmp = mn_msg_offset; tw_stime tmp = mn_msg_offset;
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "lammps.h" #include "lammps.h"
#include "nekbone_swm_user_code.h" #include "nekbone_swm_user_code.h"
#include "nearest_neighbor_swm_user_code.h" #include "nearest_neighbor_swm_user_code.h"
#include "all_to_one_swm_user_code.h"
#define ALLREDUCE_SHORT_MSG_SIZE 2048 #define ALLREDUCE_SHORT_MSG_SIZE 2048
...@@ -754,7 +755,6 @@ static void workload_caller(void * arg) ...@@ -754,7 +755,6 @@ static void workload_caller(void * arg)
{ {
shared_context* sctx = static_cast<shared_context*>(arg); shared_context* sctx = static_cast<shared_context*>(arg);
//printf("\n workload name %s ", sctx->workload_name);
if(strcmp(sctx->workload_name, "lammps") == 0) if(strcmp(sctx->workload_name, "lammps") == 0)
{ {
LAMMPS_SWM * lammps_swm = static_cast<LAMMPS_SWM*>(sctx->swm_obj); LAMMPS_SWM * lammps_swm = static_cast<LAMMPS_SWM*>(sctx->swm_obj);
...@@ -770,6 +770,11 @@ static void workload_caller(void * arg) ...@@ -770,6 +770,11 @@ static void workload_caller(void * arg)
NearestNeighborSWMUserCode * nn_swm = static_cast<NearestNeighborSWMUserCode*>(sctx->swm_obj); NearestNeighborSWMUserCode * nn_swm = static_cast<NearestNeighborSWMUserCode*>(sctx->swm_obj);
nn_swm->call(); nn_swm->call();
} }
else if(strcmp(sctx->workload_name, "incast") == 0)
{
AllToOneSWMUserCode * incast_swm = static_cast<AllToOneSWMUserCode*>(sctx->swm_obj);
incast_swm->call();
}
} }
static int comm_online_workload_load(const char * params, int app_id, int rank) static int comm_online_workload_load(const char * params, int app_id, int rank)
{ {
...@@ -807,10 +812,13 @@ static int comm_online_workload_load(const char * params, int app_id, int rank) ...@@ -807,10 +812,13 @@ static int comm_online_workload_load(const char * params, int app_id, int rank)
{ {
path.append("/skeleton.json"); path.append("/skeleton.json");
} }
else if(strcmp(o_params->workload_name, "incast") == 0)
{
path.append("/incast.json");
}
else else
tw_error(TW_LOC, "\n Undefined workload type %s ", o_params->workload_name); tw_error(TW_LOC, "\n Undefined workload type %s ", o_params->workload_name);
//printf("\n path %s ", path.c_str());
try { try {
std::ifstream jsonFile(path.c_str()); std::ifstream jsonFile(path.c_str());
boost::property_tree::json_parser::read_json(jsonFile, root); boost::property_tree::json_parser::read_json(jsonFile, root);
...@@ -837,6 +845,11 @@ static int comm_online_workload_load(const char * params, int app_id, int rank) ...@@ -837,6 +845,11 @@ static int comm_online_workload_load(const char * params, int app_id, int rank)
NearestNeighborSWMUserCode * nn_swm = new NearestNeighborSWMUserCode(root, generic_ptrs); NearestNeighborSWMUserCode * nn_swm = new NearestNeighborSWMUserCode(root, generic_ptrs);
my_ctx->sctx.swm_obj = (void*)nn_swm; my_ctx->sctx.swm_obj = (void*)nn_swm;
} }
else if(strcmp(o_params->workload_name, "incast") == 0)
{
AllToOneSWMUserCode * incast_swm = new AllToOneSWMUserCode(root, generic_ptrs);
my_ctx->sctx.swm_obj = (void*)incast_swm;
}
if(global_prod_thread == NULL) if(global_prod_thread == NULL)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment