diff --git a/codes/codes.h b/codes/codes.h index 002c66eb4ad420dfa7e703d4879e64481c3d227b..a65ce2c6b7273984e31cc44049c9ff9e65a11948 100644 --- a/codes/codes.h +++ b/codes/codes.h @@ -97,6 +97,8 @@ static inline void codes_local_latency_reverse(tw_lp *lp) return; } +void codes_comm_update(); + #ifdef __cplusplus } #endif diff --git a/src/network-workloads/conf/modelnet-mpi-test-dfly-amg-1728.conf b/src/network-workloads/conf/modelnet-mpi-test-dfly-amg-1728.conf index ed57d432bc8a3ad88a2ff4b34129977d7ed5fbc3..f393cbf2db73bbdcd2c55f6ae5ce812b38d14f0d 100644 --- a/src/network-workloads/conf/modelnet-mpi-test-dfly-amg-1728.conf +++ b/src/network-workloads/conf/modelnet-mpi-test-dfly-amg-1728.conf @@ -23,6 +23,6 @@ PARAMS local_bandwidth="5.25"; global_bandwidth="4.7"; cn_bandwidth="5.25"; - message_size="592"; + message_size="608"; routing="adaptive"; } diff --git a/src/network-workloads/model-net-dumpi-traces-dump.c b/src/network-workloads/model-net-dumpi-traces-dump.c index 80fcf981f6b0436cd07d68f0b4a817cbaf390476..c1b3944c56ae8f970c0fb6236b1df575093e090e 100644 --- a/src/network-workloads/model-net-dumpi-traces-dump.c +++ b/src/network-workloads/model-net-dumpi-traces-dump.c @@ -414,6 +414,11 @@ int main( int argc, char** argv ) workload_type[0]='\0'; tw_opt_add(app_opt); tw_init(&argc, &argv); +#ifdef USE_RDAMARIS + if(g_st_ross_rank) + { // keep damaris ranks from running code between here up until tw_end() +#endif + codes_comm_update(); if(strlen(workload_file) == 0) { @@ -423,10 +428,10 @@ int main( int argc, char** argv ) return -1; } - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_CODES, &rank); + MPI_Comm_size(MPI_COMM_CODES, &nprocs); - configuration_load(argv[2], MPI_COMM_WORLD, &config); + configuration_load(argv[2], MPI_COMM_CODES, &config); nw_add_lp_type(); @@ -446,20 +451,20 @@ int main( int argc, char** argv ) double total_avg_comp_time; long overall_sends, overall_recvs, overall_waits, overall_cols; - MPI_Reduce(&num_bytes_sent, &total_bytes_sent, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&num_bytes_recvd, &total_bytes_recvd, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&avg_time, &avg_run_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - - MPI_Reduce(&avg_recv_time, &total_avg_recv_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&avg_comm_time, &avg_comm_run_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&avg_col_time, &avg_col_run_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&avg_wait_time, &total_avg_wait_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&avg_send_time, &total_avg_send_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&avg_compute_time, &total_avg_comp_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_sends, &overall_sends, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_recvs, &overall_recvs, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_waits, &overall_waits, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_collectives, &overall_cols, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_bytes_sent, &total_bytes_sent, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&num_bytes_recvd, &total_bytes_recvd, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&avg_time, &avg_run_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + + MPI_Reduce(&avg_recv_time, &total_avg_recv_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&avg_comm_time, &avg_comm_run_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&avg_col_time, &avg_col_run_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&avg_wait_time, &total_avg_wait_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&avg_send_time, &total_avg_send_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&avg_compute_time, &total_avg_comp_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&total_sends, &overall_sends, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&total_recvs, &overall_recvs, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&total_waits, &overall_waits, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&total_collectives, &overall_cols, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); if(!g_tw_mynode) printf("\n Total bytes sent %lld recvd %lld \n avg runtime %lf \n avg comm time %lf avg compute time %lf \n avg send time %lf \n avg recv time %lf \n avg wait time %lf \n total sends %ld total recvs %ld total waits %ld total collectives %ld ", total_bytes_sent, total_bytes_recvd, @@ -470,6 +475,9 @@ int main( int argc, char** argv ) total_avg_recv_time/num_net_lps, total_avg_wait_time/num_net_lps, overall_sends, overall_recvs, overall_waits, overall_cols); +#ifdef USE_RDAMARIS + } // end if(g_st_ross_rank) +#endif tw_end(); return 0; diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 4053829f1e1532e7813d0ba48bbca289dc4e631f..801ebcb8732e945e425e5c1762c80bb4319557b2 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -2626,6 +2626,11 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) workload_type[0]='\0'; tw_opt_add(app_opt); tw_init(argc, argv); +#ifdef USE_RDAMARIS + if(g_st_ross_rank) + { // keep damaris ranks from running code between here up until tw_end() +#endif + codes_comm_update(); if(strcmp(workload_type, "dumpi") != 0) { @@ -2793,7 +2798,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) MPI_Reduce(&max_recv_time, &total_max_recv_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES); MPI_Reduce(&avg_wait_time, &total_avg_wait_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); MPI_Reduce(&avg_send_time, &total_avg_send_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce(&total_syn_data, &g_total_syn_data, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_syn_data, &g_total_syn_data, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); assert(num_net_traces); @@ -2822,6 +2827,9 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) if(alloc_spec) codes_jobmap_destroy(jobmap_ctx); +#ifdef USE_RDAMARIS + } // end if(g_st_ross_rank) +#endif tw_end(); return 0; diff --git a/src/network-workloads/model-net-synthetic-custom-dfly.c b/src/network-workloads/model-net-synthetic-custom-dfly.c index 43212c1bcb24d69153b9767ef8b4acc6cb90c8e1..a07ebedc4459a7e1a208448e4454b8f6f798a660 100644 --- a/src/network-workloads/model-net-synthetic-custom-dfly.c +++ b/src/network-workloads/model-net-synthetic-custom-dfly.c @@ -407,6 +407,11 @@ int main( tw_opt_add(app_opt); tw_init(&argc, &argv); +#ifdef USE_RDAMARIS + if(g_st_ross_rank) + { // keep damaris ranks from running code between here up until tw_end() +#endif + codes_comm_update(); if(argc < 2) { @@ -470,6 +475,9 @@ int main( assert(ret == 0 || !"lp_io_flush failure"); } model_net_report_stats(net_id); +#ifdef USE_RDAMARIS + } // end if(g_st_ross_rank) +#endif tw_end(); return 0; } diff --git a/src/network-workloads/model-net-synthetic-fattree.c b/src/network-workloads/model-net-synthetic-fattree.c index a4283068a07aa4499d5f1f758d25546667d80f0e..1fde570d3d1a1405aa11cd53599fac6c270a2931 100644 --- a/src/network-workloads/model-net-synthetic-fattree.c +++ b/src/network-workloads/model-net-synthetic-fattree.c @@ -436,6 +436,11 @@ int main( tw_opt_add(app_opt); tw_init(&argc, &argv); +#ifdef USE_RDAMARIS + if(g_st_ross_rank) + { // keep damaris ranks from running code between here up until tw_end() +#endif + codes_comm_update(); offset = 1; @@ -446,10 +451,10 @@ int main( return 0; } - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_CODES, &rank); + MPI_Comm_size(MPI_COMM_CODES, &nprocs); - configuration_load(argv[2], MPI_COMM_WORLD, &config); + configuration_load(argv[2], MPI_COMM_CODES, &config); model_net_register(); @@ -484,7 +489,7 @@ int main( printf("num_nodes:%d \n",num_nodes); - if(lp_io_prepare("modelnet-test", LP_IO_UNIQ_SUFFIX, &handle, MPI_COMM_WORLD) < 0) + if(lp_io_prepare("modelnet-test", LP_IO_UNIQ_SUFFIX, &handle, MPI_COMM_CODES) < 0) { return(-1); } @@ -516,11 +521,13 @@ int main( } #endif - if(lp_io_flush(handle, MPI_COMM_WORLD) < 0) + if(lp_io_flush(handle, MPI_COMM_CODES) < 0) { return(-1); } - +#ifdef USE_RDAMARIS + } // end if(g_st_ross_rank) +#endif tw_end(); #if PARAMS_LOG diff --git a/src/network-workloads/model-net-synthetic-slimfly.c b/src/network-workloads/model-net-synthetic-slimfly.c index 098bccbf7d92a877c8e30a9cd42fc1b0a394178f..ab1f1af0a197dbb3c5d70a2900a3e12204f461d3 100644 --- a/src/network-workloads/model-net-synthetic-slimfly.c +++ b/src/network-workloads/model-net-synthetic-slimfly.c @@ -494,6 +494,11 @@ int main( tw_opt_add(app_opt); tw_init(&argc, &argv); +#ifdef USE_RDAMARIS + if(g_st_ross_rank) + { // keep damaris ranks from running code between here up until tw_end() +#endif + codes_comm_update(); if(argc < 2) { @@ -502,10 +507,10 @@ int main( return 0; } - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_CODES, &rank); + MPI_Comm_size(MPI_COMM_CODES, &nprocs); - configuration_load(argv[2], MPI_COMM_WORLD, &config); + configuration_load(argv[2], MPI_COMM_CODES, &config); model_net_register(); svr_add_lp_type(); @@ -526,7 +531,7 @@ int main( num_nodes_per_grp = num_routers_per_grp * num_servers_per_rep; total_routers = num_routers_per_grp * num_routers_per_grp * 2; -/* if(lp_io_prepare("modelnet-test", LP_IO_UNIQ_SUFFIX, &handle, MPI_COMM_WORLD) < 0) +/* if(lp_io_prepare("modelnet-test", LP_IO_UNIQ_SUFFIX, &handle, MPI_COMM_CODES) < 0) { return(-1); } @@ -536,7 +541,7 @@ int main( { do_lp_io = 1; int flags = lp_io_use_suffix ? LP_IO_UNIQ_SUFFIX : 0; - int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_WORLD); + int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_CODES); assert(ret == 0 || !"lp_io_prepare failure"); } @@ -558,7 +563,7 @@ int main( if (do_lp_io){ - int ret = lp_io_flush(io_handle, MPI_COMM_WORLD); + int ret = lp_io_flush(io_handle, MPI_COMM_CODES); assert(ret == 0 || !"lp_io_flush failure"); } @@ -578,12 +583,15 @@ int main( #endif } -/* if(lp_io_flush(handle, MPI_COMM_WORLD) < 0) +/* if(lp_io_flush(handle, MPI_COMM_CODES) < 0) { assert(ret == 0 || !"lp_io_flush failure"); return(-1); } */ +#ifdef USE_RDAMARIS + } // end if(g_st_ross_rank) +#endif tw_end(); if(rank == 0) diff --git a/src/network-workloads/model-net-synthetic.c b/src/network-workloads/model-net-synthetic.c index 65b95a6108f875a7c7352abad2b1f39f7f00cb7b..17728894710f189d6efb2b795fe5c909b1221fd6 100644 --- a/src/network-workloads/model-net-synthetic.c +++ b/src/network-workloads/model-net-synthetic.c @@ -407,6 +407,11 @@ int main( tw_opt_add(app_opt); tw_init(&argc, &argv); +#ifdef USE_RDAMARIS + if(g_st_ross_rank) + { // keep damaris ranks from running code between here up until tw_end() +#endif + codes_comm_update(); if(argc < 2) { @@ -415,10 +420,10 @@ int main( return 0; } - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_CODES, &rank); + MPI_Comm_size(MPI_COMM_CODES, &nprocs); - configuration_load(argv[2], MPI_COMM_WORLD, &config); + configuration_load(argv[2], MPI_COMM_CODES, &config); model_net_register(); svr_add_lp_type(); @@ -457,15 +462,18 @@ int main( { do_lp_io = 1; int flags = lp_io_use_suffix ? LP_IO_UNIQ_SUFFIX : 0; - int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_WORLD); + int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_CODES); assert(ret == 0 || !"lp_io_prepare failure"); } tw_run(); if (do_lp_io){ - int ret = lp_io_flush(io_handle, MPI_COMM_WORLD); + int ret = lp_io_flush(io_handle, MPI_COMM_CODES); assert(ret == 0 || !"lp_io_flush failure"); } model_net_report_stats(net_id); +#ifdef USE_RDAMARIS + } // end if(g_st_ross_rank) +#endif tw_end(); return 0; } diff --git a/src/networks/model-net/express-mesh.C b/src/networks/model-net/express-mesh.C index 3cb247f4253098c79d9046789a6eac80bc5e8a23..b39b9d1c8796a50fadd64ceb9a314be7f6e5e850 100644 --- a/src/networks/model-net/express-mesh.C +++ b/src/networks/model-net/express-mesh.C @@ -435,7 +435,7 @@ static void local_read_config(const char * anno, local_param *params){ p->total_terminals = p->total_routers * p->num_cn; int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_rank(MPI_COMM_CODES, &rank); if(!rank) { printf("\n Total nodes %d routers %d radix %d \n", p->total_terminals, p->total_routers, p->radix); @@ -480,22 +480,22 @@ static void local_report_stats() long total_gen, total_fin; MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, - MPI_COMM_WORLD); + MPI_COMM_CODES); MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, - MPI_SUM, 0, MPI_COMM_WORLD); + MPI_SUM, 0, MPI_COMM_CODES); MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, - 0, MPI_COMM_WORLD); + 0, MPI_COMM_CODES); MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, - MPI_SUM, 0, MPI_COMM_WORLD); + MPI_SUM, 0, MPI_COMM_CODES); MPI_Reduce( &total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, - MPI_COMM_WORLD); + MPI_COMM_CODES); MPI_Reduce( &local_total_time, &avg_time, 1,MPI_DOUBLE, MPI_SUM, 0, - MPI_COMM_WORLD); + MPI_COMM_CODES); MPI_Reduce( &local_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, - MPI_COMM_WORLD); + MPI_COMM_CODES); - MPI_Reduce( &packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce( &packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce( &packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); /* print statistics */ if(!g_tw_mynode) diff --git a/src/networks/model-net/net-template.C b/src/networks/model-net/net-template.C index 988ba0359d37bbd4f0ee4f01a007ff18367fe74b..6a3c867c9eca62d259035a7522a155f61758b5df 100644 --- a/src/networks/model-net/net-template.C +++ b/src/networks/model-net/net-template.C @@ -401,22 +401,22 @@ static void local_report_stats() long total_gen, total_fin; MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, - MPI_COMM_WORLD); + MPI_COMM_CODES); MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, - MPI_SUM, 0, MPI_COMM_WORLD); + MPI_SUM, 0, MPI_COMM_CODES); MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, - 0, MPI_COMM_WORLD); + 0, MPI_COMM_CODES); MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, - MPI_SUM, 0, MPI_COMM_WORLD); + MPI_SUM, 0, MPI_COMM_CODES); MPI_Reduce( &total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, - MPI_COMM_WORLD); + MPI_COMM_CODES); MPI_Reduce( &local_total_time, &avg_time, 1,MPI_DOUBLE, MPI_SUM, 0, - MPI_COMM_WORLD); + MPI_COMM_CODES); MPI_Reduce( &local_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, - MPI_COMM_WORLD); + MPI_COMM_CODES); - MPI_Reduce( &packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce( &packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce( &packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); /* print statistics */ if(!g_tw_mynode) diff --git a/src/networks/model-net/slimfly.c b/src/networks/model-net/slimfly.c index 047adcb26cd6db1d9a2e2fe6ba7f4357b54d646c..79441f5e93831b199451fa3f95d9ca5749bd0ccf 100644 --- a/src/networks/model-net/slimfly.c +++ b/src/networks/model-net/slimfly.c @@ -638,19 +638,19 @@ static void slimfly_report_stats() float throughput_avg = 0.0; float throughput_avg2 = 0.0; - MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce( &total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce( &slimfly_total_time, &avg_time, 1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce( &slimfly_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); - - MPI_Reduce(&pe_throughput_percent, &throughput_avg, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&pe_throughput, &throughput_avg2, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); - - MPI_Reduce(&minimal_count, &total_minimal_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&nonmin_count, &total_nonmin_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &slimfly_total_time, &avg_time, 1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &slimfly_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES); + + MPI_Reduce(&pe_throughput_percent, &throughput_avg, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&pe_throughput, &throughput_avg2, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_CODES); + + MPI_Reduce(&minimal_count, &total_minimal_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&nonmin_count, &total_nonmin_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); /* print statistics */ if(!g_tw_mynode) diff --git a/src/util/codes-comm.c b/src/util/codes-comm.c index d0bc4159df98334c8950a694462fca5e88635a4a..ef59a753b1556033b762c3d2acf3d00379c81b46 100644 --- a/src/util/codes-comm.c +++ b/src/util/codes-comm.c @@ -5,9 +5,19 @@ */ #include +#include MPI_Comm MPI_COMM_CODES = MPI_COMM_WORLD; +/* + * Needs to be called AFTER tw_init() because in tw_init, + * ROSS may split the MPI_COMM_ROSS communicator + */ +void codes_comm_update() +{ + MPI_COMM_CODES = MPI_COMM_ROSS; +} + /* * Local variables: * c-indent-level: 4