From 8274da0d532622dce78d3043948472a02a69f1c8 Mon Sep 17 00:00:00 2001 From: Neil McGlohon Date: Fri, 12 Oct 2018 17:10:25 -0400 Subject: [PATCH] Add DFDally work into its own model for merging --- codes/model-net-lp.h | 2 + codes/model-net.h | 2 + codes/net/dragonfly-dally.h | 113 + src/Makefile.subdir | 5 + .../conf/dragonfly-dally/dfdally_8k.conf | 64 + .../conf/dragonfly-dally/dfdally_8k_inter | Bin 0 -> 66560 bytes .../conf/dragonfly-dally/dfdally_8k_intra | Bin 0 -> 2880 bytes .../model-net-synthetic-dally-dfly.c | 530 ++ src/networks/model-net/dragonfly-dally.C | 5241 +++++++++++++++++ src/networks/model-net/model-net-lp.c | 7 +- src/networks/model-net/model-net.c | 2 + 11 files changed, 5965 insertions(+), 1 deletion(-) create mode 100644 codes/net/dragonfly-dally.h create mode 100644 src/network-workloads/conf/dragonfly-dally/dfdally_8k.conf create mode 100644 src/network-workloads/conf/dragonfly-dally/dfdally_8k_inter create mode 100644 src/network-workloads/conf/dragonfly-dally/dfdally_8k_intra create mode 100644 src/network-workloads/model-net-synthetic-dally-dfly.c create mode 100644 src/networks/model-net/dragonfly-dally.C diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h index d3ca915..5b9ca93 100644 --- a/codes/model-net-lp.h +++ b/codes/model-net-lp.h @@ -24,6 +24,7 @@ extern "C" { #include "net/dragonfly.h" #include "net/dragonfly-custom.h" #include "net/dragonfly-plus.h" +#include "net/dragonfly-dally.h" #include "net/slimfly.h" #include "net/fattree.h" #include "net/loggp.h" @@ -133,6 +134,7 @@ typedef struct model_net_wrap_msg { terminal_message m_dfly; // dragonfly terminal_custom_message m_custom_dfly; // dragonfly-custom terminal_plus_message m_dfly_plus; // dragonfly plus + terminal_dally_message m_dally_dfly; // dragonfly dally slim_terminal_message m_slim; // slimfly fattree_message m_fat; // fattree loggp_message m_loggp; // loggp diff --git a/codes/model-net.h b/codes/model-net.h index a6b13f2..9780a10 100644 --- a/codes/model-net.h +++ b/codes/model-net.h @@ -76,6 +76,8 @@ typedef struct mn_stats mn_stats; X(EXPRESS_MESH_ROUTER, "modelnet_express_mesh_router", "express_mesh_router", &express_mesh_router_method)\ X(DRAGONFLY_PLUS, "modelnet_dragonfly_plus", "dragonfly_plus", &dragonfly_plus_method)\ X(DRAGONFLY_PLUS_ROUTER, "modelnet_dragonfly_plus_router", "dragonfly_plus_router", &dragonfly_plus_router_method)\ + X(DRAGONFLY_DALLY, "modelnet_dragonfly_dally", "dragonfly_dally", &dragonfly_dally_method)\ + X(DRAGONFLY_DALLY_ROUTER, "modelnet_dragonfly_dally_router", "dragonfly_dally_router", &dragonfly_dally_router_method)\ X(MAX_NETS, NULL, NULL, NULL) #define X(a,b,c,d) a, diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h new file mode 100644 index 0000000..2832e1b --- /dev/null +++ b/codes/net/dragonfly-dally.h @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2014 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + */ + +#ifndef DRAGONFLY_DALLY_H +#define DRAGONFLY_DALLY_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct terminal_dally_message terminal_dally_message; + +/* this message is used for both dragonfly compute nodes and routers */ +struct terminal_dally_message +{ + /* magic number */ + int magic; + /* flit travel start time*/ + tw_stime travel_start_time; + /* packet ID of the flit */ + unsigned long long packet_ID; + /* event type of the flit */ + short type; + /* category: comes from codes */ + char category[CATEGORY_NAME_MAX]; + /* store category hash in the event */ + uint32_t category_hash; + /* final destination LP ID, this comes from codes can be a server or any other LP type*/ + tw_lpid final_dest_gid; + /*sending LP ID from CODES, can be a server or any other LP type */ + tw_lpid sender_lp; + tw_lpid sender_mn_lp; // source modelnet id + /* destination terminal ID of the dragonfly */ + tw_lpid dest_terminal_id; + /* source terminal ID of the dragonfly */ + unsigned int src_terminal_id; + /* message originating router id. MM: Can we calculate it through + * sender_mn_lp??*/ + unsigned int origin_router_id; + + /* number of hops traversed by the packet */ + short my_N_hop; + short my_l_hop, my_g_hop; + short saved_channel; + short saved_vc; + + int next_stop; + + short nonmin_done; + /* Intermediate LP ID from which this message is coming */ + unsigned int intm_lp_id; + /* last hop of the message, can be a terminal, local router or global router */ + short last_hop; + /* For routing */ + int intm_rtr_id; + int saved_src_dest; + int saved_src_chan; + + uint32_t chunk_id; + uint32_t packet_size; + uint32_t message_id; + uint32_t total_size; + + int remote_event_size_bytes; + int local_event_size_bytes; + + // For buffer message + short vc_index; + int output_chan; + model_net_event_return event_rc; + int is_pull; + uint32_t pull_size; + int path_type; + + /* for reverse computation */ + short num_rngs; + short num_cll; + + int qos_index; + short last_saved_qos; + short qos_reset1; + short qos_reset2; + + tw_stime saved_available_time; + tw_stime saved_avg_time; + tw_stime saved_rcv_time; + tw_stime saved_busy_time; + tw_stime saved_total_time; + tw_stime saved_sample_time; + tw_stime msg_start_time; + tw_stime saved_busy_time_ross; + tw_stime saved_fin_chunks_ross; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* end of include guard: DRAGONFLY_H */ + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/Makefile.subdir b/src/Makefile.subdir index ac67717..5f1e3c9 100644 --- a/src/Makefile.subdir +++ b/src/Makefile.subdir @@ -94,6 +94,8 @@ nobase_include_HEADERS = \ codes/net/common-net.h \ codes/net/dragonfly.h \ codes/net/dragonfly-custom.h \ + codes/net/dragonfly-dally.h \ + codes/net/dragonfly-plus.h \ codes/net/slimfly.h \ codes/net/fattree.h \ codes/net/loggp.h \ @@ -162,6 +164,7 @@ src_libcodes_la_SOURCES = \ src/networks/model-net/dragonfly.c \ src/networks/model-net/dragonfly-custom.C \ src/networks/model-net/dragonfly-plus.C \ + src/networks/model-net/dragonfly-dally.C \ src/networks/model-net/slimfly.c \ src/networks/model-net/fattree.c \ src/networks/model-net/loggp.c \ @@ -196,6 +199,7 @@ bin_PROGRAMS += src/network-workloads/model-net-synthetic-custom-dfly bin_PROGRAMS += src/network-workloads/model-net-synthetic-slimfly bin_PROGRAMS += src/network-workloads/model-net-synthetic-fattree bin_PROGRAMS += src/network-workloads/model-net-synthetic-dfly-plus +bin_PROGRAMS += src/network-workloads/model-net-synthetic-dally-dfly src_workload_codes_workload_dump_SOURCES = \ @@ -210,6 +214,7 @@ src_network_workloads_model_net_mpi_replay_CFLAGS = $(AM_CFLAGS) src_network_workloads_model_net_synthetic_SOURCES = src/network-workloads/model-net-synthetic.c src_network_workloads_model_net_synthetic_custom_dfly_SOURCES = src/network-workloads/model-net-synthetic-custom-dfly.c src_network_workloads_model_net_synthetic_dfly_plus_SOURCES = src/network-workloads/model-net-synthetic-dfly-plus.c +src_network_workloads_model_net_synthetic_dally_dfly_SOURCES = src/network-workloads/model-net-synthetic-dally-dfly.c src_networks_model_net_topology_test_SOURCES = src/networks/model-net/topology-test.c #bin_PROGRAMS += src/network-workload/codes-nw-test diff --git a/src/network-workloads/conf/dragonfly-dally/dfdally_8k.conf b/src/network-workloads/conf/dragonfly-dally/dfdally_8k.conf new file mode 100644 index 0000000..9c20a6e --- /dev/null +++ b/src/network-workloads/conf/dragonfly-dally/dfdally_8k.conf @@ -0,0 +1,64 @@ +LPGROUPS +{ + MODELNET_GRP + { + repetitions="1040"; +# name of this lp changes according to the model + nw-lp="8"; +# these lp names will be the same for dragonfly-custom model + modelnet_dragonfly_dally="8"; + modelnet_dragonfly_dally_router="1"; + } +} +PARAMS +{ + adaptive_threshold="131072"; + num_qos_levels="2"; + qos_bandwidth="30,70"; + minimal-bias="1"; + df-dally-vc = "1"; +# packet size in the network + packet_size="4096"; + modelnet_order=( "dragonfly_dally","dragonfly_dally_router" ); + # scheduler options + modelnet_scheduler="fcfs"; +# chunk size in the network (when chunk size = packet size, packets will not be +# divided into chunks) + chunk_size="4096"; + # modelnet_scheduler="round-robin"; + + num_router_rows="1"; + # intra-group columns for routers + num_router_cols="16"; + # number of groups in the network + num_groups="65"; +# buffer size in bytes for local virtual channels + local_vc_size="16384"; +#buffer size in bytes for global virtual channels + global_vc_size="16384"; +#buffer size in bytes for compute node virtual channels + cn_vc_size="32768"; +#bandwidth in GiB/s for local channels + local_bandwidth="2.0"; +# bandwidth in GiB/s for global channels + global_bandwidth="2.0"; +# bandwidth in GiB/s for compute node-router channels + cn_bandwidth="2.0"; +# Number of row channels + num_row_chans="1"; +# Number of column channels + num_col_chans="1"; +# ROSS message size + message_size="656"; +# number of compute nodes connected to router, dictated by dragonfly config +# file + num_cns_per_router="8"; +# number of global channels per router + num_global_channels="8"; +# network config file for intra-group connections + intra-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally_8k_intra"; +# network config file for inter-group connections + inter-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally_8k_inter"; +# routing protocol to be used + routing="prog-adaptive"; +} diff --git a/src/network-workloads/conf/dragonfly-dally/dfdally_8k_inter b/src/network-workloads/conf/dragonfly-dally/dfdally_8k_inter new file mode 100644 index 0000000000000000000000000000000000000000..e35277560bddd87e2de7ebb0b6a4448b9b49671c GIT binary patch literal 66560 zcmXWgciff*{r+LxBL|q7s41Yi8e(ZV6I^IRFn6h-qL^BcTN~g+GleEL!HF%grM3hY zG{u%S-C`4(_-*0uJnrA=tIzAW&Oe^;xi8<|e>h~wkRjXT{I$#fXUKkae4SXQ*6DRt zTrKz6dV9U6K2#s8%j>GTQGR&!Jin~Is_t9gQctMwtnaJW#x?TupRQl5->yHbzo~z! z|E(M34_`Aszh!+z-MhZ2zP+AV-&3!NYvt!ZSwCODS-)R@RsT@`UDwaw{Mz~X;dQ6F zXFaqYSKm=Ds#nK#^7Gf%+v?r*yY(0KclH13y7}eT&CkCy#~teH>$rMMol-BTGvj)> zkJnr4opo`2q%Nx~>)Lq_>*x6;_2qTObn5 zc@G=p=U-g6t*@;I)FbLC^_+TH+%P}?;d*2JO1-cCq&`*uR#(q^*eE|gtd6X^*0J^Q zdQyFNy(Dg&pPyal)jR5<`fz=s{-yede`tPwmmGD^@m2Ts)T(=XR-GG%<#bQoQ}@(8 zbx++>_uM*e5_M1AQ}@(8bx++>_xwzJanwC^Pu)}Z)ID`i-E*tBY1BP+Pu)}Z)ID`i z-SboNB~ka(J#|mrQ}@(8bNRlJ#|mrQ}@(8bYlo%?g!?1 z^PKMa`l@^Tn5ujGf~tFaPHuQk_q?;}-hQO&-d_uMpY6?ISDQ}@(8bx++>_xwnFS=2psPu)}Z)ID`i-E)(;b<{m| zPu)}Z)ID`i-Sb0no2Yy0p1P;*se9_4x*wRw5xM=U?(K_tZUg&sWCnqVB1C>Ylo% z?x}m~o?nPBkGiMsse9_4x~J}`d+r>!kGiMsse9_4x~J}`dwwozSNGICbx++>_tgEs zJX6Cv=5)_-RrmIks(X88)xAAGw^L5{TwHZ;FRQw@bx+-M`?zz|J#|mrQ}@(8bx+;% zmiUUOd+MILr|zkH>YlpicJY-__tZUgPu)}Z)ID|2o8qgY?x}m~p1P;*se9_4qvETh z?m4#V-ae`7-oB*j-o7F35_M1AQ}@(8bx++7%p-l5kIs#+y0@oR-P^OO?(GG2Vca$M zP}RM?yz1W8J#|mr^9}JeQTNn6bx++>_tZUg&+o+7M%`2Q)ID`i-Bb6}J@<*bMcq^P z)ID`i-Bb6}J--!qkGiMsse9_4x~J}`d+rrq7j;kFQ}@(8bx++>_xwiGuI{OO>Ylo% z?g!?Xx~GQs$myQPRNdPbRNdQis_yN(a(m`<&qu26?Uhycw(hBWzAo++bx++>_tZUg zPu)}Z{CeCw>Ylo%?x}m~p1P;*xm(;P>Ylo%?x}m~p1P;*`PH~@)ID`i-Bb6}J#|mr z^EL4eQTNn6bx++>_tZUg&o9UQqVB1C>Ylo%?g!?PL+gk*CO5I_-kx4{Z_ll|w-?r> zacu6fs(X7?)xE8I>YlpiVR8Scd+MILr|zkH>YlpikKzGQ_tZUgPu)}Z)ID|2H^&2` z?x}m~p1P;*se9_4KZplK-Bb6}J#|mrQ}@(8-xv>$x~J}`d+MILr|zkHelKcQ_tZUg zPu&m9GdHigw>5l7PWPNrb#KqCy0_<7-P=oYhvsz8WmWgK?x}m~p1SA3@r_aU)ID`i z-Bb6}J$26q_tZV_k8g>(=TlYp_Ud^L z)ICR4-P`-e!=mn~d+MILr|zkH>Yn$;@lp5GJ#|mr56mNV&k@yk`Gnlms(X7@)xEu- z>fT;jACHIUmRH@|x~J}`d+MIL=ZW#HQTNn6bx++>_tZUg&)>!)qVB1C>Ylo%?x}m~ zp2x={qwcAD>Ylo%?x}m~p1+PqMcq^P)ID`i-Bb6}J>M3Oj=HDrse9_4x~J}`d;T(N zSNGICbw4oA)IE2sy0Ylo%?x}m~p1P;*d1O33>Ylo%?x}m~p1P;*`O|np)ID`i z-Bb6}J#|mr^R4m3sC(+3x~J}`d+MIL=a1vWsC(+3x*wQF4y_~V=sG4&%1y7jx93*f z+Y77i?Z@kicv5av)xE8I>Ylo%?x}meE1n#6Pu)}Z)ID`i-Bb7cb37&Lp1P;*se9_4 zx~J}WR-7DlPu)}Z)ID`i-Bb7cV>~tLp1P;*se9_4x~J}WMm#O*p1P;*se9_4x~J~> z`>0*rQ}+Y&%+0Ir?LDgQZ4IBE(>-Ta-P`l4?(HR2_x5wScjR)4&x*R|WmWg~O;z{y{Z;q&)A5~A z_tZUgPu)}Z)ID|2ljFOh?x}m~p1P;*se9_4PsXWH_tgEsJW}@@QFU*RsT1O~+^nj5 zdqLH`y|n7yUQu6&@6PF-x~J}`d+MILr|$Xwcy`o1bx++>_tZUgPu=r>@tml8>Ylo% z?x}m~p1S9IYnGvi=*zTd+MILr|zkH>Yo3M?}@sn?x}m~p1P;*se7Iq-y3yL-Bb6}J#|mrQ}_IP zd|%W(bx++>_tZUgPu=tE`2MJS>Ylo%?x}m~p1S8>_tZVFi_tZT%Ts`kD z>Ylo%?x}m~p1P;*`N4R3)ID`i-Bb6}J#|mr^F?dqpWUc?>Ylo%?x}m~p1S7;;+0YN z)ID`i-Bb6}J$29Z*8KnNuA65KZ(en8?@@JcA6s>AYxt_1?m55e-d<95Z$DRcZ?BX0 zaCOu@bx++>_tZUgPu=s%_<^W<>Ylo%?x}m~p1S8+c@Nh_-E+IDd;7qudwX)#y?uH7 zVAMTzPu)}Z)ID`i-E)n+hijwmse9_4x~J}`d+MH-#_OW)se9_4x~J}`d+MI6<^9i! z{-W+VqUzorQ+01os?*}^+=8lmdui3Zy`t*g);+h0ABwuC?x}m~p1P;*se67lemLr$ zx~J}`d+MILr|$W(_>ri4>Ylo%?x}m~p1S9!<42?Jse9_4x~J}`d+MHB#*anaQ}@(8 zbx++>_tZT<89yF%Pu)}Z)ID`i-Bb4i^Q>LnbH}QC`;e-8`}C@NTf^7qbkDo0?(Ijb z?(OHR?(LVxPek2Q_tZUgPu)}Z)IC2QKN)pT-Bb6}J#|mrQ}=vH{8ZFEbx++>_tZUg zPu=sQ@zYWF)ID`i-Bb6}J$26)$InFFQ}@(8bx++>_tZT<96uX%Pu)}Z)ID`i-Bb4i z^E@{zF#BPO8)Dj5sf+d+MILr|zkH>YlpiF7bw_d+MILr|zkH>Ylpi?eWH_ zd+MILr|zkH>YlpitKv;j_tZUgPu)}Z)ID|2FUFgr?x}m~p1P;*se9_4uZXuq-Bb6} zJ#|mrQ}@(8KOb+6x~J}`d+MILr|t*lnZtA1)jjv9y0?$5y0bHxTUvE*uc*4Wbx++>_uMajIqIIer|zkH>Ylo% z?s-rAO4L1dPu)}Z)ID`i-E-gg)u?;wp1P;*se9_4y63m!*P`yJd+MILr|zkH>YjVY zuSeZe_tZUgPu)}Z)IGl$?~J;q?x}m~p1L2HXX>6i<+Q7N9#VC0pI&uuUsZK)YxwS* z?)hldz5RUEy{&udp0AJJh`Oilse9_4x~J}`d)^tp8Ff$HQ}@(8bx++>_uM^xE9#!Q zr|zkH>Ylo%?)kO&?WlX|p1P;*se9_4y60=-ccSj8d+MILr|zkH>YiVT_e9-O_tZUg zPu&m9Gl$iYaba$B9aAUNNp)JCQD@hAaZyh9)ID`i-Bb6}J$29V@!qI=>Ylo%?x}m~ zp1S8l@xG{g>Ylo%?x}m~p1S8-;{8$g)ID`i-Bb6}J$26?#s{MAse9_4x~J}`d+MHV ziVsHJQ}@(8bx++>_tZVV9~Vd6Q}@(8bw4oA9A0&A@0ruC?s;t0y?tTTy?uSvy{+Nz z=5){Js_t#wQ}@(8bK>T6UJ#|mrQ}@(8bx+;%fcT@Rd+MILr|zkH>Ylpieet2Fd+MIL zr|t*lnY!o5x@+{^J*Mj3o>X;jb$=hZvn!?_hz_qOh-d+MILr|vm1{y6HMx~J}` zd+MILr|$V!{7KY3bx++>_tZUgPu=r`_|vF+>Ylo%?x}m~p1S96;?JV)se9_4x~J}` zd+MHVk3WyPr|zkH>Ylo%?x}nJDn1f*Pu)}Z1M^JXbEm3%`_P_tZUgPu=rp@i$TT)ID`i-Bb6}J$26`;%}qwse9_4x~J}`d+MG)iH}9y zQ}@*Uz&vwU9a(p+W25ix33XDPR%g`NbzZ%rE{ad&bWhz=_tZUgPu)}ZoEo2ux~J}` zd+MILr|zkHE{{(|-Bb6}J#|mrQ}@(8-x;5dx~J}`d+MILr|zkH{wY2abx++>_tZUg zPu)}ZJTpEUbx++>_tZUgPu)}Z{6kz8bx++7%rl2q-P?Os-P_0Ip3CW;7gpWd*H_)! zcURrp8vb2Q_gp9Mfx71oRrmI|s(X7%{C(6tbx++>_tZUgPu+7_{6o||bx++>_tZUg zPu=s>_{XSw>Ylo%?x}m~p1S8V@lR3r)ID`i-Bb6}J$27h;-91Lse9_4x~J}`d+MG~ z#pO}=)cwFbQ}-NMb#ITYhezMtldA6R8CCc8ysCS9QGGc6C8vApp1P;*se9_4y65!x z*Qk5yp1P;*se9_4y639+x2SvSp1P;*se9_4y65}i-=prSd+MILr|zkH>Yo3N|A@M$ z?x}m~p1P;*se8UB{xj;Hx~J}`d+MILr|$XhxH9T~V4kUa?o@SeA6j*9zaytz-Seud zd;7Mkd;7ap_qK-rFQWGd+MILr|zkH>Yo3I|Bbq*?x}m~p1P;*se7Ii{}**n-Bb6}J#|mr zQ}_H^TorXcFwY!TN7h~I*m`(9Df;f7R%g`NbzZ%rE~*dLC*qL21Km^i)ID`i-Bb6} zJ!j=s%jurFr|zkH>Ylo%?zvGu|J9@Jse9_4x~J}`d+MIo#xYlo%?x}m~p1P;*c}-j^>Ylo%?x}m~p1P;*xqjaJ+Ht*noE%fYAyx>5Jsq3YfqS9Nbssk*mk=GM#Up0`%r+l#C2?PXQ> z_S$(5>qp&F_tZUgPu)}Z)IG0=FN(US?x}m~p1P;*se7)O_pm|KJ#|mrQ}@(8bx+;% zvbbT?J#|mrQ}@(8bx+-M^}PR$V*WQ!_x8xDdwXovy?s)BcicF8M%BGNuj<}jRCRAZ zQU4N$X77@t?m52d-kw@@Z_lc8_tZUgPu)}Z z{7igt)ID`i-Bb6}J#|mrbE~*%)ID`i-Bb6}J#|mr^HcF9QTNn6bx++>_tZUgKQN#F zX4$%@?x}m~p1P;*se4`>wX1vTp1P;*se9_4x~GOW&*`48ue!I7sk*l>sJgf3_tZUg&rRc2QTNn6 zbx++>_tZUg&yU2HMcq^P)ID`i-Bb6}{lL73t+RE{kyZEh*s6Q`q^f)SlDJLOJ?B;3 z+l#91?I)`4ZU5pKk=w88-kw->Z%?nfx98Rcab)hEs(bsfs(X7?)xE8IzB-PIx~J}` zd+MILr|zkHekpDnbx++>_tZUgPu)}Zd}Z7&>Ylo%?x}m~p1P;*`GxrMsC(+3x~J}` zd+L5*K6-8uw~xA~?x}m~p1P;*se67RYFGEvJ#|mrQ}@(8bx#fNnA1JSRo&ZDs_yNX zRrmJ%+)g>&b8*$Zy{ziq);)F4?c>f-_tZUgPu)}Z)ID|2TjDFC?x}m~p1P;*se9_4 z+r?K#-Bb6}J#|mrQ}@(8Z;G#qx~J}`d+MILr|t*lJ#drw>Nv9M-X2?ZZ=Y0kZ(mYB z6nDvYlo%?x}m~p1P;*xli0J>Ylo%?x}m~p1P;*`K`En z)ID`i-Bb6}{lI+m)IE2OuZy~;?x}m~p1P;*se67dYFGEvJ#|mrQ}_uMV+6LnABQ}@(8bx++>_xx(yH|n0cr|zkH>V9C}19i_)@eNV;99wm7 zpHy{kUs82%-w^kUy62**d;5v1dt3J$T78#~$xW=fx2IR#+jFb#?S*w|9GiQr>fT;e zb#LpQx~J}WSlmDAp1P;*se9_4x~J~>qj*5nJ#|mrQ}@(8bx+;%&GEpfd+MILr|zkH z>Ylpi58^>l_tZUgPu&m9M^D{T_uMNU9Cc6KQ}@(8bx++>_xwiGuI{OO>Ylo%?zwr@ zy{+Lxa=Pb~s(X88)xABx>fT_tZTPj&F>*r|zkH>Ylo%?x}k| z7~d3iPu)}Z)ID`i-Bb5GFupnJp1P;*se9_4x~J}We|$^SJ#|mrQ}+Y&9;kbcs;`NM zWsj}8w@<3Nw=b!>w{NIlj^nczRo&Z9RNdP{a@0LXRNv(ja#O4B?O9d#_JXQ=due?< z9-doXb#LpQx~J}`d+MGi#_tZUq8;^*(r|zkH>Ylo%?x}koACHW> zr|zkH>Ylo%?x}nJIvy2uPu)}Z1M|^S_tZUg&o{=SqwcAD>Ylo%?x}m~p5Kew)jf4j z-E-X>bFZQWD%)ID|2qvPA6?x}m~ zp1P;*se9_4KaX#Zx~J}`d+MILr|zkH9vP31x~J}`d+MILr|zkH{xqHtbx++>_XG1D zsC$m8y0`a_Cq~`#q^f)SlB#?AhN^q}-Z(Mpo=;TW+q&n_s(X8M9TO+zrdQqDbF1#{ zg;n?V<8?(mDYvTX-qt;JPu)}Z)IHx7Pma2$?x}m~p1P;*seAr8o)UFW-Bb6}J#|mr zQ};Y8PL8^#?x}m~p1P;*seAr0o*H#e-4D!1Pu)}Z)ID|2x5d+)d+MILr|zkH>Yh)>cShY) z_XG1DsC$m8y0`bQZ;kKDKB?;7zNG5jzM<;gzPJ83PR)Lz>fRobqwYDP>fRnxC&X#F zSylJ;f~tFaY1O^GqP`H{ozp#aPu)}Z)ID`i-ShqN?5KO{p1P;*se9_4y66AmIZ^l2 zJ#|mrQ}@(8bYo3J=SAHQ%tue%Q}@(8bx+;%jCg+3J#|mr zQ}@(8bx+;%_ffmL=ejxSo;z0E+lN%$+Zw(wr+dz+y0`DDy0;&#y0@RtU6j*3bx++> z_tZUgPu=tUcyZJ{bx++>_tZUgPu=sM@jX%Z)ID`i-Bb6}J$27><9nm-se9_4x~J}` zd+MHlkME1RADH(*-E&mcy}f_cy?t_gf7CrMsk*mssJgfBt-80LjMJm;se2Bsy0=GH z-P;rDq&Ootx9Z+rSaokdUUhH3P&dx!e@UKoPu)}Z)ID`i-Bb6xE?ydSPu)}Z)ID`i z-Bb76FrWWrQTNn6bx++>_tZUg&kx4SqwcAD>Ylo%?x}m~o-fLKzap-mkDj`x?x}m~ zp1P;*d2zfl>Ylo%?x}m~p1P;*`L8%L>Ykfd-P?Oq-P^}j-P;_tZUgPu)}ZyfS_u>Ylo%?x}m~p1P;*xmMo8HBtA}J#|mrQ}@(8 zbYi)l{a+jNw=+=p_Nc0Rd;h9?`{a6dye|8as(br}s(bt1 zs(bs%`qwxsdq|GD=ZLC%drZ~6J*iHMvvUio?(L;j_x6gadt3M1CVnXDp1P;*se9_4 zx~J~>+4$k8d+MILr|zkH>Ylpi%i>3(?x}m~p1P;*se9_4pN=1mx~J}`d+MILr|zly zf%)h^maTj0p1P;*se9_4y5|St$D{74d+MILr|zkH>YnT6w5xmWSaok7Qgv^iUUhG4 z`1+jgc~{lF{b<#_{e0EE{nGe}sC(+3x~J}`d+MIL=f~qGqwcAD>Ylo%?x}m~o-c`? zin^!nse9_4x~J}`dww*2I_jRfr|zkH>Ylo%?g!>Qd?s7>994C1?_YIqpImisUm8Cf zb_tZUg&o9QCqwcAD z>Ylo%?y38M`TV(Myd~bx++>_tZUgPu+8PPP@A29#!}D zu~ql>g;n>qhHuO1o=d9k?dPiQZQWD%+#!B0>Ylo%?x}m~p1P;*d29T9)ID`i-Bb6} zJ#|mr^X2giQTNn6bx++>_tZUg&zs{HqwcAD>Ylo%?y38Mc@KPX{8AiMb#L!qb#I?s zb#Gr_uMyrHR_(ar|zkH>Ylo% z?)mNbwWxdQp1P;*sr!NX{Hc4sB7QyUp1P;*se9_4x~J~>`FLm4J#|mrQ}@(8*UM4& z+$pDB-Sd#Dd;9dNd;6-Ydt1YI=XB3UtM2XRtL|;xQ}=v*{6^G0bx++>_tZUgPu=s* z_|2$$>Ylo%?x}m~p1SAm@mo>%)ID`i-Bb6}J$28o#cxO5Q}@(8bx++7%zL2jxo!MT z)IImFy0=fRy0qP_e9_Z(J7#)Y}jRrmIUs(X7{)xABt&Wnq3 zk5}E>FI3&zx~J}`dybFyM%`2Q)ID`i-Bb6}Js*nqMcq^P)ID`i-Bb6}J>L@VkGiMs zse9_4x~J}`d;Ty!5Oq)8Q}@*UzG+h)ID`i z-Bb4*UUhHpnbWTBd2H3aePPwTeSOuvt>N$HbkFCi?rq&u_tZUg&vEg4QTNn6bx++> z_tZUg&&BckQTNn6bx++>_tZUg&x7I*qVB1C>Ylo%?x}m~o)5$yM%`2Q)ID`SFz zRrmJ1dPjUXx1#Fa);)Dk-Bb6}JtxK=N8MBR)ID`i-Bb6}Js*odiMprmse9_4x~J}` zd!7(~8g)_xw%#S=2psPu&m9=TF^J_tZV#6n`FdPu)}Z)ID`i-Bb7c zetabAp1P;*xn7RC=T24k_MtiL>Yk@p-P>1H-P^ZS-P;=eMNap8zUtoAJ#|mrQ};Y3 z{xa&Gx~J}`d+MILr|$Vk{8iLFbx++>_tZUgPu=sV`0J>9>Ylo%?x}m~p1SAH;%}nv zse9^vVBQ0D&uy#j?E~U(qwaZf)xCXb)xCXV)xCXRd@SmoPgdRAy63Q}dwbV9Hu~Ylo%?m0C+8Ff$HQ}@(8bx++>_go&Iin^!n zse9_4x~J}`d%iP19d%FLQ}@(8bx++>_xw|QChDHLADGXdx~J}`d+MHVkIzQkQ}@(8 zbx++>_tZUq6_-WbQ}@(8hgaR(dsf}s$K|xEdtO*|Z(m<^Z{J;YZ)^B>Io(tD)ID`i z-Bb6}J*ULqN8MBR)ID`i-Bb6}J(tBlMBP*O)ID`i-Bb6}Jx`5)jJl`pse9_4x~J}` zdp;BY6m?JC56pX@?zwH%y?sDEBK|r1a=NGPse9_4x~J}`drpskjk>4qse9_4x~J}` zd#;Lqi@K-ose9_4x~J}`d%iFJJ?fsir|zkH>Ylo%?)l&NkEr{B`TVJS>Ylo%?x}m8 z8UGn|Pu)}Z)ID`i-Bb7cLtGhk&-HTDJ$I_Qw-2qlx8IS|uI_nN)xCXN)xG`Qs(V|* z|CiG}bx++>_tZUgPu=r^_^)_n)xCXd)xEvA>fT;fSH^#5>z=x&?x}m~p1P;*d0zZa z)ID`i-Bb6}J#|mr^B?iQQTGG$9;kb6TXkaD>U90Zx!|O@WclWfadwX`(y?sa3z5Q@~BKnV)x~J}`d+MILr|zkH&dRNp z(>--h-Bb6}J#|mrbEACzt4G~a_tZUgPu)}Z)IG0_Yed~s_tZUgPu)}Z)IB%IdtWoY zD8CMMPu)}Z)ID`i-Sa(ht*CqIp1P;*se9_4y63;++EMo$UUhHpS#@t8S9Nb+6xWHm z=k-+kv@2k4EpQ?Y08)fUBx~J}`d+MILr|$XgxN+1ybx++>_tZUgPu=q`acJBn zN8NLL)xABn>fWAJ=f+{V+pF&FhpO)FYlo%?)jPc;;4J- zp1P;*se9_4x*wR&f75K;Q}@(8bx++>_tZVFi7$z|r|zkH>Ylo%?x}mOpU;1@sC(+3 zx~J}`d+MIL=hacWx~J}`d+MILr|zkHYIyV94)yhQTs@{vsTb6lbxs_f(>?F3y0;&x zy0=$W-P@bREu!wJd+MILr|zkH>Yg8qTSnbe_tZUgPu)}Z)cwG`hpn=8&uy#j?E|Xr z?Nh4m?aSiJqV9QP)xCXR)xG^x)xEuX-ow^W_tZUgPu)}Z)ID|2OX4Ylo%?x}m~p1P;*`KkEwsC(+3x~J}`d+MIL z=N57MsC(+3x~J}`d+MIL=O^M0QTNn6bx++>_tZUgPwnrR(>=#k-P=>D?(LaX_xAkU zPC4Ckan-%Otm@v@J$29R_tZUgPu)}Z{7~E_ z&a1k&7ggQcPgLF8Lvr+8J~}tP>fWAOb#KqAy0;h9g>l#1Lsj?o@~V4V_tZUg&o{)^ zMBP*O)ID`i-Bb6}J--uQ8+A|JQ}@(8bw4n_I(5%i#@(Xsse9_4x~J}`d+MHFh`UGK zQ}@(8bx++>_tZUij<1Wlr|zkH>Ylo%?x}lzE^1fz)ID`i-Bb5mH%Hx5!+Ye$)nn?E zdO@98=hXT2uDEAT_k5)4-d_tZUg&#%Y5qwcAD>Ylo% z?g!>QQ1{#}?h|#-1FG)rQ>yOm%c}0}o8rDv_q?y_-hQg;-qt;J&r$IWQTNn6bx++> z_tZUg&l}=?QTNn6bx++>_tZUyR^R1gauciW?detb_S~v_dtqG~$L1cZy0=$V-P^jS z?x}ko7Wa?3r|zkH>Ylo%?x}nJC>{`XPu)}Z)cwHx>eM}T&wb*7QTNn6bx++>_tZUg z&u_(pqVB1C>Ylo%?x}m~o_ocEqwcAD>Ylo%?x}m~p5KV$qVB1C>Ylo%?x}m~p4vYo zr+ZGRy0>Rm-P`l4?(HSHLvy<4vZ{Mq_tZUgPu=t2_{OMv>Ylo%?x}m~p1S9Q@l8?p z)ID`i-4D!rpzf)A?iSx152(7gPpP`MFRQw@Z>nF7Z^^!|>fU~;>fY8pbx+;%HSw^h zd+MILr|zkH>Ylpim*e=Td+MILr|vlg^kFQ~e=m)6JQ;ko5i z_qOh-d+MILr|x-Td~4J_bx++>_tZUgPu=sk@rbB<>YlnEm|vZ`r|zkHzBwKlbx++> z_tZUgPu)}Z{6Rb_>Ylo%?x}m~p1P;*`Nnv3)ID`i-Bb6}J#|mr^LtUdx~J}`d#;#Pu)}Z)IE=mZ;Pi^-P>1G-P^ZR z-P;e=pU1an>z=x&?g!>QQ1{e5bfRn*$HYmw=~egk+^TzfVb#6; zcwG@s%B`xpw{=h5Q}@(8bYjg&r$pUT_XG2*Q}@(8bx+;% z_&7Q0p1P;*se9_4x~J~>>v(F^J#|mrQ}@(8bx+;%ZSl0Id+MILr|zkH>YlpiFXNP` zd+MILr|zkH>Ylo%_D|30o-?cN?fF&r_L8c5`?=gZa=NGPse9_4x~J}`d!820h`Oil zse9_4x~J}`dp;Y_jJl`p2j)Fc_tZUgPu=s#cvd{6>fXMr>fXMo>fXM;{xrTb`>CpX zTldsGbx++>_k3%7SJXXqPu)}Z)ID`i-SfwBYScY-&mlSLo+GO6?J;#ioR*tab#E`I zy0@2B-P^P=vld+MILr|zkH>YiuB^P}#md+MILr|zkH z>Yl%k+SNVR%~AKPcHRSZ&+V)3?SrfC z?bGV{@#5?&s_yMus_yLvtM2V*>p$arvUNW&?}56f?x}m~p1SAc_}-{{URHH)-&A#P z-(PiaKONr}bx++>_tZUgPu)}ZJUPBU>Ylo%?x}m~p1P;*`DC0Pbx+-MXw|(vy6WDZ zP$$J1xw%#M_QI-r`|+xK`-Qsk8u@$9v+k*T>Ylo%?x}m~p4Y`oqwcAD>Ylo%?x}m~ zo*U-9UlupWuTI@l_tZUgPu)}Zd~duw>Ylo%?x}m~p1P;*`JZ@2)ID`i-Bb6}J#|mr z^Wu1A)ID`i-Bb6}J#|mr^IuWBx~J}`d+MILr|zkHYX7R7?m55e-d<95Z$DRcZ?BX0 zaCOu@bx++>_tZUgPu=s%_<^W<>Ylo%?x}m~p1S8+dH>hMFi`jQc2)QGfmQeRZQXO5_@StK>Ylo%?x}m~p1S8}z=x&?x}m~p1P;*`N8r zx~J}`d+MILr|$WI`0=QF>Ylo%?x}m~p1S9HIqmA6J67G>hg99$r&rzE8ooZKd)`%b zZ$DafZ$DpkZ@)BtB5q%GZy#KBZ=Y6mZ(mtI9zU6_d+MILr|zkH>YlnEnD_9hY~6Fa zs(bsus(X8K)xCXr{B+bkZ>qYt@2|SIpRT&M*T{SLOw>JfPu)}Z)ID`i-Sg7;*{FN! zp1P;*se9_4y60-Sxw)ZL_x9+jdwW9Fy*;hYi1Ts_tM2W`tM2U=s_t#wbC-BS)ID`i z-Bb6}J#|mr^Y(aS)ID`i-Bb6}J#{}YpFdv~Z;HC7?x}m~p1P;*se67p-W+vL-Bb6} zJ#|mrQ}^65-V$|B-Bb6}J#|mrQ}_I2yfx~cx~J}`d+MILr|vmCr(NB1kE(n7*s6Q` z!m4{)!?)#h&m~p&_H$MDw(hBW?hrp0bx++>_tZUgPu)}ZyfuD4>Ylo%?x}m~p1L2H z_rRCLFU0Mt?(GAs?(NA{_x9!Wqw$N`H&xx+_gCH9PgmXBy621Im!j^ed+MILr|zkH z>Yg8tw@2Mm_tZUgPu)}ZTrJ08xdpiqRrmIos(X7<)xABV&W?BFmR8-{E2{2o-Bb6} zJ@<=Wj=HDrse9_4x~J}`d)^bj5_M1AQ}@(8bw4nlKXuPn#ji%)Q}@(8bx++>_tZVV z7{3;EPu)}Z)ID`i-Bb5`Mf`fyJ#|mrQ}@(8bx+;%^YPB8d+MILr|zkHu9u_kxl>NN zy5}KP_x9;k_x4p)_qK-b&gq_yR^8jrSKZsXr|$Xs_>FjQ)xCXM)xCXX)xCXdy)%9@ zTldsGbx++>_XG1DsC&LVek_tZTfiuXm`Q}@(8bw4nlKXp&tbKiJ>)ID`i z-Bb6}J#|mr^V{)(sC(+3x~J}`d+MIL=ic$bsC(+3x~J}`d+MIL=QrcxsC(+3x~J}` zdk(LfXMv>fXM->fYAycXPVub5-}Y?x}m~p1S9__`Rrm>Ylo%?x}m~ zp1SAa`2DDR>Ylo%?g!>QQ1{e5caJ}a2UgwNldJCS%d76~o9ox&53}#Dy0@RMy0>*t z-Bb5`ZTwNxJ#|mrQ}@(8bx+;%EAgSId+MILr|!90j=JZ_x@+{^J*Mj3o>X;jb$ z=hZvn!?_hz_qOh-d+MILr|vm1{y6HMx~J}`d+MILr|$V!{7KY3bx++7%;!(tQ}@(8 z-x7Zsbx++>_tZUgPu)}Z{9*iA)ID`i-Bb6}J#|mr^G)&RQTNn6bx++>_tZUg&+o@a zqVB1C>YnT6sC({Ib#EV<)2{A$deyysRn@(HTh+a-;a}u*&*!V|ZQWD%)ID|2W8yF4 zX;t_3l~woltyTB-;`&JZRkrS_d+L5*-UD?{-Bb5GDE>O?o|CKY?aQm~?VGFa?FZs- zqVD;0)xE8I>Ylo%?s-7`ZPYz=Pu)}Z)ID`i-SfWqSkygrPu)}Z99DI2?^?%3-`x|c z?(Jz+_x9|nd;5;MC_a&Uq3Yh&J#|mrQ}@(8r^Y9v?x}m~p1P;*se9_4%i~i~_tgEs zeE!rubx++>_dFp!9d%FLQ}@(8bx++>_xw$KChDHLr|zkH>Ylo%?)mokY}7qrLp_T^Rg_RUrI z_5<~2@lV-LSKZsXr|zkH>Ylpi5%JGa_tZUgPu)}Z)ID|2pTy-+_tZUC%Tf0nS#@uZ zt%pb7-IJ>B?HN_~_PnZldr^Hj{w1e->Ylo%?x}m~p1SAs_}8d=>Ylo%?x}m~p1S9% z__wJ0f%*KYd+MILr|zkHzBB$k>Ylo%?x}m~p1P;*`KS1gsC(+3x~J}`d+MIL=b7=J zQTNn6bx++>_tZUg&p*VKQTJRgN8NL$s(bs;s(bq#IqmA6S5@8Hw^iNS->tg0HT-|M zb@Cpld+tzmZ;z|Gx2Mz#;=i(WPu)}Z)ID`i-Bb5m8UG!1KQQlsx~J}`d+MIL=c(~O zQTM#O>fXM&>fU~!>fU}P{x|BLx~J}`d+MILr|x-5{9n{Pbx++>_tZUgPu=sWxGL(N zy63Q}dwbWad;9QuQuN(Dt?J&MU3G8YQFU)WT%U+T@(y%Q-Bb6}J#|mrQ}>*eTP>%1 z>Ylo%?x}m~p1S8oYvw;)#trj-N8MBR)ID`i-Bb5`UtA;Vp1P;*se9_4x~J~>-?(Pf zJ#|mrQ}@(8bx+;%J#nq5d+MILr|zkH>YlpizvJ3b_Z(hzZ|_-kZy#56Z(kI(t9xEw zb#LEYb#MQo>fYA!x>5Jsq3YfqS9Nbssk*mk=GM#Up0`%r+l#C2?PXQ>_S$*>>&Lb7 z9;kcjp1P;*se9_4=fxMr%d76~o2%~a2deJvXX-!V2HCo&?x}m~p1P;*se7IiH;lTc z?x}m~p1P;*seAq{ZWMJ--Bb6}J#|mrQ}=v#+&JofWAOb#KqAbK|hw?N#^oLsj?o@~V4V_XG3!Z<4Kh>Ylo%?x}m~p1S9?@x@X1)ID`i z-Bb6}J$26w^7(HXbx++>_tZUgPu)}Zye7UR>Ylo%?x}m~p1P;*xqd$X&7$t9d+MIL zr|zkH>Yi6e?dqPor|zkH>Ylo%?y2F;bGqm2tM2V%s_yLzs_yMMx#2n8^UkV!`;n@9 zdu7$Vy>{Ni7E$-qJ#|mrQ}@(8b_tZUg&(-rDwvM`|?x}m~p1P;*se4`$w~4x^?x}m~p1P;* zseAekACcRy>fWAMb#G6vy0_=n1#x8Vo~nEMv8sD}Rn@({QT}Ld9Y;moQ}@(8bx++> z_tZT<6Ss}Jr|zkH>Ylo%?x}lj6}OAJr|zkH>Ylo%?x}lzD!x4Gp1P;*se9_4x~J~B zMch8>p1P;*se9_4x~J~>iKt!OQ}@(8bx++>_tZT#ykk!H99MO3PpP`MXI9=1 zbx++>_tZUgPu)}Z{78IN)ID`i-Bb6}J#|mrbCdY$sC(+3x~J}`d+MIL=ZE4hQTNn6 zbx++>_tZUyYlHRyG7ko_tZUgPu)}Z)IGltcaOTK?x}m~ zp1P;*seA4mUl(;x-Bb6}J#|mrQ}_H_)UNKSd+MILr|!9Kj=HCY_sHp<$5h?h7gXKb zbE@v`yK;Nxbk9es?(LOT_qOkQ>Ym%jy`t`^d+MILr|zkH>Ylg6y`%1_d+MILr|zkH z>Ym%heWLEEd+MILr|zkH>Yg{neWUKFd+MILr|zkH>Yk(G8=~&1d+MILr|zkH>Yg{m z{i5!vd+MILr|zkH4z0e+$K)nf-P_Zv?(Ml%_x8fNG>*+ZR&{T$s=Bv*-&6P0J>L-b zkGiMsse9_4x~J}`dwwS#5Oq)8Q}@(8bx++>_uMBQ7_xx5oDC(ZN zr|zkH>Ylo%?zvYyIO?9dr|zkH>Ylo%?)iwaL~19eZ`^L6o!QTNn6bx++>_tZUg&#%WfMcq^P)ID`i z-Bb6}J$H+5j=HDrse9_4x~J}`dww;(CF-8Kr|zkH>Ylo%?)jQ{SkygrPu)}Z)ID`i z-Sf+Fd|Xs@Z$D9WZx6{)_Z(4umruw|t-7~oRo&YQs_yNj_3?OkZh6(ct^0xbqp5r9 zp1S8@@vTw!)ID`i-Bb6}J$26?#UrBbse9_4x~J}`d+MHVjz>n_Q}@(8bx++>_tZUq z5RZzwr|zkH>Ylo%?x}meF&-UtPu)}Z)ID`i-Bb7cUevDcse9_4>*lC??pSqiYxvll z?s-Agy*;Pu-oC5q-hMQ9Tu%2~S#@vwzNhY~d+MGC$G1h@Q}@(8bx++>_tZTfjBk&+ zr|zkH>Ylo%?x}ko7>|#-r|zkH>Ylo%?x}m;A5Vz7r|zkH>Ylo%?x}n3A5VYlo%?x}m;8z)BHQ}@(8bxy_%ZdKL2?fag( zr|zkH>YgXYlcVmbd+MILr|zkH>Yl%ir$pUT_tZUgPu)}Z)IE=nlcVmbd+MILr|zkH z>Yl%jr$*gV_tZUgPu)}Z)IHx8Pm8*z?x}m~p1P;*seAr1YFGEvJ$28`tM2VRs_tzK zpPth_XI9_tZUgPu=sZcy81^bx++> z_tZUgPu=s6@w}*e>Ylo%?x}m~p1S85@%*TJ>Ylo%?x}m~p1SAnqjq)Ab#v4`cdWX% z52?DhHGE-C_ncF8Z{JmQZ$DafZ$F>AD5v{@c@NY*bx++>_tZU4ix)@TQ}@(8bx++> z_tZU~jqi!Nr|zkH>Ylo%?x}lDj_-}Sr|zkH>Ylo%?x}k|9p4vqPu)}Z)ID`i-Bb5G zIle#Yp1P;*se9_4x~J~>WSky#Pu+89)xABs>fWAEC&d}LxmEY}!m4}w@v3|Kg}QOx z`z3kaD1S6{Pu)}Z)ID`i-ShqN(x`jtp1P;*se9_4y66AmWl{IkJ#|mrQ}@(8bYo3JS47=Y_tZUgPu)}Z)IBebS4Q1a_tZUgPu)}Z)II+dwX1t> zUUhHpQFU)0TXk=1_^O=lIlt=OUQ%^$KUZ~cuaozGbzD2|fx4&ese9_4x~J}We*8ex zJ#|mrQ}@(8bx+;%pYfWgd+MILr|zkH>Ylpix$%Qh_tZUgPu)}Z)ID|2zsGB%?x}m~ zp1P;*se9_4XUFTJ?x}m~p1P;*se9_4e~q)^kQ{Z-5mop0n5uhwQk@oO=N44m+e@qN z?G;t`w(bY!kN!}$?x}m~p1P;*se9_4*ToM<-Bb6}J#|mrQ}@(8H_Ye%k*NFs*U}xv zcs;;z9Cy-kdXSr$-OOlaG&40bnwgrJnwgrJnwiW@X6DSynVFlJ-OPiVrJW=vX-RUD zq*5nIk|e1lm83{2Ns*M#>%Z@V?_T>n{r&dd@5z66)ID`i-Bb6}J$29LYlo% z?x}m~p1SA2JpUJ??x}m~p1P;*se9_4&&A16_tZUgPu)}Z)ID|2=A3qQ&r3?(+oMX| z+Y?LO+Zyi9>7E~yy0Ylo% z?x}nB&wF?|>Ylo%?x}m~p1P;*`AnP|bx++>_tZUgPu)}Z?3efOO4L1dPu)}Z)ID`i z-Sg=Ylo%?%9-kHFsvIdwXc9dwWc&d%Ladh&{PwrS9$RrS9#MrS9!P zdHy^(}r$^mW_tZUgPu)}Z)IHCOuSeZe_tZUgPu)}Z)IF!h z8BzDtJ#|mrQ}@(8bYlo% z?x}m~p1P;*d3Jm&>Ylo%?x}m~p1P;*IVsMGx~J}`d+MILr|zkHo)zDYx~J}`d+MIL zr|zkHz7Xd|-Bb6}J#|mrQ}=Aju_gCT?t)VH_J~sV_JmURc1PJ2dvjlvy0;IMy0>-T zFwdX5=jHL;sC(+3x~J}`d+MIL=iE3i>Ylo%?x}m~p1P;*d0Bif>Ylo%?x}m~p1P;* zIVaAKx~J}`d+MILr|zkHUK-zzx~J}`d+MILr|zkH&W;PB?x}m~p1P;**_@;9IV7iD z-E&l_dwXK3d%LsLy{+MeIo)$pseAiqse9Y^p1SA7@x!Ql>Ylo%?x}m~p1SAExG3tL zx~J}`d+MILr|x-CTpV>z-Bb6}J#|mrQ}>(^mqguD_tZUgPu)}Z)IBeZA4T0$_tZUg zPu)}Z)IF!irBV0PJ#|mrQ}@(8Tgt(4S#D^ldwWc&d%LaFz1>yz#J=42Qup@BQuns+ zJ#|mr^SbzP)ID`i-Bb6}J#|mrb7@>2bx++>_tZUgPu)}Zyf%Ikbx++>_tZUgPu)}Z zToPAA-Bb6}J#|mrQ}@(8uZf>V-Bb6}J#|mrQ}@(87sZuP_tZUgPu)}ZY%O(f56@{= z_Z(O1-fk~-Z+Dlvw>7*fr+e-xb#LpwVcr9EPu=sX_<7Vlbx++>_tZUgPu+7tTpe{! z-Bb6}J#|mrQ}?_wei3y~-Bb6}J#|mrQ}>)7*F@b@_tZUgPu)}Z)IG0=Uq;_q;iN6LnABQ}@(8bx++> z_goV_tZVFk3U4+Q}@(8 zbx++>_tZU?$8Ayf)ID`i-Ls|Cy*;cP8GXCQl)AUuO5NLCrS9$CvM=t)oh)^4``%Oc z)ID`i-Se*aW7Iu$Pu)}Z)ID`i-E&*q8Ff$HQ}@(8bx++>_q;Ry6m?JCQ}@(8bx++> z_uLwHMcq^P)ID`i-Bb6}J@1G=N8MBR)ID`i-Bb6}J-5W&QTNn6bYlpi?eW*Bd+MILr|zkH>Ylpi#<(}? zp1P;*se9_4x~J}WTl_8Rp1P;*se9_4x~J~BA?}O1r|zkH>Ylo%?x}m;8h?+vr|zkH z>Ylo%?x}mOkNczUse3l%sCy1Bb#IR>$41}o38n7sj#Bq_PpNymuUr=o=5*gM&!4)d z?x}m~p1S8l@sFr`>Ylo%?x}m~p1SA$cqrTf=|nbl))Vfx4&ese9_4y61iIpQwB4p1P;*se9_4y65hA zEb5-Rr|zkH>Ylo%?s;$gH|n0cr|zkH>Ylo%?zt--kGiMsse9_4x~J}`d)^cOi@K-o zse9_4x~J}`d+v-UqVB1Cwv@WJhn2dw$Ch_T-|n_j_jXsQd%L&Py}hp75l_jx9~7v2 z>Ylo%?x}m~o==6QsC(+3x~J}`d+MIL=ZSb~)ID`i-Bb6}J#|mr^U2sR>Ylo%?x}m~ zp1P;*c|4vLbx++>_tZUgPu)}Zd?NObx~J}`d+MILr|zkH9*cfEqkFcNy0?dyy0^!d zy0;&X+SNU~OWoTGOWoU>OWoTVK0OY|d!X*Ad+MILr|zkHJ{Fs!?x}m~p1P;*se9_4 zN8%Y#_tZUgPu)}Z)ID|2N8`Y#d+MILr|zkH>Ylpi;pl(I=$^W#?x}m~p1P;*`AGD) zYjjWDQ}@(8bx++>_dFE+c24)yJ#|mrQ}@(8bYfMV{};eb AVE_OC literal 0 HcmV?d00001 diff --git a/src/network-workloads/conf/dragonfly-dally/dfdally_8k_intra b/src/network-workloads/conf/dragonfly-dally/dfdally_8k_intra new file mode 100644 index 0000000000000000000000000000000000000000..71981eb705ea2f1ee717b96a6189160553165bfb GIT binary patch literal 2880 zcmYk$32wtM3;;p5>5-oK|8L6?x&R3TS&!Z{&a5-*%sR8qtTXG(I&!Z{&a5-*%sR8qtTXG(xn^~YneDx2bY`7dXV#f@W}R7Q)|qu?ompqrnX}C5xH8*k z*1j=1v(BtD>&!Z{&a5-*%sR8qtTT82td1qKeP*3m``+lxI&!ZH zch2hEpV>aM&a5+QXBnMYXV#f@W}R7Q)|qu?ow<8vb?(e;pIK+tnRRCET%$AV%sR8q ztTXG(I&!Z{&a5+c#;neHne8*{%sR8qtTXG(I5=Zwtu znRRBJS!dRnb!MGeXVx4>XV#f@W}UfrXLY{IY@b&!Z{<}*68&fLyBI&!Z{&a5-* l%$n8c%>CQz%sR8qtTXG(I&%+l`~h!I4iNwV literal 0 HcmV?d00001 diff --git a/src/network-workloads/model-net-synthetic-dally-dfly.c b/src/network-workloads/model-net-synthetic-dally-dfly.c new file mode 100644 index 0000000..1be3753 --- /dev/null +++ b/src/network-workloads/model-net-synthetic-dally-dfly.c @@ -0,0 +1,530 @@ +/* + * Copyright (C) 2015 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + */ + +/* +* The test program generates some synthetic traffic patterns for the model-net network models. +* currently it only support the dragonfly network model uniform random and nearest neighbor traffic patterns. +*/ + +#include "codes/model-net.h" +#include "codes/lp-io.h" +#include "codes/codes.h" +#include "codes/codes_mapping.h" +#include "codes/configuration.h" +#include "codes/lp-type-lookup.h" + + +static int net_id = 0; +static int traffic = 1; +static double arrival_time = 1000.0; +static int PAYLOAD_SZ = 2048; + +/* whether to pull instead of push */ +static int num_servers_per_rep = 0; +static int num_routers_per_grp = 0; +static int num_nodes_per_grp = 0; +static int num_nodes_per_cn = 0; +static int num_groups = 0; +static unsigned long long num_nodes = 0; + +static char lp_io_dir[256] = {'\0'}; +static lp_io_handle io_handle; +static unsigned int lp_io_use_suffix = 0; +static int do_lp_io = 0; +static int num_msgs = 20; +static tw_stime sampling_interval = 800000; +static tw_stime sampling_end_time = 1600000; + +typedef struct svr_msg svr_msg; +typedef struct svr_state svr_state; + +/* global variables for codes mapping */ +static char group_name[MAX_NAME_LENGTH]; +static char lp_type_name[MAX_NAME_LENGTH]; +static int group_index, lp_type_index, rep_id, offset; + +/* type of events */ +enum svr_event +{ + KICKOFF, /* kickoff event */ + REMOTE, /* remote event */ + LOCAL /* local event */ +}; + +/* type of synthetic traffic */ +enum TRAFFIC +{ + UNIFORM = 1, /* sends message to a randomly selected node */ + RAND_PERM = 2, + NEAREST_GROUP = 3, /* sends message to the node connected to the neighboring router */ + NEAREST_NEIGHBOR = 4, /* sends message to the next node (potentially connected to the same router) */ + RANDOM_OTHER_GROUP = 5 + +}; + +struct svr_state +{ + int msg_sent_count; /* requests sent */ + int msg_recvd_count; /* requests recvd */ + int local_recvd_count; /* number of local messages received */ + tw_stime start_ts; /* time that we started sending requests */ + tw_stime end_ts; /* time that we ended sending requests */ + int dest_id; +}; + +struct svr_msg +{ + enum svr_event svr_event_type; + tw_lpid src; /* source of this request or ack */ + int incremented_flag; /* helper for reverse computation */ + model_net_event_return event_rc; +}; + +static void svr_init( + svr_state * ns, + tw_lp * lp); +static void svr_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp); +static void svr_rev_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp); +static void svr_finalize( + svr_state * ns, + tw_lp * lp); + +tw_lptype svr_lp = { + (init_f) svr_init, + (pre_run_f) NULL, + (event_f) svr_event, + (revent_f) svr_rev_event, + (commit_f) NULL, + (final_f) svr_finalize, + (map_f) codes_mapping, + sizeof(svr_state), +}; + +// /* setup for the ROSS event tracing +// */ +// void dally_svr_event_collect(svr_msg *m, tw_lp *lp, char *buffer, int *collect_flag) +// { +// (void)lp; +// (void)collect_flag; +// int type = (int) m->svr_event_type; +// memcpy(buffer, &type, sizeof(type)); +// } + +// /* can add in any model level data to be collected along with simulation engine data +// * in the ROSS instrumentation. Will need to update the last field in +// * svr_model_types[0] for the size of the data to save in each function call +// */ +// void dally_svr_model_stat_collect(svr_state *s, tw_lp *lp, char *buffer) +// { +// (void)s; +// (void)lp; +// (void)buffer; +// return; +// } + +// st_model_types dally_svr_model_types[] = { +// {(ev_trace_f) dally_svr_event_collect, +// sizeof(int), +// (model_stat_f) dally_svr_model_stat_collect, +// 0, +// NULL, +// NULL, +// 0}, +// {NULL, 0, NULL, 0, NULL, NULL, 0} +// }; + +// static const st_model_types *dally_svr_get_model_stat_types(void) +// { +// return(&dally_svr_model_types[0]); +// } + +// void dally_svr_register_model_types() +// { +// st_model_type_register("nw-lp", dally_svr_get_model_stat_types()); +// } + +const tw_optdef app_opt [] = +{ + TWOPT_GROUP("Model net synthetic traffic " ), + TWOPT_UINT("traffic", traffic, "UNIFORM RANDOM=1, NEAREST NEIGHBOR=2 "), + TWOPT_UINT("num_messages", num_msgs, "Number of messages to be generated per terminal "), + TWOPT_UINT("payload_sz",PAYLOAD_SZ, "size of the message being sent "), + TWOPT_STIME("sampling-interval", sampling_interval, "the sampling interval "), + TWOPT_STIME("sampling-end-time", sampling_end_time, "sampling end time "), + TWOPT_STIME("arrival_time", arrival_time, "INTER-ARRIVAL TIME"), + TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"), + TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"), + TWOPT_END() +}; + +const tw_lptype* svr_get_lp_type() +{ + return(&svr_lp); +} + +static void svr_add_lp_type() +{ + lp_type_register("nw-lp", svr_get_lp_type()); +} + +static void issue_event( + svr_state * ns, + tw_lp * lp) +{ + (void)ns; + tw_event *e; + svr_msg *m; + tw_stime kickoff_time; + + /* each server sends a dummy event to itself that will kick off the real + * simulation + */ + + /* skew each kickoff event slightly to help avoid event ties later on */ + kickoff_time = 1.1 * g_tw_lookahead + tw_rand_exponential(lp->rng, arrival_time); + + e = tw_event_new(lp->gid, kickoff_time, lp); + m = tw_event_data(e); + m->svr_event_type = KICKOFF; + tw_event_send(e); +} + +static void svr_init( + svr_state * ns, + tw_lp * lp) +{ + ns->start_ts = 0.0; + ns->dest_id = -1; + + issue_event(ns, lp); + return; +} + +static void handle_kickoff_rev_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + if(m->incremented_flag) + return; + + if(b->c1) + tw_rand_reverse_unif(lp->rng); + + if(b->c8) + tw_rand_reverse_unif(lp->rng); + if(traffic == RANDOM_OTHER_GROUP) { + tw_rand_reverse_unif(lp->rng); + tw_rand_reverse_unif(lp->rng); + } + + model_net_event_rc2(lp, &m->event_rc); + ns->msg_sent_count--; + tw_rand_reverse_unif(lp->rng); +} +static void handle_kickoff_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + if(ns->msg_sent_count >= num_msgs) + { + m->incremented_flag = 1; + return; + } + + m->incremented_flag = 0; + + char anno[MAX_NAME_LENGTH]; + tw_lpid local_dest = -1, global_dest = -1; + + svr_msg * m_local = malloc(sizeof(svr_msg)); + svr_msg * m_remote = malloc(sizeof(svr_msg)); + + m_local->svr_event_type = LOCAL; + m_local->src = lp->gid; + + memcpy(m_remote, m_local, sizeof(svr_msg)); + m_remote->svr_event_type = REMOTE; + + assert(net_id == DRAGONFLY || net_id == DRAGONFLY_DALLY); /* only supported for dragonfly model right now. */ + ns->start_ts = tw_now(lp); + codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, anno, &rep_id, &offset); + int local_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); + + /* in case of uniform random traffic, send to a random destination. */ + if(traffic == UNIFORM) + { + b->c1 = 1; + local_dest = tw_rand_integer(lp->rng, 0, num_nodes - 1); + } + else if(traffic == NEAREST_GROUP) + { + local_dest = (local_id + num_nodes_per_grp) % num_nodes; + //printf("\n LP %ld sending to %ld num nodes %d ", local_id, local_dest, num_nodes); + } + else if(traffic == NEAREST_NEIGHBOR) + { + local_dest = (local_id + 1) % num_nodes; +// printf("\n LP %ld sending to %ld num nodes %d ", rep_id * 2 + offset, local_dest, num_nodes); + } + else if(traffic == RAND_PERM) + { + if(ns->dest_id == -1) + { + b->c8 = 1; + ns->dest_id = tw_rand_integer(lp->rng, 0, num_nodes - 1); + local_dest = ns->dest_id; + } + else + { + local_dest = ns->dest_id; + } + } + else if(traffic == RANDOM_OTHER_GROUP) + { + int my_group_id = local_id / num_nodes_per_grp; + + int other_groups[num_groups-1]; + int added =0; + for(int i = 0; i < num_groups; i++) + { + if(i != my_group_id) { + other_groups[added] = i; + added++; + } + } + int rand_group = other_groups[tw_rand_integer(lp->rng,0,added -1)]; + int rand_node_intra_id = tw_rand_integer(lp->rng, 0, num_nodes_per_grp-1); + + local_dest = (rand_group * num_nodes_per_grp) + rand_node_intra_id; + printf("\n LP %ld sending to %ld num nodes %d ", local_id, local_dest, num_nodes); + + } + assert(local_dest < num_nodes); +// codes_mapping_get_lp_id(group_name, lp_type_name, anno, 1, local_dest / num_servers_per_rep, local_dest % num_servers_per_rep, &global_dest); + global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0); + ns->msg_sent_count++; + m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp); + + issue_event(ns, lp); + return; +} + +static void handle_remote_rev_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + (void)b; + (void)m; + (void)lp; + ns->msg_recvd_count--; +} + +static void handle_remote_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + (void)b; + (void)m; + (void)lp; + ns->msg_recvd_count++; +} + +static void handle_local_rev_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + (void)b; + (void)m; + (void)lp; + ns->local_recvd_count--; +} + +static void handle_local_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + (void)b; + (void)m; + (void)lp; + ns->local_recvd_count++; +} +/* convert ns to seconds */ +static tw_stime ns_to_s(tw_stime ns) +{ + return(ns / (1000.0 * 1000.0 * 1000.0)); +} + +/* convert seconds to ns */ +static tw_stime s_to_ns(tw_stime ns) +{ + return(ns * (1000.0 * 1000.0 * 1000.0)); +} + +static void svr_finalize( + svr_state * ns, + tw_lp * lp) +{ + ns->end_ts = tw_now(lp); + + //printf("server %llu recvd %d bytes in %f seconds, %f MiB/s sent_count %d recvd_count %d local_count %d \n", (unsigned long long)lp->gid, PAYLOAD_SZ*ns->msg_recvd_count, ns_to_s(ns->end_ts-ns->start_ts), + // ((double)(PAYLOAD_SZ*ns->msg_sent_count)/(double)(1024*1024)/ns_to_s(ns->end_ts-ns->start_ts)), ns->msg_sent_count, ns->msg_recvd_count, ns->local_recvd_count); + return; +} + +static void svr_rev_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + switch (m->svr_event_type) + { + case REMOTE: + handle_remote_rev_event(ns, b, m, lp); + break; + case LOCAL: + handle_local_rev_event(ns, b, m, lp); + break; + case KICKOFF: + handle_kickoff_rev_event(ns, b, m, lp); + break; + default: + assert(0); + break; + } +} + +static void svr_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + switch (m->svr_event_type) + { + case REMOTE: + handle_remote_event(ns, b, m, lp); + break; + case LOCAL: + handle_local_event(ns, b, m, lp); + break; + case KICKOFF: + handle_kickoff_event(ns, b, m, lp); + break; + default: + printf("\n Invalid message type %d ", m->svr_event_type); + assert(0); + break; + } +} + +int main( + int argc, + char **argv) +{ + int nprocs; + int rank; + int num_nets; + int *net_ids; + int num_router_rows, num_router_cols; + + tw_opt_add(app_opt); + tw_init(&argc, &argv); + + if(argc < 2) + { + printf("\n Usage: mpirun --sync=2/3 mapping_file_name.conf (optional --nkp) "); + MPI_Finalize(); + return 0; + } + + MPI_Comm_rank(MPI_COMM_CODES, &rank); + MPI_Comm_size(MPI_COMM_CODES, &nprocs); + + configuration_load(argv[2], MPI_COMM_CODES, &config); + + model_net_register(); + svr_add_lp_type(); + + // if (g_st_ev_trace || g_st_model_stats || g_st_use_analysis_lps) + // dally_svr_register_model_types(); + + codes_mapping_setup(); + + net_ids = model_net_configure(&num_nets); + //assert(num_nets==1); + net_id = *net_ids; + free(net_ids); + + /* 5 days of simulation time */ + g_tw_ts_end = s_to_ns(5 * 24 * 60 * 60); + model_net_enable_sampling(sampling_interval, sampling_end_time); + + if(net_id != DRAGONFLY && net_id != DRAGONFLY_DALLY) + { + printf("\n The test works with dragonfly model configuration only! %d %d ", DRAGONFLY_DALLY, net_id); + MPI_Finalize(); + return 0; + } + num_servers_per_rep = codes_mapping_get_lp_count("MODELNET_GRP", 1, "nw-lp", + NULL, 1); + configuration_get_value_int(&config, "PARAMS", "num_router_rows", NULL, &num_router_rows); + configuration_get_value_int(&config, "PARAMS", "num_router_cols", NULL, &num_router_cols); + configuration_get_value_int(&config, "PARAMS", "num_groups", NULL, &num_groups); + configuration_get_value_int(&config, "PARAMS", "num_cns_per_router", NULL, &num_nodes_per_cn); + + num_routers_per_grp = num_router_rows * num_router_cols; + + num_nodes = num_groups * num_routers_per_grp * num_nodes_per_cn; + num_nodes_per_grp = num_routers_per_grp * num_nodes_per_cn; + + assert(num_nodes); + + if(lp_io_dir[0]) + { + do_lp_io = 1; + int flags = lp_io_use_suffix ? LP_IO_UNIQ_SUFFIX : 0; + int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_CODES); + assert(ret == 0 || !"lp_io_prepare failure"); + } + tw_run(); + if (do_lp_io){ + int ret = lp_io_flush(io_handle, MPI_COMM_CODES); + assert(ret == 0 || !"lp_io_flush failure"); + } + model_net_report_stats(net_id); + tw_end(); + return 0; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C new file mode 100644 index 0000000..67979f6 --- /dev/null +++ b/src/networks/model-net/dragonfly-dally.C @@ -0,0 +1,5241 @@ +/* + * Copyright (C) 2013 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + * A 1D specific dragonfly custom model - diverged from dragonfly-custom.C + * Differs from dragonfly.C in that it allows for the custom features typically found in + * dragonfly-custom.C. + * + * This is not intended to be a long term solution, but enough changes had been made that merging + * into dragonfly-custom.C wasn't feasible at the time. And we needed to have this work introduced + * to the repo. + * + * DO NOT USE THIS MODEL AS A STARTING POINT FOR NEW MODELS + * It has lots of "fossils" leftover from dragonfly custom that depending on how its used, are bad. + * The orthogonally new changes will hopefully be merged into dragonfly custom. But I'm not familiar + * enough with either dragonfly custom or this model in order to confidently make the merge safe for both. + */ + +#include + +#include "codes/jenkins-hash.h" +#include "codes/codes_mapping.h" +#include "codes/codes.h" +#include "codes/model-net.h" +#include "codes/model-net-method.h" +#include "codes/model-net-lp.h" +#include "codes/net/dragonfly-dally.h" +#include "sys/file.h" +#include "codes/quickhash.h" +#include "codes/rc-stack.h" +#include +#include +#include + +#ifdef ENABLE_CORTEX +#include +#include +#endif + +#define DUMP_CONNECTIONS 0 +#define PRINT_CONFIG 1 +#define CREDIT_SIZE 8 +#define DFLY_HASH_TABLE_SIZE 4999 +// debugging parameters +#define BW_MONITOR 1 +#define DEBUG_LP 892 +#define T_ID -1 +#define TRACK -1 +#define TRACK_PKT -1 +#define TRACK_MSG -1 +#define DEBUG 0 +#define MAX_STATS 65536 +#define SHOW_ADAP_STATS 1 + +#define LP_CONFIG_NM_TERM (model_net_lp_config_names[DRAGONFLY_DALLY]) +#define LP_METHOD_NM_TERM (model_net_method_names[DRAGONFLY_DALLY]) +#define LP_CONFIG_NM_ROUT (model_net_lp_config_names[DRAGONFLY_DALLY_ROUTER]) +#define LP_METHOD_NM_ROUT (model_net_method_names[DRAGONFLY_DALLY_ROUTER]) + +static int debug_cnt = 0; +static int max_lvc_src_g = 1; +static int max_lvc_intm_g = 3; +static int min_gvc_src_g = 0; +static int min_gvc_intm_g = 1; + +static int BIAS_MIN = 1; +static int DF_DALLY = 0; +static int adaptive_threshold = 1024; + +static tw_stime max_qos_monitor = 5000000000; +static long num_local_packets_sr = 0; +static long num_local_packets_sg = 0; +static long num_remote_packets = 0; + +/* time in nanosecs */ +static int bw_reset_window = 5000000; + +#define indexer3d(_ptr, _x, _y, _z, _maxx, _maxy, _maxz) \ + ((_ptr) + _z * (_maxx * _maxz) + _y * (_maxx) + _x) + +#define indexer2d(_ptr, _x, _y, _maxx, _maxy) \ + ((_ptr) + _y * (_maxx) + _x) + +using namespace std; +struct Link { + int offset, type; +}; +struct bLink { + int offset, dest; +}; +/* Each entry in the vector is for a router id + * against each router id, there is a map of links (key of the map is the dest + * router id) + * link has information on type (green or black) and offset (number of links + * between that particular source and dest router ID)*/ +static vector< map< int, vector > > intraGroupLinks; +/* contains mapping between source router and destination group via link (link + * has dest ID)*/ +static vector< map< int, vector > > interGroupLinks; +/*MM: Maintains a list of routers connecting the source and destination groups */ +static vector< vector< vector > > connectionList; + +struct IntraGroupLink { + int src, dest, type; +}; + +struct InterGroupLink { + int src, dest; +}; + +#ifdef ENABLE_CORTEX +/* This structure is defined at the end of the file */ +extern "C" { +extern cortex_topology dragonfly_dally_cortex_topology; +} +#endif + +static int debug_slot_count = 0; +static long term_ecount, router_ecount, term_rev_ecount, router_rev_ecount; +static long packet_gen = 0, packet_fin = 0; + +static double maxd(double a, double b) { return a < b ? b : a; } + +/* minimal and non-minimal packet counts for adaptive routing*/ +static int minimal_count=0, nonmin_count=0; +static int num_routers_per_mgrp = 0; + +typedef struct dragonfly_param dragonfly_param; +/* annotation-specific parameters (unannotated entry occurs at the + * last index) */ +static uint64_t num_params = 0; +static dragonfly_param * all_params = NULL; +static const config_anno_map_t * anno_map = NULL; + +/* global variables for codes mapping */ +static char lp_group_name[MAX_NAME_LENGTH]; +static int mapping_grp_id, mapping_type_id, mapping_rep_id, mapping_offset; + +/* router magic number */ +static int router_magic_num = 0; + +/* terminal magic number */ +static int terminal_magic_num = 0; + +/* Hops within a group */ +static int num_intra_nonmin_hops = 4; +static int num_intra_min_hops = 2; + +static FILE * dragonfly_rtr_bw_log = NULL; +//static FILE * dragonfly_term_bw_log = NULL; + +static int sample_bytes_written = 0; +static int sample_rtr_bytes_written = 0; + +static char cn_sample_file[MAX_NAME_LENGTH]; +static char router_sample_file[MAX_NAME_LENGTH]; + +//don't do overhead here - job of MPI layer +static tw_stime mpi_soft_overhead = 0; + +typedef struct terminal_dally_message_list terminal_dally_message_list; +struct terminal_dally_message_list { + terminal_dally_message msg; + char* event_data; + terminal_dally_message_list *next; + terminal_dally_message_list *prev; +}; + +static void init_terminal_dally_message_list(terminal_dally_message_list *thisO, + terminal_dally_message *inmsg) { + thisO->msg = *inmsg; + thisO->event_data = NULL; + thisO->next = NULL; + thisO->prev = NULL; +} + +static void delete_terminal_dally_message_list(void *thisO) { + terminal_dally_message_list* toDel = (terminal_dally_message_list*)thisO; + if(toDel->event_data != NULL) free(toDel->event_data); + free(toDel); +} + +struct dragonfly_param +{ + // configuration parameters + int num_routers; /*Number of routers in a group*/ + double local_bandwidth;/* bandwidth of the router-router channels within a group */ + double global_bandwidth;/* bandwidth of the inter-group router connections */ + double cn_bandwidth;/* bandwidth of the compute node channels connected to routers */ + int num_vcs; /* number of virtual channels */ + int local_vc_size; /* buffer size of the router-router channels */ + int global_vc_size; /* buffer size of the global channels */ + int cn_vc_size; /* buffer size of the compute node channels */ + int chunk_size; /* full-sized packets are broken into smaller chunks.*/ + // derived parameters + int num_cn; + int intra_grp_radix; + int num_col_chans; + int num_row_chans; + int num_router_rows; + int num_router_cols; + int num_groups; + int radix; + int total_routers; + int total_terminals; + int num_global_channels; + int num_qos_levels; + int * qos_bandwidths; + double cn_delay; + double local_delay; + double global_delay; + double credit_delay; + double router_delay; + + int max_hops_notify; //maximum number of hops allowed before notifying via printout +}; + +static const dragonfly_param* stored_params; + + +struct dfly_hash_key +{ + uint64_t message_id; + tw_lpid sender_id; +}; + +struct dfly_router_sample +{ + tw_lpid router_id; + tw_stime* busy_time; + int64_t* link_traffic_sample; + tw_stime end_time; + long fwd_events; + long rev_events; +}; + +struct dfly_cn_sample +{ + tw_lpid terminal_id; + long fin_chunks_sample; + long data_size_sample; + double fin_hops_sample; + tw_stime fin_chunks_time; + tw_stime busy_time_sample; + tw_stime end_time; + long fwd_events; + long rev_events; +}; + +struct dfly_qhash_entry +{ + struct dfly_hash_key key; + char * remote_event_data; + int num_chunks; + int remote_event_size; + struct qhash_head hash_link; +}; + +/* handles terminal and router events like packet generate/send/receive/buffer */ +typedef struct terminal_state terminal_state; +typedef struct router_state router_state; + +/* dragonfly compute node data structure */ +struct terminal_state +{ + uint64_t packet_counter; + + int packet_gen; + int packet_fin; + + // Dragonfly specific parameters + unsigned int router_id; + unsigned int terminal_id; + + // Each terminal will have an input and output channel with the router + int* vc_occupancy; // NUM_VC + tw_stime terminal_available_time; + terminal_dally_message_list **terminal_msgs; + terminal_dally_message_list **terminal_msgs_tail; + int in_send_loop; + struct mn_stats dragonfly_stats_array[CATEGORY_MAX]; + + int * qos_status; + int * qos_data; + + int rc_index; + int* last_qos_status; + int* last_qos_data; + + int last_qos_lvl; + int is_monitoring_bw; + + struct rc_stack * st; + int issueIdle; + int* terminal_length; + + const char * anno; + const dragonfly_param *params; + + struct qhash_table *rank_tbl; + uint64_t rank_tbl_pop; + + tw_stime total_time; + uint64_t total_msg_size; + double total_hops; + long finished_msgs; + long finished_chunks; + long finished_packets; + + tw_stime last_buf_full; + tw_stime busy_time; + + tw_stime max_latency; + tw_stime min_latency; + + char output_buf[4096]; + char output_buf2[4096]; + /* For LP suspend functionality */ + int error_ct; + + /* For sampling */ + long fin_chunks_sample; + long data_size_sample; + double fin_hops_sample; + tw_stime fin_chunks_time; + tw_stime busy_time_sample; + int num_term_rc_windows; + + char sample_buf[4096]; + struct dfly_cn_sample * sample_stat; + int op_arr_size; + int max_arr_size; + + /* for logging forward and reverse events */ + long fwd_events; + long rev_events; + + /* following used for ROSS model-level stats collection */ + long fin_chunks_ross_sample; + long data_size_ross_sample; + long fin_hops_ross_sample; + tw_stime fin_chunks_time_ross_sample; + tw_stime busy_time_ross_sample; + struct dfly_cn_sample ross_sample; +}; + +typedef enum qos_priority +{ + Q_HIGH =0, + Q_MEDIUM, + Q_LOW, + Q_UNKNOWN, +} qos_priority; + +typedef enum qos_status +{ + Q_ACTIVE = 1, + Q_OVERBW, +} qos_status; +/* terminal event type (1-4) */ +typedef enum event_t +{ + T_GENERATE=1, + T_ARRIVE, + T_SEND, + T_BUFFER, + R_SEND, + R_ARRIVE, + R_BUFFER, + R_BANDWIDTH, + R_BW_HALT, + T_BANDWIDTH, +} event_t; + +/* whether the last hop of a packet was global, local or a terminal */ +enum last_hop +{ + GLOBAL=1, + LOCAL, + TERMINAL, + ROOT +}; + +/* three forms of routing algorithms available, adaptive routing is not + * accurate and fully functional in the current version as the formulas + * for detecting load on global channels are not very accurate */ +enum ROUTING_ALGO +{ + MINIMAL = 1, + NON_MINIMAL, + ADAPTIVE, + PROG_ADAPTIVE +}; + +enum LINK_TYPE +{ + GREEN, + BLACK, +}; +struct router_state +{ + unsigned int router_id; + int group_id; + int op_arr_size; + int max_arr_size; + int rc_index; + int num_rtr_rc_windows; + + int* global_channel; + + tw_stime* next_output_available_time; + tw_stime* cur_hist_start_time; + tw_stime* last_buf_full; + + tw_stime* busy_time; + tw_stime* busy_time_sample; + + terminal_dally_message_list ***pending_msgs; + terminal_dally_message_list ***pending_msgs_tail; + terminal_dally_message_list ***queued_msgs; + terminal_dally_message_list ***queued_msgs_tail; + int *in_send_loop; + int *queued_count; + struct rc_stack * st; + + int* last_sent_chan; + int** vc_occupancy; + int64_t* link_traffic; + int64_t * link_traffic_sample; + + int is_monitoring_bw; + int* last_qos_lvl; + int** qos_status; + int** qos_data; + /* for reverse handler */ + int* last_qos_status; + int* last_qos_data; + + const char * anno; + const dragonfly_param *params; + + int* prev_hist_num; + int* cur_hist_num; + + char output_buf[4096]; + + struct dfly_router_sample * rsamples; + + long fwd_events; + long rev_events; + + /* following used for ROSS model-level stats collection */ + tw_stime* busy_time_ross_sample; + int64_t * link_traffic_ross_sample; + struct dfly_router_sample ross_rsample; +}; + +/* had to pull some of the ROSS model stats collection stuff up here */ +void custom_dally_dragonfly_event_collect(terminal_dally_message *m, tw_lp *lp, char *buffer, int *collect_flag); +void custom_dally_dragonfly_model_stat_collect(terminal_state *s, tw_lp *lp, char *buffer); +void custom_dally_dfly_router_model_stat_collect(router_state *s, tw_lp *lp, char *buffer); +static void ross_dally_dragonfly_rsample_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample); +static void ross_dally_dragonfly_rsample_rc_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample); +static void ross_dally_dragonfly_sample_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample); +static void ross_dally_dragonfly_sample_rc_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample); + +st_model_types custom_dally_dragonfly_model_types[] = { + {(ev_trace_f) custom_dally_dragonfly_event_collect, + sizeof(int), + (model_stat_f) custom_dally_dragonfly_model_stat_collect, + sizeof(tw_lpid) + sizeof(long) * 2 + sizeof(double) + sizeof(tw_stime) *2, + (sample_event_f) ross_dally_dragonfly_sample_fn, + (sample_revent_f) ross_dally_dragonfly_sample_rc_fn, + sizeof(struct dfly_cn_sample) } , + {(ev_trace_f) custom_dally_dragonfly_event_collect, + sizeof(int), + (model_stat_f) custom_dally_dfly_router_model_stat_collect, + 0, //updated in router_dally_setup() since it's based on the radix + (sample_event_f) ross_dally_dragonfly_rsample_fn, + (sample_revent_f) ross_dally_dragonfly_rsample_rc_fn, + 0 } , //updated in router_dally_setup() since it's based on the radix + {NULL, 0, NULL, 0, NULL, NULL, 0} +}; +/* End of ROSS model stats collection */ + +static short routing = MINIMAL; + +static tw_stime dragonfly_total_time = 0; +static tw_stime dragonfly_max_latency = 0; + + +static long long total_hops = 0; +static long long N_finished_packets = 0; +static long long total_msg_sz = 0; +static long long N_finished_msgs = 0; +static long long N_finished_chunks = 0; + +/* convert ns to seconds */ +static tw_stime ns_to_s(tw_stime ns) +{ + return(ns / (1000.0 * 1000.0 * 1000.0)); +} + +static double bytes_to_gigabytes(double bytes) +{ + return bytes / (double) (1024 * 1024 * 1024); +} +static int dragonfly_rank_hash_compare( + void *key, struct qhash_head *link) +{ + struct dfly_hash_key *message_key = (struct dfly_hash_key *)key; + struct dfly_qhash_entry *tmp = NULL; + + tmp = qhash_entry(link, struct dfly_qhash_entry, hash_link); + + if (tmp->key.message_id == message_key->message_id + && tmp->key.sender_id == message_key->sender_id) + return 1; + + return 0; +} +static int dragonfly_hash_func(void *k, int table_size) +{ + struct dfly_hash_key *tmp = (struct dfly_hash_key *)k; + uint32_t pc = 0, pb = 0; + bj_hashlittle2(tmp, sizeof(*tmp), &pc, &pb); + return (int)(pc % (table_size - 1)); + /*uint64_t key = (~tmp->message_id) + (tmp->message_id << 18); + key = key * 21; + key = ~key ^ (tmp->sender_id >> 4); + key = key * tmp->sender_id; + return (int)(key & (table_size - 1));*/ +} + +/* convert GiB/s and bytes to ns */ +static tw_stime bytes_to_ns(uint64_t bytes, double GB_p_s) +{ + tw_stime time; + + /* bytes to GB */ + time = ((double)bytes)/(1024.0*1024.0*1024.0); + /* GiB to s */ + time = time / GB_p_s; + /* s to ns */ + time = time * 1000.0 * 1000.0 * 1000.0; + + return(time); +} + +/* returns the dragonfly message size */ +int dragonfly_dally_get_msg_sz(void) +{ + return sizeof(terminal_dally_message); +} + +static void free_tmp(void * ptr) +{ + struct dfly_qhash_entry * dfly = (dfly_qhash_entry *)ptr; + if(dfly->remote_event_data) + free(dfly->remote_event_data); + + if(dfly) + free(dfly); +} + +int get_vcg_from_category(terminal_dally_message * msg) +{ + if(strcmp(msg->category, "high") == 0) + return Q_HIGH; + else if(strcmp(msg->category, "medium") == 0) + return Q_MEDIUM; + else + tw_error(TW_LOC, "\n priority needs to be specified with qos_levels>1 %d", msg->category); +} +static void append_to_terminal_dally_message_list( + terminal_dally_message_list ** thisq, + terminal_dally_message_list ** thistail, + int index, + terminal_dally_message_list *msg) { +// printf("\n msg id %d ", msg->msg.packet_ID); + if(thisq[index] == NULL) { + thisq[index] = msg; + } else { + assert(thistail[index] != NULL); + thistail[index]->next = msg; + msg->prev = thistail[index]; + } + thistail[index] = msg; +// printf("\n done adding %d ", msg->msg.packet_ID); +} + +static void prepend_to_terminal_dally_message_list( + terminal_dally_message_list ** thisq, + terminal_dally_message_list ** thistail, + int index, + terminal_dally_message_list *msg) { + if(thisq[index] == NULL) { + thistail[index] = msg; + } else { + thisq[index]->prev = msg; + msg->next = thisq[index]; + } + thisq[index] = msg; +} + +static terminal_dally_message_list* return_head( + terminal_dally_message_list ** thisq, + terminal_dally_message_list ** thistail, + int index) { + terminal_dally_message_list *head = thisq[index]; + if(head != NULL) { + thisq[index] = head->next; + if(head->next != NULL) { + head->next->prev = NULL; + head->next = NULL; + } else { + thistail[index] = NULL; + } + } + return head; +} + +static terminal_dally_message_list* return_tail( + terminal_dally_message_list ** thisq, + terminal_dally_message_list ** thistail, + int index) { + terminal_dally_message_list *tail = thistail[index]; + assert(tail); + if(tail->prev != NULL) { + tail->prev->next = NULL; + thistail[index] = tail->prev; + tail->prev = NULL; + } else { + thistail[index] = NULL; + thisq[index] = NULL; + } + return tail; +} +/* TODO: Differentiate between local and global bandwidths. */ +static int get_rtr_bandwidth_consumption(router_state * s, int qos_lvl, int output_port) +{ + assert(qos_lvl >= Q_HIGH && qos_lvl <= Q_LOW); + assert(output_port < s->params->intra_grp_radix + s->params->num_global_channels + s->params->num_cn); + + int bandwidth = s->params->cn_bandwidth; + if(output_port < s->params->intra_grp_radix) + bandwidth = s->params->local_bandwidth; + else if(output_port < s->params->intra_grp_radix + s->params->num_global_channels) + bandwidth = s->params->global_bandwidth; + + /* conversion into bytes from GiB */ + double max_bw = bandwidth * 1024.0 * 1024.0 * 1024.0; + double max_bw_per_ns = max_bw / (1000.0 * 1000.0 * 1000.0); + double max_bytes_per_win = max_bw_per_ns * bw_reset_window; + + /* bw_consumed would be in Gigabytes per second. */ +// tw_stime reset_window_s = ns_to_s(bw_reset_window); +// double bw_gib = bytes_to_gigabytes(s->qos_data[output_port][qos_lvl]); +// double bw_consumed = ((double)bw_gib / (double)reset_window_s); + int percent_bw = (((double)s->qos_data[output_port][qos_lvl]) / max_bytes_per_win) * 100; +// printf("\n percent bw consumed by qos_lvl %d is %d bytes transferred %d max_bw %lf ", qos_lvl, percent_bw, s->qos_data[output_port][qos_lvl], max_bw_per_ns); + return percent_bw; + +} + +void dragonfly_print_params(const dragonfly_param *p) +{ + int myRank; + MPI_Comm_rank(MPI_COMM_CODES, &myRank); + if (!myRank) { + printf("\n------------------ Dragonfly Dally Parameters ---------\n"); + printf("\tnum_routers = %d\n",p->num_routers); + printf("\tlocal_bandwidth = %.2f\n",p->local_bandwidth); + printf("\tglobal_bandwidth = %.2f\n",p->global_bandwidth); + printf("\tcn_bandwidth = %.2f\n",p->cn_bandwidth); + printf("\tnum_vcs = %d\n",p->num_vcs); + printf("\tnum_qos_levels = %d\n",p->num_qos_levels); + printf("\tlocal_vc_size = %d\n",p->local_vc_size); + printf("\tglobal_vc_size = %d\n",p->global_vc_size); + printf("\tcn_vc_size = %d\n",p->cn_vc_size); + printf("\tchunk_size = %d\n",p->chunk_size); + printf("\tnum_cn = %d\n",p->num_cn); + printf("\tintra_grp_radix = %d\n",p->intra_grp_radix); + printf("\tnum_col_chans = %d\n",p->num_col_chans); + printf("\tnum_row_chans = %d\n",p->num_row_chans); + printf("\tnum_router_rows = %d\n",p->num_router_rows); + printf("\tnum_router_cols = %d\n",p->num_router_cols); + printf("\tnum_groups = %d\n",p->num_groups); + printf("\tvirtual radix = %d\n",p->radix); + printf("\ttotal_routers = %d\n",p->total_routers); + printf("\ttotal_terminals = %d\n",p->total_terminals); + printf("\tnum_global_channels = %d\n",p->num_global_channels); + printf("\tcn_delay = %.2f\n",p->cn_delay); + printf("\tlocal_delay = %.2f\n",p->local_delay); + printf("\tglobal_delay = %.2f\n",p->global_delay); + printf("\tcredit_delay = %.2f\n",p->credit_delay); + printf("\trouter_delay = %.2f\n",p->router_delay); + printf("\trouting = %d\n",routing); + printf("\tmax hops notification = %d\n",p->max_hops_notify); + printf("------------------------------------------------------\n\n"); + } +} + +static void dragonfly_read_config(const char * anno, dragonfly_param *params){ + /*Adding init for router magic number*/ + uint32_t h1 = 0, h2 = 0; + bj_hashlittle2(LP_METHOD_NM_ROUT, strlen(LP_METHOD_NM_ROUT), &h1, &h2); + router_magic_num = h1 + h2; + + bj_hashlittle2(LP_METHOD_NM_TERM, strlen(LP_METHOD_NM_TERM), &h1, &h2); + terminal_magic_num = h1 + h2; + + // shorthand + dragonfly_param *p = params; + int myRank; + MPI_Comm_rank(MPI_COMM_CODES, &myRank); + + int rc = configuration_get_value_int(&config, "PARAMS", "local_vc_size", anno, &p->local_vc_size); + if(rc) { + p->local_vc_size = 1024; + if(!myRank) + fprintf(stderr, "Buffer size of local channels not specified, setting to %d\n", p->local_vc_size); + } + + rc = configuration_get_value_int(&config, "PARAMS", "num_qos_levels", anno, &p->num_qos_levels); + if(rc) { + p->num_qos_levels = 1; + if(!myRank) + fprintf(stderr, "Number of QOS levels not specified, setting to %d\n", p->num_qos_levels); + } + + char qos_levels_str[MAX_NAME_LENGTH]; + rc = configuration_get_value(&config, "PARAMS", "qos_bandwidth", anno, qos_levels_str, MAX_NAME_LENGTH); + p->qos_bandwidths = (int*)calloc(p->num_qos_levels, sizeof(int)); + + if(p->num_qos_levels > 1) + { + int total_bw = 0; + char * token; + token = strtok(qos_levels_str, ","); + int i = 0; + while(token != NULL) + { + sscanf(token, "%d", &p->qos_bandwidths[i]); + total_bw += p->qos_bandwidths[i]; + if(p->qos_bandwidths[i] <= 0) + { + tw_error(TW_LOC, "\n Invalid bandwidth levels"); + } + i++; + token = strtok(NULL,","); + } + assert(total_bw <= 100); + } + else + p->qos_bandwidths[0] = 100; + rc = configuration_get_value_double(&config, "PARAMS", "max_qos_monitor", anno, &max_qos_monitor); + if(rc) { + if(!myRank) + fprintf(stderr, "Setting max_qos_monitor to %lf\n", max_qos_monitor); + } + rc = configuration_get_value_int(&config, "PARAMS", "adaptive_threshold", anno, &adaptive_threshold); + if (rc) { + if(!myRank) + fprintf(stderr, "Adaptive Minimal Routing Threshold not specified: setting to default = 0. (Will consider minimal and nonminimal routes based on scoring metric alone)\n"); + adaptive_threshold = 0; + } + + rc = configuration_get_value_int(&config, "PARAMS", "global_vc_size", anno, &p->global_vc_size); + if(rc) { + p->global_vc_size = 2048; + if(!myRank) + fprintf(stderr, "Buffer size of global channels not specified, setting to %d\n", p->global_vc_size); + } + + rc = configuration_get_value_int(&config, "PARAMS", "df-dally-vc", anno, &DF_DALLY); + if(rc) { + DF_DALLY = 0; + } + + if(DF_DALLY != 1) + tw_error(TW_LOC, "\ndf-dally-vc must be 1 to run this model\n"); + + rc = configuration_get_value_int(&config, "PARAMS", "minimal-bias", anno, &BIAS_MIN); + if(rc) { + BIAS_MIN = 0; + } + else { + if(!myRank) + fprintf(stderr,"Setting minimal bias\n"); + } + + rc = configuration_get_value_int(&config, "PARAMS", "cn_vc_size", anno, &p->cn_vc_size); + if(rc) { + p->cn_vc_size = 1024; + if(!myRank) + fprintf(stderr, "Buffer size of compute node channels not specified, setting to %d\n", p->cn_vc_size); + } + + rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", anno, &p->chunk_size); + if(rc) { + p->chunk_size = 512; + if(!myRank) + fprintf(stderr, "Chunk size for packets is specified, setting to %d\n", p->chunk_size); + } + + rc = configuration_get_value_double(&config, "PARAMS", "local_bandwidth", anno, &p->local_bandwidth); + if(rc) { + p->local_bandwidth = 5.25; + if(!myRank) + fprintf(stderr, "Bandwidth of local channels not specified, setting to %lf\n", p->local_bandwidth); + } + + rc = configuration_get_value_double(&config, "PARAMS", "global_bandwidth", anno, &p->global_bandwidth); + if(rc) { + p->global_bandwidth = 4.7; + if(!myRank) + fprintf(stderr, "Bandwidth of global channels not specified, setting to %lf\n", p->global_bandwidth); + } + + rc = configuration_get_value_double(&config, "PARAMS", "cn_bandwidth", anno, &p->cn_bandwidth); + if(rc) { + p->cn_bandwidth = 5.25; + if(!myRank) + fprintf(stderr, "Bandwidth of compute node channels not specified, setting to %lf\n", p->cn_bandwidth); + } + + rc = configuration_get_value_double(&config, "PARAMS", "router_delay", anno, + &p->router_delay); + if(rc) { + p->router_delay = 100; + } + + configuration_get_value(&config, "PARAMS", "cn_sample_file", anno, cn_sample_file, + MAX_NAME_LENGTH); + configuration_get_value(&config, "PARAMS", "rt_sample_file", anno, router_sample_file, + MAX_NAME_LENGTH); + + char routing_str[MAX_NAME_LENGTH]; + configuration_get_value(&config, "PARAMS", "routing", anno, routing_str, + MAX_NAME_LENGTH); + if(strcmp(routing_str, "minimal") == 0) + routing = MINIMAL; + else if(strcmp(routing_str, "nonminimal")==0 || + strcmp(routing_str,"non-minimal")==0) + routing = NON_MINIMAL; + else if (strcmp(routing_str, "adaptive") == 0) + routing = ADAPTIVE; + else if (strcmp(routing_str, "prog-adaptive") == 0) + routing = PROG_ADAPTIVE; + else + { + if(!myRank) + fprintf(stderr, "No routing protocol specified, setting to minimal routing\n"); + routing = MINIMAL; + } + + rc = configuration_get_value_int(&config, "PARAMS", "notification_on_hops_greater_than", anno, &p->max_hops_notify); + if (rc) { + if(!myRank) + printf("Maximum hops for notifying not specified, setting to INT MAX\n"); + p->max_hops_notify = INT_MAX; + } + + // rc = configuration_get_value_int(&config, "PARAMS", "num_vcs_override", anno, &p->num_vcs); + // if(rc) { + // if(routing == PROG_ADAPTIVE) + // p->num_vcs = 10; + // else + // p->num_vcs = 8; + // } + // else { + // printf("Overriding num_vcs: p->num_vcs=%d\n",p->num_vcs); + // } + + if(DF_DALLY == 0) + { + //if(routing == PROG_ADAPTIVE) + // p->num_vcs = 10; + //else + p->num_vcs = 8; + } + else + { + p->num_vcs = 4; + } + + if(p->num_qos_levels > 1) + p->num_vcs = p->num_qos_levels * p->num_vcs; + + rc = configuration_get_value_int(&config, "PARAMS", "num_groups", anno, &p->num_groups); + if(rc) { + tw_error(TW_LOC, "\nnum_groups not specified, Aborting\n"); + } + rc = configuration_get_value_int(&config, "PARAMS", "num_col_chans", anno, &p->num_col_chans); + if(rc) { +// printf("\n Number of links connecting chassis not specified, setting to default value 3 "); + p->num_col_chans = 3; + } + rc = configuration_get_value_int(&config, "PARAMS", "num_row_chans", anno, &p->num_row_chans); + if(rc) { +// printf("\n Number of links connecting chassis not specified, setting to default value 3 "); + p->num_row_chans = 1; + } + rc = configuration_get_value_int(&config, "PARAMS", "num_router_rows", anno, &p->num_router_rows); + if(rc) { + if(!myRank) + fprintf(stderr, "Number of router rows not specified, setting to 6\n"); + p->num_router_rows = 6; + } + rc = configuration_get_value_int(&config, "PARAMS", "num_router_cols", anno, &p->num_router_cols); + if(rc) { + if(!myRank) + fprintf(stderr,"Number of router columns not specified, setting to 16\n"); + p->num_router_cols = 16; + } + p->intra_grp_radix = (p->num_router_cols * p->num_row_chans); + if(p->num_router_rows > 1) + p->intra_grp_radix += (p->num_router_rows * p->num_col_chans); + + p->num_routers = p->num_router_rows * p->num_router_cols; + + rc = configuration_get_value_int(&config, "PARAMS", "num_cns_per_router", anno, &p->num_cn); + if(rc) { + if(!myRank) + fprintf(stderr,"Number of cns per router not specified, setting to %d\n", p->num_routers/2); + p->num_cn = p->num_routers/2; + } + + rc = configuration_get_value_int(&config, "PARAMS", "num_global_channels", anno, &p->num_global_channels); + if(rc) { + if(!myRank) + fprintf(stderr,"Number of global channels per router not specified, setting to 10\n"); + p->num_global_channels = 10; + } + p->radix = p->intra_grp_radix + p->num_global_channels + p->num_cn; + p->total_routers = p->num_groups * p->num_routers; + p->total_terminals = p->total_routers * p->num_cn; + + // read intra group connections, store from a router's perspective + // all links to the same router form a vector + char intraFile[MAX_NAME_LENGTH]; + configuration_get_value(&config, "PARAMS", "intra-group-connections", + anno, intraFile, MAX_NAME_LENGTH); + if(strlen(intraFile) <= 0) { + tw_error(TW_LOC, "Intra group connections file not specified. Aborting"); + } + FILE *groupFile = fopen(intraFile, "rb"); + if(!groupFile) + tw_error(TW_LOC, "intra-group file not found "); + + if(!myRank) + printf("Reading intra-group connectivity file: %s\n", intraFile); + + { + vector< int > offsets; + offsets.resize(p->num_routers, 0); + intraGroupLinks.resize(p->num_routers); + IntraGroupLink newLink; + + while(fread(&newLink, sizeof(IntraGroupLink), 1, groupFile) != 0) { + Link tmpLink; + tmpLink.type = newLink.type; + tmpLink.offset = offsets[newLink.src]++; + intraGroupLinks[newLink.src][newLink.dest].push_back(tmpLink); + } + } + + fclose(groupFile); + + // read inter group connections, store from a router's perspective + // also create a group level table that tells all the connecting routers + char interFile[MAX_NAME_LENGTH]; + configuration_get_value(&config, "PARAMS", "inter-group-connections", + anno, interFile, MAX_NAME_LENGTH); + if(strlen(interFile) <= 0) { + tw_error(TW_LOC, "Inter group connections file not specified. Aborting"); + } + FILE *systemFile = fopen(interFile, "rb"); + if(!myRank) + { + printf("Reading inter-group connectivity file: %s\n", interFile); + printf("\n Total routers %d total groups %d ", p->total_routers, p->num_groups); + } + + { + vector< int > offsets; + offsets.resize(p->total_routers, 0); + interGroupLinks.resize(p->total_routers); + connectionList.resize(p->num_groups); + for(int g = 0; g < connectionList.size(); g++) { + connectionList[g].resize(p->num_groups); + } + + InterGroupLink newLink; + + while(fread(&newLink, sizeof(InterGroupLink), 1, systemFile) != 0) { + bLink tmpLink; + tmpLink.dest = newLink.dest; + int srcG = newLink.src / p->num_routers; + int destG = newLink.dest / p->num_routers; + tmpLink.offset = offsets[newLink.src]++; + interGroupLinks[newLink.src][destG].push_back(tmpLink); + int r; + for(r = 0; r < connectionList[srcG][destG].size(); r++) { + if(connectionList[srcG][destG][r] == newLink.src) break; + } + if(r == connectionList[srcG][destG].size()) { + connectionList[srcG][destG].push_back(newLink.src); + } + } + } + + fclose(systemFile); + +#if DUMP_CONNECTIONS == 1 +if(!myRank) { + printf("Dumping intra-group connections\n"); + for(int a = 0; a < intraGroupLinks.size(); a++) { + printf("Connections for router %d\n", a); + map< int, vector > &curMap = intraGroupLinks[a]; + map< int, vector >::iterator it = curMap.begin(); + for(; it != curMap.end(); it++) { + printf(" ( %d - ", it->first); + for(int l = 0; l < it->second.size(); l++) { + // offset is number of local connections + // type is black or green according to Cray architecture + printf("%d,%d ", it->second[l].offset, it->second[l].type); + } + printf(")"); + } + printf("\n"); + } +} +#endif +#if DUMP_CONNECTIONS == 1 +if(!myRank) { + printf("Dumping inter-group connections\n"); + for(int a = 0; a < interGroupLinks.size(); a++) { + printf("Connections for router %d\n", a); + map< int, vector > &curMap = interGroupLinks[a]; + map< int, vector >::iterator it = curMap.begin(); + for(; it != curMap.end(); it++) { + // dest group ID + printf(" ( %d - ", it->first); + for(int l = 0; l < it->second.size(); l++) { + // dest is dest router ID + // offset is number of global connections + printf("%d,%d ", it->second[l].offset, it->second[l].dest); + } + printf(")"); + } + printf("\n"); + } +} +#endif + +#if DUMP_CONNECTIONS == 1 +if(!myRank) { + printf("Dumping source aries for global connections\n"); + for(int g = 0; g < p->num_groups; g++) { + for(int g1 = 0; g1 < p->num_groups; g1++) { + printf(" ( "); + for(int l = 0; l < connectionList[g][g1].size(); l++) { + printf("%d ", connectionList[g][g1][l]); + } + printf(")"); + } + printf("\n"); + } +} +#endif + if(!myRank) { + printf("\n Total nodes %d routers %d groups %d routers per group %d radix %d\n", + p->num_cn * p->total_routers, p->total_routers, p->num_groups, + p->num_routers, p->radix); + } + + p->cn_delay = bytes_to_ns(p->chunk_size, p->cn_bandwidth); + p->local_delay = bytes_to_ns(p->chunk_size, p->local_bandwidth); + p->global_delay = bytes_to_ns(p->chunk_size, p->global_bandwidth); + p->credit_delay = bytes_to_ns(CREDIT_SIZE, p->local_bandwidth); //assume 8 bytes packet + + if (PRINT_CONFIG) + dragonfly_print_params(p); + + stored_params = p; +} + +void dragonfly_dally_configure(){ + anno_map = codes_mapping_get_lp_anno_map(LP_CONFIG_NM_TERM); + assert(anno_map); + num_params = anno_map->num_annos + (anno_map->has_unanno_lp > 0); + all_params = (dragonfly_param *)calloc(num_params, sizeof(*all_params)); + + for (int i = 0; i < anno_map->num_annos; i++){ + const char * anno = anno_map->annotations[i].ptr; + dragonfly_read_config(anno, &all_params[i]); + } + if (anno_map->has_unanno_lp > 0){ + dragonfly_read_config(NULL, &all_params[anno_map->num_annos]); + } +#ifdef ENABLE_CORTEX + model_net_topology = dragonfly_dally_cortex_topology; +#endif +} + +/* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */ +void dragonfly_dally_report_stats() +{ + long long avg_hops, total_finished_packets, total_finished_chunks; + long long total_finished_msgs, final_msg_sz; + tw_stime avg_time, max_time; + int total_minimal_packets, total_nonmin_packets; + long total_gen, total_fin; + long total_local_packets_sr, total_local_packets_sg, total_remote_packets; + + MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &dragonfly_total_time, &avg_time, 1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &dragonfly_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES); + + MPI_Reduce( &packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &num_local_packets_sr, &total_local_packets_sr, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &num_local_packets_sg, &total_local_packets_sg, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &num_remote_packets, &total_remote_packets, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + if(routing == ADAPTIVE || routing == PROG_ADAPTIVE || SHOW_ADAP_STATS) + { + MPI_Reduce(&minimal_count, &total_minimal_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&nonmin_count, &total_nonmin_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); + } + + /* print statistics */ + if(!g_tw_mynode) + { + if (PRINT_CONFIG) + dragonfly_print_params(stored_params); + + printf("\nAverage number of hops traversed %f average chunk latency %lf us maximum chunk latency %lf us avg message size %lf bytes finished messages %lld finished chunks %lld\n", + (float)avg_hops/total_finished_chunks, avg_time/(total_finished_chunks*1000), max_time/1000, (float)final_msg_sz/total_finished_msgs, total_finished_msgs, total_finished_chunks); + if(routing == ADAPTIVE || routing == PROG_ADAPTIVE || SHOW_ADAP_STATS) + printf("\nADAPTIVE ROUTING STATS: %d chunks routed minimally %d chunks routed non-minimally completed packets %lld \n", + total_minimal_packets, total_nonmin_packets, total_finished_chunks); + + printf("\nTotal packets generated %ld finished %ld Locally routed- same router %ld different-router %ld Remote (inter-group) %ld \n", total_gen, total_fin, total_local_packets_sr, total_local_packets_sg, total_remote_packets); + } + return; +} + +void issue_bw_monitor_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + for(int i = 0 ; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + int num_qos_levels = s->params->num_qos_levels; + int num_term_rc_wins = s->num_term_rc_windows; + int rc_index = msg->qos_index; + + for(int k = 0; k < num_qos_levels; k++) + { + s->qos_status[k] = *(indexer2d(s->last_qos_status, rc_index, k, num_term_rc_wins, num_qos_levels)); + s->qos_data[k] = *(indexer2d(s->last_qos_data, rc_index, k, num_term_rc_wins, num_qos_levels)); + *(indexer2d(s->last_qos_status, rc_index, k, num_term_rc_wins, num_qos_levels)) = 0; + *(indexer2d(s->last_qos_data, rc_index, k, num_term_rc_wins, num_qos_levels)) = 0; + } +} +/* resets the bandwidth numbers recorded so far */ +void issue_bw_monitor_event(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + + msg->num_cll = 0; + msg->num_rngs = 0; + int num_qos_levels = s->params->num_qos_levels; + int rc_index = s->rc_index; + int num_term_rc_wins = s->num_term_rc_windows; + + /* dynamically reallocate array if index has reached max-size */ + if(s->rc_index >= s->num_term_rc_windows) + { + s->num_term_rc_windows *= 2; + int * tmp1 = (int*)calloc(s->num_term_rc_windows * num_qos_levels, sizeof(int)); + int * tmp2 = (int*)calloc(s->num_term_rc_windows * num_qos_levels, sizeof(int)); + + /* now copy elements one by one. can't use memcpy with 2d array. */ + for(int i = 0; i < s->num_term_rc_windows; i++) + { + for(int j = 0; j < num_qos_levels; j++) + { + *(indexer2d(tmp1, i, j, s->num_term_rc_windows, num_qos_levels)) = *(indexer2d(s->last_qos_status, i, j, num_term_rc_wins, num_qos_levels)); + *(indexer2d(tmp2, i, j, s->num_term_rc_windows, num_qos_levels)) = *(indexer2d(s->last_qos_data, i, j, num_term_rc_wins, num_qos_levels)); + } + } + free(s->last_qos_status); + free(s->last_qos_data); + + s->last_qos_status = tmp1; + s->last_qos_data = tmp2; + } + /* Reset the qos status and bandwidth consumption. */ + for(int k = 0; k < num_qos_levels; k++) + { + *(indexer2d(s->last_qos_status, rc_index, k, num_term_rc_wins, num_qos_levels)) = s->qos_status[k]; + *(indexer2d(s->last_qos_data, rc_index, k, num_term_rc_wins, num_qos_levels)) = s->qos_data[k]; + s->qos_status[k] = Q_ACTIVE; + s->qos_data[k] = 0; + } + msg->qos_index = s->rc_index; + s->rc_index++; + assert(s->rc_index < s->num_term_rc_windows); + +/* if(s->router_id == 0) + { + fprintf(dragonfly_term_bw_log, "\n %d %lf %lf ", s->terminal_id, tw_now(lp), s->busy_time_sample); + s->busy_time_sample = 0; + } + */ + if(tw_now(lp) > max_qos_monitor) + return; + + msg->num_cll++; + terminal_dally_message * m; + tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); + tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, DRAGONFLY_DALLY, + (void**)&m, NULL); + m->type = T_BANDWIDTH; + m->magic = terminal_magic_num; + tw_event_send(e); +} + +void issue_rtr_bw_monitor_event_rc(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + int num_qos_levels = s->params->num_qos_levels; + int rc_index = msg->qos_index; + + for(int i = 0 ; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + for(int j = 0; j < s->params->radix; j++) + { + for(int k = 0; k < num_qos_levels; k++) + { + s->qos_status[j][k] = *(indexer3d(s->last_qos_status, rc_index, j, k, s->num_rtr_rc_windows, s->params->radix, num_qos_levels)); + s->qos_data[j][k] = *(indexer3d(s->last_qos_data, rc_index, j, k, s->num_rtr_rc_windows, s->params->radix, num_qos_levels)); + *(indexer3d(s->last_qos_status, rc_index, j, k, s->num_rtr_rc_windows, s->params->radix, num_qos_levels)) = 0; + *(indexer3d(s->last_qos_data, rc_index, j, k, s->num_rtr_rc_windows, s->params->radix, num_qos_levels)) = 0; + } + } +} +void issue_rtr_bw_monitor_event(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + msg->num_cll = 0; + msg->num_rngs = 0; + + int num_qos_levels = s->params->num_qos_levels; + int rc_index = s->rc_index; + int num_rtr_rc_windows = s->num_rtr_rc_windows; + + /* dynamically reallocate the array.. */ + if(s->rc_index >= s->num_rtr_rc_windows) + { + s->num_rtr_rc_windows *= 2; + int * tmp1 = (int*)calloc(s->num_rtr_rc_windows * s->params->radix * num_qos_levels, sizeof(int)); + int * tmp2 = (int*)calloc(s->num_rtr_rc_windows * s->params->radix * num_qos_levels, sizeof(int)); + /* now copy elements one by one. can't use memcpy with 2d array. */ + for(int i = 0; i < num_rtr_rc_windows; i++) + { + for(int j = 0; j < s->params->radix; j++) + { + for(int k = 0; k < num_qos_levels; k++) + { + *(indexer3d(tmp1, i, j, k, s->num_rtr_rc_windows, s->params->radix, num_qos_levels)) = *(indexer3d(s->last_qos_status, i, j, k, num_rtr_rc_windows, s->params->radix, num_qos_levels)); + *(indexer3d(tmp2, i, j, k, s->num_rtr_rc_windows, s->params->radix, num_qos_levels)) = *(indexer3d(s->last_qos_data, i, j, k, num_rtr_rc_windows, s->params->radix, num_qos_levels)); + } + } + } + free(s->last_qos_status); + free(s->last_qos_data); + + s->last_qos_status = tmp1; + s->last_qos_data = tmp2; + } + assert(rc_index < s->num_rtr_rc_windows && rc_index >= 0); + + for(int j = 0; j < s->params->radix; j++) + { + for(int k = 0; k < num_qos_levels; k++) + { + *(indexer3d(s->last_qos_status, rc_index, j, k, s->num_rtr_rc_windows, s->params->radix, num_qos_levels)) = s->qos_status[j][k]; + *(indexer3d(s->last_qos_data, rc_index, j, k, s->num_rtr_rc_windows, s->params->radix, num_qos_levels)) = s->qos_data[j][k]; + } + } + + msg->qos_index = s->rc_index; + s->rc_index++; + + for(int j = 0; j < s->params->radix; j++) + { + for(int k = 0; k < num_qos_levels; k++) + { + int bw_consumed = get_rtr_bandwidth_consumption(s, k, j); + if(s->qos_data[j][k] > 0) + { + fprintf(dragonfly_rtr_bw_log, "\n %d %f %d %d %d %d %d %f", s->router_id, tw_now(lp), j, k, bw_consumed, s->qos_status[j][k], s->qos_data[j][k], s->busy_time_sample[j]); + + } + } + } + for(int j = 0; j < s->params->radix; j++) + { + /* Reset the qos status and bandwidth consumption. */ + for(int k = 0; k < num_qos_levels; k++) + { + s->qos_status[j][k] = Q_ACTIVE; + s->qos_data[j][k] = 0; + } + //s->busy_time_sample[j] = 0; + } + + if(tw_now(lp) > max_qos_monitor) + return; + + msg->num_cll++; + tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); + terminal_dally_message *m; + tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, + DRAGONFLY_DALLY_ROUTER, (void**)&m, NULL); + m->type = R_BANDWIDTH; + m->magic = router_magic_num; + tw_event_send(e); +} + +void reset_rtr_bw_counters(router_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + int num_qos_levels = s->params->num_qos_levels; + if(msg->type == R_BANDWIDTH) + { + for(int k = 0; k < s->num_rtr_rc_windows; k++) + { + for(int i = 0; i < s->params->radix; i++) + { + for(int j = 0; j < num_qos_levels; j++) + { + *(indexer3d(s->last_qos_status, k, i, j, s->num_rtr_rc_windows, s->params->radix, num_qos_levels)) = 0; + *(indexer3d(s->last_qos_data, k, i, j, s->num_rtr_rc_windows, s->params->radix, num_qos_levels)) = 0; + } + } + } + s->rc_index = 0; + } +} +void reset_bw_counters(terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + int num_qos_levels = s->params->num_qos_levels; + if(msg->type == T_BANDWIDTH) + { + for(int i = 0; i < s->num_term_rc_windows; i++) + { + for(int j = 0; j < s->params->num_qos_levels; j++) + { + *(indexer2d(s->last_qos_status, i, j, s->num_term_rc_windows, num_qos_levels)) = 0; + *(indexer2d(s->last_qos_data, i, j, s->num_term_rc_windows, num_qos_levels)) = 0; + } + } + s->rc_index = 0; + } +} +/* initialize a dragonfly compute node terminal */ +void +terminal_dally_init( terminal_state * s, + tw_lp * lp ) +{ + s->packet_gen = 0; + s->packet_fin = 0; + s->is_monitoring_bw = 0; + s->num_term_rc_windows = 100; + s->rc_index = 0; + + int i; + char anno[MAX_NAME_LENGTH]; + + // Assign the global router ID + // TODO: be annotation-aware + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, + &mapping_type_id, anno, &mapping_rep_id, &mapping_offset); + if (anno[0] == '\0'){ + s->anno = NULL; + s->params = &all_params[num_params-1]; + } + else{ + s->anno = strdup(anno); + int id = configuration_get_annotation_index(anno, anno_map); + s->params = &all_params[id]; + } + + int num_qos_levels = s->params->num_qos_levels; + int num_lps = codes_mapping_get_lp_count(lp_group_name, 1, LP_CONFIG_NM_TERM, + s->anno, 0); + + s->terminal_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); + s->router_id=(int)s->terminal_id / (s->params->num_cn); + s->terminal_available_time = 0.0; + s->packet_counter = 0; + s->min_latency = INT_MAX; + s->max_latency = 0; + + s->finished_msgs = 0; + s->finished_chunks = 0; + s->finished_packets = 0; + s->total_time = 0.0; + s->total_msg_size = 0; + + s->busy_time = 0.0; + + s->fwd_events = 0; + s->rev_events = 0; + + rc_stack_create(&s->st); + s->vc_occupancy = (int*)calloc(num_qos_levels, sizeof(int)); + s->last_buf_full = 0.0; + + s->terminal_length = (int*)calloc(num_qos_levels, sizeof(int)); + + /* Whether the virtual channel group is active or over-bw*/ + s->qos_status = (int*)calloc(num_qos_levels, sizeof(int)); + + /* How much data has been transmitted on the virtual channel group within + * the window */ + s->qos_data = (int*)calloc(num_qos_levels, sizeof(int)); + + /* for reverse handlers */ + s->last_qos_status = (int*)calloc(s->num_term_rc_windows * num_qos_levels, sizeof(int)); + s->last_qos_data = (int*)calloc(s->num_term_rc_windows * num_qos_levels, sizeof(int)); + + for(i = 0; i < num_qos_levels; i++) + { + s->qos_data[i] = 0; + s->qos_status[i] = Q_ACTIVE; + s->vc_occupancy[i]=0; + } + + s->last_qos_lvl = 0; + s->rank_tbl = NULL; + s->terminal_msgs = + (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*)); + s->terminal_msgs_tail = + (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*)); + + for(int i = 0; i < num_qos_levels; i++) + { + s->terminal_msgs[i] = NULL; + s->terminal_msgs_tail[i] = NULL; + } + s->in_send_loop = 0; + s->issueIdle = 0; + + /*if(s->terminal_id == 0) + { + char term_bw_log[64]; + sprintf(term_bw_log, "terminal-bw-tracker"); + dragonfly_term_bw_log = fopen(term_bw_log, "w"); + fprintf(dragonfly_term_bw_log, "\n term-id time-stamp port-id busy-time"); + }*/ + return; +} + +/* sets up the router virtual channels, global channels, + * local channels, compute node channels */ +void router_dally_setup(router_state * r, tw_lp * lp) +{ + + char anno[MAX_NAME_LENGTH]; + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, + &mapping_type_id, anno, &mapping_rep_id, &mapping_offset); + + if (anno[0] == '\0'){ + r->anno = NULL; + r->params = &all_params[num_params-1]; + } else{ + r->anno = strdup(anno); + int id = configuration_get_annotation_index(anno, anno_map); + r->params = &all_params[id]; + } + + // shorthand + const dragonfly_param *p = r->params; + + num_routers_per_mgrp = codes_mapping_get_lp_count (lp_group_name, 1, "modelnet_dragonfly_dally_router", + NULL, 0); + int num_grp_reps = codes_mapping_get_group_reps(lp_group_name); + if(p->total_routers != num_grp_reps * num_routers_per_mgrp) + tw_error(TW_LOC, "\n Config error: num_routers specified %d total routers computed in the network %d " + "does not match with repetitions * dragonfly_router %d ", + p->num_routers, p->total_routers, num_grp_reps * num_routers_per_mgrp); + + r->router_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); + r->group_id=r->router_id/p->num_routers; + + char rtr_bw_log[128]; + sprintf(rtr_bw_log, "router-bw-tracker-%d", g_tw_mynode); + + if(dragonfly_rtr_bw_log == NULL) + { + dragonfly_rtr_bw_log = fopen(rtr_bw_log, "w+"); + + fprintf(dragonfly_rtr_bw_log, "\n router-id time-stamp port-id qos-level bw-consumed qos-status qos-data busy-time"); + } + //printf("\n Local router id %d global id %d ", r->router_id, lp->gid); + + r->num_rtr_rc_windows = 100; + r->rc_index = 0; + r->is_monitoring_bw = 0; + r->fwd_events = 0; + r->rev_events = 0; + r->ross_rsample.fwd_events = 0; + r->ross_rsample.rev_events = 0; + + + int num_qos_levels = p->num_qos_levels; + + /* history window for bandwidth reverse computation */ + r->last_qos_status = (int*)calloc(r->num_rtr_rc_windows * r->params->radix * num_qos_levels, sizeof(int)); + r->last_qos_data = (int*)calloc(r->num_rtr_rc_windows * r->params->radix * num_qos_levels, sizeof(int)); + + r->global_channel = (int*)calloc(p->num_global_channels, sizeof(int)); + r->next_output_available_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + r->cur_hist_start_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + r->link_traffic = (int64_t*)calloc(p->radix, sizeof(int64_t)); + r->link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); + r->cur_hist_num = (int*)calloc(p->radix, sizeof(int)); + r->prev_hist_num = (int*)calloc(p->radix, sizeof(int)); + + r->last_sent_chan = (int*) calloc(p->num_router_rows, sizeof(int)); + r->vc_occupancy = (int**)calloc(p->radix , sizeof(int*)); + r->in_send_loop = (int*)calloc(p->radix, sizeof(int)); + r->qos_data = (int**)calloc(p->radix, sizeof(int*)); + r->last_qos_lvl = (int*)calloc(p->radix, sizeof(int)); + r->qos_status = (int**)calloc(p->radix, sizeof(int*)); + r->pending_msgs = + (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**)); + r->pending_msgs_tail = + (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**)); + r->queued_msgs = + (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**)); + r->queued_msgs_tail = + (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**)); + r->queued_count = (int*)calloc(p->radix, sizeof(int)); + r->last_buf_full = (tw_stime*)calloc(p->radix, sizeof(tw_stime*)); + r->busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + r->busy_time_sample = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + + /* set up for ROSS stats sampling */ + r->link_traffic_ross_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); + r->busy_time_ross_sample = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + if (g_st_model_stats) + lp->model_types->mstat_sz = sizeof(tw_lpid) + (sizeof(int64_t) + sizeof(tw_stime)) * p->radix; + if (g_st_use_analysis_lps && g_st_model_stats) + lp->model_types->sample_struct_sz = sizeof(struct dfly_router_sample) + (sizeof(tw_stime) + sizeof(int64_t)) * p->radix; + r->ross_rsample.busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + r->ross_rsample.link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); + + rc_stack_create(&r->st); + + for(int i = 0; i < p->num_router_rows; i++) + r->last_sent_chan[i] = 0; + + for(int i=0; i < p->radix; i++) + { + // Set credit & router occupancy + r->last_buf_full[i] = 0.0; + r->busy_time[i] = 0.0; + r->busy_time_sample[i] = 0.0; + r->next_output_available_time[i]=0; + r->last_qos_lvl[i] = 0; + r->cur_hist_start_time[i] = 0; + r->link_traffic[i]=0; + r->link_traffic_sample[i] = 0; + r->cur_hist_num[i] = 0; + r->prev_hist_num[i] = 0; + r->queued_count[i] = 0; + r->in_send_loop[i] = 0; + r->vc_occupancy[i] = (int*)calloc(p->num_vcs, sizeof(int)); +// printf("\n Number of vcs %d for radix %d ", p->num_vcs, p->radix); + r->pending_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs, + sizeof(terminal_dally_message_list*)); + r->pending_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs, + sizeof(terminal_dally_message_list*)); + r->queued_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs, + sizeof(terminal_dally_message_list*)); + r->queued_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs, + sizeof(terminal_dally_message_list*)); + r->qos_status[i] = (int*)calloc(num_qos_levels, sizeof(int)); + r->qos_data[i] = (int*)calloc(num_qos_levels, sizeof(int)); + for(int j = 0; j < num_qos_levels; j++) + { + r->qos_status[i][j] = Q_ACTIVE; + r->qos_data[i][j] = 0; + } + for(int j = 0; j < p->num_vcs; j++) { + r->pending_msgs[i][j] = NULL; + r->pending_msgs_tail[i][j] = NULL; + r->queued_msgs[i][j] = NULL; + r->queued_msgs_tail[i][j] = NULL; + } + } + return; +} + + +/* dragonfly packet event , generates a dragonfly packet on the compute node */ +static tw_stime dragonfly_dally_packet_event( + model_net_request const * req, + uint64_t message_offset, + uint64_t packet_size, + tw_stime offset, + mn_sched_params const * sched_params, + void const * remote_event, + void const * self_event, + tw_lp *sender, + int is_last_pckt) +{ + (void)message_offset; + (void)sched_params; + tw_event * e_new; + tw_stime xfer_to_nic_time; + terminal_dally_message * msg; + char* tmp_ptr; + + xfer_to_nic_time = codes_local_latency(sender); + //e_new = tw_event_new(sender->gid, xfer_to_nic_time+offset, sender); + //msg = tw_event_data(e_new); + e_new = model_net_method_event_new(sender->gid, xfer_to_nic_time+offset, + sender, DRAGONFLY_DALLY, (void**)&msg, (void**)&tmp_ptr); + strcpy(msg->category, req->category); + msg->final_dest_gid = req->final_dest_lp; + msg->total_size = req->msg_size; + msg->sender_lp=req->src_lp; + msg->sender_mn_lp = sender->gid; + msg->packet_size = packet_size; + msg->travel_start_time = tw_now(sender); + msg->remote_event_size_bytes = 0; + msg->local_event_size_bytes = 0; + msg->type = T_GENERATE; + msg->dest_terminal_id = req->dest_mn_lp; + msg->message_id = req->msg_id; + msg->is_pull = req->is_pull; + msg->pull_size = req->pull_size; + msg->magic = terminal_magic_num; + msg->msg_start_time = req->msg_start_time; + + if(is_last_pckt) /* Its the last packet so pass in remote and local event information*/ + { + if(req->remote_event_size > 0) + { + msg->remote_event_size_bytes = req->remote_event_size; + memcpy(tmp_ptr, remote_event, req->remote_event_size); + tmp_ptr += req->remote_event_size; + } + if(req->self_event_size > 0) + { + msg->local_event_size_bytes = req->self_event_size; + memcpy(tmp_ptr, self_event, req->self_event_size); + tmp_ptr += req->self_event_size; + } + } + //printf("\n dragonfly remote event %d local event %d last packet %d %lf ", msg->remote_event_size_bytes, msg->local_event_size_bytes, is_last_pckt, xfer_to_nic_time); + tw_event_send(e_new); + return xfer_to_nic_time; +} + +/* dragonfly packet event reverse handler */ +static void dragonfly_dally_packet_event_rc(tw_lp *sender) +{ + codes_local_latency_reverse(sender); + return; +} + +/*When a packet is sent from the current router and a buffer slot becomes available, a credit is sent back to schedule another packet event*/ +static void router_credit_send(router_state * s, terminal_dally_message * msg, + tw_lp * lp, int sq, short* rng_counter) { + tw_event * buf_e; + tw_stime ts; + terminal_dally_message * buf_msg; + + int dest = 0, type = R_BUFFER; + int is_terminal = 0; + + const dragonfly_param *p = s->params; + + // Notify sender terminal about available buffer space + if(msg->last_hop == TERMINAL) { + dest = msg->src_terminal_id; + type = T_BUFFER; + is_terminal = 1; + } else if(msg->last_hop == GLOBAL + || msg->last_hop == LOCAL + || msg->last_hop == ROOT) + { + dest = msg->intm_lp_id; + } else + printf("\n Invalid message type"); + + (*rng_counter)++; + ts = g_tw_lookahead + p->credit_delay + tw_rand_unif(lp->rng); + + if (is_terminal) { + buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY_DALLY, + (void**)&buf_msg, NULL); + buf_msg->magic = terminal_magic_num; + } else { + buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY_DALLY_ROUTER, + (void**)&buf_msg, NULL); + buf_msg->magic = router_magic_num; + } + + buf_msg->origin_router_id = s->router_id; + if(sq == -1) { + buf_msg->vc_index = msg->vc_index; + buf_msg->output_chan = msg->output_chan; + } else { + buf_msg->vc_index = msg->saved_vc; + buf_msg->output_chan = msg->saved_channel; + } + strcpy(buf_msg->category, msg->category); + buf_msg->type = type; + + tw_event_send(buf_e); + return; +} + +static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + int num_qos_levels = s->params->num_qos_levels; + if(bf->c1) + s->is_monitoring_bw = 0; + + s->packet_gen--; + packet_gen--; + s->packet_counter--; + + if(bf->c2) + num_local_packets_sr--; + if(bf->c3) + num_local_packets_sg--; + if(bf->c4) + num_remote_packets--; + + for(int i = 0; i < msg->num_rngs; i++) + tw_rand_reverse_unif(lp->rng); + + for(int i = 0; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + int num_chunks = msg->packet_size/s->params->chunk_size; + if(msg->packet_size < s->params->chunk_size) + num_chunks++; + + int i; + int vcg = 0; + if(num_qos_levels > 1) + { + vcg = get_vcg_from_category(msg); + assert(vcg == Q_HIGH || vcg == Q_MEDIUM); + } + assert(vcg < num_qos_levels); + + for(i = 0; i < num_chunks; i++) { + delete_terminal_dally_message_list(return_tail(s->terminal_msgs, + s->terminal_msgs_tail, vcg)); + s->terminal_length[vcg] -= s->params->chunk_size; + } + if(bf->c5) { + s->in_send_loop = 0; + } + if(bf->c11) { + s->issueIdle = 0; + } + struct mn_stats* stat; + stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + stat->send_count--; + stat->send_bytes -= msg->packet_size; + stat->send_time -= (1/s->params->cn_bandwidth) * msg->packet_size; +} + +/* generates packet at the current dragonfly compute node */ +static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, + tw_lp * lp) { + + msg->num_rngs = 0; + msg->num_cll = 0; + + packet_gen++; + int num_qos_levels = s->params->num_qos_levels; + + if(num_qos_levels > 1) + { + tw_lpid router_id; + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, + &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 0, + s->router_id / num_routers_per_mgrp, s->router_id % num_routers_per_mgrp, &router_id); + if(s->is_monitoring_bw == 0) + { + bf->c1 = 1; + /* Issue an event on both terminal and router to monitor bandwidth */ + msg->num_cll++; + tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); + terminal_dally_message * m; + tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, DRAGONFLY_DALLY, + (void**)&m, NULL); + m->type = T_BANDWIDTH; + m->magic = terminal_magic_num; + s->is_monitoring_bw = 1; + tw_event_send(e); + } + } + s->packet_gen++; + + tw_stime ts, nic_ts; + + assert(lp->gid != msg->dest_terminal_id); + const dragonfly_param *p = s->params; + + int vcg = 0; + if(num_qos_levels > 1) + { + vcg = get_vcg_from_category(msg); + assert(vcg == Q_HIGH || vcg == Q_MEDIUM); + } + assert(vcg < num_qos_levels); + + int total_event_size; + uint64_t num_chunks = msg->packet_size / p->chunk_size; + double cn_delay = s->params->cn_delay; + + if (msg->packet_size < s->params->chunk_size) + num_chunks++; + + if(msg->packet_size < s->params->chunk_size) + cn_delay = bytes_to_ns(msg->packet_size % s->params->chunk_size, s->params->cn_bandwidth); + + int dest_router_id = codes_mapping_get_lp_relative_id(msg->dest_terminal_id, 0, 0) / s->params->num_cn; + int dest_grp_id = dest_router_id / s->params->num_routers; + int src_grp_id = s->router_id / s->params->num_routers; + + if(src_grp_id == dest_grp_id) + { + if(dest_router_id == s->router_id) + { + bf->c2 = 1; + num_local_packets_sr++; + } + else + { + bf->c3 = 1; + num_local_packets_sg++; + } + } + else + { + bf->c4 = 1; + num_remote_packets++; + } + msg->num_rngs++; + nic_ts = g_tw_lookahead + (num_chunks * cn_delay) + tw_rand_unif(lp->rng); + + msg->packet_ID = s->packet_counter; + s->packet_counter++; + msg->my_N_hop = 0; + msg->my_l_hop = 0; + msg->my_g_hop = 0; + + + for(int i = 0; i < num_chunks; i++) + { + terminal_dally_message_list *cur_chunk = (terminal_dally_message_list*)calloc(1, + sizeof(terminal_dally_message_list)); + msg->origin_router_id = s->router_id; + init_terminal_dally_message_list(cur_chunk, msg); + + if(msg->remote_event_size_bytes + msg->local_event_size_bytes > 0) { + cur_chunk->event_data = (char*)calloc(1, + msg->remote_event_size_bytes + msg->local_event_size_bytes); + } + + void * m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + if (msg->remote_event_size_bytes){ + memcpy(cur_chunk->event_data, m_data_src, msg->remote_event_size_bytes); + } + if (msg->local_event_size_bytes){ + m_data_src = (char*)m_data_src + msg->remote_event_size_bytes; + memcpy((char*)cur_chunk->event_data + msg->remote_event_size_bytes, + m_data_src, msg->local_event_size_bytes); + } + + cur_chunk->msg.output_chan = vcg; + cur_chunk->msg.chunk_id = i; + cur_chunk->msg.origin_router_id = s->router_id; + append_to_terminal_dally_message_list(s->terminal_msgs, s->terminal_msgs_tail, + vcg, cur_chunk); + s->terminal_length[vcg] += s->params->chunk_size; + } + + if(s->terminal_length[vcg] < s->params->cn_vc_size) { + model_net_method_idle_event(nic_ts, 0, lp); + } else { + bf->c11 = 1; + s->issueIdle = 1; + } + + if(s->in_send_loop == 0) { + bf->c5 = 1; + msg->num_cll++; + ts = codes_local_latency(lp); + terminal_dally_message *m; + tw_event* e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_DALLY, + (void**)&m, NULL); + m->type = T_SEND; + m->magic = terminal_magic_num; + s->in_send_loop = 1; + tw_event_send(e); + } + + total_event_size = model_net_get_msg_sz(DRAGONFLY_DALLY) + + msg->remote_event_size_bytes + msg->local_event_size_bytes; + mn_stats* stat; + stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + stat->send_count++; + stat->send_bytes += msg->packet_size; + stat->send_time += (1/p->cn_bandwidth) * msg->packet_size; + if(stat->max_event_size < total_event_size) + stat->max_event_size = total_event_size; + + return; +} + +static int get_term_bandwidth_consumption(terminal_state * s, int qos_lvl) +{ + assert(qos_lvl >= Q_HIGH && qos_lvl <= Q_LOW); + + //tw_stime reset_window_s = ns_to_s(bw_reset_window); + //double bw_gib = bytes_to_gigabytes(s->qos_data[qos_lvl]); + + //double bw_consumed = ((double)bw_gib / (double)reset_window_s); + double max_bw = s->params->cn_bandwidth * 1024.0 * 1024.0 * 1024.0; + double max_bw_per_ns = max_bw / (1000.0 * 1000.0 * 1000.0); + double max_bytes_per_win = max_bw_per_ns * bw_reset_window; +// int percent_bw = (bw_consumed / s->params->cn_bandwidth) * 100; + int percent_bw = (((double)s->qos_data[qos_lvl]) / max_bytes_per_win) * 100; +// printf("\n At terminal %lf max bytes %d percent %d ", max_bytes_per_win, s->qos_data[qos_lvl], percent_bw); + return percent_bw; +} +static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + int num_qos_levels = s->params->num_qos_levels; + + int vcs_per_qos = s->params->num_vcs / num_qos_levels; + int output_port = msg->vc_index; + int vcg = 0; + int base_limit = 0; + + int chunk_size = s->params->chunk_size; + int bw_consumption[num_qos_levels]; + /* First make sure the bandwidth consumptions are up to date. */ + if(BW_MONITOR == 1) + { + for(int k = 0; k < num_qos_levels; k++) + { + if(s->qos_status[output_port][k] != Q_OVERBW) + { + bw_consumption[k] = get_rtr_bandwidth_consumption(s, k, output_port); + if(bw_consumption[k] > s->params->qos_bandwidths[k]) + { +// printf("\n Router %d QoS %d exceeded allowed bandwidth %d ", s->router_id, k, bw_consumption[k]); + if(k == 0) + msg->qos_reset1 = 1; + else if(k == 1) + msg->qos_reset2 = 1; + + s->qos_status[output_port][k] = Q_OVERBW; + } + } + } + int vc_size = s->params->global_vc_size; + if(output_port < s->params->intra_grp_radix) + vc_size = s->params->local_vc_size; + + /* TODO: If none of the vcg is exceeding bandwidth limit then select high + * priority traffic first. */ + for(int i = 0; i < num_qos_levels; i++) + { + if(s->qos_status[output_port][i] == Q_ACTIVE) + { + int base_limit = i * vcs_per_qos; + for(int k = base_limit; k < base_limit + vcs_per_qos; k ++) + { + if(s->pending_msgs[output_port][k] != NULL) + return k; + } + } + } + } + + /* All vcgs are exceeding their bandwidth limits*/ + msg->last_saved_qos = s->last_qos_lvl[output_port]; + int next_rr_vcg = (s->last_qos_lvl[output_port] + 1) % num_qos_levels; + + for(int i = 0; i < num_qos_levels; i++) + { + base_limit = next_rr_vcg * vcs_per_qos; + for(int k = base_limit; k < base_limit + vcs_per_qos; k++) + { + if(s->pending_msgs[output_port][k] != NULL) + { + if(msg->last_saved_qos < 0) + msg->last_saved_qos = s->last_qos_lvl[output_port]; + + s->last_qos_lvl[output_port] = next_rr_vcg; + return k; + } + } + next_rr_vcg = (next_rr_vcg + 1) % num_qos_levels; + assert(next_rr_vcg < 2); + } + return -1; +} + +static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + int num_qos_levels = s->params->num_qos_levels; + + if(num_qos_levels == 1) + { + if(s->terminal_msgs[0] == NULL || s->vc_occupancy[0] + s->params->chunk_size > s->params->cn_vc_size) + return -1; + else + return 0; + } + + int bw_consumption[num_qos_levels]; + + /* First make sure the bandwidth consumptions are up to date. */ + for(int k = 0; k < num_qos_levels; k++) + { + if(s->qos_status[k] != Q_OVERBW) + { + bw_consumption[k] = get_term_bandwidth_consumption(s, k); + if(bw_consumption[k] > s->params->qos_bandwidths[k]) + { + if(k == 0) + msg->qos_reset1 = 1; + else if(k == 1) + msg->qos_reset2 = 1; + + s->qos_status[k] = Q_OVERBW; + } + } + } + /* TODO: If none of the vcg is exceeding bandwidth limit then select high + * priority traffic first. */ + if(BW_MONITOR == 1) + { + for(int i = 0; i < num_qos_levels; i++) + { + if(s->qos_status[i] == Q_ACTIVE) + { + if(s->terminal_msgs[i] != NULL && s->vc_occupancy[i] + s->params->chunk_size <= s->params->cn_vc_size) + return i; + } + } + } + + + int next_rr_vcg = (s->last_qos_lvl + 1) % num_qos_levels; + /* All vcgs are exceeding their bandwidth limits*/ + for(int i = 0; i < num_qos_levels; i++) + { + if(s->terminal_msgs[i] != NULL && s->vc_occupancy[i] + s->params->chunk_size <= s->params->cn_vc_size) + { + bf->c2 = 1; + + if(msg->last_saved_qos < 0) + msg->last_saved_qos = s->last_qos_lvl; + + s->last_qos_lvl = next_rr_vcg; + return i; + } + next_rr_vcg = (next_rr_vcg + 1) % num_qos_levels; + } + return -1; +} + +static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, + tw_lp * lp) +{ + int num_qos_levels = s->params->num_qos_levels; + + if(msg->qos_reset1) + s->qos_status[0] = Q_ACTIVE; + if(msg->qos_reset2) + s->qos_status[1] = Q_ACTIVE; + + if(msg->last_saved_qos) + s->last_qos_lvl = msg->last_saved_qos; + + if(bf->c1) { + s->in_send_loop = 1; + if(bf->c3) + s->last_buf_full = msg->saved_busy_time; + + return; + } + + int vcg = msg->saved_vc; + s->terminal_available_time = msg->saved_available_time; + + for(int i = 0; i < msg->num_cll; i++) { + codes_local_latency_reverse(lp); + } + + for(int i = 0; i < msg->num_rngs; i++) + { + tw_rand_reverse_unif(lp->rng); + } + s->terminal_length[vcg] += s->params->chunk_size; + /*TODO: MM change this to the vcg */ + s->vc_occupancy[vcg] -= s->params->chunk_size; + + terminal_dally_message_list* cur_entry = (terminal_dally_message_list *)rc_stack_pop(s->st); + + int data_size = s->params->chunk_size; + if(cur_entry->msg.packet_size < s->params->chunk_size) + data_size = cur_entry->msg.packet_size % s->params->chunk_size; + + s->qos_data[vcg] -= data_size; + + prepend_to_terminal_dally_message_list(s->terminal_msgs, + s->terminal_msgs_tail, vcg, cur_entry); + if(bf->c4) { + s->in_send_loop = 1; + } + if(bf->c5) + { + s->issueIdle = 1; + if(bf->c6) + { + s->busy_time = msg->saved_total_time; + s->last_buf_full = msg->saved_busy_time; + s->busy_time_sample = msg->saved_sample_time; + s->ross_sample.busy_time_sample = msg->saved_sample_time; + s->busy_time_ross_sample = msg->saved_busy_time_ross; + } + } + return; +} +/* sends the packet from the current dragonfly compute node to the attached router */ +static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, + tw_lp * lp) { + + tw_stime ts; + tw_event *e; + terminal_dally_message *m; + tw_lpid router_id; + int vcg = 0; + int num_qos_levels = s->params->num_qos_levels; + + msg->last_saved_qos = -1; + msg->qos_reset1 = -1; + msg->qos_reset2 = -1; + msg->num_rngs = 0; + msg->num_cll = 0; + + if(num_qos_levels > 1) + vcg = get_next_vcg(s, bf, msg, lp); + + /* For a terminal to router connection, there would be as many VCGs as number + * of VCs*/ + + if(vcg == -1) { + bf->c1 = 1; + s->in_send_loop = 0; + if(!s->last_buf_full) + { + bf->c3 = 1; + msg->saved_busy_time = s->last_buf_full; + s->last_buf_full = tw_now(lp); + } + return; + } + + msg->saved_vc = vcg; + terminal_dally_message_list* cur_entry = s->terminal_msgs[vcg]; + int data_size = s->params->chunk_size; + uint64_t num_chunks = cur_entry->msg.packet_size/s->params->chunk_size; + if(cur_entry->msg.packet_size < s->params->chunk_size) + num_chunks++; + + tw_stime delay = s->params->cn_delay; + if((cur_entry->msg.packet_size < s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) + { + data_size = cur_entry->msg.packet_size % s->params->chunk_size; + delay = bytes_to_ns(cur_entry->msg.packet_size % s->params->chunk_size, s->params->cn_bandwidth); + } + + s->qos_data[vcg] += data_size; + + msg->saved_available_time = s->terminal_available_time; + + msg->num_rngs++; + ts = g_tw_lookahead + delay + tw_rand_unif(lp->rng); + + s->terminal_available_time = maxd(s->terminal_available_time, tw_now(lp)); + s->terminal_available_time += ts; + + ts = s->terminal_available_time - tw_now(lp); + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, + &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 0, + s->router_id / num_routers_per_mgrp, s->router_id % num_routers_per_mgrp, &router_id); + +// if(s->router_id == 1) +// printf("\n Local router id %d global router id %d ", s->router_id, router_id); + // we are sending an event to the router, so no method_event here + void * remote_event; + e = model_net_method_event_new(router_id, ts, lp, + DRAGONFLY_DALLY_ROUTER, (void**)&m, &remote_event); + memcpy(m, &cur_entry->msg, sizeof(terminal_dally_message)); + if (m->remote_event_size_bytes){ + memcpy(remote_event, cur_entry->event_data, m->remote_event_size_bytes); + } + + m->type = R_ARRIVE; + m->src_terminal_id = lp->gid; + m->vc_index = vcg; + m->last_hop = TERMINAL; + m->magic = router_magic_num; + m->path_type = -1; + m->local_event_size_bytes = 0; + m->intm_rtr_id = -1; + tw_event_send(e); + + + if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && lp->gid == T_ID) + printf("\n Packet %llu generated at terminal %d dest %llu size %llu num chunks %llu router-id %d %llu", + cur_entry->msg.packet_ID, s->terminal_id, LLU(cur_entry->msg.dest_terminal_id), + LLU(cur_entry->msg.packet_size), LLU(num_chunks), s->router_id, router_id); + + if(cur_entry->msg.chunk_id == num_chunks - 1 && + (cur_entry->msg.local_event_size_bytes > 0)) { + msg->num_cll++; + tw_stime local_ts = codes_local_latency(lp); + tw_event *e_new = tw_event_new(cur_entry->msg.sender_lp, local_ts, lp); + void * m_new = tw_event_data(e_new); + void *local_event = (char*)cur_entry->event_data + + cur_entry->msg.remote_event_size_bytes; + memcpy(m_new, local_event, cur_entry->msg.local_event_size_bytes); + tw_event_send(e_new); + } + int next_vcg = 0; + + if(num_qos_levels > 1) + next_vcg = get_next_vcg(s, bf, msg, lp); + + s->vc_occupancy[vcg] += s->params->chunk_size; + cur_entry = return_head(s->terminal_msgs, s->terminal_msgs_tail, vcg); + rc_stack_push(lp, cur_entry, delete_terminal_dally_message_list, s->st); + s->terminal_length[vcg] -= s->params->chunk_size; + + cur_entry = NULL; + if(next_vcg >= 0) + cur_entry = s->terminal_msgs[next_vcg]; + + /* if there is another packet inline then schedule another send event */ + if(cur_entry != NULL && + s->vc_occupancy[next_vcg] + s->params->chunk_size <= s->params->cn_vc_size) { + terminal_dally_message *m_new; + msg->num_rngs++; + ts += tw_rand_unif(lp->rng); + e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_DALLY, + (void**)&m_new, NULL); + m_new->type = T_SEND; + m_new->magic = terminal_magic_num; + tw_event_send(e); + } else { + /* If not then the LP will wait for another credit or packet generation */ + bf->c4 = 1; + s->in_send_loop = 0; + } + if(s->issueIdle) { + bf->c5 = 1; + s->issueIdle = 0; + msg->num_rngs++; + ts += tw_rand_unif(lp->rng); + model_net_method_idle_event(ts, 0, lp); + + if(s->last_buf_full > 0.0) + { + bf->c6 = 1; + msg->saved_total_time = s->busy_time; + msg->saved_busy_time = s->last_buf_full; + msg->saved_sample_time = s->busy_time_sample; + + s->busy_time += (tw_now(lp) - s->last_buf_full); + s->busy_time_sample += (tw_now(lp) - s->last_buf_full); + s->ross_sample.busy_time_sample += (tw_now(lp) - s->last_buf_full); + s->busy_time_ross_sample += (tw_now(lp) - s->last_buf_full); + s->last_buf_full = 0.0; + } + } + return; +} + +static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + + for(int i = 0; i < msg->num_rngs; i++) + tw_rand_reverse_unif(lp->rng); + + for(int i = 0; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + if(bf->c31) + { + s->packet_fin--; + packet_fin--; + } + + if(msg->path_type == MINIMAL) + minimal_count--; + if(msg->path_type == NON_MINIMAL) + nonmin_count--; + + N_finished_chunks--; + s->finished_chunks--; + s->fin_chunks_sample--; + s->ross_sample.fin_chunks_sample--; + s->fin_chunks_ross_sample--; + + total_hops -= msg->my_N_hop; + s->total_hops -= msg->my_N_hop; + s->fin_hops_sample -= msg->my_N_hop; + s->ross_sample.fin_hops_sample -= msg->my_N_hop; + s->fin_hops_ross_sample -= msg->my_N_hop; + dragonfly_total_time = msg->saved_total_time; + s->fin_chunks_time = msg->saved_sample_time; + s->ross_sample.fin_chunks_time = msg->saved_sample_time; + s->fin_chunks_time_ross_sample = msg->saved_fin_chunks_ross; + s->total_time = msg->saved_avg_time; + + struct qhash_head * hash_link = NULL; + struct dfly_qhash_entry * tmp = NULL; + + struct dfly_hash_key key; + key.message_id = msg->message_id; + key.sender_id = msg->sender_lp; + + hash_link = qhash_search(s->rank_tbl, &key); + tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); + + mn_stats* stat; + stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + stat->recv_time = msg->saved_rcv_time; + + if(bf->c1) + { + stat->recv_count--; + stat->recv_bytes -= msg->packet_size; + N_finished_packets--; + s->finished_packets--; + } + if(bf->c3) + { + dragonfly_max_latency = msg->saved_available_time; + } + + if(bf->c22) + { + s->max_latency = msg->saved_available_time; + } + if(bf->c7) + { + //assert(!hash_link); + if(bf->c4) + model_net_event_rc2(lp, &msg->event_rc); + + N_finished_msgs--; + s->finished_msgs--; + total_msg_sz -= msg->total_size; + s->total_msg_size -= msg->total_size; + s->data_size_sample -= msg->total_size; + s->ross_sample.data_size_sample -= msg->total_size; + s->data_size_ross_sample -= msg->total_size; + + struct dfly_qhash_entry * d_entry_pop = (dfly_qhash_entry *)rc_stack_pop(s->st); + qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link)); + s->rank_tbl_pop++; + + if(s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) + tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); + + hash_link = &(d_entry_pop->hash_link); + tmp = d_entry_pop; + + } + + assert(tmp); + tmp->num_chunks--; + + if(bf->c5) + { + qhash_del(hash_link); + free_tmp(tmp); + s->rank_tbl_pop--; + } + return; +} +static void send_remote_event(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf, char * event_data, int remote_event_size) +{ + void * tmp_ptr = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + + msg->num_rngs++; + tw_stime ts = g_tw_lookahead + mpi_soft_overhead + tw_rand_unif(lp->rng); + + if (msg->is_pull){ + bf->c4 = 1; + struct codes_mctx mc_dst = + codes_mctx_set_global_direct(msg->sender_mn_lp); + struct codes_mctx mc_src = + codes_mctx_set_global_direct(lp->gid); + int net_id = model_net_get_id(LP_METHOD_NM_TERM); + + model_net_set_msg_param(MN_MSG_PARAM_START_TIME, MN_MSG_PARAM_START_TIME_VAL, &(msg->msg_start_time)); + + msg->event_rc = model_net_event_mctx(net_id, &mc_src, &mc_dst, msg->category, + msg->sender_lp, msg->pull_size, ts, + remote_event_size, tmp_ptr, 0, NULL, lp); + } + else{ + tw_event * e = tw_event_new(msg->final_dest_gid, ts, lp); + void * m_remote = tw_event_data(e); + memcpy(m_remote, event_data, remote_event_size); + tw_event_send(e); + } + return; +} +/* packet arrives at the destination terminal */ +static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, + tw_lp * lp) { + + if (msg->my_N_hop > s->params->max_hops_notify) + { + printf("Terminal received a packet with %d hops! (Notify on > than %d)\n",msg->my_N_hop, s->params->max_hops_notify); + } + + // NIC aggregation - should this be a separate function? + // Trigger an event on receiving server + + msg->num_rngs = 0; + msg->num_cll = 0; + + if(!s->rank_tbl) + s->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE); + + struct dfly_hash_key key; + key.message_id = msg->message_id; + key.sender_id = msg->sender_lp; + + struct qhash_head *hash_link = NULL; + struct dfly_qhash_entry * tmp = NULL; + + hash_link = qhash_search(s->rank_tbl, &key); + + if(hash_link) + tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); + + uint64_t total_chunks = msg->total_size / s->params->chunk_size; + + if(msg->total_size % s->params->chunk_size) + total_chunks++; + + if(!total_chunks) + total_chunks = 1; + + /*if(tmp) + { + if(tmp->num_chunks >= total_chunks || tmp->num_chunks < 0) + { + //tw_output(lp, "\n invalid number of chunks %d for LP %ld ", tmp->num_chunks, lp->gid); + tw_lp_suspend(lp, 0, 0); + return; + } + }*/ + assert(lp->gid == msg->dest_terminal_id); + + if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) + printf("\n Packet %llu arrived at lp %llu hops %d ", msg->sender_lp, LLU(lp->gid), msg->my_N_hop); + + msg->num_rngs++; + tw_stime ts = g_tw_lookahead + s->params->credit_delay + tw_rand_unif(lp->rng); + + // no method_event here - message going to router + tw_event * buf_e; + terminal_dally_message * buf_msg; + buf_e = model_net_method_event_new(msg->intm_lp_id, ts, lp, + DRAGONFLY_DALLY_ROUTER, (void**)&buf_msg, NULL); + buf_msg->magic = router_magic_num; + buf_msg->vc_index = msg->vc_index; + buf_msg->output_chan = msg->output_chan; + buf_msg->type = R_BUFFER; + tw_event_send(buf_e); + + bf->c1 = 0; + bf->c3 = 0; + bf->c4 = 0; + bf->c7 = 0; + + /* Total overall finished chunks in simulation */ + N_finished_chunks++; + /* Finished chunks on a LP basis */ + s->finished_chunks++; + /* Finished chunks per sample */ + s->fin_chunks_sample++; + s->ross_sample.fin_chunks_sample++; + s->fin_chunks_ross_sample++; + + /* WE do not allow self messages through dragonfly */ + assert(lp->gid != msg->src_terminal_id); + + uint64_t num_chunks = msg->packet_size / s->params->chunk_size; + if (msg->packet_size < s->params->chunk_size) + num_chunks++; + + if(msg->path_type == MINIMAL) + minimal_count++; + + if(msg->path_type == NON_MINIMAL) + nonmin_count++; + + if(msg->chunk_id == num_chunks - 1) + { + bf->c31 = 1; + s->packet_fin++; + packet_fin++; + } + if(msg->path_type != MINIMAL && msg->path_type != NON_MINIMAL) + printf("\n Wrong message path type %d ", msg->path_type); + + /* save the sample time */ + msg->saved_sample_time = s->fin_chunks_time; + s->fin_chunks_time += (tw_now(lp) - msg->travel_start_time); + s->ross_sample.fin_chunks_time += (tw_now(lp) - msg->travel_start_time); + msg->saved_fin_chunks_ross = s->fin_chunks_time_ross_sample; + s->fin_chunks_time_ross_sample += (tw_now(lp) - msg->travel_start_time); + + /* save the total time per LP */ + msg->saved_avg_time = s->total_time; + s->total_time += (tw_now(lp) - msg->travel_start_time); + + msg->saved_total_time = dragonfly_total_time; + dragonfly_total_time += tw_now( lp ) - msg->travel_start_time; + total_hops += msg->my_N_hop; + s->total_hops += msg->my_N_hop; + s->fin_hops_sample += msg->my_N_hop; + s->ross_sample.fin_hops_sample += msg->my_N_hop; + s->fin_hops_ross_sample += msg->my_N_hop; + + mn_stats* stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + msg->saved_rcv_time = stat->recv_time; + stat->recv_time += (tw_now(lp) - msg->travel_start_time); + +#if DEBUG == 1 + if( msg->packet_ID == TRACK + && msg->chunk_id == num_chunks-1 + && msg->message_id == TRACK_MSG) + { + printf( "(%lf) [Terminal %d] packet %lld has arrived \n", + tw_now(lp), (int)lp->gid, msg->packet_ID); + + printf("travel start time is %f\n", + msg->travel_start_time); + + printf("My hop now is %d\n",msg->my_N_hop); + } +#endif + + /* Now retreieve the number of chunks completed from the hash and update + * them */ + void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + + /* If an entry does not exist then create one */ + if(!tmp) + { + bf->c5 = 1; + struct dfly_qhash_entry * d_entry = (dfly_qhash_entry *)calloc(1, sizeof (struct dfly_qhash_entry)); + d_entry->num_chunks = 0; + d_entry->key = key; + d_entry->remote_event_data = NULL; + d_entry->remote_event_size = 0; + qhash_add(s->rank_tbl, &key, &(d_entry->hash_link)); + s->rank_tbl_pop++; + + if(s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) + tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); + + hash_link = &(d_entry->hash_link); + tmp = d_entry; + } + + assert(tmp); + tmp->num_chunks++; + + if(msg->chunk_id == num_chunks - 1) + { + bf->c1 = 1; + stat->recv_count++; + stat->recv_bytes += msg->packet_size; + + N_finished_packets++; + s->finished_packets++; + } + /* if its the last chunk of the packet then handle the remote event data */ + if(msg->remote_event_size_bytes > 0 && !tmp->remote_event_data) + { + /* Retreive the remote event entry */ + tmp->remote_event_data = (char*)calloc(1, msg->remote_event_size_bytes); + assert(tmp->remote_event_data); + tmp->remote_event_size = msg->remote_event_size_bytes; + memcpy(tmp->remote_event_data, m_data_src, msg->remote_event_size_bytes); + } + if(s->min_latency > tw_now(lp) - msg->travel_start_time) { + s->min_latency = tw_now(lp) - msg->travel_start_time; + } + if (dragonfly_max_latency < tw_now( lp ) - msg->travel_start_time) { + bf->c3 = 1; + msg->saved_available_time = dragonfly_max_latency; + dragonfly_max_latency = tw_now( lp ) - msg->travel_start_time; + s->max_latency = tw_now(lp) - msg->travel_start_time; + } + if(s->max_latency < tw_now( lp ) - msg->travel_start_time) { + bf->c22 = 1; + msg->saved_available_time = s->max_latency; + s->max_latency = tw_now(lp) - msg->travel_start_time; + } + /* If all chunks of a message have arrived then send a remote event to the + * callee*/ + //assert(tmp->num_chunks <= total_chunks); + + if(tmp->num_chunks >= total_chunks) + { + bf->c7 = 1; + + s->data_size_sample += msg->total_size; + s->ross_sample.data_size_sample += msg->total_size; + s->data_size_ross_sample += msg->total_size; + N_finished_msgs++; + total_msg_sz += msg->total_size; + s->total_msg_size += msg->total_size; + s->finished_msgs++; + + //assert(tmp->remote_event_data && tmp->remote_event_size > 0); + if(tmp->remote_event_data && tmp->remote_event_size > 0) { + send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size); + } + /* Remove the hash entry */ + qhash_del(hash_link); + rc_stack_push(lp, tmp, free_tmp, s->st); + s->rank_tbl_pop--; + } + return; +} + +static void ross_dally_dragonfly_rsample_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample) +{ + (void)lp; + (void)bf; + + const dragonfly_param * p = s->params; + int i = 0; + + sample->router_id = s->router_id; + sample->end_time = tw_now(lp); + sample->fwd_events = s->ross_rsample.fwd_events; + sample->rev_events = s->ross_rsample.rev_events; + sample->busy_time = (tw_stime*)((&sample->rev_events) + 1); + sample->link_traffic_sample = (int64_t*)((&sample->busy_time[0]) + p->radix); + + for(; i < p->radix; i++) + { + sample->busy_time[i] = s->ross_rsample.busy_time[i]; + sample->link_traffic_sample[i] = s->ross_rsample.link_traffic_sample[i]; + } + + /* clear up the current router stats */ + s->ross_rsample.fwd_events = 0; + s->ross_rsample.rev_events = 0; + + for( i = 0; i < p->radix; i++) + { + s->ross_rsample.busy_time[i] = 0; + s->ross_rsample.link_traffic_sample[i] = 0; + } +} + +static void ross_dally_dragonfly_rsample_rc_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample) +{ + (void)lp; + (void)bf; + + const dragonfly_param * p = s->params; + int i =0; + + for(; i < p->radix; i++) + { + s->ross_rsample.busy_time[i] = sample->busy_time[i]; + s->ross_rsample.link_traffic_sample[i] = sample->link_traffic_sample[i]; + } + + s->ross_rsample.fwd_events = sample->fwd_events; + s->ross_rsample.rev_events = sample->rev_events; +} + +static void ross_dally_dragonfly_sample_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample) +{ + (void)lp; + (void)bf; + + sample->terminal_id = s->terminal_id; + sample->fin_chunks_sample = s->ross_sample.fin_chunks_sample; + sample->data_size_sample = s->ross_sample.data_size_sample; + sample->fin_hops_sample = s->ross_sample.fin_hops_sample; + sample->fin_chunks_time = s->ross_sample.fin_chunks_time; + sample->busy_time_sample = s->ross_sample.busy_time_sample; + sample->end_time = tw_now(lp); + sample->fwd_events = s->ross_sample.fwd_events; + sample->rev_events = s->ross_sample.rev_events; + + s->ross_sample.fin_chunks_sample = 0; + s->ross_sample.data_size_sample = 0; + s->ross_sample.fin_hops_sample = 0; + s->ross_sample.fwd_events = 0; + s->ross_sample.rev_events = 0; + s->ross_sample.fin_chunks_time = 0; + s->ross_sample.busy_time_sample = 0; +} + +static void ross_dally_dragonfly_sample_rc_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample) +{ + (void)lp; + (void)bf; + + s->ross_sample.busy_time_sample = sample->busy_time_sample; + s->ross_sample.fin_chunks_time = sample->fin_chunks_time; + s->ross_sample.fin_hops_sample = sample->fin_hops_sample; + s->ross_sample.data_size_sample = sample->data_size_sample; + s->ross_sample.fin_chunks_sample = sample->fin_chunks_sample; + s->ross_sample.fwd_events = sample->fwd_events; + s->ross_sample.rev_events = sample->rev_events; +} + +void dragonfly_dally_rsample_init(router_state * s, + tw_lp * lp) +{ + (void)lp; + int i = 0; + const dragonfly_param * p = s->params; + + assert(p->radix); + + s->max_arr_size = MAX_STATS; + s->rsamples = (struct dfly_router_sample*)calloc(MAX_STATS, sizeof(struct dfly_router_sample)); + for(; i < s->max_arr_size; i++) + { + s->rsamples[i].busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + s->rsamples[i].link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); + } +} +void dragonfly_dally_rsample_rc_fn(router_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + (void)bf; + (void)lp; + (void)msg; + + s->op_arr_size--; + int cur_indx = s->op_arr_size; + struct dfly_router_sample stat = s->rsamples[cur_indx]; + + const dragonfly_param * p = s->params; + int i =0; + + for(; i < p->radix; i++) + { + s->busy_time_sample[i] = stat.busy_time[i]; + s->link_traffic_sample[i] = stat.link_traffic_sample[i]; + } + + for( i = 0; i < p->radix; i++) + { + stat.busy_time[i] = 0; + stat.link_traffic_sample[i] = 0; + } + s->fwd_events = stat.fwd_events; + s->rev_events = stat.rev_events; +} + +void dragonfly_dally_rsample_fn(router_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + (void)bf; + (void)lp; + (void)msg; + + const dragonfly_param * p = s->params; + + if(s->op_arr_size >= s->max_arr_size) + { + struct dfly_router_sample * tmp = (dfly_router_sample *)calloc((MAX_STATS + s->max_arr_size), sizeof(struct dfly_router_sample)); + memcpy(tmp, s->rsamples, s->op_arr_size * sizeof(struct dfly_router_sample)); + free(s->rsamples); + s->rsamples = tmp; + s->max_arr_size += MAX_STATS; + } + + int i = 0; + int cur_indx = s->op_arr_size; + + s->rsamples[cur_indx].router_id = s->router_id; + s->rsamples[cur_indx].end_time = tw_now(lp); + s->rsamples[cur_indx].fwd_events = s->fwd_events; + s->rsamples[cur_indx].rev_events = s->rev_events; + + for(; i < p->radix; i++) + { + s->rsamples[cur_indx].busy_time[i] = s->busy_time_sample[i]; + s->rsamples[cur_indx].link_traffic_sample[i] = s->link_traffic_sample[i]; + } + + s->op_arr_size++; + + /* clear up the current router stats */ + s->fwd_events = 0; + s->rev_events = 0; + + for( i = 0; i < p->radix; i++) + { + s->busy_time_sample[i] = 0; + s->link_traffic_sample[i] = 0; + } +} + +void dragonfly_dally_rsample_fin(router_state * s, + tw_lp * lp) +{ + (void)lp; + const dragonfly_param * p = s->params; + + if(s->router_id == 0) + { + + /* write metadata file */ + char meta_fname[64]; + sprintf(meta_fname, "dragonfly-router-sampling.meta"); + + FILE * fp = fopen(meta_fname, "w"); + fprintf(fp, "Router sample struct format: \nrouter_id (tw_lpid) \nbusy time for each of the %d links (double) \n" + "link traffic for each of the %d links (int64_t) \nsample end time (double) forward events per sample \nreverse events per sample ", + p->radix, p->radix); + fprintf(fp, "\n\nOrdering of links \n%d green (router-router same row) channels \n %d black (router-router same column) channels \n %d global (router-router remote group)" + " channels \n %d terminal channels", p->num_router_cols * p->num_row_chans, p->num_router_rows * p->num_col_chans, p->num_global_channels, p->num_cn); + fclose(fp); + } + char rt_fn[MAX_NAME_LENGTH]; + if(strcmp(router_sample_file, "") == 0) + sprintf(rt_fn, "dragonfly-router-sampling-%ld.bin", g_tw_mynode); + else + sprintf(rt_fn, "%s-%ld.bin", router_sample_file, g_tw_mynode); + + int i = 0; + + int size_sample = sizeof(tw_lpid) + p->radix * (sizeof(int64_t) + sizeof(tw_stime)) + sizeof(tw_stime) + 2 * sizeof(long); + FILE * fp = fopen(rt_fn, "a"); + fseek(fp, sample_rtr_bytes_written, SEEK_SET); + + for(; i < s->op_arr_size; i++) + { + fwrite((void*)&(s->rsamples[i].router_id), sizeof(tw_lpid), 1, fp); + fwrite(s->rsamples[i].busy_time, sizeof(tw_stime), p->radix, fp); + fwrite(s->rsamples[i].link_traffic_sample, sizeof(int64_t), p->radix, fp); + fwrite((void*)&(s->rsamples[i].end_time), sizeof(tw_stime), 1, fp); + fwrite((void*)&(s->rsamples[i].fwd_events), sizeof(long), 1, fp); + fwrite((void*)&(s->rsamples[i].rev_events), sizeof(long), 1, fp); + } + sample_rtr_bytes_written += (s->op_arr_size * size_sample); + fclose(fp); +} +void dragonfly_dally_sample_init(terminal_state * s, + tw_lp * lp) +{ + (void)lp; + s->fin_chunks_sample = 0; + s->data_size_sample = 0; + s->fin_hops_sample = 0; + s->fin_chunks_time = 0; + s->busy_time_sample = 0; + + s->op_arr_size = 0; + s->max_arr_size = MAX_STATS; + + s->sample_stat = (dfly_cn_sample *)calloc(MAX_STATS, sizeof(struct dfly_cn_sample)); + +} +void dragonfly_dally_sample_rc_fn(terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + (void)lp; + (void)bf; + (void)msg; + + s->op_arr_size--; + int cur_indx = s->op_arr_size; + struct dfly_cn_sample stat = s->sample_stat[cur_indx]; + s->busy_time_sample = stat.busy_time_sample; + s->fin_chunks_time = stat.fin_chunks_time; + s->fin_hops_sample = stat.fin_hops_sample; + s->data_size_sample = stat.data_size_sample; + s->fin_chunks_sample = stat.fin_chunks_sample; + s->fwd_events = stat.fwd_events; + s->rev_events = stat.rev_events; + + stat.busy_time_sample = 0; + stat.fin_chunks_time = 0; + stat.fin_hops_sample = 0; + stat.data_size_sample = 0; + stat.fin_chunks_sample = 0; + stat.end_time = 0; + stat.terminal_id = 0; + stat.fwd_events = 0; + stat.rev_events = 0; +} + +void dragonfly_dally_sample_fn(terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + (void)lp; + (void)msg; + (void)bf; + + if(s->op_arr_size >= s->max_arr_size) + { + /* In the worst case, copy array to a new memory location, its very + * expensive operation though */ + struct dfly_cn_sample * tmp = (dfly_cn_sample *)calloc((MAX_STATS + s->max_arr_size), sizeof(struct dfly_cn_sample)); + memcpy(tmp, s->sample_stat, s->op_arr_size * sizeof(struct dfly_cn_sample)); + free(s->sample_stat); + s->sample_stat = tmp; + s->max_arr_size += MAX_STATS; + } + + int cur_indx = s->op_arr_size; + + s->sample_stat[cur_indx].terminal_id = s->terminal_id; + s->sample_stat[cur_indx].fin_chunks_sample = s->fin_chunks_sample; + s->sample_stat[cur_indx].data_size_sample = s->data_size_sample; + s->sample_stat[cur_indx].fin_hops_sample = s->fin_hops_sample; + s->sample_stat[cur_indx].fin_chunks_time = s->fin_chunks_time; + s->sample_stat[cur_indx].busy_time_sample = s->busy_time_sample; + s->sample_stat[cur_indx].end_time = tw_now(lp); + s->sample_stat[cur_indx].fwd_events = s->fwd_events; + s->sample_stat[cur_indx].rev_events = s->rev_events; + + s->op_arr_size++; + s->fin_chunks_sample = 0; + s->data_size_sample = 0; + s->fin_hops_sample = 0; + s->fwd_events = 0; + s->rev_events = 0; + s->fin_chunks_time = 0; + s->busy_time_sample = 0; +} + +void dragonfly_dally_sample_fin(terminal_state * s, + tw_lp * lp) +{ + (void)lp; + + + if(!g_tw_mynode) + { + + /* write metadata file */ + char meta_fname[64]; + sprintf(meta_fname, "dragonfly-cn-sampling.meta"); + + FILE * fp = fopen(meta_fname, "w"); + fprintf(fp, "Compute node sample format\nterminal_id (tw_lpid) \nfinished chunks (long)" + "\ndata size per sample (long) \nfinished hops (double) \ntime to finish chunks (double)" + "\nbusy time (double)\nsample end time(double) \nforward events (long) \nreverse events (long)"); + fclose(fp); + } + + char rt_fn[MAX_NAME_LENGTH]; + if(strncmp(cn_sample_file, "", 10) == 0) + sprintf(rt_fn, "dragonfly-cn-sampling-%ld.bin", g_tw_mynode); + else + sprintf(rt_fn, "%s-%ld.bin", cn_sample_file, g_tw_mynode); + + FILE * fp = fopen(rt_fn, "a"); + fseek(fp, sample_bytes_written, SEEK_SET); + fwrite(s->sample_stat, sizeof(struct dfly_cn_sample), s->op_arr_size, fp); + fclose(fp); + + sample_bytes_written += (s->op_arr_size * sizeof(struct dfly_cn_sample)); +} + +static void terminal_buf_update_rc(terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + int vcg = 0; + int num_qos_levels = s->params->num_qos_levels; + + for(int i = 0; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + if(num_qos_levels > 1) + vcg = get_vcg_from_category(msg); + + s->vc_occupancy[vcg] += s->params->chunk_size; + if(bf->c1) { + s->in_send_loop = 0; + } + + return; +} +/* update the compute node-router channel buffer */ +static void +terminal_buf_update(terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + msg->num_cll = 0; + msg->num_rngs = 0; + + bf->c1 = 0; + bf->c2 = 0; + bf->c3 = 0; + int vcg = 0; + + int num_qos_levels = s->params->num_qos_levels; + + if(num_qos_levels > 1) + vcg = get_vcg_from_category(msg); + + msg->num_cll++; + tw_stime ts = codes_local_latency(lp); + s->vc_occupancy[vcg] -= s->params->chunk_size; + + if(s->in_send_loop == 0 && s->terminal_msgs[vcg] != NULL) { + terminal_dally_message *m; + bf->c1 = 1; + tw_event* e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_DALLY, + (void**)&m, NULL); + m->type = T_SEND; + m->magic = terminal_magic_num; + s->in_send_loop = 1; + tw_event_send(e); + } + return; +} + +void +terminal_dally_event( terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp ) +{ + s->fwd_events++; + s->ross_sample.fwd_events++; + //*(int *)bf = (int)0; + assert(msg->magic == terminal_magic_num); + + rc_stack_gc(lp, s->st); + switch(msg->type) + { + case T_GENERATE: + packet_generate(s,bf,msg,lp); + break; + + case T_ARRIVE: + packet_arrive(s,bf,msg,lp); + break; + + case T_SEND: + packet_send(s,bf,msg,lp); + break; + + case T_BUFFER: + terminal_buf_update(s, bf, msg, lp); + break; + + case T_BANDWIDTH: + issue_bw_monitor_event(s, bf, msg, lp); + break; + + default: + printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type); + tw_error(TW_LOC, "Msg type not supported"); + } +} + +void +dragonfly_dally_terminal_final( terminal_state * s, + tw_lp * lp ) +{ + model_net_print_stats(lp->gid, s->dragonfly_stats_array); + int written = 0; + + if(s->terminal_id == 0) + { + written += sprintf(s->output_buf + written, "# Format < dest_type> "); +// fprintf(fp, "# Format <# Flits/Packets finished> \n"); + } + written += sprintf(s->output_buf + written, "\n%u %s %llu %s %s %llu %lf", + s->terminal_id, "T", s->router_id, "R", "CN", LLU(s->total_msg_size), s->busy_time); + + lp_io_write(lp->gid, (char*)"dragonfly-link-stats", written, s->output_buf); + + if(s->terminal_id == 0) + { + //fclose(dragonfly_term_bw_log); + char meta_filename[128]; + sprintf(meta_filename, "dragonfly-cn-stats.meta"); + + FILE * fp = NULL; + fp = fopen(meta_filename, "w+"); + if(fp) + fprintf(fp, "# Format <# Flits/Packets finished> \n"); + fclose(fp); + } + + written = 0; + written += sprintf(s->output_buf2 + written, "%llu %llu %lf %lf %lf %lf %llu %lf\n", + lp->gid, s->terminal_id, s->total_time/s->finished_chunks, + s->busy_time, s->max_latency, s->min_latency, + s->finished_packets, (double)s->total_hops/s->finished_chunks); + + if(s->terminal_msgs[0] != NULL) + printf("[%llu] leftover terminal messages \n", LLU(lp->gid)); + lp_io_write(lp->gid, (char*)"dragonfly-cn-stats", written, s->output_buf2); + + + //if(s->packet_gen != s->packet_fin) + // printf("\n generated %d finished %d ", s->packet_gen, s->packet_fin); + + if(s->rank_tbl) + qhash_finalize(s->rank_tbl); + + rc_stack_destroy(s->st); + free(s->vc_occupancy); + free(s->terminal_msgs); + free(s->terminal_msgs_tail); +} + +void dragonfly_dally_router_final(router_state * s, + tw_lp * lp) +{ + free(s->global_channel); + int i, j; + for(i = 0; i < s->params->radix; i++) { + for(j = 0; j < s->params->num_vcs; j++) { + if(s->queued_msgs[i][j] != NULL) { + printf("[%llu] leftover queued messages %d %d %d\n", LLU(lp->gid), i, j, + s->vc_occupancy[i][j]); + } + if(s->pending_msgs[i][j] != NULL) { + printf("[%llu] lefover pending messages %d %d\n", LLU(lp->gid), i, j); + } + } + } + + if(s->router_id == 0) + fclose(dragonfly_rtr_bw_log); + + rc_stack_destroy(s->st); + + const dragonfly_param *p = s->params; + int written = 0; + int src_rel_id = s->router_id % p->num_routers; + int local_grp_id = s->router_id / p->num_routers; + for(int d = 0; d < p->intra_grp_radix; d++) + { + if(d != src_rel_id) + { + int dest_ab_id = local_grp_id * p->num_routers + d; + written += sprintf(s->output_buf + written, "\n%d %s %d %s %s %llu %lf", + s->router_id, + "R", + dest_ab_id, + "R", + "L", + s->link_traffic[d], + s->busy_time[d]); + } + } + map< int, vector > &curMap = interGroupLinks[s->router_id]; + map< int, vector >::iterator it = curMap.begin(); + for(; it != curMap.end(); it++) + { + /* TODO: Works only for single global connections right now. Make it functional + * for a 2-D dragonfly. */ + for(int l = 0; l < it->second.size(); l++) { + int dest_rtr_id = it->second[l].dest; + int offset = it->second[l].offset; + assert(offset >= 0 && offset < p->num_global_channels); + written += sprintf(s->output_buf + written, "\n%d %s %d %s %s %llu %lf", + s->router_id, + "R", + dest_rtr_id, + "R", + "G", + s->link_traffic[offset], + s->busy_time[offset]); + } + } + sprintf(s->output_buf + written, "\n"); + lp_io_write(lp->gid, (char*)"dragonfly-link-stats", written, s->output_buf); + + /*if(!s->router_id) + { + written = sprintf(s->output_buf, "# Format "); + written += sprintf(s->output_buf + written, "# Router ports in the order: %d green links, %d black links %d global channels \n", + p->num_router_cols * p->num_row_chans, p->num_router_rows * p->num_col_chans, p->num_global_channels); + } + written += sprintf(s->output_buf2 + written, "\n %llu %d %d", + LLU(lp->gid), + s->router_id / p->num_routers, + s->router_id % p->num_routers); + + for(int d = 0; d < p->radix; d++) + written += sprintf(s->output_buf2 + written, " %lld", LLD(s->link_traffic[d])); + + lp_io_write(lp->gid, (char*)"dragonfly-router-traffic", written, s->output_buf2); + */ + if (!g_tw_mynode) { + if (s->router_id == 0) { + if (PRINT_CONFIG) + dragonfly_print_params(s->params); + } + } +} + +static vector get_intra_router(router_state * s, int src_router_id, int dest_router_id, int num_rtrs_per_grp) +{ + /* Check for intra-group connections */ + int src_rel_id = src_router_id % num_rtrs_per_grp; + int dest_rel_id = dest_router_id % num_rtrs_per_grp; + + int group_id = src_router_id / num_rtrs_per_grp; + + map< int, vector > &curMap = intraGroupLinks[src_rel_id]; + map< int, vector >::iterator it_src = curMap.begin(); + int offset = group_id * num_rtrs_per_grp; + vector intersection; + + /* If no direct connection exists then find an intermediate connection */ + if(curMap.find(dest_rel_id) == curMap.end()) + { + assert(0); + int src_col = src_rel_id % s->params->num_router_cols; + int src_row = src_rel_id / s->params->num_router_cols; + + int dest_col = dest_rel_id % s->params->num_router_cols; + int dest_row = dest_rel_id / s->params->num_router_cols; + + //row first, column second + /*int choice1 = src_row * s->params->num_router_cols + dest_col; + int choice2 = dest_row * s->params->num_router_cols + src_col; + intersection.push_back(offset + choice1); + intersection.push_back(offset + choice2);*/ + map > &destMap = intraGroupLinks[dest_rel_id]; + map< int, vector >::iterator it_dest = destMap.begin(); + + while(it_src != curMap.end() && it_dest != destMap.end()) + { + if(it_src->first < it_dest->first) + it_src++; + else + if(it_dest->first < it_src->first) + it_dest++; + else { + intersection.push_back(offset + it_src->first); + it_src++; + it_dest++; + } + } + + } + else + { + /* There is a direct connection */ + intersection.push_back(dest_router_id); + } + return intersection; +} +int find_chan(int router_id, + int dest_grp_id, + int num_routers) +{ + int my_grp_id = router_id / num_routers; + for(int i = 0; i < connectionList[my_grp_id][dest_grp_id].size(); i++) + { + if(connectionList[my_grp_id][dest_grp_id][i] == router_id) + return i; + } + return -1; +} +/* get the next stop for the current packet + * determines if it is a router within a group, a router in another group + * or the destination terminal */ +static tw_lpid +get_next_stop(router_state * s, + tw_lp * lp, + tw_bf * bf, + terminal_dally_message * msg, + int dest_router_id, + int adap_chan, + int do_chan_selection, + int get_direct_con, + short* rng_counter) +{ + int dest_lp; + tw_lpid router_dest_id; + int dest_group_id; + + int local_router_id = s->router_id; + int my_grp_id = s->router_id / s->params->num_routers; + dest_group_id = dest_router_id / s->params->num_routers; + int origin_grp_id = msg->origin_router_id / s->params->num_routers; + + int select_chan = -1; + /* If the packet has arrived at the destination router */ + if(dest_router_id == local_router_id) + { + dest_lp = msg->dest_terminal_id; + return dest_lp; + } + /* If the packet has arrived at the destination group */ + if(s->group_id == dest_group_id) + { + vector next_stop = get_intra_router(s, local_router_id, dest_router_id, s->params->num_routers); + assert(!next_stop.empty()); + assert(next_stop.size() == 1); + (*rng_counter)++; + select_chan = tw_rand_integer(lp->rng, 0, next_stop.size() - 1); + + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, next_stop[select_chan] / num_routers_per_mgrp, + next_stop[select_chan] % num_routers_per_mgrp, &router_dest_id); + + if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) + printf("\n Next stop is %ld ", next_stop[select_chan]); + + return router_dest_id; + } + + /* If the packet is at the source router then select a global channel among + * the many global channels available (unless one already specified by + * adaptive routing). do_chan_selection is turned on in case prog-adaptive + * routing has just decided to take a non-minimal route. */ + if(msg->last_hop == TERMINAL + || s->router_id == msg->intm_rtr_id + || (routing == PROG_ADAPTIVE && do_chan_selection)) + { + if(adap_chan >= 0) + select_chan = adap_chan; + else + { + /* Only for non-minimal routes, direct connections are preferred + * (global ports) */ + if(get_direct_con) + { + if(interGroupLinks[s->router_id][dest_group_id].size() > 1) + select_chan = find_chan(s->router_id, dest_group_id, s->params->num_routers); + assert(select_chan >= 0); + } + else + { + (*rng_counter)++; + select_chan = tw_rand_integer(lp->rng, 0, connectionList[my_grp_id][dest_group_id].size() - 1); + } + } + dest_lp = connectionList[my_grp_id][dest_group_id][select_chan]; + //printf("\n my grp %d dest router %d dest_lp %d rid %d chunk id %d", my_grp_id, dest_router_id, dest_lp, s->router_id, msg->chunk_id); + msg->saved_src_dest = dest_lp; + } + /* Get the number of global channels connecting the origin and destination + * groups */ + //assert(msg->saved_src_chan >= 0 && msg->saved_src_chan < connectionList[my_grp_id][dest_group_id].size()); + + if(s->router_id == msg->saved_src_dest) + { + (*rng_counter)++; + select_chan = tw_rand_integer(lp->rng, 0, interGroupLinks[s->router_id][dest_group_id].size() - 1); + bLink bl = interGroupLinks[s->router_id][dest_group_id][select_chan]; + dest_lp = bl.dest; + } + else + { + /* Connection within the group */ + bf->c21 = 1; + vector dests = get_intra_router(s, local_router_id, msg->saved_src_dest, s->params->num_routers); + assert(!dests.empty()); + (*rng_counter)++; + select_chan = tw_rand_integer(lp->rng, 0, dests.size() - 1); + + /* If there is a direct connection */ + dest_lp = dests[select_chan]; + } + if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) + printf("\n Next stop is %ld ", dest_lp); + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, dest_lp / num_routers_per_mgrp, + dest_lp % num_routers_per_mgrp, &router_dest_id); + + return router_dest_id; +} +/* gets the output port corresponding to the next stop of the message */ +static int +get_output_port( router_state * s, + terminal_dally_message * msg, + tw_lp * lp, + tw_bf * bf, + int next_stop, + short* rng_counter) +{ + int output_port = -1; + int rand_offset = -1; + int terminal_id = codes_mapping_get_lp_relative_id(msg->dest_terminal_id, 0, 0); + const dragonfly_param *p = s->params; + + int local_router_id = codes_mapping_get_lp_relative_id(next_stop, 0, 0); + int src_router = s->router_id; + int dest_router = local_router_id; + + if((tw_lpid)next_stop == msg->dest_terminal_id) + { + /* Make a random number selection (only for reverse computation) */ + (*rng_counter)++; + int rand_sel = tw_rand_integer(lp->rng, 0, terminal_id); + output_port = p->intra_grp_radix + p->num_global_channels + ( terminal_id % p->num_cn); + } + else + { + int intm_grp_id = local_router_id / p->num_routers; + int rand_offset = -1; + + if(intm_grp_id != s->group_id) + { + /* traversing a global channel */ + vector &curVec = interGroupLinks[src_router][intm_grp_id]; + + if(interGroupLinks[src_router][intm_grp_id].size() == 0) + printf("\n Source router %d intm_grp_id %d ", src_router, intm_grp_id); + + assert(interGroupLinks[src_router][intm_grp_id].size() > 0); + + (*rng_counter)++; + rand_offset = tw_rand_integer(lp->rng, 0, interGroupLinks[src_router][intm_grp_id].size()-1); + + assert(rand_offset >= 0 && rand_offset < s->params->num_global_channels); + + bLink bl = interGroupLinks[src_router][intm_grp_id][rand_offset]; + int channel_id = bl.offset; + + output_port = p->intra_grp_radix + channel_id; + } + else + { + int intra_rtr_id = (local_router_id % p->num_routers); + int intragrp_rtr_id = s->router_id % p->num_routers; + + int src_col = intragrp_rtr_id % p->num_router_cols; + int src_row = intragrp_rtr_id / p->num_router_cols; + + int dest_col = intra_rtr_id % p->num_router_cols; + int dest_row = intra_rtr_id / p->num_router_cols; + + if(src_row == dest_row) + { + (*rng_counter)++; + int offset = tw_rand_integer(lp->rng, 0, p->num_row_chans -1); + output_port = dest_col * p->num_row_chans + offset; + assert(output_port < (s->params->num_router_cols * p->num_row_chans)); + } + else if(src_col == dest_col) + { + assert(0); + (*rng_counter)++; + int offset = tw_rand_integer(lp->rng, 0, p->num_col_chans -1); + output_port = (p->num_router_cols * p->num_row_chans) + dest_row * p->num_col_chans + offset; + assert(output_port < p->intra_grp_radix); + } + else + { + tw_error(TW_LOC, "\n Invalid dragonfly connectivity src row %d dest row %d src col %d dest col %d src %d dest %d", + src_row, dest_row, src_col, dest_col, intragrp_rtr_id, intra_rtr_id); + } + + } + } + return output_port; +} + +static void do_local_adaptive_routing(router_state * s, + tw_lp * lp, + terminal_dally_message * msg, + tw_bf * bf, + int dest_router_id, + int intm_router_id, + short* rng_counter) +{ + tw_lpid min_rtr_id, nonmin_rtr_id; + int min_port, nonmin_port; + + int dest_grp_id = dest_router_id / s->params->num_routers; + int intm_grp_id = intm_router_id / s->params->num_routers; + int my_grp_id = s->router_id / s->params->num_routers; + + if(my_grp_id != dest_grp_id || my_grp_id != intm_grp_id) + tw_error(TW_LOC, "\n Invalid local routing my grp id %d dest_gid %d intm_gid %d intm rid %d", + my_grp_id, dest_grp_id, intm_grp_id, intm_router_id); + + int min_chan=-1, nonmin_chan=-1; + vector next_min_stops = get_intra_router(s, s->router_id, dest_router_id, s->params->num_routers); + vector next_nonmin_stops = get_intra_router(s, s->router_id, intm_router_id, s->params->num_routers); + + (*rng_counter)++; + min_chan = tw_rand_integer(lp->rng, 0, next_min_stops.size() - 1); + (*rng_counter)++; + nonmin_chan = tw_rand_integer(lp->rng, 0, next_nonmin_stops.size() - 1); + + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, next_min_stops[min_chan] / num_routers_per_mgrp, + next_min_stops[min_chan] % num_routers_per_mgrp, &min_rtr_id); + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, next_nonmin_stops[nonmin_chan] / num_routers_per_mgrp, + next_nonmin_stops[nonmin_chan] % num_routers_per_mgrp, &nonmin_rtr_id); + + min_port = get_output_port(s, msg, lp, bf, min_rtr_id, rng_counter); + nonmin_port = get_output_port(s, msg, lp, bf, nonmin_rtr_id, rng_counter); + + int min_port_count = 0, nonmin_port_count = 0; + + for(int k = 0; k < s->params->num_vcs; k++) + min_port_count += s->vc_occupancy[min_port][k]; + min_port_count += s->queued_count[min_port]; + + for(int k = 0; k < s->params->num_vcs; k++) + nonmin_port_count += s->vc_occupancy[nonmin_port][k]; + nonmin_port_count += s->queued_count[nonmin_port]; + + int local_stop = -1; + tw_lpid global_stop; + + if(BIAS_MIN == 1) + { + nonmin_port_count = nonmin_port_count * 2; + } + + msg->path_type = MINIMAL; + +// if(nonmin_port_count * num_intra_nonmin_hops > min_port_count * num_intra_min_hops) + if(min_port_count > adaptive_threshold && min_port_count > nonmin_port_count) + { + msg->path_type = NON_MINIMAL; + } +} +/* This function gets a randomly selected router from a group with which the + * current router has direct connections... */ +static vector get_indirect_conns(router_state * s, tw_lp * lp, int dest_grp_id, short* rng_counter) +{ + map< int, vector > &curMap = interGroupLinks[s->router_id]; + map< int, vector >::iterator it = curMap.begin(); + vector nonmin_ports; + int num_routers = s->params->num_routers; + (*rng_counter)++; + int dest_idx = tw_rand_integer(lp->rng, 0, num_routers - 1); + + for(; it != curMap.end(); it++) { + if(it->first != dest_grp_id) + { + int grp_id = it->first; + int begin = grp_id * s->params->num_routers; + for(int l = 0; l < it->second.size(); l++) + { + nonmin_ports.push_back(begin + dest_idx); + } + } + } + return nonmin_ports; +} +static int get_port_score(router_state * s, + int port, + int biase) +{ + int port_count = 0; + + if(port <= 0) + return INT_MAX; + + for(int k = 0; k < s->params->num_vcs; k++) + { + port_count += s->vc_occupancy[port][k]; + } + port_count += s->queued_count[port]; + + if(biase) + port_count = port_count * 2; + return port_count; +} +static int do_global_adaptive_routing( router_state * s, + tw_lp * lp, + terminal_dally_message * msg, + tw_bf * bf, + int dest_router_id, + int intm_id_a, + int intm_id_b, + short* rng_counter) { + int next_chan = -1; + // decide which routing to take + // get the queue occupancy of both the minimal and non-minimal output ports + + bool local_min = false; + int num_routers = s->params->num_routers; + int dest_grp_id = dest_router_id / num_routers; + int intm_grp_id_a = intm_id_a / num_routers; + int intm_grp_id_b = intm_id_b / num_routers; + + assert(intm_grp_id_a >= 0 && intm_grp_id_b >=0); + + int my_grp_id = s->router_id / num_routers; + + int num_min_chans; + vector direct_intra; + if(my_grp_id == dest_grp_id) + { + local_min = true; + direct_intra = get_intra_router(s, s->router_id, dest_router_id, num_routers); + num_min_chans = direct_intra.size(); + } + else + { + num_min_chans = connectionList[my_grp_id][dest_grp_id].size(); + } + int num_nonmin_chans_a = connectionList[my_grp_id][intm_grp_id_a].size(); + int num_nonmin_chans_b = connectionList[my_grp_id][intm_grp_id_b].size(); + int min_chan_a = -1, min_chan_b = -1, nonmin_chan_a = -1, nonmin_chan_b = -1; + int min_rtr_a, min_rtr_b, nonmin_rtr_a, nonmin_rtr_b; + vector dest_rtr_as, dest_rtr_bs; + int min_port_a, min_port_b, nonmin_port_a, nonmin_port_b; + tw_lpid min_rtr_a_id, min_rtr_b_id, nonmin_rtr_a_id, nonmin_rtr_b_id; + bool noIntraA, noIntraB; + + /* two possible routes to minimal destination */ + (*rng_counter) += 2; + min_chan_a = tw_rand_integer(lp->rng, 0, num_min_chans - 1); + min_chan_b = tw_rand_integer(lp->rng, 0, num_min_chans - 1); + + if(min_chan_a == min_chan_b && num_min_chans > 1) + min_chan_b = (min_chan_a + 1) % num_min_chans; + + int chana1 = 0; + + assert(min_chan_a >= 0); + if(!local_min) + { + min_rtr_a = connectionList[my_grp_id][dest_grp_id][min_chan_a]; + noIntraA = false; + if(min_rtr_a == s->router_id) { + noIntraA = true; + min_rtr_a = interGroupLinks[s->router_id][dest_grp_id][chana1].dest; + } + if(num_min_chans > 1) { + assert(min_chan_b >= 0); + noIntraB = false; + min_rtr_b = connectionList[my_grp_id][dest_grp_id][min_chan_b]; + + if(min_rtr_b == s->router_id) { + noIntraB = true; + min_rtr_b = interGroupLinks[s->router_id][dest_grp_id][chana1].dest; + } + } + + if(noIntraA) { + dest_rtr_as.push_back(min_rtr_a); + } else { + dest_rtr_as = get_intra_router(s, s->router_id, min_rtr_a, s->params->num_routers); + } + } + else + { + noIntraA = true; + noIntraB = true; + + assert(direct_intra.size() > 0); + min_rtr_a = direct_intra[min_chan_a]; + dest_rtr_as.push_back(min_rtr_a); + + if(num_min_chans > 1) + min_rtr_b = direct_intra[min_chan_b]; + } + int dest_rtr_b_sel; + (*rng_counter)++; + int dest_rtr_a_sel = tw_rand_integer(lp->rng, 0, dest_rtr_as.size() - 1); + + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, dest_rtr_as[dest_rtr_a_sel] / num_routers_per_mgrp, + dest_rtr_as[dest_rtr_a_sel] % num_routers_per_mgrp, &min_rtr_a_id); + + min_port_a = get_output_port(s, msg, lp, bf, min_rtr_a_id, rng_counter); + + if(num_min_chans > 1) + { + if(noIntraB) { + dest_rtr_bs.push_back(min_rtr_b); + } else { + dest_rtr_bs = get_intra_router(s, s->router_id, min_rtr_b, s->params->num_routers); + } + (*rng_counter)++; + dest_rtr_b_sel = tw_rand_integer(lp->rng, 0, dest_rtr_bs.size() - 1); + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, dest_rtr_bs[dest_rtr_b_sel] / num_routers_per_mgrp, + dest_rtr_bs[dest_rtr_b_sel] % num_routers_per_mgrp, &min_rtr_b_id); + min_port_b = get_output_port(s, msg, lp, bf, min_rtr_b_id, rng_counter); + } + + /* if a direct global channel exists for non-minimal route in the source group then give a priority to that. */ + if(msg->my_l_hop == max_lvc_src_g) + { + assert(routing == PROG_ADAPTIVE); + nonmin_chan_a = find_chan(s->router_id, intm_grp_id_a, num_routers); + nonmin_chan_b = find_chan(s->router_id, intm_grp_id_b, num_routers); + assert(nonmin_chan_a >= 0 && nonmin_chan_b >= 0); + } + /* two possible nonminimal routes */ + (*rng_counter) += 2; + int rand_a = tw_rand_integer(lp->rng, 0, num_nonmin_chans_a - 1); + int rand_b = tw_rand_integer(lp->rng, 0, num_nonmin_chans_b - 1); + + noIntraA = false; + if(nonmin_chan_a != -1) { + /* TODO: For a 2-D dragonfly, this can be more than one link. */ + bf->c25=1; + noIntraA = true; + nonmin_rtr_a = interGroupLinks[s->router_id][intm_grp_id_a][0].dest; + } + else + { + assert(rand_a >= 0); + nonmin_chan_a = rand_a; + nonmin_rtr_a = connectionList[my_grp_id][intm_grp_id_a][rand_a]; + if(nonmin_rtr_a == s->router_id) + noIntraA = true; + } + assert(nonmin_chan_a >= 0); + + if(num_nonmin_chans_b > 0) { + noIntraB = false; + if(nonmin_chan_b != -1) { + bf->c26=1; + noIntraB = true; + nonmin_rtr_b = interGroupLinks[s->router_id][intm_grp_id_b][0].dest; + } + else + { + assert(rand_b >= 0); + nonmin_chan_b = rand_b; + nonmin_rtr_b = connectionList[my_grp_id][intm_grp_id_b][rand_b]; + if(nonmin_rtr_b == s->router_id) + noIntraB = true; + } + assert(nonmin_chan_b >= 0); + } + + if(noIntraA) { + dest_rtr_as.clear(); + dest_rtr_as.push_back(nonmin_rtr_a); + } else { + dest_rtr_as = get_intra_router(s, s->router_id, nonmin_rtr_a, s->params->num_routers); + } + (*rng_counter)++; + dest_rtr_a_sel = tw_rand_integer(lp->rng, 0, dest_rtr_as.size() - 1); + + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, dest_rtr_as[dest_rtr_a_sel] / num_routers_per_mgrp, + dest_rtr_as[dest_rtr_a_sel] % num_routers_per_mgrp, &nonmin_rtr_a_id); + nonmin_port_a = get_output_port(s, msg, lp, bf, nonmin_rtr_a_id, rng_counter); + + assert(nonmin_port_a >= 0); + + if(num_nonmin_chans_b > 0) + { + bf->c11 = 1; + if(noIntraB) { + dest_rtr_bs.clear(); + dest_rtr_bs.push_back(nonmin_rtr_b); + } else { + dest_rtr_bs = get_intra_router(s, s->router_id, nonmin_rtr_b, s->params->num_routers); + } + (*rng_counter)++; + dest_rtr_b_sel = tw_rand_integer(lp->rng, 0, dest_rtr_bs.size() - 1); + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, dest_rtr_bs[dest_rtr_b_sel] / num_routers_per_mgrp, + dest_rtr_bs[dest_rtr_b_sel] % num_routers_per_mgrp, &nonmin_rtr_b_id); + nonmin_port_b = get_output_port(s, msg, lp, bf, nonmin_rtr_b_id, rng_counter); + assert(nonmin_port_b >= 0); + } + int min_port_a_count = 0, min_port_b_count = 0; + int nonmin_port_a_count = 0, nonmin_port_b_count = 0; + + min_port_a_count = get_port_score(s, min_port_a, 0); + + if(num_min_chans > 1) + { + min_port_b_count = get_port_score(s, min_port_b, 0); + } + + nonmin_port_a_count = get_port_score(s, nonmin_port_a, BIAS_MIN); + + if(num_nonmin_chans_b > 0) + { + assert(nonmin_port_b >= 0); + nonmin_port_b_count += get_port_score(s, nonmin_port_b, BIAS_MIN); + } + int next_min_stop = -1, next_nonmin_stop = -1; + int next_min_count = -1, next_nonmin_count = -1; + + /* First compare which of the nonminimal ports has less congestions */ + int sel_nonmin = 0; + if(num_nonmin_chans_b > 0 && nonmin_port_a_count > nonmin_port_b_count) + { + next_nonmin_count = nonmin_port_b_count; + next_nonmin_stop = nonmin_chan_b; + sel_nonmin = 1; + } + else + { + next_nonmin_count = nonmin_port_a_count; + next_nonmin_stop = nonmin_chan_a; + } + /* do the same for minimal ports */ + if(num_min_chans > 1 && min_port_a_count > min_port_b_count) + { + next_min_count = min_port_b_count; + next_min_stop = min_chan_b; + } + else + { + next_min_count = min_port_a_count; + next_min_stop = min_chan_a; + } + + /* Now compare the least congested minimal and non-minimal routes */ + if(next_min_count > adaptive_threshold && next_min_count > next_nonmin_count) + { +// printf("\n Minimal chan %d occupancy %d non-min %d occupancy %d ", next_min_stop, next_min_count, next_nonmin_stop, next_nonmin_count); + next_chan = next_nonmin_stop; + msg->path_type = NON_MINIMAL; + + if(sel_nonmin) + msg->intm_rtr_id = intm_id_b; + else + msg->intm_rtr_id = intm_id_a; + } + else + { + next_chan = next_min_stop; + msg->path_type = MINIMAL; + } + return next_chan; + + // VARIATION 1: + // if(num_min_hops * min_port_count <= num_nonmin_hops * nonmin_port_count) { + // VARIATION 2: + //if(num_min_hops * min_port_count <= (num_nonmin_hops * (q_avg + 1))) { + /*if(min_port_count <= nonmin_port_count) { + msg->path_type = MINIMAL; + next_stop = minimal_next_stop; + msg->intm_group_id = -1; + } + else + { + msg->path_type = NON_MINIMAL; + next_stop = nonmin_next_stop; + }*/ +} + + +static void router_verify_valid_receipt(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) +{ + if (msg->my_N_hop > s->params->max_hops_notify) + { + printf("Router received a packet with %d hops so far! (Notify on > than %d)\n",msg->my_N_hop, s->params->max_hops_notify); + } + + + bool has_valid_connection; + if (msg->last_hop == TERMINAL) { + tw_lpid src_term_lpgid = msg->src_terminal_id; + int src_term_rel_id; + + try { + src_term_rel_id = codes_mapping_get_lp_relative_id(src_term_lpgid,0,0); + } + catch (...) { + tw_error(TW_LOC, "\nRouter Receipt Verify: Codes Mapping Get LP Rel ID Failure - Terminal"); + } + + has_valid_connection = (s->router_id == (src_term_rel_id / s->params->num_cn)); //a router can only receive a packet from a terminal if that terminal belongs to it + + if (!has_valid_connection) { + tw_error(TW_LOC, "\nRouter received packet from non-existent connection - Terminal\n"); + } + + } + else if (msg->last_hop == LOCAL) { + int rel_id; + + try { + rel_id = codes_mapping_get_lp_relative_id(msg->intm_lp_id,0,0); + } + catch (...) { + tw_error(TW_LOC, "\nRouter Receipt Verify: Codes Mapping Get LP Rel ID Failure - Local"); + } + + int my_loc_id = s->router_id % s->params->num_routers; + int intm_loc_id = rel_id % s->params->num_routers; + + if (intraGroupLinks[my_loc_id][intm_loc_id].size() > 0) + has_valid_connection = true; + else + has_valid_connection = false; + + if (!has_valid_connection) { + tw_error(TW_LOC, "\nRouter received packet from non-existent connection - Local\n"); + } + } + else if (msg->last_hop == GLOBAL) { + int rel_id; + + try { + rel_id = rel_id = codes_mapping_get_lp_relative_id(msg->intm_lp_id,0,0); + } + catch (...) { + tw_error(TW_LOC, "\nRouter Receipt Verify: Codes Mapping Get LP Rel ID Failure - Global"); + } + + int rel_id_grp_id = rel_id / s->params->num_routers; + + if (interGroupLinks[s->router_id][rel_id_grp_id].size() > 0) + has_valid_connection = true; + else + has_valid_connection = false; + + if (!has_valid_connection) { + tw_error(TW_LOC, "\nRouter received packet from non-existent connection - Global\n"); + } + } + else { + tw_error(TW_LOC, "\nUnspecified msg->last_hop when received by a router\n"); + } + +} + +static void router_packet_receive_rc(router_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + router_rev_ecount++; + router_ecount--; + + int output_port = msg->saved_vc; + int output_chan = msg->saved_channel; + + for(int i = 0 ; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + for(int i = 0; i < msg->num_rngs; i++) + tw_rand_reverse_unif(lp->rng); + + if(bf->c1) + s->is_monitoring_bw = 0; + + if(bf->c2) { + terminal_dally_message_list * tail = return_tail(s->pending_msgs[output_port], s->pending_msgs_tail[output_port], output_chan); + delete_terminal_dally_message_list(tail); + s->vc_occupancy[output_port][output_chan] -= s->params->chunk_size; + if(bf->c3) { + s->in_send_loop[output_port] = 0; + } + } + if(bf->c4) { + delete_terminal_dally_message_list(return_tail(s->queued_msgs[output_port], + s->queued_msgs_tail[output_port], output_chan)); + s->queued_count[output_port] -= s->params->chunk_size; + } +} + +/* Packet arrives at the router and a credit is sent back to the sending terminal/router */ +static void +router_packet_receive( router_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp ) +{ + msg->num_cll = 0; + msg->num_rngs = 0; + + router_verify_valid_receipt(s, bf, msg, lp); + + router_ecount++; + + tw_stime ts; + + int num_qos_levels = s->params->num_qos_levels; + int vcs_per_qos = s->params->num_vcs / num_qos_levels; + + if(num_qos_levels > 1) + { + if(s->is_monitoring_bw == 0) + { + bf->c1 = 1; + msg->num_cll++; + tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); + terminal_dally_message * m; + tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, + DRAGONFLY_DALLY_ROUTER, (void**)&m, NULL); + m->type = R_BANDWIDTH; + m->magic = router_magic_num; + tw_event_send(e); + s->is_monitoring_bw = 1; + } + } + int vcg = 0; + if(num_qos_levels > 1) + vcg = get_vcg_from_category(msg); + + int num_routers = s->params->num_routers; + int num_groups = s->params->num_groups; + int total_routers = s->params->total_routers; + + int next_stop = -1, output_port = -1, output_chan = -1, adap_chan = -1; + int dest_router_id = codes_mapping_get_lp_relative_id(msg->dest_terminal_id, 0, 0) / s->params->num_cn; + int local_grp_id = s->router_id / num_routers; + int src_grp_id = msg->origin_router_id / num_routers; + int dest_grp_id = dest_router_id / num_routers; + int intm_router_id, intm_router_id_b; + short prev_path_type = 0, next_path_type = 0; + + terminal_dally_message_list * cur_chunk = (terminal_dally_message_list*)calloc(1, sizeof(terminal_dally_message_list)); + init_terminal_dally_message_list(cur_chunk, msg); + + /* Set the default route as minimal for prog-adaptive */ + if(cur_chunk->msg.last_hop == TERMINAL) + cur_chunk->msg.path_type = MINIMAL; + + /* for prog-adaptive routing, record the current route of packet */ + int get_direct_con = 0; + prev_path_type = cur_chunk->msg.path_type; + + /* Here we check for local or global adaptive routing. If destination router + * is in the same group then we do a local adaptive routing by selecting an + * intermediate router ID which is in the same group. */ + if(src_grp_id != dest_grp_id) + { + if(cur_chunk->msg.my_l_hop == max_lvc_src_g) + { + vector direct_rtrs = get_indirect_conns(s, lp, dest_grp_id, &(msg->num_rngs)); + assert(direct_rtrs.size() > 0); + msg->num_rngs++; + int indxa = tw_rand_integer(lp->rng, 0, direct_rtrs.size() - 1); + intm_router_id = direct_rtrs[indxa]; + msg->num_rngs++; + int indxb = tw_rand_integer(lp->rng, 0, direct_rtrs.size() - 1); + intm_router_id_b = direct_rtrs[indxb]; + assert(intm_router_id / num_routers != local_grp_id); + assert(intm_router_id_b / num_routers != local_grp_id); + } + else + { + msg->num_rngs += 2; + intm_router_id = tw_rand_integer(lp->rng, 0, total_routers - 1); + intm_router_id_b = tw_rand_integer(lp->rng, 0, total_routers - 1); + if((intm_router_id/num_routers) == local_grp_id) + intm_router_id = (intm_router_id + num_routers) % total_routers; + + if((intm_router_id_b/num_routers) == local_grp_id) + { + intm_router_id_b = (intm_router_id_b + num_routers) % total_routers; + } + + assert(intm_router_id / num_routers != local_grp_id); + assert(intm_router_id_b / num_routers != local_grp_id); + } + } + else + { + msg->num_rngs++; + intm_router_id = (src_grp_id * num_routers) + + (((s->router_id % num_routers) + + tw_rand_integer(lp->rng, 1, num_routers - 1)) % num_routers); + } + + if(routing == NON_MINIMAL) + cur_chunk->msg.path_type = NON_MINIMAL; + + /* progressive adaptive routing is only triggered when packet has to traverse a + * global channel. It doesn't make sense to use it within a group */ + if(dest_grp_id != src_grp_id && + ((cur_chunk->msg.last_hop == TERMINAL + && routing == ADAPTIVE) + || (cur_chunk->msg.path_type == MINIMAL + && routing == PROG_ADAPTIVE +// && s->router_id != dest_router_id))) + && local_grp_id == src_grp_id))) + { + adap_chan = do_global_adaptive_routing(s, lp, &(cur_chunk->msg), bf, dest_router_id, intm_router_id, intm_router_id_b, &(msg->num_rngs)); + } + /* If destination router is in the same group then local adaptive routing is + * triggered */ + if(cur_chunk->msg.origin_router_id == dest_router_id) + cur_chunk->msg.path_type = MINIMAL; + + if(dest_grp_id == src_grp_id && + dest_router_id != s->router_id && + (routing == ADAPTIVE || routing == PROG_ADAPTIVE) + && cur_chunk->msg.last_hop == TERMINAL) + { + do_local_adaptive_routing(s, lp, &(cur_chunk->msg), bf, dest_router_id, intm_router_id, &(msg->num_rngs)); + } + + next_path_type = cur_chunk->msg.path_type; + + if(cur_chunk->msg.path_type != MINIMAL && cur_chunk->msg.path_type != NON_MINIMAL) + tw_error(TW_LOC, "\n packet src %d dest %d intm %d src grp %d dest grp %d", s->router_id, dest_router_id, intm_router_id, src_grp_id, dest_grp_id); + + assert(cur_chunk->msg.path_type == MINIMAL || cur_chunk->msg.path_type == NON_MINIMAL); + + /* If non-minimal, set the random destination */ + if(cur_chunk->msg.last_hop == TERMINAL + && cur_chunk->msg.path_type == NON_MINIMAL + && cur_chunk->msg.intm_rtr_id == -1) + { + cur_chunk->msg.nonmin_done = 0; + cur_chunk->msg.intm_rtr_id = intm_router_id; + } + + if(cur_chunk->msg.path_type == NON_MINIMAL) + { + /* If non-minimal route has completed, mark the packet. + * If not, set the non-minimal destination.*/ + if(s->router_id == cur_chunk->msg.intm_rtr_id) + { + cur_chunk->msg.nonmin_done = 1; + } + else if(cur_chunk->msg.nonmin_done == 0) + { + //printf("\n Setting intm router id to %d %d", dest_router_id, cur_chunk->msg.intm_rtr_id); + dest_router_id = cur_chunk->msg.intm_rtr_id; + } + } + +if(cur_chunk->msg.path_type == NON_MINIMAL) +{ + if((cur_chunk->msg.my_l_hop == max_lvc_src_g && cur_chunk->msg.my_g_hop == min_gvc_src_g) +|| (cur_chunk->msg.my_l_hop == max_lvc_intm_g && cur_chunk->msg.my_g_hop == min_gvc_intm_g)) + get_direct_con = 1; +} + /* If the packet route has just changed to non-minimal with prog-adaptive + * routing, we have to compute the next stop based on that */ + int do_chan_selection = 0; + if(routing == PROG_ADAPTIVE && prev_path_type != next_path_type && s->group_id == src_grp_id) + do_chan_selection = 1; + + next_stop = get_next_stop(s, lp, bf, &(cur_chunk->msg), dest_router_id, adap_chan, do_chan_selection, get_direct_con, &(msg->num_rngs)); + + if(cur_chunk->msg.packet_ID == LLU(TRACK_PKT) && cur_chunk->msg.src_terminal_id == T_ID) + printf("\n Packet %llu arrived at router %u next stop %d final stop %d local hops %d global hops %d", cur_chunk->msg.packet_ID, s->router_id, next_stop, dest_router_id, cur_chunk->msg.my_l_hop, cur_chunk->msg.my_g_hop); + + output_port = get_output_port(s, &(cur_chunk->msg), lp, bf, next_stop, &(msg->num_rngs)); + assert(output_port >= 0); + int max_vc_size = s->params->cn_vc_size; + + cur_chunk->msg.vc_index = output_port; + cur_chunk->msg.next_stop = next_stop; + + output_chan = 0; + if(output_port < s->params->intra_grp_radix) { + if(DF_DALLY == 1) + { + if(cur_chunk->msg.my_g_hop == 1 && cur_chunk->msg.last_hop == GLOBAL) { + output_chan = 1; + } else if(cur_chunk->msg.my_g_hop == 1 && cur_chunk->msg.last_hop == LOCAL){ + output_chan = 2; + } + else if (cur_chunk->msg.my_g_hop == 2) { + output_chan = 3; + } + } + /* TODO: Recheck VC count after things are in order for a 2-D dragonfly. */ + //else { + // if(cur_chunk->msg.my_g_hop == 1 && cur_chunk->msg.last_hop == GLOBAL) { + // output_chan = 2; + // } + // else if (cur_chunk->msg.my_g_hop == 2 && cur_chunk->msg.last_hop == GLOBAL) { + // output_chan = 6; + //} + //} + max_vc_size = s->params->local_vc_size; + cur_chunk->msg.my_l_hop++; + } else if(output_port < (s->params->intra_grp_radix + + s->params->num_global_channels)) { + output_chan = cur_chunk->msg.my_g_hop; + max_vc_size = s->params->global_vc_size; + cur_chunk->msg.my_g_hop++; + } + + assert(output_chan < vcs_per_qos); + output_chan = output_chan + (vcg * vcs_per_qos); + assert(output_chan < s->params->num_vcs); + cur_chunk->msg.output_chan = output_chan; + cur_chunk->msg.my_N_hop++; + + if(output_port >= s->params->radix) + tw_error(TW_LOC, "\n Output port greater than router radix %d ", output_port); + + if(output_chan >= s->params->num_vcs || output_chan < 0) + tw_error(TW_LOC, "\n Output channel %d great than available VCs %d", output_chan, s->params->num_vcs - 1); + //cur_chunk->msg.packet_ID, output_chan, output_port, s->router_id, dest_router_id, cur_chunk->msg.path_type, src_grp_id, dest_grp_id, msg->src_terminal_id); + + if(msg->remote_event_size_bytes > 0) { + void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY_ROUTER, msg); + cur_chunk->event_data = (char*)calloc(1, msg->remote_event_size_bytes); + memcpy(cur_chunk->event_data, m_data_src, msg->remote_event_size_bytes); + } + + if(s->vc_occupancy[output_port][output_chan] + s->params->chunk_size + <= max_vc_size) { + bf->c2 = 1; + assert(output_chan < s->params->num_vcs && output_port < s->params->radix); + router_credit_send(s, msg, lp, -1, &(msg->num_rngs)); + + append_to_terminal_dally_message_list( s->pending_msgs[output_port], + s->pending_msgs_tail[output_port], output_chan, cur_chunk); + s->vc_occupancy[output_port][output_chan] += s->params->chunk_size; + if(s->in_send_loop[output_port] == 0) { + bf->c3 = 1; + terminal_dally_message *m; + msg->num_cll++; + ts = codes_local_latency(lp); + tw_event *e = model_net_method_event_new(lp->gid, ts, lp, + DRAGONFLY_DALLY_ROUTER, (void**)&m, NULL); + m->type = R_SEND; + m->magic = router_magic_num; + m->vc_index = output_port; + + tw_event_send(e); + s->in_send_loop[output_port] = 1; + } + } else { + + bf->c4 = 1; + cur_chunk->msg.saved_vc = msg->vc_index; + cur_chunk->msg.saved_channel = msg->output_chan; + assert(output_chan < s->params->num_vcs && output_port < s->params->radix); + append_to_terminal_dally_message_list( s->queued_msgs[output_port], + s->queued_msgs_tail[output_port], output_chan, cur_chunk); + s->queued_count[output_port] += s->params->chunk_size; + } + + msg->saved_vc = output_port; + msg->saved_channel = output_chan; + return; +} + +static void router_packet_send_rc(router_state * s, + tw_bf * bf, + terminal_dally_message * msg, tw_lp * lp) +{ + router_ecount--; + router_rev_ecount++; + int num_qos_levels = s->params->num_qos_levels; + + int output_port = msg->saved_vc; + + if(msg->qos_reset1) + s->qos_status[output_port][0] = Q_ACTIVE; + if(msg->qos_reset2) + s->qos_status[output_port][1] = Q_ACTIVE; + + if(msg->last_saved_qos) + s->last_qos_lvl[output_port] = msg->last_saved_qos; + + if(bf->c1) { + s->in_send_loop[output_port] = 1; + if(bf->c2) { + s->last_buf_full[output_port] = msg->saved_busy_time; + } + return; + } + + for(int i = 0; i < msg->num_rngs; i++) + tw_rand_reverse_unif(lp->rng); + + for(int i = 0; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + int output_chan = msg->saved_channel; + if(bf->c8) + { + s->busy_time[output_port] = msg->saved_rcv_time; + s->busy_time_sample[output_port] = msg->saved_sample_time; + s->last_buf_full[output_port] = msg->saved_busy_time; + } + + terminal_dally_message_list * cur_entry = (terminal_dally_message_list *)rc_stack_pop(s->st); + assert(cur_entry); + + int vcg = get_vcg_from_category(&(cur_entry->msg)); + + int msg_size = s->params->chunk_size; + if(cur_entry->msg.packet_size < s->params->chunk_size) + msg_size = cur_entry->msg.packet_size; + + s->qos_data[output_port][vcg] -= msg_size; + s->next_output_available_time[output_port] = msg->saved_available_time; + + if(bf->c11) + { + s->link_traffic[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; + s->link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; + s->ross_rsample.link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; + s->link_traffic_ross_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; + } + if(bf->c12) + { + s->link_traffic[output_port] -= s->params->chunk_size; + s->link_traffic_sample[output_port] -= s->params->chunk_size; + s->ross_rsample.link_traffic_sample[output_port] -= s->params->chunk_size; + s->link_traffic_ross_sample[output_port] -= s->params->chunk_size; + } + + prepend_to_terminal_dally_message_list(s->pending_msgs[output_port], + s->pending_msgs_tail[output_port], output_chan, cur_entry); + + if(bf->c4) { + s->in_send_loop[output_port] = 1; + return; + } +} +/* routes the current packet to the next stop */ +static void +router_packet_send( router_state * s, + tw_bf * bf, + terminal_dally_message * msg, tw_lp * lp) +{ + router_ecount++; + + tw_stime ts; + tw_event *e; + terminal_dally_message *m; + int output_port = msg->vc_index; + int is_local = 0; + terminal_dally_message_list *cur_entry = NULL; + + /* reset qos rc handler before incrementing it */ + msg->last_saved_qos = -1; + msg->qos_reset1 = -1; + msg->qos_reset2 = -1; + msg->num_cll = 0; + msg->num_rngs = 0; + + int num_qos_levels = s->params->num_qos_levels; + int output_chan = get_next_router_vcg(s, bf, msg, lp); + + msg->saved_vc = output_port; + msg->saved_channel = output_chan; + + if(output_chan < 0) { + bf->c1 = 1; + s->in_send_loop[output_port] = 0; + if(s->queued_count[output_port] && !s->last_buf_full[output_port]) + { + bf->c2 = 1; + msg->saved_busy_time = s->last_buf_full[output_port]; + s->last_buf_full[output_port] = tw_now(lp); + } + return; + } + + cur_entry = s->pending_msgs[output_port][output_chan]; + + assert(cur_entry != NULL); + + if(s->last_buf_full[output_port]) + { + bf->c8 = 1; + msg->saved_rcv_time = s->busy_time[output_port]; + msg->saved_busy_time = s->last_buf_full[output_port]; + msg->saved_sample_time = s->busy_time_sample[output_port]; + s->busy_time[output_port] += (tw_now(lp) - s->last_buf_full[output_port]); + s->busy_time_sample[output_port] += (tw_now(lp) - s->last_buf_full[output_port]); + s->last_buf_full[output_port] = 0.0; + } + + int vcg = get_vcg_from_category(&(cur_entry->msg)); + int to_terminal = 1, global = 0; + double delay = s->params->cn_delay; + double bandwidth = s->params->cn_bandwidth; + + if(output_port < s->params->intra_grp_radix) { + to_terminal = 0; + delay = s->params->local_delay; + bandwidth = s->params->local_bandwidth; + } else if(output_port < s->params->intra_grp_radix + + s->params->num_global_channels) { + to_terminal = 0; + global = 1; + delay = s->params->global_delay; + bandwidth = s->params->global_bandwidth; + } + + uint64_t num_chunks = cur_entry->msg.packet_size / s->params->chunk_size; + if(cur_entry->msg.packet_size < s->params->chunk_size) + num_chunks++; + + double bytetime = delay; + + if(cur_entry->msg.packet_size == 0) + bytetime = bytes_to_ns(CREDIT_SIZE, bandwidth); + + if((cur_entry->msg.packet_size < s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) + bytetime = bytes_to_ns(cur_entry->msg.packet_size % s->params->chunk_size, bandwidth); + + msg->num_rngs++; + ts = g_tw_lookahead + tw_rand_unif( lp->rng) + bytetime + s->params->router_delay; + + msg->saved_available_time = s->next_output_available_time[output_port]; + s->next_output_available_time[output_port] = + maxd(s->next_output_available_time[output_port], tw_now(lp)); + s->next_output_available_time[output_port] += ts; + + ts = s->next_output_available_time[output_port] - tw_now(lp); + // dest can be a router or a terminal, so we must check + void * m_data; + if (to_terminal) { + // printf("\n next stop %d dest term id %d ", cur_entry->msg.next_stop, cur_entry->msg.dest_terminal_id); + if(cur_entry->msg.next_stop != cur_entry->msg.dest_terminal_id) + printf("\n intra-group radix %d output port %d next stop %d", s->params->intra_grp_radix, output_port, cur_entry->msg.next_stop); + assert(cur_entry->msg.next_stop == cur_entry->msg.dest_terminal_id); + e = model_net_method_event_new(cur_entry->msg.next_stop, + s->next_output_available_time[output_port] - tw_now(lp), lp, + DRAGONFLY_DALLY, (void**)&m, &m_data); + } else { + e = model_net_method_event_new(cur_entry->msg.next_stop, + s->next_output_available_time[output_port] - tw_now(lp), lp, + DRAGONFLY_DALLY_ROUTER, (void**)&m, &m_data); + } + memcpy(m, &cur_entry->msg, sizeof(terminal_dally_message)); + if (m->remote_event_size_bytes){ + memcpy(m_data, cur_entry->event_data, m->remote_event_size_bytes); + } + + if(global) + m->last_hop = GLOBAL; + else + m->last_hop = LOCAL; + + m->intm_lp_id = lp->gid; + m->magic = router_magic_num; + + int msg_size = s->params->chunk_size; + + if((cur_entry->msg.packet_size % s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) { + bf->c11 = 1; + s->link_traffic[output_port] += (cur_entry->msg.packet_size % + s->params->chunk_size); + s->link_traffic_sample[output_port] += (cur_entry->msg.packet_size % + s->params->chunk_size); + s->ross_rsample.link_traffic_sample[output_port] += (cur_entry->msg.packet_size % + s->params->chunk_size); + s->link_traffic_ross_sample[output_port] += (cur_entry->msg.packet_size % + s->params->chunk_size); + msg_size = cur_entry->msg.packet_size % s->params->chunk_size; + } else { + bf->c12 = 1; + s->link_traffic[output_port] += s->params->chunk_size; + s->link_traffic_sample[output_port] += s->params->chunk_size; + s->ross_rsample.link_traffic_sample[output_port] += s->params->chunk_size; + s->link_traffic_ross_sample[output_port] += s->params->chunk_size; + } + + if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && cur_entry->msg.src_terminal_id == T_ID) + printf("\n Queuing at the router %d ", s->router_id); + /* Determine the event type. If the packet has arrived at the final + * destination router then it should arrive at the destination terminal + * next.*/ + if(to_terminal) { + m->type = T_ARRIVE; + m->magic = terminal_magic_num; + } else { + /* The packet has to be sent to another router */ + m->magic = router_magic_num; + m->type = R_ARRIVE; + } + tw_event_send(e); + + cur_entry = return_head(s->pending_msgs[output_port], + s->pending_msgs_tail[output_port], output_chan); + rc_stack_push(lp, cur_entry, delete_terminal_dally_message_list, s->st); + + s->qos_data[output_port][vcg] += msg_size; + s->next_output_available_time[output_port] -= s->params->router_delay; + ts -= s->params->router_delay; + + int next_output_chan = get_next_router_vcg(s, bf, msg, lp); + + if(next_output_chan < 0) + { + bf->c4 = 1; + s->in_send_loop[output_port] = 0; + return; + } + cur_entry = s->pending_msgs[output_port][next_output_chan]; + assert(cur_entry != NULL); + + terminal_dally_message *m_new; + msg->num_rngs++; + ts += g_tw_lookahead + tw_rand_unif(lp->rng); + e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_DALLY_ROUTER, + (void**)&m_new, NULL); + m_new->type = R_SEND; + m_new->magic = router_magic_num; + m_new->vc_index = output_port; + tw_event_send(e); + return; +} + +static void router_buf_update_rc(router_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + int indx = msg->vc_index; + int output_chan = msg->output_chan; + s->vc_occupancy[indx][output_chan] += s->params->chunk_size; + + for(int i = 0; i < msg->num_rngs; i++) + tw_rand_reverse_unif(lp->rng); + + for(int i = 0; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + if(bf->c3) + { + s->busy_time[indx] = msg->saved_rcv_time; + s->busy_time_sample[indx] = msg->saved_sample_time; + s->ross_rsample.busy_time[indx] = msg->saved_sample_time; + s->busy_time_ross_sample[indx] = msg->saved_busy_time_ross; + s->last_buf_full[indx] = msg->saved_busy_time; + } + if(bf->c1) { + terminal_dally_message_list* head = return_tail(s->pending_msgs[indx], + s->pending_msgs_tail[indx], output_chan); + prepend_to_terminal_dally_message_list(s->queued_msgs[indx], + s->queued_msgs_tail[indx], output_chan, head); + s->vc_occupancy[indx][output_chan] -= s->params->chunk_size; + s->queued_count[indx] += s->params->chunk_size; + } + if(bf->c2) { + s->in_send_loop[indx] = 0; + } +} +/* Update the buffer space associated with this router LP */ +static void router_buf_update(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + msg->num_cll = 0; + msg->num_rngs = 0; + + int indx = msg->vc_index; + int output_chan = msg->output_chan; + s->vc_occupancy[indx][output_chan] -= s->params->chunk_size; + + if(s->last_buf_full[indx] > 0.0) + { + bf->c3 = 1; + msg->saved_rcv_time = s->busy_time[indx]; + msg->saved_busy_time = s->last_buf_full[indx]; + msg->saved_sample_time = s->busy_time_sample[indx]; + msg->saved_busy_time_ross = s->busy_time_ross_sample[indx]; + s->busy_time[indx] += (tw_now(lp) - s->last_buf_full[indx]); + s->busy_time_sample[indx] += (tw_now(lp) - s->last_buf_full[indx]); + s->ross_rsample.busy_time[indx] += (tw_now(lp) - s->last_buf_full[indx]); + s->busy_time_ross_sample[indx] += (tw_now(lp) - s->last_buf_full[indx]); + s->last_buf_full[indx] = 0.0; + } + if(s->queued_msgs[indx][output_chan] != NULL) { + bf->c1 = 1; + assert(indx < s->params->radix); + assert(output_chan < s->params->num_vcs); + terminal_dally_message_list *head = return_head(s->queued_msgs[indx], + s->queued_msgs_tail[indx], output_chan); + /*if(strcmp(head->msg.category, "medium") == 0) + { + if(head->msg.saved_channel < 4 || head->msg.saved_channel >= 8) + { + tw_error(TW_LOC, "\n invalid output chan %d last-hop %d", head->msg.saved_channel, head->msg.last_hop); + } + }*/ + router_credit_send(s, &head->msg, lp, 1, &(msg->num_rngs)); + append_to_terminal_dally_message_list(s->pending_msgs[indx], + s->pending_msgs_tail[indx], output_chan, head); + s->vc_occupancy[indx][output_chan] += s->params->chunk_size; + s->queued_count[indx] -= s->params->chunk_size; + } + if(s->in_send_loop[indx] == 0 && s->pending_msgs[indx][output_chan] != NULL) { + bf->c2 = 1; + terminal_dally_message *m; + msg->num_cll++; + tw_stime ts = codes_local_latency(lp); + tw_event *e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_DALLY_ROUTER, + (void**)&m, NULL); + m->type = R_SEND; + m->vc_index = indx; + m->magic = router_magic_num; + s->in_send_loop[indx] = 1; + tw_event_send(e); + } + return; +} + +void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_message * msg, + tw_lp * lp) { + s->fwd_events++; + s->ross_rsample.fwd_events++; + rc_stack_gc(lp, s->st); + + assert(msg->magic == router_magic_num); + switch(msg->type) + { + case R_SEND: // Router has sent a packet to an intra-group router (local channel) + router_packet_send(s, bf, msg, lp); + break; + + case R_ARRIVE: // Router has received a packet from an intra-group router (local channel) + router_packet_receive(s, bf, msg, lp); + break; + + case R_BUFFER: + router_buf_update(s, bf, msg, lp); + break; + + case R_BANDWIDTH: + issue_rtr_bw_monitor_event(s, bf, msg, lp); + break; + + default: + printf("\n (%lf) [Router %d] Router Message type not supported %d dest " + "terminal id %d packet ID %d ", tw_now(lp), (int)lp->gid, msg->type, + (int)msg->dest_terminal_id, (int)msg->packet_ID); + tw_error(TW_LOC, "Msg type not supported"); + break; + } +} + +/* Reverse computation handler for a terminal event */ +void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, + terminal_dally_message * msg, tw_lp * lp) { + + s->rev_events++; + s->ross_sample.rev_events++; + switch(msg->type) + { + case T_GENERATE: + packet_generate_rc(s, bf, msg, lp); + break; + + case T_SEND: + packet_send_rc(s, bf, msg, lp); + break; + + case T_ARRIVE: + packet_arrive_rc(s, bf, msg, lp); + break; + + case T_BUFFER: + terminal_buf_update_rc(s, bf, msg, lp); + break; + + case T_BANDWIDTH: + issue_bw_monitor_event_rc(s,bf, msg, lp); + break; + + default: + tw_error(TW_LOC, "\n Invalid terminal event type %d ", msg->type); + } +} + +/* Reverse computation handler for a router event */ +void router_dally_rc_event_handler(router_state * s, tw_bf * bf, + terminal_dally_message * msg, tw_lp * lp) { + s->rev_events++; + s->ross_rsample.rev_events++; + + switch(msg->type) { + case R_SEND: + router_packet_send_rc(s, bf, msg, lp); + break; + case R_ARRIVE: + router_packet_receive_rc(s, bf, msg, lp); + break; + + case R_BUFFER: + router_buf_update_rc(s, bf, msg, lp); + break; + + case R_BANDWIDTH: + issue_rtr_bw_monitor_event_rc(s, bf, msg, lp); + break; + } +} + +/* dragonfly compute node and router LP types */ +extern "C" { +tw_lptype dragonfly_dally_lps[] = +{ + // Terminal handling functions + { + (init_f)terminal_dally_init, + (pre_run_f) NULL, + (event_f) terminal_dally_event, + (revent_f) terminal_dally_rc_event_handler, + (commit_f) reset_bw_counters, + (final_f) dragonfly_dally_terminal_final, + (map_f) codes_mapping, + sizeof(terminal_state) + }, + { + (init_f) router_dally_setup, + (pre_run_f) NULL, + (event_f) router_dally_event, + (revent_f) router_dally_rc_event_handler, + (commit_f) reset_rtr_bw_counters, + (final_f) dragonfly_dally_router_final, + (map_f) codes_mapping, + sizeof(router_state), + }, + {NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0}, +}; +} + +/* For ROSS event tracing */ +void custom_dally_dragonfly_event_collect(terminal_dally_message *m, tw_lp *lp, char *buffer, int *collect_flag) +{ + (void)lp; + (void)collect_flag; + + int type = (int) m->type; + memcpy(buffer, &type, sizeof(type)); +} + +void custom_dally_dragonfly_model_stat_collect(terminal_state *s, tw_lp *lp, char *buffer) +{ + (void)lp; + + int index = 0; + tw_lpid id = 0; + long tmp = 0; + tw_stime tmp2 = 0; + + id = s->terminal_id; + memcpy(&buffer[index], &id, sizeof(id)); + index += sizeof(id); + + tmp = s->fin_chunks_ross_sample; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->fin_chunks_ross_sample = 0; + + tmp = s->data_size_ross_sample; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->data_size_ross_sample = 0; + + tmp = s->fin_hops_ross_sample; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->fin_hops_ross_sample = 0; + + tmp2 = s->fin_chunks_time_ross_sample; + memcpy(&buffer[index], &tmp2, sizeof(tmp2)); + index += sizeof(tmp2); + s->fin_chunks_time_ross_sample = 0; + + tmp2 = s->busy_time_ross_sample; + memcpy(&buffer[index], &tmp2, sizeof(tmp2)); + index += sizeof(tmp2); + s->busy_time_ross_sample = 0; + + return; +} + +void custom_dally_dfly_router_model_stat_collect(router_state *s, tw_lp *lp, char *buffer) +{ + (void)lp; + + const dragonfly_param * p = s->params; + int i, index = 0; + + tw_lpid id = 0; + tw_stime tmp = 0; + int64_t tmp2 = 0; + + id = s->router_id; + memcpy(&buffer[index], &id, sizeof(id)); + index += sizeof(id); + + for(i = 0; i < p->radix; i++) + { + tmp = s->busy_time_ross_sample[i]; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->busy_time_ross_sample[i] = 0; + + tmp2 = s->link_traffic_ross_sample[i]; + memcpy(&buffer[index], &tmp2, sizeof(tmp2)); + index += sizeof(tmp2); + s->link_traffic_ross_sample[i] = 0; + } + return; +} + +static const st_model_types *custom_dally_dragonfly_get_model_types(void) +{ + return(&custom_dally_dragonfly_model_types[0]); +} + +static const st_model_types *custom_dally_dfly_router_get_model_types(void) +{ + return(&custom_dally_dragonfly_model_types[1]); +} + +static void custom_dally_dragonfly_register_model_types(st_model_types *base_type) +{ + st_model_type_register(LP_CONFIG_NM_TERM, base_type); +} + +static void custom_dally_router_register_model_types(st_model_types *base_type) +{ + st_model_type_register(LP_CONFIG_NM_ROUT, base_type); +} +/*** END of ROSS event tracing additions */ + +/* returns the dragonfly lp type for lp registration */ +static const tw_lptype* dragonfly_dally_get_cn_lp_type(void) +{ + return(&dragonfly_dally_lps[0]); +} +static const tw_lptype* router_dally_get_lp_type(void) +{ + return (&dragonfly_dally_lps[1]); +} + +static void dragonfly_dally_register(tw_lptype *base_type) { + lp_type_register(LP_CONFIG_NM_TERM, base_type); +} + +static void router_dally_register(tw_lptype *base_type) { + lp_type_register(LP_CONFIG_NM_ROUT, base_type); +} + +extern "C" { +/* data structure for dragonfly statistics */ +struct model_net_method dragonfly_dally_method = +{ + 0, + dragonfly_dally_configure, + dragonfly_dally_register, + dragonfly_dally_packet_event, + dragonfly_dally_packet_event_rc, + NULL, + NULL, + dragonfly_dally_get_cn_lp_type, + dragonfly_dally_get_msg_sz, + dragonfly_dally_report_stats, + NULL, + NULL, + NULL,//(event_f)dragonfly_dally_sample_fn, + NULL,//(revent_f)dragonfly_dally_sample_rc_fn, + (init_f)dragonfly_dally_sample_init, + NULL,//(final_f)dragonfly_dally_sample_fin + custom_dally_dragonfly_register_model_types, + custom_dally_dragonfly_get_model_types, +}; + +struct model_net_method dragonfly_dally_router_method = +{ + 0, + NULL, // handled by dragonfly_configure + router_dally_register, + NULL, + NULL, + NULL, + NULL, + router_dally_get_lp_type, + dragonfly_dally_get_msg_sz, + NULL, // not yet supported + NULL, + NULL, + NULL,//(event_f)dragonfly_dally_rsample_fn, + NULL,//(revent_f)dragonfly_dally_rsample_rc_fn, + (init_f)dragonfly_dally_rsample_init, + NULL,//(final_f)dragonfly_dally_rsample_fin + custom_dally_router_register_model_types, + custom_dally_dfly_router_get_model_types, +}; + +#ifdef ENABLE_CORTEX + +static int dragonfly_dally_get_number_of_compute_nodes(void* topo) { + + const dragonfly_param * params = &all_params[num_params-1]; + if(!params) + return -1.0; + + return params->total_terminals; +} + +static int dragonfly_dally_get_number_of_routers(void* topo) { + // TODO + const dragonfly_param * params = &all_params[num_params-1]; + if(!params) + return -1.0; + + return params->total_routers; +} + +static double dragonfly_dally_get_router_link_bandwidth(void* topo, router_id_t r1, router_id_t r2) { + // TODO: handle this function for multiple cables between the routers. + // Right now it returns the bandwidth of a single cable only. + // Given two router ids r1 and r2, this function should return the bandwidth (double) + // of the link between the two routers, or 0 of such a link does not exist in the topology. + // The function should return -1 if one of the router id is invalid. + const dragonfly_param * params = &all_params[num_params-1]; + if(!params) + return -1.0; + + if(r1 > params->total_routers || r2 > params->total_routers) + return -1.0; + + if(r1 < 0 || r2 < 0) + return -1.0; + + int gid_r1 = r1 / params->num_routers; + int gid_r2 = r2 / params->num_routers; + + if(gid_r1 == gid_r2) + { + int lid_r1 = r1 % params->num_routers; + int lid_r2 = r2 % params->num_routers; + + /* The connection will be there if the router is in the same row or + * same column */ + int src_row_r1 = lid_r1 / params->num_router_cols; + int src_row_r2 = lid_r2 / params->num_router_cols; + + int src_col_r1 = lid_r1 % params->num_router_cols; + int src_col_r2 = lid_r2 % params->num_router_cols; + + if(src_row_r1 == src_row_r2 || src_col_r1 == src_col_r2) + return params->local_bandwidth; + else + return 0.0; + } + else + { + vector &curVec = interGroupLinks[r1][gid_r2]; + vector::iterator it = curVec.begin(); + + for(; it != curVec.end(); it++) + { + bLink bl = *it; + if(bl.dest == r2) + return params->global_bandwidth; + } + + return 0.0; + } + return 0.0; +} + +static double dragonfly_dally_get_compute_node_bandwidth(void* topo, cn_id_t node) { + // TODO + // Given the id of a compute node, this function should return the bandwidth of the + // link connecting this compute node to its router. + // The function should return -1 if the compute node id is invalid. + const dragonfly_param * params = &all_params[num_params-1]; + if(!params) + return -1.0; + + if(node < 0 || node >= params->total_terminals) + return -1.0; + + return params->cn_bandwidth; +} + +static int dragonfly_dally_get_router_neighbor_count(void* topo, router_id_t r) { + // TODO + // Given the id of a router, this function should return the number of routers + // (not compute nodes) connected to it. It should return -1 if the router id + // is not valid. + const dragonfly_param * params = &all_params[num_params-1]; + if(!params) + return -1.0; + + if(r < 0 || r >= params->total_routers) + return -1.0; + + /* Now count the global channels */ + set g_neighbors; + + map< int, vector > &curMap = interGroupLinks[r]; + map< int, vector >::iterator it = curMap.begin(); + for(; it != curMap.end(); it++) { + for(int l = 0; l < it->second.size(); l++) { + g_neighbors.insert(it->second[l].dest); + } + } + return (params->num_router_cols - 1) + (params->num_router_rows - 1) + g_neighbors.size(); +} + +static void dragonfly_dally_get_router_neighbor_list(void* topo, router_id_t r, router_id_t* neighbors) { + // Given a router id r, this function fills the "neighbors" array with the ids of routers + // directly connected to r. It is assumed that enough memory has been allocated to "neighbors" + // (using get_router_neighbor_count to know the required size). + const dragonfly_param * params = &all_params[num_params-1]; + + int gid = r / params->num_routers; + int local_rid = r - (gid * params->num_routers); + int src_row = local_rid / params->num_router_cols; + int src_col = local_rid % params->num_router_cols; + + /* First the routers in the same row */ + int i = 0; + int offset = 0; + while(i < params->num_router_cols) + { + int neighbor = gid * params->num_routers + (src_row * params->num_router_cols) + i; + if(neighbor != r) + { + neighbors[offset] = neighbor; + offset++; + } + i++; + } + + /* Now the routers in the same column. */ + offset = 0; + i = 0; + while(i < params->num_router_rows) + { + int neighbor = gid * params->num_routers + src_col + (i * params->num_router_cols); + + if(neighbor != r) + { + neighbors[offset+params->num_router_cols-1] = neighbor; + offset++; + } + i++; + } + int g_offset = params->num_router_cols + params->num_router_rows - 2; + + /* Now fill up global channels */ + set g_neighbors; + + map< int, vector > &curMap = interGroupLinks[r]; + map< int, vector >::iterator it = curMap.begin(); + for(; it != curMap.end(); it++) { + for(int l = 0; l < it->second.size(); l++) { + g_neighbors.insert(it->second[l].dest); + } + } + /* Now transfer the content of the sets to the array */ + set::iterator it_set; + int count = 0; + + for(it_set = g_neighbors.begin(); it_set != g_neighbors.end(); it_set++) + { + neighbors[g_offset+count] = *it_set; + ++count; + } +} + +static int dragonfly_dally_get_router_location(void* topo, router_id_t r, int32_t* location, int size) { + // TODO + // Given a router id r, this function should fill the "location" array (of maximum size "size") + // with information providing the location of the router in the topology. In a Dragonfly network, + // for instance, this can be the array [ group_id, router_id ] where group_id is the id of the + // group in which the router is, and router_id is the id of the router inside this group (as opposed + // to "r" which is its global id). For a torus network, this would be the dimensions. + // If the "size" is sufficient to hold the information, the function should return the size + // effectively used (e.g. 2 in the above example). If however the function did not manage to use + // the provided buffer, it should return -1. + const dragonfly_param * params = &all_params[num_params-1]; + if(!params) + return -1; + + if(r < 0 || r >= params->total_terminals) + return -1; + + if(size < 2) + return -1; + + int rid = r % params->num_routers; + int gid = r / params->num_routers; + location[0] = gid; + location[1] = rid; + return 2; +} + +static int dragonfly_dally_get_compute_node_location(void* topo, cn_id_t node, int32_t* location, int size) { + // TODO + // This function does the same as dragonfly_dally_get_router_location but for a compute node instead + // of a router. E.g., for a dragonfly network, the location could be expressed as the array + // [ group_id, router_id, terminal_id ] + const dragonfly_param * params = &all_params[num_params-1]; + if(!params) + return -1; + + if(node < 0 || node >= params->total_terminals) + return -1; + + if(size < 3) + return -1; + + int rid = (node / params->num_cn) % params->num_routers; + int rid_global = node / params->num_cn; + int gid = rid_global / params->num_routers; + int lid = node % params->num_cn; + + location[0] = gid; + location[1] = rid; + location[2] = lid; + + return 3; +} + +static router_id_t dragonfly_dally_get_router_from_compute_node(void* topo, cn_id_t node) { + // TODO + // Given a node id, this function returns the id of the router connected to the node, + // or -1 if the node id is not valid. + const dragonfly_param * params = &all_params[num_params-1]; + if(!params) + return -1; + + if(node < 0 || node >= params->total_terminals) + return -1; + + router_id_t rid = node / params->num_cn; + return rid; +} + +static int dragonfly_dally_get_router_compute_node_count(void* topo, router_id_t r) { + // Given the id of a router, returns the number of compute nodes connected to this + // router, or -1 if the router id is not valid. + const dragonfly_param * params = &all_params[num_params-1]; + if(!params) + return -1; + + if(r < 0 || r >= params->total_routers) + return -1; + + return params->num_cn; +} + +static void dragonfly_dally_get_router_compute_node_list(void* topo, router_id_t r, cn_id_t* nodes) { + // TODO: What if there is an invalid router ID? + // Given the id of a router, fills the "nodes" array with the list of ids of compute nodes + // connected to this router. It is assumed that enough memory has been allocated for the + // "nodes" variable to hold all the ids. + const dragonfly_param * params = &all_params[num_params-1]; + + for(int i = 0; i < params->num_cn; i++) + nodes[i] = r * params->num_cn + i; +} + +extern "C" { + +cortex_topology dragonfly_dally_cortex_topology = { +// .internal = + NULL, +// .get_number_of_routers = + dragonfly_dally_get_number_of_routers, +// .get_number_of_compute_nodes = + dragonfly_dally_get_number_of_compute_nodes, +// .get_router_link_bandwidth = + dragonfly_dally_get_router_link_bandwidth, +// .get_compute_node_bandwidth = + dragonfly_dally_get_compute_node_bandwidth, +// .get_router_neighbor_count = + dragonfly_dally_get_router_neighbor_count, +// .get_router_neighbor_list = + dragonfly_dally_get_router_neighbor_list, +// .get_router_location = + dragonfly_dally_get_router_location, +// .get_compute_node_location = + dragonfly_dally_get_compute_node_location, +// .get_router_from_compute_node = + dragonfly_dally_get_router_from_compute_node, +// .get_router_compute_node_count = + dragonfly_dally_get_router_compute_node_count, +// .get_router_compute_node_list = dragonfly_dally_get_router_compute_node_list, + dragonfly_dally_get_router_compute_node_list +}; + +} +#endif + +} diff --git a/src/networks/model-net/model-net-lp.c b/src/networks/model-net/model-net-lp.c index 525feb0..42c00a5 100644 --- a/src/networks/model-net/model-net-lp.c +++ b/src/networks/model-net/model-net-lp.c @@ -391,10 +391,14 @@ void model_net_base_configure(){ offsetof(model_net_wrap_msg, msg.m_dfly_plus); msg_offsets[DRAGONFLY_PLUS_ROUTER] = offsetof(model_net_wrap_msg, msg.m_dfly_plus); + msg_offsets[DRAGONFLY_DALLY] = + offsetof(model_net_wrap_msg, msg.m_dally_dfly); + msg_offsets[DRAGONFLY_DALLY_ROUTER] = + offsetof(model_net_wrap_msg, msg.m_dally_dfly); msg_offsets[SLIMFLY] = offsetof(model_net_wrap_msg, msg.m_slim); msg_offsets[FATTREE] = - offsetof(model_net_wrap_msg, msg.m_fat); + offsetof(model_net_wrap_msg, msg.m_fat); msg_offsets[LOGGP] = offsetof(model_net_wrap_msg, msg.m_loggp); msg_offsets[EXPRESS_MESH] = @@ -402,6 +406,7 @@ void model_net_base_configure(){ msg_offsets[EXPRESS_MESH_ROUTER] = offsetof(model_net_wrap_msg, msg.m_em); + // perform the configuration(s) // This part is tricky, as we basically have to look up all annotations that // have LP names of the form modelnet_*. For each of those, we need to read diff --git a/src/networks/model-net/model-net.c b/src/networks/model-net/model-net.c index 86159ee..16c2edb 100644 --- a/src/networks/model-net/model-net.c +++ b/src/networks/model-net/model-net.c @@ -25,6 +25,8 @@ extern struct model_net_method dragonfly_method; extern struct model_net_method dragonfly_custom_method; extern struct model_net_method dragonfly_plus_method; extern struct model_net_method dragonfly_plus_router_method; +extern struct model_net_method dragonfly_dally_method; +extern struct model_net_method dragonfly_dally_router_method; extern struct model_net_method slimfly_method; extern struct model_net_method fattree_method; extern struct model_net_method dragonfly_router_method; -- 2.26.2