Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Elsa Gonsiorowski
codes
Commits
f61a0c6c
Commit
f61a0c6c
authored
Nov 10, 2015
by
Misbah Mubarak
Browse files
First pass on the congestion control in torus model, fixing stat collection in dragonfly
parent
7a7092e1
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
codes/net/dragonfly.h
View file @
f61a0c6c
...
...
@@ -71,7 +71,8 @@ struct terminal_message
/* for reverse computation */
short
path_type
;
tw_stime
saved_available_time
;
tw_stime
saved_credit_time
;
tw_stime
saved_avg_time
;
tw_stime
saved_start_time
;
tw_stime
saved_collective_init_time
;
tw_stime
saved_hist_start_time
;
tw_stime
msg_start_time
;
...
...
codes/net/torus.h
View file @
f61a0c6c
...
...
@@ -40,8 +40,7 @@ struct nodes_message
nodes_event_t
type
;
/* for reverse computation */
int
saved_src_dim
;
int
saved_src_dir
;
int
saved_channel
;
/* coordinates of the destination torus nodes */
int
*
dest
;
...
...
@@ -71,6 +70,9 @@ struct nodes_message
/* for reverse computation of a node's fan in*/
int
saved_fan_nodes
;
int
source_channel
;
int
saved_queue
;
/* chunk id of the flit (distinguishes flits) */
int
chunk_id
;
...
...
src/models/network-workloads/conf/modelnet-mpi-test-dragonfly.conf
View file @
f61a0c6c
...
...
@@ -14,7 +14,7 @@ PARAMS
modelnet_order
=(
"dragonfly"
);
# scheduler options
modelnet_scheduler
=
"fcfs"
;
chunk_size
=
"
64
"
;
chunk_size
=
"
512
"
;
# modelnet_scheduler="round-robin";
num_vcs
=
"1"
;
num_routers
=
"6"
;
...
...
@@ -24,6 +24,6 @@ PARAMS
local_bandwidth
=
"5.25"
;
global_bandwidth
=
"4.7"
;
cn_bandwidth
=
"5.25"
;
message_size
=
"5
28
"
;
message_size
=
"5
44
"
;
routing
=
"adaptive"
;
}
src/models/network-workloads/model-net-mpi-wrklds.c
View file @
f61a0c6c
...
...
@@ -13,7 +13,7 @@
#include "codes/model-net.h"
#include "codes/rc-stack.h"
#define TRACE
-1
#define TRACE
0
#define TRACK 0
char
workload_type
[
128
];
...
...
@@ -344,7 +344,7 @@ static void mpi_queue_update(struct mpi_queue_ptrs* mpi_queue, struct codes_work
/* prints the elements of a queue (for debugging purposes). */
static
void
printCompletedQueue
(
nw_state
*
s
,
tw_lp
*
lp
)
{
if
(
TRACE
==
lp
->
g
id
)
if
(
TRACE
==
s
->
nw_
id
)
{
printf
(
"
\n
%lf contents of completed operations queue "
,
tw_now
(
lp
));
struct
completed_requests
*
current
=
s
->
completed_reqs
;
...
...
@@ -370,7 +370,7 @@ static void notify_waits_rc(nw_state* s, tw_bf* bf, tw_lp* lp, nw_message* m, du
s->saved_pending_wait = NULL;
}
*/
if
(
lp
->
g
id
==
TRACE
)
if
(
s
->
nw_
id
==
TRACE
)
printf
(
"
\n
%lf reverse -- notify waits req id %d "
,
tw_now
(
lp
),
completed_req
);
printCompletedQueue
(
s
,
lp
);
...
...
@@ -380,7 +380,7 @@ static void notify_waits_rc(nw_state* s, tw_bf* bf, tw_lp* lp, nw_message* m, du
/* if a wait-elem exists, it means the request ID has been matched*/
if
(
m
->
u
.
rc
.
matched_op
==
2
)
{
if
(
lp
->
g
id
==
TRACE
)
if
(
s
->
nw_
id
==
TRACE
)
{
printf
(
"
\n
%lf matched req id %d "
,
tw_now
(
lp
),
completed_req
);
printCompletedQueue
(
s
,
lp
);
...
...
@@ -409,7 +409,7 @@ static int notify_waits(nw_state* s, tw_bf* bf, tw_lp* lp, nw_message* m, dumpi_
struct
pending_waits
*
wait_elem
=
s
->
pending_waits
;
m
->
u
.
rc
.
matched_op
=
0
;
if
(
lp
->
g
id
==
TRACE
)
if
(
s
->
nw_
id
==
TRACE
)
printf
(
"
\n
%lf notify waits req id %d "
,
tw_now
(
lp
),
completed_req
);
if
(
!
wait_elem
)
...
...
@@ -438,7 +438,7 @@ static int notify_waits(nw_state* s, tw_bf* bf, tw_lp* lp, nw_message* m, dumpi_
{
if
(
wait_elem
->
mpi_op
->
u
.
waits
.
req_ids
[
i
]
==
completed_req
)
{
if
(
lp
->
g
id
==
TRACE
)
if
(
s
->
nw_
id
==
TRACE
)
printCompletedQueue
(
s
,
lp
);
m
->
u
.
rc
.
matched_op
=
1
;
wait_elem
->
num_completed
++
;
...
...
@@ -447,7 +447,7 @@ static int notify_waits(nw_state* s, tw_bf* bf, tw_lp* lp, nw_message* m, dumpi_
if
(
wait_elem
->
num_completed
==
required_count
)
{
if
(
lp
->
g
id
==
TRACE
)
if
(
s
->
nw_
id
==
TRACE
)
{
printf
(
"
\n
%lf req %d completed %d"
,
tw_now
(
lp
),
completed_req
,
wait_elem
->
num_completed
);
printCompletedQueue
(
s
,
lp
);
...
...
@@ -513,7 +513,7 @@ static void codes_exec_mpi_wait(nw_state* s, tw_lp* lp, nw_message * m, struct c
static
void
codes_exec_mpi_wait_all_rc
(
nw_state
*
s
,
nw_message
*
m
,
tw_lp
*
lp
,
struct
codes_workload_op
*
mpi_op
)
{
if
(
lp
->
g
id
==
TRACE
)
if
(
s
->
nw_
id
==
TRACE
)
{
printf
(
"
\n
%lf codes exec mpi waitall reverse %d "
,
tw_now
(
lp
),
m
->
u
.
rc
.
found_match
);
printCompletedQueue
(
s
,
lp
);
...
...
@@ -837,7 +837,6 @@ static void codes_exec_comp_delay(
msg
->
msg_type
=
MPI_OP_GET_NEXT
;
tw_event_send
(
e
);
}
/* reverse computation operation for MPI irecv */
...
...
@@ -1257,7 +1256,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
s
->
nw_id
,
s
->
num_completed
);
m
->
u
.
rc
.
saved_op
=
mpi_op
;
if
(
mpi_op
->
op_type
==
CODES_WK_END
)
if
(
mpi_op
->
op_type
==
CODES_WK_END
&&
s
->
num_completed
==
50000
)
{
//rc_stack_push(lp, mpi_op, free, s->st);
s
->
elapsed_time
=
tw_now
(
lp
)
-
s
->
start_time
;
...
...
src/models/networks/model-net/dragonfly.c
View file @
f61a0c6c
...
...
@@ -147,7 +147,6 @@ struct terminal_state
int
*
vc_occupancy
;
// NUM_VC
int
num_vcs
;
tw_stime
terminal_available_time
;
tw_stime
next_credit_available_time
;
terminal_message_list
**
terminal_msgs
;
terminal_message_list
**
terminal_msgs_tail
;
int
in_send_loop
;
...
...
@@ -248,7 +247,6 @@ struct router_state
int
*
global_channel
;
tw_stime
*
next_output_available_time
;
tw_stime
*
next_credit_available_time
;
tw_stime
*
cur_hist_start_time
;
terminal_message_list
***
pending_msgs
;
terminal_message_list
***
pending_msgs_tail
;
...
...
@@ -739,7 +737,6 @@ void router_setup(router_state * r, tw_lp * lp)
r
->
global_channel
=
(
int
*
)
malloc
(
p
->
num_global_channels
*
sizeof
(
int
));
r
->
next_output_available_time
=
(
tw_stime
*
)
malloc
(
p
->
radix
*
sizeof
(
tw_stime
));
r
->
next_credit_available_time
=
(
tw_stime
*
)
malloc
(
p
->
radix
*
sizeof
(
tw_stime
));
r
->
cur_hist_start_time
=
(
tw_stime
*
)
malloc
(
p
->
radix
*
sizeof
(
tw_stime
));
r
->
link_traffic
=
(
int
*
)
malloc
(
p
->
radix
*
sizeof
(
int
));
r
->
cur_hist_num
=
(
int
*
)
malloc
(
p
->
radix
*
sizeof
(
int
));
...
...
@@ -760,7 +757,6 @@ void router_setup(router_state * r, tw_lp * lp)
{
// Set credit & router occupancy
r
->
next_output_available_time
[
i
]
=
0
;
r
->
next_credit_available_time
[
i
]
=
0
;
r
->
cur_hist_start_time
[
i
]
=
0
;
r
->
link_traffic
[
i
]
=
0
;
r
->
cur_hist_num
[
i
]
=
0
;
...
...
@@ -1256,7 +1252,6 @@ void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_message * msg, tw
if
(
msg
->
path_type
==
NON_MINIMAL
)
nonmin_count
--
;
s
->
next_credit_available_time
=
msg
->
saved_credit_time
;
uint64_t
num_chunks
=
msg
->
packet_size
/
s
->
params
->
chunk_size
;
N_finished_chunks
--
;
...
...
@@ -1296,8 +1291,6 @@ void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_message * msg, tw
hash_link
=
qhash_search
(
s
->
rank_tbl
,
&
key
);
tmp
=
qhash_entry
(
hash_link
,
struct
dfly_qhash_entry
,
hash_link
);
total_hops
-=
msg
->
my_N_hop
;
mn_stats
*
stat
;
stat
=
model_net_find_stats
(
msg
->
category
,
s
->
dragonfly_stats_array
);
if
(
bf
->
c1
)
...
...
@@ -1312,16 +1305,16 @@ void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_message * msg, tw
dragonfly_max_latency
=
msg
->
saved_available_time
;
if
(
bf
->
c7
)
if
(
!
hash_link
)
{
s
->
finished_msgs
--
;
total_msg_sz
-=
msg
->
total_size
;
N_finished_msgs
--
;
stat
->
recv_time
-=
tw_now
(
lp
)
-
msg
->
tr
ave
l
_start_time
;
s
->
total_msg_time
-=
(
tw_now
(
lp
)
-
msg
->
msg
_start_time
)
;
stat
->
recv_time
-=
msg
->
s
ave
d
_start_time
;
s
->
total_msg_time
-=
msg
->
saved
_start_time
;
s
->
total_msg_size
-=
msg
->
total_size
;
dragonfly_total_time
-=
(
tw_now
(
lp
)
-
msg
->
tr
ave
l_start
_time
)
;
dragonfly_total_time
-=
msg
->
s
ave
d_avg
_time
;
struct
dfly_qhash_entry
*
d_entry_pop
=
(
struct
dfly_qhash_entry
*
)
rc_stack_pop
(
s
->
st
);
qhash_add
(
s
->
rank_tbl
,
&
key
,
&
(
d_entry_pop
->
hash_link
));
...
...
@@ -1354,7 +1347,7 @@ void send_remote_event(terminal_state * s, terminal_message * msg, tw_lp * lp, t
model_net_set_msg_param
(
MN_MSG_PARAM_START_TIME
,
MN_MSG_PARAM_START_TIME_VAL
,
&
(
msg
->
msg_start_time
));
model_net_event_mctx
(
net_id
,
&
mc_src
,
&
mc_dst
,
msg
->
category
,
msg
->
event_rc
=
model_net_event_mctx
(
net_id
,
&
mc_src
,
&
mc_dst
,
msg
->
category
,
msg
->
sender_lp
,
msg
->
pull_size
,
ts
,
remote_event_size
,
tmp_ptr
,
0
,
NULL
,
lp
);
}
...
...
@@ -1539,10 +1532,12 @@ void packet_arrive(terminal_state * s, tw_bf * bf, terminal_message * msg,
N_finished_msgs
++
;
total_msg_sz
+=
msg
->
total_size
;
dragonfly_total_time
+=
tw_now
(
lp
)
-
msg
->
travel_start_time
;
stat
->
recv_time
+=
tw_now
(
lp
)
-
msg
->
travel_start_time
;
msg
->
saved_avg_time
=
tw_now
(
lp
)
-
msg
->
travel_start_time
;
dragonfly_total_time
+=
msg
->
saved_avg_time
;
msg
->
saved_start_time
=
(
tw_now
(
lp
)
-
msg
->
msg_start_time
);
stat
->
recv_time
+=
msg
->
saved_start_time
;
s
->
finished_msgs
++
;
s
->
total_msg_time
+=
(
tw_now
(
lp
)
-
msg
->
msg
_start_time
)
;
s
->
total_msg_time
+=
msg
->
saved
_start_time
;
s
->
total_msg_size
+=
msg
->
total_size
;
if
(
dragonfly_max_latency
<
tw_now
(
lp
)
-
msg
->
travel_start_time
)
{
...
...
@@ -1923,7 +1918,7 @@ dragonfly_terminal_final( terminal_state * s,
int
written
=
0
;
if
(
!
s
->
terminal_id
)
written
=
sprintf
(
s
->
output_buf
,
"# Format <LP id> <Terminal ID> <
Avg
Msg Size> <
Avg
Msg Time> <# Msgs finished>"
);
written
=
sprintf
(
s
->
output_buf
,
"# Format <LP id> <Terminal ID> <
Total
Msg Size> <
Total
Msg Time> <# Msgs finished>
\n
"
);
written
+=
sprintf
(
s
->
output_buf
+
written
,
"%lu %lu %ld %lf %ld %ld
\n
"
,
lp
->
gid
,
s
->
terminal_id
,
s
->
total_msg_size
,
s
->
total_msg_time
,
s
->
finished_msgs
,
s
->
finished_packets
);
lp_io_write
(
lp
->
gid
,
"dragonfly-msg-stats"
,
written
,
s
->
output_buf
);
...
...
@@ -2042,9 +2037,7 @@ get_next_stop(router_state * s,
if
(
msg
->
last_hop
==
TERMINAL
&&
path
==
NON_MINIMAL
)
{
if
(
dest_group_id
!=
s
->
group_id
)
{
msg
->
intm_group_id
=
intm_id
;
}
}
/******************** DECIDE THE DESTINATION GROUP ***********************/
/* It means that the packet has arrived at the inter-mediate group for non-minimal routing. Reset the group now. */
...
...
@@ -2200,7 +2193,7 @@ static int do_adaptive_routing( router_state * s,
assert
(
num_nonmin_hops
<=
6
);
/* average the local queues of the router */
unsigned
int
q_avg
=
0
;
/*
unsigned int q_avg = 0;
int i;
for( i = 0; i < s->params->radix; i++)
{
...
...
@@ -2209,7 +2202,7 @@ static int do_adaptive_routing( router_state * s,
s->vc_occupancy[i][2];
}
q_avg = q_avg / (s->params->radix - 1);
*/
//int min_out_chan = minimal_out_port;
//int nonmin_out_chan = nonmin_out_port;
...
...
@@ -2311,15 +2304,13 @@ router_packet_receive( router_state * s,
}
else
if
(
msg
->
last_hop
==
TERMINAL
&&
routing
==
ADAPTIVE
)
{
next_stop
=
do_adaptive_routing
(
s
,
bf
,
msg
,
lp
,
dest_router_id
,
intm_id
);
}
else
{
if
(
routing
==
ADAPTIVE
||
routing
==
PROG_ADAPTIVE
)
assert
(
msg
->
path_type
==
MINIMAL
||
msg
->
path_type
==
NON_MINIMAL
);
if
(
routing
==
MINIMAL
||
routing
==
NON_MINIMAL
)
msg
->
path_type
=
routing
;
/*defaults to the routing algorithm if we
don't have adaptive routing here*/
next_stop
=
get_next_stop
(
s
,
bf
,
msg
,
lp
,
msg
->
path_type
,
dest_router_id
,
intm_id
);
}
assert
(
msg
->
path_type
==
MINIMAL
||
msg
->
path_type
==
NON_MINIMAL
);
terminal_message_list
*
cur_chunk
=
(
terminal_message_list
*
)
malloc
(
sizeof
(
terminal_message_list
));
init_terminal_message_list
(
cur_chunk
,
msg
);
...
...
src/models/networks/model-net/torus.c
View file @
f61a0c6c
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment