Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
codes-dev
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Xin Wang
codes-dev
Commits
69f49064
Commit
69f49064
authored
Jun 08, 2017
by
Misbah Mubarak
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Adding statistics recording for mpi_allreduce, turning off the debug option
parent
346be3da
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
63 additions
and
9 deletions
+63
-9
src/network-workloads/model-net-mpi-replay.c
src/network-workloads/model-net-mpi-replay.c
+63
-9
No files found.
src/network-workloads/model-net-mpi-replay.c
View file @
69f49064
...
...
@@ -36,8 +36,9 @@ static int msg_size_hash_compare(
void
*
key
,
struct
qhash_head
*
link
);
/* NOTE: Message tracking works in sequential mode only! */
int
debug_cols
=
0
;
int
enable_msg_tracking
=
0
;
tw_lpid
TRACK_LP
=
0
;
tw_lpid
TRACK_LP
=
-
1
;
int
unmatched
=
0
;
char
workload_type
[
128
];
...
...
@@ -215,6 +216,15 @@ struct nw_state
/* time spent by the LP in executing the app trace*/
double
start_time
;
double
col_time
;
double
reduce_time
;
int
num_reduce
;
double
all_reduce_time
;
int
num_all_reduce
;
double
elapsed_time
;
/* time spent in compute operations */
double
compute_time
;
...
...
@@ -232,7 +242,7 @@ struct nw_state
struct
qlist_head
completed_reqs
;
tw_stime
cur_interval_end
;
/* Pending wait operation */
struct
pending_waits
*
wait_op
;
...
...
@@ -1397,9 +1407,14 @@ static void codes_exec_mpi_send(nw_state* s,
}
/* convert seconds to ns */
static
tw_stime
s_to_ns
(
tw_stime
ns
)
static
tw_stime
s_to_ns
(
tw_stime
s
)
{
return
(
s
*
(
1000
.
0
*
1000
.
0
*
1000
.
0
));
}
/* convert seconds to ns */
static
tw_stime
ns_to_s
(
tw_stime
ns
)
{
return
(
ns
*
(
1000
.
0
*
1000
.
0
*
1000
.
0
));
return
(
ns
/
(
1000
.
0
*
1000
.
0
*
1000
.
0
));
}
static
void
update_completed_queue_rc
(
nw_state
*
s
,
tw_bf
*
bf
,
nw_message
*
m
,
tw_lp
*
lp
)
...
...
@@ -1658,6 +1673,10 @@ void nw_test_init(nw_state* s, tw_lp* lp)
s
->
sampling_indx
=
0
;
s
->
is_finished
=
0
;
s
->
cur_interval_end
=
0
;
s
->
col_time
=
0
;
s
->
num_reduce
=
0
;
s
->
reduce_time
=
0
;
s
->
all_reduce_time
=
0
;
if
(
!
num_net_traces
)
num_net_traces
=
num_mpi_lps
;
...
...
@@ -1925,13 +1944,27 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t
}
}
break
;
case
CODES_WK_ALLREDUCE
:
{
if
(
bf
->
c1
)
{
s
->
num_all_reduce
--
;
s
->
col_time
=
m
->
rc
.
saved_send_time
;
s
->
all_reduce_time
-=
s
->
col_time
;
}
else
{
s
->
col_time
=
0
;
}
codes_issue_next_event_rc
(
lp
);
}
break
;
case
CODES_WK_BCAST
:
case
CODES_WK_ALLGATHER
:
case
CODES_WK_ALLGATHERV
:
case
CODES_WK_ALLTOALL
:
case
CODES_WK_ALLTOALLV
:
case
CODES_WK_REDUCE
:
case
CODES_WK_ALLREDUCE
:
case
CODES_WK_COL
:
{
s
->
num_cols
--
;
...
...
@@ -2053,16 +2086,34 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l
codes_exec_mpi_wait
(
s
,
bf
,
lp
,
&
mpi_op
);
}
break
;
case
CODES_WK_ALLREDUCE
:
{
s
->
num_cols
++
;
if
(
s
->
col_time
>
0
)
{
bf
->
c1
=
1
;
m
->
rc
.
saved_delay
=
s
->
all_reduce_time
;
s
->
all_reduce_time
+=
(
tw_now
(
lp
)
-
s
->
col_time
);
m
->
rc
.
saved_send_time
=
s
->
col_time
;
s
->
col_time
=
0
;
s
->
num_all_reduce
++
;
}
else
{
s
->
col_time
=
tw_now
(
lp
);
}
codes_issue_next_event
(
lp
);
}
break
;
case
CODES_WK_REDUCE
:
case
CODES_WK_BCAST
:
case
CODES_WK_ALLGATHER
:
case
CODES_WK_ALLGATHERV
:
case
CODES_WK_ALLTOALL
:
case
CODES_WK_ALLTOALLV
:
case
CODES_WK_REDUCE
:
case
CODES_WK_ALLREDUCE
:
case
CODES_WK_COL
:
{
//printf("\n MPI COL ");
s
->
num_cols
++
;
codes_issue_next_event
(
lp
);
}
...
...
@@ -2159,7 +2210,10 @@ void nw_test_finalize(nw_state* s, tw_lp* lp)
if
(
s
->
recv_time
>
max_recv_time
)
max_recv_time
=
s
->
recv_time
;
avg_time
+=
s
->
elapsed_time
;
if
(
debug_cols
)
printf
(
"
\n
Rank %lld Avg all reduce time %lf "
,
s
->
nw_id
,
ns_to_s
(
s
->
all_reduce_time
/
s
->
num_all_reduce
));
avg_time
+=
s
->
elapsed_time
;
avg_comm_time
+=
(
s
->
elapsed_time
-
s
->
compute_time
);
avg_wait_time
+=
s
->
wait_time
;
avg_send_time
+=
s
->
send_time
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment