Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
AutoPerf
autoperf
Commits
cfd65ad0
Commit
cfd65ad0
authored
Mar 23, 2021
by
Sudheer Chunduri
Browse files
few minor changes to make the code consistent with the other Darshan modules
in preparation for Darshan 3.3.0 release
parent
ba69643f
Changes
4
Hide whitespace changes
Inline
Side-by-side
apmpi/darshan-apmpi-log-format.h
View file @
cfd65ad0
...
...
@@ -208,8 +208,8 @@ enum apmpi_f_mpiop_synctime_indices
/* aggregate (across all the ranks) per MPI op times */
#define APMPI_F_MPI_GLOBAL_COUNTERS \
Y(
RANK
_TOTAL_
MPI
TIME) \
Y(
RANK
_TOTAL_
MPI
SYNCTIME) \
Y(
MPI
_TOTAL_
COMM_
TIME) \
Y(
MPI
_TOTAL_
COMM_
SYNC
_
TIME) \
Z(APMPI_F_MPI_GLOBAL_NUM_INDICES)
enum
apmpi_f_mpi_global_indices
{
...
...
@@ -238,7 +238,6 @@ struct darshan_apmpi_header_record
{
struct
darshan_base_record
base_rec
;
int64_t
magic
;
uint32_t
version
;
uint32_t
sync_flag
;
double
apmpi_f_variance_total_mpitime
;
double
apmpi_f_variance_total_mpisynctime
;
...
...
apmpi/lib/darshan-apmpi.c
View file @
cfd65ad0
...
...
@@ -404,7 +404,6 @@ static void apmpi_runtime_initialize()
#else
apmpi_runtime
->
header_record
->
sync_flag
=
0
;
#endif
apmpi_runtime
->
header_record
->
version
=
APMPI_VER
;
}
apmpi_runtime
->
rec_id
=
darshan_core_gen_record_id
(
"APMPI"
);
//record name
...
...
@@ -478,7 +477,7 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm)
/* get total mpi time variances across the ranks */
var_send_buf
->
n
=
1
;
var_send_buf
->
S
=
0
;
var_send_buf
->
T
=
apmpi_runtime
->
perf_record
->
fglobalcounters
[
RANK
_TOTAL_
MPI
TIME
];
var_send_buf
->
T
=
apmpi_runtime
->
perf_record
->
fglobalcounters
[
MPI
_TOTAL_
COMM_
TIME
];
PMPI_Reduce
(
var_send_buf
,
var_recv_buf
,
1
,
var_dt
,
var_op
,
0
,
mod_comm
);
...
...
@@ -491,7 +490,7 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm)
/* get total mpi sync time variances across the ranks */
var_send_buf
->
n
=
1
;
var_send_buf
->
S
=
0
;
var_send_buf
->
T
=
apmpi_runtime
->
perf_record
->
fglobalcounters
[
RANK
_TOTAL_
MPI
SYNCTIME
];
var_send_buf
->
T
=
apmpi_runtime
->
perf_record
->
fglobalcounters
[
MPI
_TOTAL_
COMM_
SYNC
_
TIME
];
PMPI_Reduce
(
var_send_buf
,
var_recv_buf
,
1
,
var_dt
,
var_op
,
0
,
mod_comm
);
...
...
@@ -539,15 +538,15 @@ static void apmpi_mpi_redux(
return
;
}
double
mpisync_time
=
0
.
0
;
/* Compute Total MPI time per rank:
RANK
_TOTAL_
MPI
TIME */
/* Compute Total MPI time per rank:
MPI
_TOTAL_
COMM_
TIME */
for
(
i
=
MPI_SEND_TOTAL_TIME
;
i
<
APMPI_F_MPIOP_TOTALTIME_NUM_INDICES
;
i
+=
3
){
// times (total_time, max_time, min_time)
apmpi_runtime
->
perf_record
->
fglobalcounters
[
RANK
_TOTAL_
MPI
TIME
]
+=
apmpi_runtime
->
perf_record
->
fcounters
[
i
];
apmpi_runtime
->
perf_record
->
fglobalcounters
[
MPI
_TOTAL_
COMM_
TIME
]
+=
apmpi_runtime
->
perf_record
->
fcounters
[
i
];
}
for
(
i
=
MPI_BARRIER_TOTAL_SYNC_TIME
;
i
<
APMPI_F_MPIOP_SYNCTIME_NUM_INDICES
;
i
++
){
mpisync_time
+=
apmpi_runtime
->
perf_record
->
fsynccounters
[
i
];
}
apmpi_runtime
->
perf_record
->
fglobalcounters
[
RANK
_TOTAL_
MPI
TIME
]
+=
mpisync_time
;
apmpi_runtime
->
perf_record
->
fglobalcounters
[
RANK
_TOTAL_
MPI
SYNCTIME
]
=
mpisync_time
;
apmpi_runtime
->
perf_record
->
fglobalcounters
[
MPI
_TOTAL_
COMM_
TIME
]
+=
mpisync_time
;
apmpi_runtime
->
perf_record
->
fglobalcounters
[
MPI
_TOTAL_
COMM_
SYNC
_
TIME
]
=
mpisync_time
;
#if 0
red_send_buf = apmpi_runtime->perf_record;
...
...
apmpi/util/apmpi-backend.py
View file @
cfd65ad0
...
...
@@ -19,7 +19,6 @@ struct darshan_apmpi_header_record
{
struct darshan_base_record base_rec;
int64_t magic;
uint32_t version;
uint32_t sync_flag;
double apmpi_f_variance_total_mpitime;
double apmpi_f_variance_total_mpisynctime;
...
...
@@ -56,7 +55,6 @@ def log_get_apmpi_record(log, mod_type, dtype='dict'):
rec
[
'rank'
]
=
hdr
[
0
].
base_rec
.
rank
rec
[
'magic'
]
=
hdr
[
0
].
magic
rec
[
'sync_flag'
]
=
hdr
[
0
].
sync_flag
rec
[
'version'
]
=
hdr
[
0
].
version
rec
[
'variance_total_mpitime'
]
=
hdr
[
0
].
apmpi_f_variance_total_mpitime
rec
[
'variance_total_mpisynctime'
]
=
hdr
[
0
].
apmpi_f_variance_total_mpisynctime
else
:
...
...
apmpi/util/darshan-apmpi-logutils.c
View file @
cfd65ad0
...
...
@@ -212,12 +212,12 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name,
hdr_rec
=
rec
;
DARSHAN_F_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
hdr_rec
->
base_rec
.
rank
,
hdr_rec
->
base_rec
.
id
,
"
RANKS
_TOTAL_
MPI
TIME_VARIANCE"
,
hdr_rec
->
apmpi_f_variance_total_mpitime
,
"
MPI
_TOTAL_
COMM_
TIME_VARIANCE"
,
hdr_rec
->
apmpi_f_variance_total_mpitime
,
""
,
""
,
""
);
if
(
hdr_rec
->
sync_flag
)
DARSHAN_F_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
hdr_rec
->
base_rec
.
rank
,
hdr_rec
->
base_rec
.
id
,
"
RANKS
_TOTAL_
MPI
SYNCTIME_VARIANCE"
,
hdr_rec
->
apmpi_f_variance_total_mpisynctime
,
"
MPI
_TOTAL_
COMM_
SYNC
_
TIME_VARIANCE"
,
hdr_rec
->
apmpi_f_variance_total_mpisynctime
,
""
,
""
,
""
);
first_rec
=
0
;
sync_flag
=
hdr_rec
->
sync_flag
;
...
...
@@ -228,7 +228,7 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name,
DARSHAN_S_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
prf_rec
->
base_rec
.
rank
,
prf_rec
->
base_rec
.
id
,
"
nodeid
"
,
prf_rec
->
node_name
,
"
MPI_PROCESSOR_NAME
"
,
prf_rec
->
node_name
,
""
,
""
,
""
);
for
(
i
=
0
;
i
<
APMPI_NUM_INDICES
;
i
++
)
...
...
@@ -272,10 +272,30 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name,
static
void
darshan_log_print_apmpi_description
(
int
ver
)
{
printf
(
"
\n
# description of APMPI counters: %d
\n
"
,
ver
);
//printf("# node: node connected to this router\n");
//printf("# AR_RTR_x_y_INQ_PRF_INCOMING_FLIT_VC[0-7]: flits on VCz of x y tile\n");
//printf("# AR_RTR_x_y_INQ_PRF_ROWBUS_STALL_CNT: stalls on x y tile\n");
printf
(
"# global summary stats showing the variance across all the MPI processes.
\n
"
);
printf
(
"# MPI_TOTAL_COMM_TIME_VARIANCE: variance in total communication time across all the processes.
\n
"
);
printf
(
"# MPI_TOTAL_COMM_SYNC_TIME_VARIANCE: variance in total sync time across all the processes.
\n
"
);
printf
(
"# per-process summary stats based on the MPI op instrumented counters.
\n
"
);
printf
(
"# MPI_PROCESSOR_NAME: name of the processor used by the MPI process.
\n
"
);
printf
(
"# MPI_TOTAL_COMM_TIME: total communication (MPI) time of a process across all the MPI ops.
\n
"
);
printf
(
"# MPI_TOTAL_COMM_SYNC_TIME: total sync time of a process across all the MPI ops.
\n
"
);
printf
(
"# APMPI_*: MPI operation counts.
\n
"
);
printf
(
"# Blocking Point-to-point, Nonblocking Point-to-point, Misc MPI operations.
\n
"
);
printf
(
"# Blocking Collective, Nonblocking Collective and RMA opeations are instrumented.
\n
"
);
printf
(
"# Total MPI operations instrumented in this release: 74.
\n
"
);
printf
(
"# The following counters (as applicable) are reported for each instrumented operation.
\n
"
);
printf
(
"# CALL_COUNT: total call count for an MPI operation.
\n
"
);
printf
(
"# TOTAL_BYTES: total bytes (cumulative across all calls of an op) used with an MPI op.
\n
"
);
printf
(
"# MSG_SIZE_AGG_0_256: total bytes for all the calls of an MPI op with message size range [0, 256B].
\n
"
);
printf
(
"# MSG_SIZE_AGG_256_1K: total bytes for all the calls of an MPI op with message size range (256B, 1KB].
\n
"
);
printf
(
"# MSG_SIZE_AGG_1K_8K: total bytes for all the calls of an MPI op with message size range (1KB, 8KB].
\n
"
);
printf
(
"# MSG_SIZE_AGG_8K_256K: total bytes for all the calls of an MPI op with message size range (8KB, 256KB].
\n
"
);
printf
(
"# MSG_SIZE_AGG_256K_1M: total bytes for all the calls of an MPI op with message size range (256KB, 1MB].
\n
"
);
printf
(
"# MSG_SIZE_AGG_1M_PLUS: total bytes for all the calls of an MPI op with message size greater than 1MB.
\n
"
);
printf
(
"# TOTAL_TIME: total time (cumulative across all calls of an op) of an MPI op.
\n
"
);
printf
(
"# MIN_TIME: maximum time across all calls of an MPI op.
\n
"
);
printf
(
"# MAX_TIME: minimum time across all calls of an MPI op.
\n
"
);
printf
(
"# TOTAL_SYNC_TIME: total sync time (cumulative across all calls of an op) of an MPI op.
\n
"
);
return
;
}
...
...
@@ -302,12 +322,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf
(
"- "
);
DARSHAN_F_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
hdr_rec1
->
base_rec
.
rank
,
hdr_rec1
->
base_rec
.
id
,
"
RANKS
_TOTAL_
MPI
TIME_VARIANCE"
,
hdr_rec1
->
apmpi_f_variance_total_mpitime
,
"
MPI
_TOTAL_
COMM_
TIME_VARIANCE"
,
hdr_rec1
->
apmpi_f_variance_total_mpitime
,
""
,
""
,
""
);
if
(
sync_flag
)
DARSHAN_F_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
hdr_rec1
->
base_rec
.
rank
,
hdr_rec1
->
base_rec
.
id
,
"
RANKS
_TOTAL_
MPI
SYNCTIME_VARIANCE"
,
hdr_rec1
->
apmpi_f_variance_total_mpisynctime
,
"
MPI
_TOTAL_
COMM_
SYNC
_
TIME_VARIANCE"
,
hdr_rec1
->
apmpi_f_variance_total_mpisynctime
,
""
,
""
,
""
);
}
else
if
(
!
hdr_rec1
)
...
...
@@ -315,12 +335,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf
(
"+ "
);
DARSHAN_F_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
hdr_rec2
->
base_rec
.
rank
,
hdr_rec2
->
base_rec
.
id
,
"
RANKS
_TOTAL_
MPI
TIME_VARIANCE"
,
hdr_rec2
->
apmpi_f_variance_total_mpitime
,
"
MPI
_TOTAL_
COMM_
TIME_VARIANCE"
,
hdr_rec2
->
apmpi_f_variance_total_mpitime
,
""
,
""
,
""
);
if
(
sync_flag
)
DARSHAN_F_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
hdr_rec2
->
base_rec
.
rank
,
hdr_rec2
->
base_rec
.
id
,
"
RANKS
_TOTAL_
MPI
SYNCTIME_VARIANCE"
,
hdr_rec2
->
apmpi_f_variance_total_mpisynctime
,
"
MPI
_TOTAL_
COMM_
SYNC
_
TIME_VARIANCE"
,
hdr_rec2
->
apmpi_f_variance_total_mpisynctime
,
""
,
""
,
""
);
}
else
...
...
@@ -330,12 +350,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf
(
"- "
);
DARSHAN_F_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
hdr_rec1
->
base_rec
.
rank
,
hdr_rec1
->
base_rec
.
id
,
"
RANKS
_TOTAL_
MPI
TIME_VARIANCE"
,
hdr_rec1
->
apmpi_f_variance_total_mpitime
,
"
MPI
_TOTAL_
COMM_
TIME_VARIANCE"
,
hdr_rec1
->
apmpi_f_variance_total_mpitime
,
""
,
""
,
""
);
printf
(
"+ "
);
DARSHAN_F_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
hdr_rec2
->
base_rec
.
rank
,
hdr_rec2
->
base_rec
.
id
,
"
RANKS
_TOTAL_
MPI
TIME_VARIANCE"
,
hdr_rec2
->
apmpi_f_variance_total_mpitime
,
"
MPI
_TOTAL_
COMM_
TIME_VARIANCE"
,
hdr_rec2
->
apmpi_f_variance_total_mpitime
,
""
,
""
,
""
);
}
if
(
sync_flag
)
...
...
@@ -345,12 +365,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf
(
"- "
);
DARSHAN_F_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
hdr_rec1
->
base_rec
.
rank
,
hdr_rec1
->
base_rec
.
id
,
"
RANKS
_TOTAL_
MPI
SYNCTIME_VARIANCE"
,
hdr_rec1
->
apmpi_f_variance_total_mpisynctime
,
"
MPI
_TOTAL_
COMM_
SYNC
_
TIME_VARIANCE"
,
hdr_rec1
->
apmpi_f_variance_total_mpisynctime
,
""
,
""
,
""
);
printf
(
"+ "
);
DARSHAN_F_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
hdr_rec2
->
base_rec
.
rank
,
hdr_rec2
->
base_rec
.
id
,
"
RANKS
_TOTAL_
MPI
SYNCTIME_VARIANCE"
,
hdr_rec2
->
apmpi_f_variance_total_mpisynctime
,
"
MPI
_TOTAL_
COMM_
SYNC
_
TIME_VARIANCE"
,
hdr_rec2
->
apmpi_f_variance_total_mpisynctime
,
""
,
""
,
""
);
}
}
...
...
@@ -363,7 +383,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf
(
"- "
);
DARSHAN_S_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
prf_rec1
->
base_rec
.
rank
,
prf_rec1
->
base_rec
.
id
,
"
nodeid
"
,
prf_rec1
->
node_name
,
"
MPI_PROCESSOR_NAME
"
,
prf_rec1
->
node_name
,
""
,
""
,
""
);
}
else
if
(
!
prf_rec1
)
...
...
@@ -371,7 +391,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf
(
"+ "
);
DARSHAN_S_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
prf_rec2
->
base_rec
.
rank
,
prf_rec2
->
base_rec
.
id
,
"
nodeid
"
,
prf_rec2
->
node_name
,
"
MPI_PROCESSOR_NAME
"
,
prf_rec2
->
node_name
,
""
,
""
,
""
);
}
else
if
(
prf_rec1
->
node_name
!=
prf_rec2
->
node_name
)
...
...
@@ -379,12 +399,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf
(
"- "
);
DARSHAN_S_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
prf_rec1
->
base_rec
.
rank
,
prf_rec1
->
base_rec
.
id
,
"
nodeid
"
,
prf_rec1
->
node_name
,
"
MPI_PROCESSOR_NAME
"
,
prf_rec1
->
node_name
,
""
,
""
,
""
);
printf
(
"+ "
);
DARSHAN_S_COUNTER_PRINT
(
darshan_module_names
[
DARSHAN_APMPI_MOD
],
prf_rec2
->
base_rec
.
rank
,
prf_rec2
->
base_rec
.
id
,
"
nodeid
"
,
prf_rec2
->
node_name
,
"
MPI_PROCESSOR_NAME
"
,
prf_rec2
->
node_name
,
""
,
""
,
""
);
}
int
i
;
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment