Commit cfd65ad0 authored by Sudheer Chunduri's avatar Sudheer Chunduri
Browse files

few minor changes to make the code consistent with the other Darshan modules

in preparation for Darshan 3.3.0 release
parent ba69643f
...@@ -208,8 +208,8 @@ enum apmpi_f_mpiop_synctime_indices ...@@ -208,8 +208,8 @@ enum apmpi_f_mpiop_synctime_indices
/* aggregate (across all the ranks) per MPI op times */ /* aggregate (across all the ranks) per MPI op times */
#define APMPI_F_MPI_GLOBAL_COUNTERS \ #define APMPI_F_MPI_GLOBAL_COUNTERS \
Y(RANK_TOTAL_MPITIME) \ Y(MPI_TOTAL_COMM_TIME) \
Y(RANK_TOTAL_MPISYNCTIME) \ Y(MPI_TOTAL_COMM_SYNC_TIME) \
Z(APMPI_F_MPI_GLOBAL_NUM_INDICES) Z(APMPI_F_MPI_GLOBAL_NUM_INDICES)
enum apmpi_f_mpi_global_indices enum apmpi_f_mpi_global_indices
{ {
...@@ -238,7 +238,6 @@ struct darshan_apmpi_header_record ...@@ -238,7 +238,6 @@ struct darshan_apmpi_header_record
{ {
struct darshan_base_record base_rec; struct darshan_base_record base_rec;
int64_t magic; int64_t magic;
uint32_t version;
uint32_t sync_flag; uint32_t sync_flag;
double apmpi_f_variance_total_mpitime; double apmpi_f_variance_total_mpitime;
double apmpi_f_variance_total_mpisynctime; double apmpi_f_variance_total_mpisynctime;
......
...@@ -404,7 +404,6 @@ static void apmpi_runtime_initialize() ...@@ -404,7 +404,6 @@ static void apmpi_runtime_initialize()
#else #else
apmpi_runtime->header_record->sync_flag = 0; apmpi_runtime->header_record->sync_flag = 0;
#endif #endif
apmpi_runtime->header_record->version = APMPI_VER;
} }
apmpi_runtime->rec_id = darshan_core_gen_record_id("APMPI"); //record name apmpi_runtime->rec_id = darshan_core_gen_record_id("APMPI"); //record name
...@@ -478,7 +477,7 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm) ...@@ -478,7 +477,7 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm)
/* get total mpi time variances across the ranks */ /* get total mpi time variances across the ranks */
var_send_buf->n = 1; var_send_buf->n = 1;
var_send_buf->S = 0; var_send_buf->S = 0;
var_send_buf->T = apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPITIME]; var_send_buf->T = apmpi_runtime->perf_record->fglobalcounters[MPI_TOTAL_COMM_TIME];
PMPI_Reduce(var_send_buf, var_recv_buf, 1, PMPI_Reduce(var_send_buf, var_recv_buf, 1,
var_dt, var_op, 0, mod_comm); var_dt, var_op, 0, mod_comm);
...@@ -491,7 +490,7 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm) ...@@ -491,7 +490,7 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm)
/* get total mpi sync time variances across the ranks */ /* get total mpi sync time variances across the ranks */
var_send_buf->n = 1; var_send_buf->n = 1;
var_send_buf->S = 0; var_send_buf->S = 0;
var_send_buf->T = apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPISYNCTIME]; var_send_buf->T = apmpi_runtime->perf_record->fglobalcounters[MPI_TOTAL_COMM_SYNC_TIME];
PMPI_Reduce(var_send_buf, var_recv_buf, 1, PMPI_Reduce(var_send_buf, var_recv_buf, 1,
var_dt, var_op, 0, mod_comm); var_dt, var_op, 0, mod_comm);
...@@ -539,15 +538,15 @@ static void apmpi_mpi_redux( ...@@ -539,15 +538,15 @@ static void apmpi_mpi_redux(
return; return;
} }
double mpisync_time = 0.0; double mpisync_time = 0.0;
/* Compute Total MPI time per rank: RANK_TOTAL_MPITIME */ /* Compute Total MPI time per rank: MPI_TOTAL_COMM_TIME */
for (i=MPI_SEND_TOTAL_TIME; i<APMPI_F_MPIOP_TOTALTIME_NUM_INDICES; i+=3){ // times (total_time, max_time, min_time) for (i=MPI_SEND_TOTAL_TIME; i<APMPI_F_MPIOP_TOTALTIME_NUM_INDICES; i+=3){ // times (total_time, max_time, min_time)
apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPITIME] += apmpi_runtime->perf_record->fcounters[i]; apmpi_runtime->perf_record->fglobalcounters[MPI_TOTAL_COMM_TIME] += apmpi_runtime->perf_record->fcounters[i];
} }
for (i=MPI_BARRIER_TOTAL_SYNC_TIME; i<APMPI_F_MPIOP_SYNCTIME_NUM_INDICES; i++){ for (i=MPI_BARRIER_TOTAL_SYNC_TIME; i<APMPI_F_MPIOP_SYNCTIME_NUM_INDICES; i++){
mpisync_time += apmpi_runtime->perf_record->fsynccounters[i]; mpisync_time += apmpi_runtime->perf_record->fsynccounters[i];
} }
apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPITIME] += mpisync_time; apmpi_runtime->perf_record->fglobalcounters[MPI_TOTAL_COMM_TIME] += mpisync_time;
apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPISYNCTIME] = mpisync_time; apmpi_runtime->perf_record->fglobalcounters[MPI_TOTAL_COMM_SYNC_TIME] = mpisync_time;
#if 0 #if 0
red_send_buf = apmpi_runtime->perf_record; red_send_buf = apmpi_runtime->perf_record;
......
...@@ -19,7 +19,6 @@ struct darshan_apmpi_header_record ...@@ -19,7 +19,6 @@ struct darshan_apmpi_header_record
{ {
struct darshan_base_record base_rec; struct darshan_base_record base_rec;
int64_t magic; int64_t magic;
uint32_t version;
uint32_t sync_flag; uint32_t sync_flag;
double apmpi_f_variance_total_mpitime; double apmpi_f_variance_total_mpitime;
double apmpi_f_variance_total_mpisynctime; double apmpi_f_variance_total_mpisynctime;
...@@ -56,7 +55,6 @@ def log_get_apmpi_record(log, mod_type, dtype='dict'): ...@@ -56,7 +55,6 @@ def log_get_apmpi_record(log, mod_type, dtype='dict'):
rec['rank'] = hdr[0].base_rec.rank rec['rank'] = hdr[0].base_rec.rank
rec['magic'] = hdr[0].magic rec['magic'] = hdr[0].magic
rec['sync_flag'] = hdr[0].sync_flag rec['sync_flag'] = hdr[0].sync_flag
rec['version'] = hdr[0].version
rec['variance_total_mpitime'] = hdr[0].apmpi_f_variance_total_mpitime rec['variance_total_mpitime'] = hdr[0].apmpi_f_variance_total_mpitime
rec['variance_total_mpisynctime'] = hdr[0].apmpi_f_variance_total_mpisynctime rec['variance_total_mpisynctime'] = hdr[0].apmpi_f_variance_total_mpisynctime
else: else:
......
...@@ -212,12 +212,12 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name, ...@@ -212,12 +212,12 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name,
hdr_rec = rec; hdr_rec = rec;
DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
hdr_rec->base_rec.rank, hdr_rec->base_rec.id, hdr_rec->base_rec.rank, hdr_rec->base_rec.id,
"RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec->apmpi_f_variance_total_mpitime, "MPI_TOTAL_COMM_TIME_VARIANCE", hdr_rec->apmpi_f_variance_total_mpitime,
"", "", ""); "", "", "");
if(hdr_rec->sync_flag) if(hdr_rec->sync_flag)
DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
hdr_rec->base_rec.rank, hdr_rec->base_rec.id, hdr_rec->base_rec.rank, hdr_rec->base_rec.id,
"RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec->apmpi_f_variance_total_mpisynctime, "MPI_TOTAL_COMM_SYNC_TIME_VARIANCE", hdr_rec->apmpi_f_variance_total_mpisynctime,
"", "", ""); "", "", "");
first_rec = 0; first_rec = 0;
sync_flag = hdr_rec->sync_flag; sync_flag = hdr_rec->sync_flag;
...@@ -228,7 +228,7 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name, ...@@ -228,7 +228,7 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name,
DARSHAN_S_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_S_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
prf_rec->base_rec.rank, prf_rec->base_rec.id, prf_rec->base_rec.rank, prf_rec->base_rec.id,
"nodeid", prf_rec->node_name, "MPI_PROCESSOR_NAME", prf_rec->node_name,
"", "", ""); "", "", "");
for(i = 0; i < APMPI_NUM_INDICES; i++) for(i = 0; i < APMPI_NUM_INDICES; i++)
...@@ -272,10 +272,30 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name, ...@@ -272,10 +272,30 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name,
static void darshan_log_print_apmpi_description(int ver) static void darshan_log_print_apmpi_description(int ver)
{ {
printf("\n# description of APMPI counters: %d\n", ver); printf("\n# description of APMPI counters: %d\n", ver);
//printf("# node: node connected to this router\n"); printf("# global summary stats showing the variance across all the MPI processes.\n");
//printf("# AR_RTR_x_y_INQ_PRF_INCOMING_FLIT_VC[0-7]: flits on VCz of x y tile\n"); printf("# MPI_TOTAL_COMM_TIME_VARIANCE: variance in total communication time across all the processes.\n");
//printf("# AR_RTR_x_y_INQ_PRF_ROWBUS_STALL_CNT: stalls on x y tile\n"); printf("# MPI_TOTAL_COMM_SYNC_TIME_VARIANCE: variance in total sync time across all the processes.\n");
printf("# per-process summary stats based on the MPI op instrumented counters.\n");
printf("# MPI_PROCESSOR_NAME: name of the processor used by the MPI process.\n");
printf("# MPI_TOTAL_COMM_TIME: total communication (MPI) time of a process across all the MPI ops.\n");
printf("# MPI_TOTAL_COMM_SYNC_TIME: total sync time of a process across all the MPI ops.\n");
printf("# APMPI_*: MPI operation counts.\n");
printf("# Blocking Point-to-point, Nonblocking Point-to-point, Misc MPI operations.\n");
printf("# Blocking Collective, Nonblocking Collective and RMA opeations are instrumented.\n");
printf("# Total MPI operations instrumented in this release: 74.\n");
printf("# The following counters (as applicable) are reported for each instrumented operation.\n");
printf("# CALL_COUNT: total call count for an MPI operation.\n");
printf("# TOTAL_BYTES: total bytes (cumulative across all calls of an op) used with an MPI op.\n");
printf("# MSG_SIZE_AGG_0_256: total bytes for all the calls of an MPI op with message size range [0, 256B].\n");
printf("# MSG_SIZE_AGG_256_1K: total bytes for all the calls of an MPI op with message size range (256B, 1KB].\n");
printf("# MSG_SIZE_AGG_1K_8K: total bytes for all the calls of an MPI op with message size range (1KB, 8KB].\n");
printf("# MSG_SIZE_AGG_8K_256K: total bytes for all the calls of an MPI op with message size range (8KB, 256KB].\n");
printf("# MSG_SIZE_AGG_256K_1M: total bytes for all the calls of an MPI op with message size range (256KB, 1MB].\n");
printf("# MSG_SIZE_AGG_1M_PLUS: total bytes for all the calls of an MPI op with message size greater than 1MB.\n");
printf("# TOTAL_TIME: total time (cumulative across all calls of an op) of an MPI op.\n");
printf("# MIN_TIME: maximum time across all calls of an MPI op.\n");
printf("# MAX_TIME: minimum time across all calls of an MPI op.\n");
printf("# TOTAL_SYNC_TIME: total sync time (cumulative across all calls of an op) of an MPI op.\n");
return; return;
} }
...@@ -302,12 +322,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -302,12 +322,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf("- "); printf("- ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id, hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id,
"RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpitime, "MPI_TOTAL_COMM_TIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpitime,
"", "", ""); "", "", "");
if(sync_flag) if(sync_flag)
DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id, hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id,
"RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpisynctime, "MPI_TOTAL_COMM_SYNC_TIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpisynctime,
"", "", ""); "", "", "");
} }
else if (!hdr_rec1) else if (!hdr_rec1)
...@@ -315,12 +335,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -315,12 +335,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf("+ "); printf("+ ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id, hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id,
"RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpitime, "MPI_TOTAL_COMM_TIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpitime,
"", "", ""); "", "", "");
if(sync_flag) if(sync_flag)
DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id, hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id,
"RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpisynctime, "MPI_TOTAL_COMM_SYNC_TIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpisynctime,
"", "", ""); "", "", "");
} }
else else
...@@ -330,12 +350,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -330,12 +350,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf("- "); printf("- ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id, hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id,
"RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpitime, "MPI_TOTAL_COMM_TIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpitime,
"", "", ""); "", "", "");
printf("+ "); printf("+ ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id, hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id,
"RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpitime, "MPI_TOTAL_COMM_TIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpitime,
"", "", ""); "", "", "");
} }
if(sync_flag) if(sync_flag)
...@@ -345,12 +365,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -345,12 +365,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf("- "); printf("- ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id, hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id,
"RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpisynctime, "MPI_TOTAL_COMM_SYNC_TIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpisynctime,
"", "", ""); "", "", "");
printf("+ "); printf("+ ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id, hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id,
"RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpisynctime, "MPI_TOTAL_COMM_SYNC_TIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpisynctime,
"", "", ""); "", "", "");
} }
} }
...@@ -363,7 +383,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -363,7 +383,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf("- "); printf("- ");
DARSHAN_S_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_S_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id, prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
"nodeid", prf_rec1->node_name, "MPI_PROCESSOR_NAME", prf_rec1->node_name,
"", "", ""); "", "", "");
} }
else if (!prf_rec1) else if (!prf_rec1)
...@@ -371,7 +391,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -371,7 +391,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf("+ "); printf("+ ");
DARSHAN_S_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_S_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id, prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
"nodeid", prf_rec2->node_name, "MPI_PROCESSOR_NAME", prf_rec2->node_name,
"", "", ""); "", "", "");
} }
else if (prf_rec1->node_name != prf_rec2->node_name) else if (prf_rec1->node_name != prf_rec2->node_name)
...@@ -379,12 +399,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -379,12 +399,12 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf("- "); printf("- ");
DARSHAN_S_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_S_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id, prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
"nodeid", prf_rec1->node_name, "MPI_PROCESSOR_NAME", prf_rec1->node_name,
"", "", ""); "", "", "");
printf("+ "); printf("+ ");
DARSHAN_S_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD], DARSHAN_S_COUNTER_PRINT(darshan_module_names[DARSHAN_APMPI_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id, prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
"nodeid", prf_rec2->node_name, "MPI_PROCESSOR_NAME", prf_rec2->node_name,
"", "", ""); "", "", "");
} }
int i; int i;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment