Commit fe2e8ade authored by Sudheer Chunduri's avatar Sudheer Chunduri
Browse files

update to include sync_flag (coll sync) in header rec

parent d83a8bec
...@@ -239,6 +239,7 @@ struct darshan_apmpi_header_record ...@@ -239,6 +239,7 @@ struct darshan_apmpi_header_record
struct darshan_base_record base_rec; struct darshan_base_record base_rec;
int64_t magic; int64_t magic;
uint32_t version; uint32_t version;
uint32_t sync_flag;
double apmpi_f_variance_total_mpitime; double apmpi_f_variance_total_mpitime;
double apmpi_f_variance_total_mpisynctime; double apmpi_f_variance_total_mpisynctime;
}; };
......
...@@ -47,7 +47,7 @@ typedef long long ap_bytes_t; ...@@ -47,7 +47,7 @@ typedef long long ap_bytes_t;
ret = FUNC; \ ret = FUNC; \
tm2 = darshan_core_wtime(); \ tm2 = darshan_core_wtime(); \
tdiff = tm2-tm1; \ tdiff = tm2-tm1; \
tsync = -1 tsync = 0
#endif #endif
#define TIME(FUNC) \ #define TIME(FUNC) \
...@@ -411,6 +411,11 @@ void apmpi_runtime_initialize() ...@@ -411,6 +411,11 @@ void apmpi_runtime_initialize()
apmpi_runtime->header_record->base_rec.id = apmpi_runtime->header_id; apmpi_runtime->header_record->base_rec.id = apmpi_runtime->header_id;
apmpi_runtime->header_record->base_rec.rank = my_rank; apmpi_runtime->header_record->base_rec.rank = my_rank;
apmpi_runtime->header_record->magic = APMPI_MAGIC; apmpi_runtime->header_record->magic = APMPI_MAGIC;
#ifdef __APMPI_COLL_SYNC
apmpi_runtime->header_record->sync_flag = 1;
#else
apmpi_runtime->header_record->sync_flag = 0;
#endif
apmpi_runtime->header_record->version = APMPI_VER; apmpi_runtime->header_record->version = APMPI_VER;
} }
...@@ -456,7 +461,6 @@ static void apmpi_record_reduction_op (void* inrec_v, void* inoutrec_v, ...@@ -456,7 +461,6 @@ static void apmpi_record_reduction_op (void* inrec_v, void* inoutrec_v,
} }
} }
#endif #endif
#if 1
static void apmpi_shared_record_variance(MPI_Comm mod_comm) static void apmpi_shared_record_variance(MPI_Comm mod_comm)
{ {
MPI_Datatype var_dt; MPI_Datatype var_dt;
...@@ -495,7 +499,7 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm) ...@@ -495,7 +499,7 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm)
{ {
apmpi_runtime->header_record->apmpi_f_variance_total_mpitime = apmpi_runtime->header_record->apmpi_f_variance_total_mpitime =
(var_recv_buf->S / var_recv_buf->n); (var_recv_buf->S / var_recv_buf->n);
} }
/* get total mpi sync time variances across the ranks */ /* get total mpi sync time variances across the ranks */
var_send_buf->n = 1; var_send_buf->n = 1;
var_send_buf->S = 0; var_send_buf->S = 0;
...@@ -509,7 +513,6 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm) ...@@ -509,7 +513,6 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm)
apmpi_runtime->header_record->apmpi_f_variance_total_mpisynctime = apmpi_runtime->header_record->apmpi_f_variance_total_mpisynctime =
(var_recv_buf->S / var_recv_buf->n); (var_recv_buf->S / var_recv_buf->n);
} }
PMPI_Type_free(&var_dt); PMPI_Type_free(&var_dt);
PMPI_Op_free(&var_op); PMPI_Op_free(&var_op);
free(var_send_buf); free(var_send_buf);
...@@ -517,7 +520,6 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm) ...@@ -517,7 +520,6 @@ static void apmpi_shared_record_variance(MPI_Comm mod_comm)
return; return;
} }
#endif
/******************************************************************************** /********************************************************************************
...@@ -558,7 +560,6 @@ static void apmpi_mpi_redux( ...@@ -558,7 +560,6 @@ static void apmpi_mpi_redux(
} }
apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPITIME] += mpisync_time; apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPITIME] += mpisync_time;
apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPISYNCTIME] = mpisync_time; apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPISYNCTIME] = mpisync_time;
#if 0 #if 0
red_send_buf = apmpi_runtime->perf_record; red_send_buf = apmpi_runtime->perf_record;
......
...@@ -20,6 +20,7 @@ struct darshan_apmpi_header_record ...@@ -20,6 +20,7 @@ struct darshan_apmpi_header_record
struct darshan_base_record base_rec; struct darshan_base_record base_rec;
int64_t magic; int64_t magic;
uint32_t version; uint32_t version;
uint32_t sync_flag;
double apmpi_f_variance_total_mpitime; double apmpi_f_variance_total_mpitime;
double apmpi_f_variance_total_mpisynctime; double apmpi_f_variance_total_mpisynctime;
}; };
...@@ -54,6 +55,7 @@ def log_get_apmpi_record(log, mod_type, dtype='dict'): ...@@ -54,6 +55,7 @@ def log_get_apmpi_record(log, mod_type, dtype='dict'):
rec['id'] = hdr[0].base_rec.id rec['id'] = hdr[0].base_rec.id
rec['rank'] = hdr[0].base_rec.rank rec['rank'] = hdr[0].base_rec.rank
rec['magic'] = hdr[0].magic rec['magic'] = hdr[0].magic
rec['sync_flag'] = hdr[0].sync_flag
rec['version'] = hdr[0].version rec['version'] = hdr[0].version
rec['variance_total_mpitime'] = hdr[0].apmpi_f_variance_total_mpitime rec['variance_total_mpitime'] = hdr[0].apmpi_f_variance_total_mpitime
rec['variance_total_mpisynctime'] = hdr[0].apmpi_f_variance_total_mpisynctime rec['variance_total_mpisynctime'] = hdr[0].apmpi_f_variance_total_mpisynctime
......
...@@ -205,6 +205,7 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name, ...@@ -205,6 +205,7 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name,
{ {
int i; int i;
static int first_rec = 1; static int first_rec = 1;
static int sync_flag;
struct darshan_apmpi_header_record *hdr_rec; struct darshan_apmpi_header_record *hdr_rec;
struct darshan_apmpi_perf_record *prf_rec; struct darshan_apmpi_perf_record *prf_rec;
...@@ -215,11 +216,13 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name, ...@@ -215,11 +216,13 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name,
hdr_rec->base_rec.rank, hdr_rec->base_rec.id, hdr_rec->base_rec.rank, hdr_rec->base_rec.id,
"RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec->apmpi_f_variance_total_mpitime, "RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec->apmpi_f_variance_total_mpitime,
"", "", ""); "", "", "");
if(hdr_rec->sync_flag)
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
hdr_rec->base_rec.rank, hdr_rec->base_rec.id, hdr_rec->base_rec.rank, hdr_rec->base_rec.id,
"RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec->apmpi_f_variance_total_mpisynctime, "RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec->apmpi_f_variance_total_mpisynctime,
"", "", ""); "", "", "");
first_rec = 0; first_rec = 0;
sync_flag = hdr_rec->sync_flag;
} }
else else
{ {
...@@ -244,6 +247,7 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name, ...@@ -244,6 +247,7 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name,
apmpi_f_mpiop_totaltime_counter_names[i], prf_rec->fcounters[i], apmpi_f_mpiop_totaltime_counter_names[i], prf_rec->fcounters[i],
"", "", ""); "", "", "");
} }
if(sync_flag){
for(i = 0; i < APMPI_F_MPIOP_SYNCTIME_NUM_INDICES; i++) for(i = 0; i < APMPI_F_MPIOP_SYNCTIME_NUM_INDICES; i++)
{ {
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
...@@ -251,13 +255,16 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name, ...@@ -251,13 +255,16 @@ static void darshan_log_print_apmpi_rec(void *rec, char *file_name,
apmpi_f_mpiop_synctime_counter_names[i], prf_rec->fsynccounters[i], apmpi_f_mpiop_synctime_counter_names[i], prf_rec->fsynccounters[i],
"", "", ""); "", "", "");
} }
for(i = 0; i < APMPI_F_MPI_GLOBAL_NUM_INDICES; i++) }
{
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
prf_rec->base_rec.rank, prf_rec->base_rec.id, prf_rec->base_rec.rank, prf_rec->base_rec.id,
apmpi_f_mpi_global_counter_names[i], prf_rec->fglobalcounters[i], apmpi_f_mpi_global_counter_names[0], prf_rec->fglobalcounters[0],
"", "", "");
if(sync_flag)
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
prf_rec->base_rec.rank, prf_rec->base_rec.id,
apmpi_f_mpi_global_counter_names[1], prf_rec->fglobalcounters[1],
"", "", ""); "", "", "");
}
} }
return; return;
...@@ -285,7 +292,9 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -285,7 +292,9 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
hdr_rec2 = (struct darshan_apmpi_header_record*) file_rec2; hdr_rec2 = (struct darshan_apmpi_header_record*) file_rec2;
prf_rec1 = (struct darshan_apmpi_perf_record*) file_rec1; prf_rec1 = (struct darshan_apmpi_perf_record*) file_rec1;
prf_rec2 = (struct darshan_apmpi_perf_record*) file_rec2; prf_rec2 = (struct darshan_apmpi_perf_record*) file_rec2;
static int sync_flag;
sync_flag = hdr_rec1->sync_flag && hdr_rec2->sync_flag;
if (hdr_rec1->magic == APMPI_MAGIC) if (hdr_rec1->magic == APMPI_MAGIC)
{ {
/* this is the header record */ /* this is the header record */
...@@ -296,6 +305,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -296,6 +305,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id, hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id,
"RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpitime, "RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpitime,
"", "", ""); "", "", "");
if(sync_flag)
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id, hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id,
"RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpisynctime, "RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec1->apmpi_f_variance_total_mpisynctime,
...@@ -308,6 +318,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -308,6 +318,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id, hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id,
"RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpitime, "RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpitime,
"", "", ""); "", "", "");
if(sync_flag)
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id, hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id,
"RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpisynctime, "RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpisynctime,
...@@ -328,6 +339,8 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -328,6 +339,8 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
"RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpitime, "RANKS_TOTAL_MPITIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpitime,
"", "", ""); "", "", "");
} }
if(sync_flag)
{
if (hdr_rec1->apmpi_f_variance_total_mpisynctime != hdr_rec2->apmpi_f_variance_total_mpisynctime) if (hdr_rec1->apmpi_f_variance_total_mpisynctime != hdr_rec2->apmpi_f_variance_total_mpisynctime)
{ {
printf("- "); printf("- ");
...@@ -341,6 +354,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -341,6 +354,7 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
"RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpisynctime, "RANKS_TOTAL_MPISYNCTIME_VARIANCE", hdr_rec2->apmpi_f_variance_total_mpisynctime,
"", "", ""); "", "", "");
} }
}
} }
} }
else else
...@@ -440,6 +454,8 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -440,6 +454,8 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
"", "", ""); "", "", "");
} }
} }
if(sync_flag)
{
for(i = 0; i < APMPI_F_MPIOP_SYNCTIME_NUM_INDICES; i++) for(i = 0; i < APMPI_F_MPIOP_SYNCTIME_NUM_INDICES; i++)
{ {
if (!prf_rec2) if (!prf_rec2)
...@@ -473,14 +489,15 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -473,14 +489,15 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
"", "", ""); "", "", "");
} }
} }
for(i = 0; i < APMPI_F_MPI_GLOBAL_NUM_INDICES; i++) }
//for(i = 0; i < APMPI_F_MPI_GLOBAL_NUM_INDICES; i++)
{ {
if (!prf_rec2) if (!prf_rec2)
{ {
printf("- "); printf("- ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id, prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
apmpi_f_mpi_global_counter_names[i], prf_rec1->fglobalcounters[i], apmpi_f_mpi_global_counter_names[0], prf_rec1->fglobalcounters[0],
"", "", ""); "", "", "");
} }
else if (!prf_rec1) else if (!prf_rec1)
...@@ -488,23 +505,55 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1, ...@@ -488,23 +505,55 @@ static void darshan_log_print_apmpi_rec_diff(void *file_rec1, char *file_name1,
printf("+ "); printf("+ ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id, prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
apmpi_f_mpi_global_counter_names[i], prf_rec2->fglobalcounters[i], apmpi_f_mpi_global_counter_names[0], prf_rec2->fglobalcounters[0],
"", "", ""); "", "", "");
} }
else if (prf_rec1->fglobalcounters[i] != prf_rec2->fglobalcounters[i]) else if (prf_rec1->fglobalcounters[0] != prf_rec2->fglobalcounters[0])
{ {
printf("- "); printf("- ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id, prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
apmpi_f_mpi_global_counter_names[i], prf_rec1->fglobalcounters[i], apmpi_f_mpi_global_counter_names[0], prf_rec1->fglobalcounters[0],
"", "", ""); "", "", "");
printf("+ "); printf("+ ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD], DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id, prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
apmpi_f_mpi_global_counter_names[i], prf_rec2->fglobalcounters[i], apmpi_f_mpi_global_counter_names[0], prf_rec2->fglobalcounters[0],
"", "", ""); "", "", "");
} }
} if(sync_flag)
{
if (!prf_rec2)
{
printf("- ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
apmpi_f_mpi_global_counter_names[1], prf_rec1->fglobalcounters[1],
"", "", "");
}
else if (!prf_rec1)
{
printf("+ ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
apmpi_f_mpi_global_counter_names[1], prf_rec2->fglobalcounters[1],
"", "", "");
}
else if (prf_rec1->fglobalcounters[1] != prf_rec2->fglobalcounters[1])
{
printf("- ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
apmpi_f_mpi_global_counter_names[1], prf_rec1->fglobalcounters[1],
"", "", "");
printf("+ ");
DARSHAN_F_COUNTER_PRINT(darshan_module_names[APMPI_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
apmpi_f_mpi_global_counter_names[1], prf_rec2->fglobalcounters[1],
"", "", "");
}
}
}
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment