Commit e2b599de authored by Sudheer Chunduri's avatar Sudheer Chunduri
Browse files

add ALPS id, few bug fixes and reorganization of code

parent 797de8a1
......@@ -20,12 +20,7 @@
'F'*0x1)
#define APXC_PERF_COUNTERS \
/* non-PAPI counters first */\
X(AR_RTR_GROUP) \
X(AR_RTR_CHASSIS) \
X(AR_RTR_BLADE) \
X(AR_RTR_NODE) \
/* PAPI counters after this point */\
/* PAPI counters */\
X(AR_RTR_0_0_INQ_PRF_INCOMING_FLIT_VC0) \
X(AR_RTR_0_0_INQ_PRF_INCOMING_FLIT_VC1) \
X(AR_RTR_0_0_INQ_PRF_INCOMING_FLIT_VC2) \
......@@ -467,6 +462,11 @@ enum apxc_cluster_modes
struct darshan_apxc_perf_record
{
struct darshan_base_record base_rec;
int group;
int chassis;
int blade;
int node;
int marked;
uint64_t counters[APXC_NUM_INDICES];
};
......@@ -479,6 +479,7 @@ struct darshan_apxc_header_record
int ngroups;
int memory_mode;
int cluster_mode;
uint64_t appid;
};
#endif /* __DARSHAN_APXC_LOG_FORMAT_H */
......@@ -6,6 +6,7 @@
#define _XOPEN_SOURCE 500
#define _GNU_SOURCE
#define csJOBID_ENV_STR "ALPS_APP_ID"
#include "darshan-runtime-config.h"
#include <stdio.h>
......@@ -133,11 +134,11 @@ static void capture(struct darshan_apxc_perf_record *rec,
(long long*) &rec->counters[AR_RTR_0_0_INQ_PRF_INCOMING_FLIT_VC0]);
PAPI_reset(apxc_runtime->PAPI_event_set);
rec->counters[AR_RTR_GROUP] = apxc_runtime->group;
rec->counters[AR_RTR_CHASSIS] = apxc_runtime->chassis;
rec->counters[AR_RTR_BLADE] = apxc_runtime->blade;
rec->counters[AR_RTR_NODE] = apxc_runtime->node;
rec->group = apxc_runtime->group;
rec->chassis = apxc_runtime->chassis;
rec->blade = apxc_runtime->blade;
rec->node = apxc_runtime->node;
rec->marked = 0;
rec->base_rec.id = rec_id;
rec->base_rec.rank = my_rank;
......@@ -311,6 +312,7 @@ static void apxc_mpi_redux(
apxc_runtime->header_record->memory_mode |= (1 << 31);
if (cmode != rcmode)
apxc_runtime->header_record->cluster_mode |= (1 << 31);
apxc_runtime->header_record->appid = atoi((char*)getenv( csJOBID_ENV_STR ));
}
/* count network dimensions */
......@@ -396,13 +398,14 @@ static void apxc_mpi_redux(
MPI_SUM,
0,
router_comm);
if (router_rank == 0)
{
for (i = 0; i < APXC_NUM_INDICES; i++)
{
apxc_runtime->perf_record->counters[i] /= router_count;
}
apxc_runtime->perf_record->base_rec.rank = -1;
apxc_runtime->perf_record->marked = -1;
}
PMPI_Comm_free(&router_comm);
......@@ -423,23 +426,23 @@ static void apxc_shutdown(
assert(apxc_runtime);
*apxc_buf_sz = 0;
/*if (my_rank == 0) {
*apxc_buf_sz += sizeof(*apxc_runtime->header_record); }
*/
if (apxc_runtime->perf_record->base_rec.rank == -1)
if (my_rank == 0) {
*apxc_buf_sz += sizeof(*apxc_runtime->header_record);
}
if (apxc_runtime->perf_record->marked == -1)
{
*apxc_buf_sz += sizeof( *apxc_runtime->perf_record);
*apxc_buf_sz += sizeof( *apxc_runtime->perf_record);
}
finalize_counters();
free(apxc_runtime);
apxc_runtime = NULL;
finalize_counters();
APXC_UNLOCK();
return;
}
/*
* Local variables:
* c-indent-level: 4
......
......@@ -50,7 +50,7 @@ struct darshan_mod_logutil_funcs apxc_logutils =
static int darshan_log_get_apxc_rec(darshan_fd fd, void** buf_p)
{
struct darshan_apxc_header_record *hdr_rec;
struct darshan_apxc_perf_record *perf_rec;
struct darshan_apxc_perf_record *prf_rec;
int rec_len;
char *buffer;
int i;
......@@ -116,15 +116,20 @@ static int darshan_log_get_apxc_rec(darshan_fd fd, void** buf_p)
DARSHAN_BSWAP32(&(hdr_rec->ngroups));
DARSHAN_BSWAP32(&(hdr_rec->memory_mode));
DARSHAN_BSWAP32(&(hdr_rec->cluster_mode));
DARSHAN_BSWAP32(&(hdr_rec->appid));
}
else
{
perf_rec = (struct darshan_apxc_perf_record*)buffer;
DARSHAN_BSWAP64(&(perf_rec->base_rec.id));
DARSHAN_BSWAP64(&(perf_rec->base_rec.rank));
prf_rec = (struct darshan_apxc_perf_record*)buffer;
DARSHAN_BSWAP64(&(prf_rec->base_rec.id));
DARSHAN_BSWAP64(&(prf_rec->base_rec.rank));
DARSHAN_BSWAP64(&(prf_rec->group));
DARSHAN_BSWAP64(&(prf_rec->chassis));
DARSHAN_BSWAP64(&(prf_rec->blade));
DARSHAN_BSWAP64(&(prf_rec->node));
for (i = 0; i < APXC_NUM_INDICES; i++)
{
DARSHAN_BSWAP64(&perf_rec->counters[i]);
DARSHAN_BSWAP64(&prf_rec->counters[i]);
}
}
}
......@@ -173,7 +178,7 @@ static void darshan_log_print_apxc_rec(void *rec, char *file_name,
int i;
static int first_rec = 1;
struct darshan_apxc_header_record *hdr_rec;
struct darshan_apxc_perf_record *perf_rec;
struct darshan_apxc_perf_record *prf_rec;
if (first_rec)
{
......@@ -200,17 +205,33 @@ static void darshan_log_print_apxc_rec(void *rec, char *file_name,
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
hdr_rec->base_rec.rank, hdr_rec->base_rec.id,
"cluster_mode_consistent", ((hdr_rec->cluster_mode & (1<<31)) == 0), "", "", "");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
hdr_rec->base_rec.rank, hdr_rec->base_rec.id,
"application_id", hdr_rec->appid, "", "", "");
first_rec = 0;
}
else
{
perf_rec = rec;
prf_rec = rec;
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec->base_rec.rank, prf_rec->base_rec.id,
"GROUP", prf_rec->group, "", "", "");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec->base_rec.rank, prf_rec->base_rec.id,
"CHASSIS", prf_rec->chassis, "", "", "");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec->base_rec.rank, prf_rec->base_rec.id,
"BLADE", prf_rec->blade, "", "", "");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec->base_rec.rank, prf_rec->base_rec.id,
"NODE", prf_rec->node, "", "", "");
for(i = 0; i < APXC_NUM_INDICES; i++)
{
DARSHAN_U_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
perf_rec->base_rec.rank, perf_rec->base_rec.id,
apxc_counter_names[i], perf_rec->counters[i],
prf_rec->base_rec.rank, prf_rec->base_rec.id,
apxc_counter_names[i], prf_rec->counters[i],
"", "", "");
}
}
......@@ -284,7 +305,10 @@ static void darshan_log_print_apxc_rec_diff(void *file_rec1, char *file_name1,
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id,
"cluster_mode_consistent", ((hdr_rec1->cluster_mode & (1<<31)) == 0), "", "", "");
printf("- ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id,
"appid", hdr_rec1->appid, "", "", "");
}
else if (!hdr_rec1)
{
......@@ -315,6 +339,10 @@ static void darshan_log_print_apxc_rec_diff(void *file_rec1, char *file_name1,
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id,
"cluster_mode_consistent", ((hdr_rec2->cluster_mode & (1<<31)) == 0), "", "", "");
printf("+ ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id,
"appid", hdr_rec2->appid, "", "", "");
}
else
{
......@@ -400,10 +428,107 @@ static void darshan_log_print_apxc_rec_diff(void *file_rec1, char *file_name1,
hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id,
"cluster_mode_consistent", ((hdr_rec2->cluster_mode & (1<<31)) == 0), "", "", "");
}
if (hdr_rec1->appid != hdr_rec2->appid)
{
printf("- ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
hdr_rec1->base_rec.rank, hdr_rec1->base_rec.id,
"application_id", hdr_rec1->appid, "", "", "");
printf("+ ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
hdr_rec2->base_rec.rank, hdr_rec2->base_rec.id,
"application_id", hdr_rec2->appid, "", "", "");
}
}
}
else
{
if (!prf_rec2)
{
printf("- ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
"GROUP", prf_rec1->group, "", "", "");
printf("- ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
"CHASSIS", prf_rec1->chassis, "", "", "");
printf("- ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
"BLADE", prf_rec1->blade, "", "", "");
printf("- ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
"NODE", prf_rec1->node, "", "", "");
}
else if (!prf_rec1)
{
printf("+ ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
"GROUP", prf_rec2->group, "", "", "");
printf("+ ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
"CHASSIS", prf_rec2->chassis, "", "", "");
printf("+ ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
"BLADE", prf_rec2->blade, "", "", "");
printf("+ ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
"NODE", prf_rec2->node, "", "", "");
}
else {
if (prf_rec1->group != prf_rec2->group)
{
printf("- ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
"GROUP", prf_rec1->group, "", "", "");
printf("+ ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
"GROUP", prf_rec2->group, "", "", "");
}
if (prf_rec1->chassis != prf_rec2->chassis)
{
printf("- ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
"CHASSIS", prf_rec1->chassis, "", "", "");
printf("+ ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
"CHASSIS", prf_rec2->chassis, "", "", "");
}
if (prf_rec1->blade != prf_rec2->blade)
{
printf("- ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
"BLADE", prf_rec1->blade, "", "", "");
printf("+ ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
"BLADE", prf_rec2->blade, "", "", "");
}
if (prf_rec1->node != prf_rec2->node)
{
printf("- ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec1->base_rec.rank, prf_rec1->base_rec.id,
"NODE", prf_rec1->node, "", "", "");
printf("+ ");
DARSHAN_I_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
prf_rec2->base_rec.rank, prf_rec2->base_rec.id,
"NODE", prf_rec2->node, "", "", "");
}
}
int i;
/* router tile record */
for(i = 0; i < APXC_NUM_INDICES; i++)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment