Commit 30b57bd4 authored by Kevin Harms's avatar Kevin Harms
Browse files

runtime and utils code

parent eee2d2c6
/*
* Copyright (C) 2017 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
#define _XOPEN_SOURCE 500
#define _GNU_SOURCE
#include "darshan-runtime-config.h"
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <assert.h>
#include <papi.h>
#include "uthash.h"
#include "darshan.h"
#include "darshan-dynamic.h"
#include "darshan-apxc-log-format.h"
/*
* PAPI_events are defined by the Aries counters listed in the log header.
*/
#define QUOTE(a) #a
#define X(a) QUOTE(a),
static char* PAPI_events[] =
{
APXC_RTR_COUNTERS
};
#undef X
#undef QUOTE
/*
* <Description>
*
* This module does not intercept any system calls. It just pulls data
* from the personality struct at initialization.
*/
/*
* Global runtime struct for tracking data needed at runtime
*/
struct apxc_runtime
{
struct darshan_apxc_header_record *header_record;
struct darshan_apxc_router_record *rtr_record;
darshan_record_id header_id;
darshan_record_id rtr_id;
int PAPI_event_set;
int PAPI_event_count;
int group;
int chassis;
int blade;
int node;
};
static struct apxc_runtime *apxc_runtime = NULL;
static pthread_mutex_t apxc_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
/* my_rank indicates the MPI rank of this process */
static int my_rank = -1;
/* internal helper functions for the APXC module */
void apxc_runtime_initialize(void);
/* forward declaration for shutdown function needed to interface with darshan-core */
static void apxc_shutdown(MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **buffer, int *size);
/* macros for obtaining/releasing the APXC module lock */
#define APXC_LOCK() pthread_mutex_lock(&apxc_runtime_mutex)
#define APXC_UNLOCK() pthread_mutex_unlock(&apxc_runtime_mutex)
/*
* Initialize counters using PAPI
*/
static void initialize_counters (void)
{
int i = 0;
int code = 0;
PAPI_library_init(PAPI_VER_CURRENT);
PAPI_create_eventset(&apxc_runtime->PAPI_event_set);
while(strcmp(PAPI_events[i], "APXC_RTR_NUM_INDICES") != 0)
{
PAPI_event_name_to_code(PAPI_events[i], &code);
PAPI_add_event(apxc_runtime->PAPI_event_set, code);
i++;
}
apxc_runtime->PAPI_event_count = i;
PAPI_start(apxc_runtime->PAPI_event_set);
return;
}
static void finalize_counters (void)
{
PAPI_cleanup_eventset(apxc_runtime->PAPI_event_set);
PAPI_destroy_eventset(&apxc_runtime->PAPI_event_set);
PAPI_shutdown();
return;
}
static void get_coords (void)
{
FILE *f = fopen("/proc/cray_xt/cname","r");
if (f != NULL)
{
char a, b, c, d;
int racki, rackj, chassis, blade, nic;
/* format example: c1-0c1s2n1 c3-0c2s15n3 */
fscanf(f, "%c%d-%d%c%d%c%d%c%d",
&a, &racki, &rackj, &b, &chassis, &c, &blade, &d, &nic);
fclose(f);
apxc_runtime->group = racki/2 + rackj*6;
apxc_runtime->chassis = (racki%2) * 3 + chassis;
apxc_runtime->blade = blade;
apxc_runtime->node = nic;
}
return;
}
/*
* Function which updates all the counter data
*/
static void capture(struct darshan_apxc_router_record *rec,
darshan_record_id rec_id)
{
FILE *f;
PAPI_stop(apxc_runtime->PAPI_event_set, (long long*) rec->counters);
PAPI_reset(apxc_runtime->PAPI_event_set);
rec->coord[0] = apxc_runtime->group;
rec->coord[1] = apxc_runtime->chassis;
rec->coord[2] = apxc_runtime->blade;
rec->coord[3] = apxc_runtime->node;
rec->base_rec.id = rec_id;
rec->base_rec.rank = my_rank;
return;
}
void apxc_runtime_initialize()
{
int apxc_buf_size;
char rtr_rec_name[32];
APXC_LOCK();
/* don't do anything if already initialized */
if(apxc_runtime)
{
APXC_UNLOCK();
return;
}
apxc_buf_size = sizeof(struct darshan_apxc_header_record) +
sizeof(struct darshan_apxc_router_record);
/* register the BG/Q module with the darshan-core component */
darshan_core_register_module(
DARSHAN_APXC_MOD,
&apxc_shutdown,
&apxc_buf_size,
&my_rank,
NULL);
/* not enough memory to fit crayxc module record */
if(apxc_buf_size < sizeof(struct darshan_apxc_header_record) + sizeof(struct darshan_apxc_router_record))
{
darshan_core_unregister_module(DARSHAN_APXC_MOD);
APXC_UNLOCK();
return;
}
/* initialize module's global state */
apxc_runtime = malloc(sizeof(*apxc_runtime));
if(!apxc_runtime)
{
darshan_core_unregister_module(DARSHAN_APXC_MOD);
APXC_UNLOCK();
return;
}
memset(apxc_runtime, 0, sizeof(*apxc_runtime));
if (my_rank == 0)
{
apxc_runtime->header_id = darshan_core_gen_record_id("darshan-crayxc-header");
/* register the crayxc file record with darshan-core */
apxc_runtime->header_record = darshan_core_register_record(
apxc_runtime->header_id,
NULL,
DARSHAN_APXC_MOD,
sizeof(struct darshan_apxc_header_record),
NULL);
if(!(apxc_runtime->header_record))
{
darshan_core_unregister_module(DARSHAN_APXC_MOD);
free(apxc_runtime);
apxc_runtime = NULL;
APXC_UNLOCK();
return;
}
apxc_runtime->header_record->base_rec.id = apxc_runtime->header_id;
apxc_runtime->header_record->base_rec.rank = my_rank;
}
get_coords();
sprintf(rtr_rec_name, "darshan-crayxc-rtr-%d%d%d",
apxc_runtime->group, apxc_runtime->chassis, apxc_runtime->blade);
apxc_runtime->rtr_id = darshan_core_gen_record_id(rtr_rec_name);
apxc_runtime->rtr_record = darshan_core_register_record(
apxc_runtime->rtr_id,
NULL,
DARSHAN_APXC_MOD,
sizeof(struct darshan_apxc_router_record),
NULL);
if(!(apxc_runtime->rtr_record))
{
darshan_core_unregister_module(DARSHAN_APXC_MOD);
free(apxc_runtime);
apxc_runtime = NULL;
APXC_UNLOCK();
return;
}
initialize_counters();
APXC_UNLOCK();
return;
}
/********************************************************************************
* shutdown function exported by this module for coordinating with darshan-core *
********************************************************************************/
/* Pass data for the crayxc module back to darshan-core to log to file. */
static void apxc_shutdown(
MPI_Comm mod_comm,
darshan_record_id *shared_recs,
int shared_rec_count,
void **buffer,
int *size)
{
int result;
int i;
int color;
int router_rank;
int router_count;
int chassis_count;
int group_count;
MPI_Comm router_comm;
MPI_Comm chassis_comm;
MPI_Comm group_comm;
APXC_LOCK();
if (!apxc_runtime)
{
APXC_UNLOCK();
return;
}
/* collect data */
capture(apxc_runtime->rtr_record, apxc_runtime->rtr_id);
/*
* reduce data
*
* aggregate data from processes which share the same blade and avg.
*
*/
color = (apxc_runtime->group << 5) + \
(apxc_runtime->chassis << 2) + \
apxc_runtime->blade;
PMPI_Comm_split(MPI_COMM_WORLD, color, my_rank, &router_comm);
PMPI_Comm_split(MPI_COMM_WORLD, (color & ~(0x3)), my_rank, &chassis_comm);
PMPI_Comm_split(MPI_COMM_WORLD, (color & ~(0x1f)), my_rank, &group_comm);
PMPI_Comm_size(chassis_comm, &chassis_count);
PMPI_Comm_size(group_comm, &group_count);
PMPI_Comm_rank(router_comm, &router_rank);
PMPI_Comm_size(router_comm, &router_count);
PMPI_Reduce(apxc_runtime->rtr_record->counters,
apxc_runtime->rtr_record->counters,
apxc_runtime->PAPI_event_count,
MPI_LONG_LONG_INT,
MPI_SUM,
0,
router_comm);
if (router_rank == 0)
{
for (i = 0; i < apxc_runtime->PAPI_event_count; i++)
{
apxc_runtime->rtr_record->counters[i] /= router_count;
}
}
else
{
/* discard other ranks non-unique router blades */
*size -= sizeof(*apxc_runtime->rtr_record);
}
if (my_rank == 0)
{
apxc_runtime->header_record->nblades = router_count;
apxc_runtime->header_record->nchassis = chassis_count;
apxc_runtime->header_record->ngroups = group_count;
}
PMPI_Comm_free(&router_comm);
PMPI_Comm_free(&chassis_comm);
PMPI_Comm_free(&group_comm);
free(apxc_runtime);
apxc_runtime = NULL;
APXC_UNLOCK();
return;
}
/*
* Local variables:
* c-indent-level: 4
* c-basic-offset: 4
* End:
*
* vim: ts=8 sts=4 sw=4 expandtab
*/
#
# AutoPerf Make rules for Darshan
#
VPATH += :$(srcdir)/../modules/autoperf/crayxc/util
DARSHAN_MOD_LOG_FORMATS += $(srcdir)/../modules/autoperf/crayxc/darshan-apxc-log-format.h
DARSHAN_MOD_LOGUTIL_HEADERS += darshan-apxc-logutils.h
DARSHAN_STATIC_MOD_OBJS += darshan-apxc-logutils.o
DARSHAN_DYNAMIC_MOD_OBJS += darshan-apxc-logutils.po
CFLAGS += \
-DDARSHAN_USE_APXC \
-I$(srcdir)/../modules/autoperf/crayxc \
-I$(srcdir)/../modules/autoperf/crayxc/util
CFLAGS_SHARED += \
-DDARSHAN_USE_APXC \
-I$(srcdir)/../modules/autoperf/crayxc \
-I$(srcdir)/../modules/autoperf/crayxc/util
darshan-apxc-logutils.o: darshan-apxc-logutils.c darshan-logutils.h darshan-apxc-logutils.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../modules/autoperf/crayxc/darshan-apxc-log-format.h | uthash-1.9.2
$(CC) $(CFLAGS) -c $< -o $@
darshan-apxc-logutils.po: darshan-apxc-logutils.c darshan-logutils.h darshan-apxc-logutils.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../modules/autoperf/crayxc/darshan-apxc-log-format.h | uthash-1.9.2
$(CC) $(CFLAGS_SHARED) -c $< -o $@
/*
* Copyright (C) 2018 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
#define _GNU_SOURCE
#include "darshan-util-config.h"
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include "darshan-logutils.h"
#include "darshan-apxc-log-format.h"
/* counter name strings for the BGQ module */
#define X(a) #a,
char *apxc_counter_names[] = {
APXC_RTR_COUNTERS
};
#undef X
static int darshan_log_get_apxc_rec(darshan_fd fd, void** buf_p);
static int darshan_log_put_apxc_rec(darshan_fd fd, void* buf);
static void darshan_log_print_apxc_rec(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type);
static void darshan_log_print_apxc_description(int ver);
static void darshan_log_print_apxc_rec_diff(void *file_rec1, char *file_name1,
void *file_rec2, char *file_name2);
static void darshan_log_agg_apxc_recs(void *rec, void *agg_rec, int init_flag);
struct darshan_mod_logutil_funcs apxc_logutils =
{
.log_get_record = &darshan_log_get_apxc_rec,
.log_put_record = &darshan_log_put_apxc_rec,
.log_print_record = &darshan_log_print_apxc_rec,
.log_print_description = &darshan_log_print_apxc_description,
.log_print_diff = &darshan_log_print_apxc_rec_diff,
.log_agg_records = &darshan_log_agg_apxc_recs
};
static int darshan_log_get_apxc_rec(darshan_fd fd, void** buf_p)
{
struct darshan_apxc_header_record *hdr_rec;
struct darshan_apxc_router_record *rtr_rec;
int rec_len;
char *buffer, *p;
int i;
int ret = -1;
static int first_rec = 1;
if(fd->mod_map[DARSHAN_APXC_MOD].len == 0)
return(0);
if (!*buf_p)
{
/* assume this is the largest possible record size */
buffer = malloc(sizeof(struct darshan_apxc_router_record));
if (!buffer)
{
return(-1);
}
}
else
{
buffer = *buf_p;
}
if (fd->mod_ver[DARSHAN_APXC_MOD] == 0)
{
printf("Either unknown or debug version: %d\n",
fd->mod_ver[DARSHAN_APXC_MOD]);
return(0);
}
if ((fd->mod_ver[DARSHAN_APXC_MOD] > 0) &&
(fd->mod_ver[DARSHAN_APXC_MOD] < DARSHAN_APXC_VER))
{
/* perform conversion as needed */
}
/* v1, current version */
if (fd->mod_ver[DARSHAN_APXC_MOD] == DARSHAN_APXC_VER)
{
if (first_rec)
{
rec_len = sizeof(struct darshan_apxc_header_record);
first_rec = 0;
}
else
rec_len = sizeof(struct darshan_apxc_router_record);
ret = darshan_log_get_mod(fd, DARSHAN_APXC_MOD, buffer, rec_len);
}
if (ret == rec_len)
{
if(fd->swap_flag)
{
if (rec_len == sizeof(struct darshan_apxc_header_record))
{
hdr_rec = (struct darshan_apxc_header_record*)buffer;
/* swap bytes if necessary */
DARSHAN_BSWAP64(&(hdr_rec->base_rec.id));
DARSHAN_BSWAP64(&(hdr_rec->base_rec.rank));
DARSHAN_BSWAP64(&(hdr_rec->nblades));
DARSHAN_BSWAP64(&(hdr_rec->nchassis));
DARSHAN_BSWAP64(&(hdr_rec->ngroups));
}
else
{
rtr_rec = (struct darshan_apxc_router_record*)buffer;
DARSHAN_BSWAP64(&(hdr_rec->base_rec.id));
DARSHAN_BSWAP64(&(hdr_rec->base_rec.rank));
for (i = 0; i < 4; i++)
{
DARSHAN_BSWAP64(&rtr_rec->coord[i]);
}
for (i = 0; i < APXC_RTR_NUM_INDICES; i++)
{
DARSHAN_BSWAP64(&rtr_rec->counters[i]);
}
}
}
*buf_p = buffer;
return(1);
}
else if (ret < 0)
{
*buf_p = NULL;
if (buffer) free(buffer);
return(-1);
}
else
{
*buf_p = NULL;
if (buffer) free(buffer);
return(0);
}
}
static int darshan_log_put_apxc_rec(darshan_fd fd, void* buf)
{
int ret;
int rec_len;
static int first_rec = 1;
if (first_rec)
{
rec_len = sizeof(struct darshan_apxc_header_record);
first_rec = 0;
}
else
rec_len = sizeof(struct darshan_apxc_router_record);
ret = darshan_log_put_mod(fd, DARSHAN_APXC_MOD, buf,
rec_len, DARSHAN_APXC_VER);
if(ret < 0)
return(-1);
return(0);
}
static void darshan_log_print_apxc_rec(void *rec, char *file_name,
char *mnt_pt, char *fs_type)
{
int i;
static int first_rec = 1;
struct darshan_apxc_header_record *hdr_rec;
struct darshan_apxc_router_record *rtr_rec;
if (first_rec)
{
hdr_rec = rec;
DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
hdr_rec->base_rec.rank, hdr_rec->base_rec.id,
"groups", hdr_rec->ngroups, file_name, "", "");
DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
hdr_rec->base_rec.rank, hdr_rec->base_rec.id,
"chassis", hdr_rec->nchassis, file_name, "", "");
DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
hdr_rec->base_rec.rank, hdr_rec->base_rec.id,
"blades", hdr_rec->nblades, file_name, "", "");
first_rec = 0;
}
else
{
rtr_rec = rec;
char *coord_name[] = {"group", "chassis", "blade", "node"};
for (i = 0; i < 4; i++)
{
DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
rtr_rec->base_rec.rank, rtr_rec->base_rec.id,
coord_name[i], rtr_rec->coord[i],
file_name, "", "");
}
for(i = 0; i < APXC_RTR_NUM_INDICES; i++)
{
DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_APXC_MOD],
rtr_rec->base_rec.rank, rtr_rec->base_rec.id,
apxc_counter_names[i], rtr_rec->counters[i],
file_name, "", "");
}
}
return;
}
static void darshan_log_print_apxc_description(int ver)
{
printf("\n# description of APXC counters: %d\n", ver);
printf("# groups: total number of groups.\n");
printf("# chassis: total number of chassis.\n");
printf("# blades: total number of blades.\n");
printf("# router:\n");
printf("# group: group this router is in.\n");
printf("# chassis: chassies this router is in.\n");
printf("# blade: blade this router is in.\n");
printf("# node: node connected to this router.\n");
printf("# AR_RTR_x_y_INQ_PRF_INCOMING_FLIT_VC[0-7]: flits on VCz\n");
printf("# AR_RTR_x_y_INQ_PRF_ROWBUS_STALL_CNT: stalls on x y tile\n");
return;
}
static void darshan_log_print_apxc_rec_diff(void *file_rec1, char *file_name1,
void *file_rec2, char *file_name2)
{
return;
}
static void darshan_log_agg_apxc_recs(void *rec, void *agg_rec, int init_flag)
{
int i;