GitLab maintenance scheduled for Tomorrow, 2019-09-24, from 12:00 to 13:00 CT - Services will be unavailable during this time.

Commit 835e28d9 authored by Kevin Harms's avatar Kevin Harms

Updates to align with latest modular code base

parent 760e827e
......@@ -9,30 +9,38 @@
#include "darshan-log-format.h"
#define BGQ_COUNTERS \
X(BGQ_CSJOBID, "control system jobid") \
X(BGQ_NNODES, "number of BGQ compute nodes") \
X(BGQ_RANKSPERNODE, "number of MPI ranks per node") \
X(BGQ_DDRPERNODE, "size in MB of DDR3 per node") \
X(BGQ_INODES, "number of i/o nodes") \
X(BGQ_ANODES, "dimension of A torus") \
X(BGQ_BNODES, "dimension of B torus") \
X(BGQ_CNODES, "dimension of C torus") \
X(BGQ_DNODES, "dimension of D torus") \
X(BGQ_ENODES, "dimension of E torus") \
X(BGQ_TORUSENABLED, "which dimensions are torus") \
X(BGQ_NUM_INDICES, "end of counters")
#define BGQ_F_COUNTERS \
X(BGQ_F_TIMESTAMP, "timestamp when data was collected") \
X(BGQ_F_NUM_INDICES, "end of counters")
#define X(a, b) a,
/* integer counters for the "BGQ" example module */
enum darshan_bgq_indices
{
BGQ_CSJOBID, // control system jobid
BGQ_NNODES, // number of BGQ compute nodes
BGQ_RANKSPERNODE, // number of MPI ranks per node
BGQ_DDRPERNODE, // size in MB of DDR3 per node
BGQ_INODES, // number of i/o nodes
BGQ_ANODES, // dimension of A torus
BGQ_BNODES, // dimension of B torus
BGQ_CNODES, // dimension of C torus
BGQ_DNODES, // dimension of D torus
BGQ_ENODES, // dimension of E torus
BGQ_TORUSENABLED, // which dimensions are torus
BGQ_NUM_INDICES,
BGQ_COUNTERS
};
/* floating point counters for the "BGQ" example module */
enum darshan_bgq_f_indices
{
BGQ_F_TIMESTAMP, // timestamp when data collected
BGQ_F_NUM_INDICES,
BGQ_F_COUNTERS
};
#undef X
/* the darshan_bgq_record structure encompasses the high-level data/counters
* which would actually be logged to file by Darshan for the "BGQ" example
......
......@@ -49,7 +49,7 @@
X(DARSHAN_MPIIO_MOD, "MPI-IO", mpiio_logutils) \
X(DARSHAN_HDF5_MOD, "HDF5", hdf5_logutils) \
X(DARSHAN_PNETCDF_MOD, "PNETCDF", pnetcdf_logutils) \
X(DARSHAN_BGQ_MODE, "BG/Q", bgq_logutils)
X(DARSHAN_BGQ_MOD, "BG/Q", bgq_logutils)
/* unique identifiers to distinguish between available darshan modules */
/* NOTES: - valid ids range from [0...DARSHAN_MAX_MODS-1]
......
......@@ -49,13 +49,14 @@ static int instrumentation_disabled = 0;
/* my_rank indicates the MPI rank of this process */
static int my_rank = -1;
static int darshan_mem_alignment = 1;
/* internal helper functions for the "NULL" module */
void bgq_runtime_initialize(void);
/* forward declaration for module functions needed to interface with darshan-core */
static void bgq_begin_shutdown(void);
static void bgq_get_output_data(void **buffer, int *size);
static void bgq_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **buffer, int *size);
static void bgq_shutdown(void);
static void bgq_setup_reduction(darshan_record_id *shared_recs,int *shared_rec_count,void **send_buf,void **recv_buf,int *rec_size);
static void bgq_record_reduction_op(void* infile_v,void* inoutfile_v,int *len,MPI_Datatype *datatype);
......@@ -75,7 +76,6 @@ static void capture(struct darshan_bgq_record *rec)
rec->counters[BGQ_CSJOBID] = Kernel_GetJobID();
rec->counters[BGQ_RANKSPERNODE] = Kernel_ProcessCount();
rec->counters[BGQ_INODES] = MPIX_IO_node();
r = Kernel_GetPersonality(&person, sizeof(person));
......@@ -88,11 +88,11 @@ static void capture(struct darshan_bgq_record *rec)
rec->counters[BGQ_DNODES] = person.Network_Config.Dnodes;
rec->counters[BGQ_ENODES] = person.Network_Config.Enodes;
rec->counters[BGQ_TORUSENABLED] =
((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_A) << 0) |
((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_B) << 1) |
((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_C) << 2) |
((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_D) << 3) |
((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_E) << 4);
(((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_A) == ND_ENABLE_TORUS_DIM_A) << 0) |
(((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_B) == ND_ENABLE_TORUS_DIM_B) << 1) |
(((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_C) == ND_ENABLE_TORUS_DIM_C) << 2) |
(((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_D) == ND_ENABLE_TORUS_DIM_D) << 3) |
(((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_E) == ND_ENABLE_TORUS_DIM_E) << 4);
rec->counters[BGQ_DDRPERNODE] = person.DDR_Config.DDRSizeMB;
}
......@@ -114,8 +114,6 @@ void bgq_runtime_initialize()
struct darshan_module_funcs bgq_mod_fns =
{
.begin_shutdown = bgq_begin_shutdown,
.setup_reduction = bgq_setup_reduction,
.record_reduction_op = bgq_record_reduction_op,
.get_output_data = bgq_get_output_data,
.shutdown = bgq_shutdown
};
......@@ -132,8 +130,9 @@ void bgq_runtime_initialize()
darshan_core_register_module(
DARSHAN_BGQ_MOD,
&bgq_mod_fns,
&my_rank,
&mem_limit,
NULL);
&darshan_mem_alignment);
/* return if no memory assigned by darshan-core */
if(mem_limit == 0)
......@@ -162,7 +161,7 @@ void bgq_runtime_initialize()
recname,
strlen(recname),
1,
DARSHAN_POSIX_MOD,
DARSHAN_BGQ_MOD,
&bgq_runtime->record.f_id,
&bgq_runtime->record.alignment);
......@@ -192,8 +191,18 @@ static void bgq_begin_shutdown()
return;
}
static int cmpr(const void *p1, const void *p2)
{
const int *a = (int*) p1;
const int *b = (int*) p2;
return ((*a == *b) ? 0 : ((*a < *b) ? -1 : 1));
}
/* Pass output data for the "BGQ" module back to darshan-core to log to file. */
static void bgq_get_output_data(
MPI_Comm mod_comm,
darshan_record_id *shared_recs,
int shared_rec_count,
void **buffer,
int *size)
{
......@@ -202,6 +211,46 @@ static void bgq_get_output_data(
* I/O records, and set the output size according to the number of records
* currently being tracked.
*/
int nprocs;
int result;
uint64_t *ion_ids;
if (my_rank == 0)
{
DARSHAN_MPI_CALL(MPI_Comm_size)(mod_comm, &nprocs);
ion_ids = malloc(sizeof(*ion_ids)*nprocs);
result = (ion_ids != NULL);
}
DARSHAN_MPI_CALL(MPI_Bcast)(&result, 1, MPI_INT, 0, mod_comm);
if (bgq_runtime && result)
{
int i, found;
uint64_t val;
DARSHAN_MPI_CALL(MPI_Gather)(&bgq_runtime->record.counters[BGQ_INODES],
1,
MPI_INT,
ion_ids,
1,
MPI_INT,
0,
mod_comm);
if (my_rank == 0)
{
qsort(ion_ids, nprocs, sizeof(*ion_ids), cmpr);
for (i = 1, val = ion_ids[0], found = 1; i < nprocs; i++)
{
if (val != ion_ids[i])
{
val = ion_ids[i];
found += 1;
}
}
bgq_runtime->record.counters[BGQ_INODES] = found;
}
}
if ((bgq_runtime) && (my_rank == 0))
{
*buffer = &bgq_runtime->record;
......@@ -228,37 +277,6 @@ static void bgq_shutdown()
return;
}
static void bgq_setup_reduction(
darshan_record_id *shared_recs,
int *shared_rec_count,
void **send_buf,
void **recv_buf,
int *rec_size)
{
int i;
int found;
for (i = 0; i < *shared_rec_count; i++)
{
if (shared_recs[i] == bgq_runtime->record.f_id)
{
found = 1;
break;
}
}
if (found)
{
printf("found bgq shared record\n");
*rec_size = sizeof(struct darshan_bgq_record);
*shared_rec_count = 1;
*send_buf = &bgq_runtime->record;
*recv_buf = &bgq_runtime->record;
}
return;
}
static void bgq_record_reduction_op(
void* infile_v,
void* inoutfile_v,
......
......@@ -69,10 +69,16 @@ darshan-pnetcdf-logutils.o: darshan-pnetcdf-logutils.c darshan-logutils.h darsha
darshan-pnetcdf-logutils.po: darshan-pnetcdf-logutils.c darshan-logutils.h darshan-pnetcdf-logutils.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-pnetcdf-log-format.h | uthash-1.9.2
$(CC) $(CFLAGS_SHARED) -c $< -o $@
libdarshan-util.so: darshan-logutils.po darshan-posix-logutils.po darshan-mpiio-logutils.po darshan-hdf5-logutils.po darshan-pnetcdf-logutils.po
darshan-bgq-logutils.o: darshan-bgq-logutils.c darshan-logutils.h darshan-bgq-logutils.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-bgq-log-format.h | uthash-1.9.2
$(CC) $(CFLAGS) -c $< -o $@
darshan-bgq-logutils.po: darshan-bgq-logutils.c darshan-logutils.h darshan-bgq-logutils.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-bgq-log-format.h | uthash-1.9.2
$(CC) $(CFLAGS_SHARED) -c $< -o $@
libdarshan-util.so: darshan-logutils.po darshan-posix-logutils.po darshan-mpiio-logutils.po darshan-hdf5-logutils.po darshan-pnetcdf-logutils.po darshan-bgq-logutils.po
$(CC) $(CFLAGS_SHARED) $(LDFLAGS) -o $@ $^ $(LIBS)
libdarshan-util.a: darshan-logutils.o darshan-posix-logutils.o darshan-mpiio-logutils.o darshan-hdf5-logutils.o darshan-pnetcdf-logutils.o
libdarshan-util.a: darshan-logutils.o darshan-posix-logutils.o darshan-mpiio-logutils.o darshan-hdf5-logutils.o darshan-pnetcdf-logutils.o darshan-bgq-logutils.o
ar rcs libdarshan-util.a $^
jenkins-hash-gen: jenkins-hash-gen.c lookup3.o
......
......@@ -19,33 +19,80 @@
#include "darshan-bgq-logutils.h"
int darshan_log_get_bgq_file(darshan_fd fd, struct darshan_bgq_record *file)
/* counter name strings for the POSIX module */
#define X(a, b) #a,
char *bgq_counter_names[] = {
BGQ_COUNTERS
};
char *bgq_f_counter_names[] = {
BGQ_F_COUNTERS
};
#undef X
static int darshan_log_get_bgq_file(void** psx_buf_p, int* bytes_left,
void** file_rec, darshan_record_id* rec_id, int byte_swap_flag);
static void darshan_log_print_bgq_file(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type);
struct darshan_mod_logutil_funcs bgq_logutils =
{
.log_get_record = &darshan_log_get_bgq_file,
.log_print_record = &darshan_log_print_bgq_file,
};
static int darshan_log_get_bgq_file(void** psx_buf_p, int* bytes_left,
void** file_rec, darshan_record_id* rec_id, int byte_swap_flag)
{
int i;
int ret;
struct darshan_bgq_record *file = (struct darshan_bgq_record *)
(*psx_buf_p);
if(*bytes_left < sizeof(struct darshan_bgq_record))
return(-1);
if(byte_swap_flag)
{
/* swap bytes if necessary */
DARSHAN_BSWAP64(&file->f_id);
DARSHAN_BSWAP64(&file->rank);
for(i=0; i<POSIX_NUM_INDICES; i++)
DARSHAN_BSWAP64(&file->counters[i]);
for(i=0; i<POSIX_F_NUM_INDICES; i++)
DARSHAN_BSWAP64(&file->fcounters[i]);
}
/* reset file record, so that diff compares against a zero'd out record
* if file is missing
*/
memset(file, 0, sizeof(*file));
/* update/set output variables */
*file_rec = (void *)file;
*rec_id = file->f_id;
*psx_buf_p = (file + 1); /* increment input buf by size of file record */
*bytes_left -= sizeof(struct darshan_bgq_record);
return(0);
}
static void darshan_log_print_bgq_file(void *file_rec, char *file_name,
char *mnt_pt, char *fs_type)
{
int i;
struct darshan_bgq_record *bgq_file_rec =
(struct darshan_bgq_record *)file_rec;
for(i=0; i<POSIX_NUM_INDICES; i++)
{
DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_POSIX_MOD],
bgq_file_rec->rank, bgq_file_rec->f_id, bgq_counter_names[i],
bgq_file_rec->counters[i], file_name, mnt_pt, fs_type);
}
ret = darshan_log_get_moddat(fd, DARSHAN_BGQ_MOD,
(void *)file, sizeof(*file));
if(ret == 1)
for(i=0; i<POSIX_F_NUM_INDICES; i++)
{
if(fd->swap_flag)
{
/* swap bytes if necessary */
DARSHAN_BSWAP64(&file->f_id);
DARSHAN_BSWAP64(&file->rank);
for(i=0; i<BGQ_NUM_INDICES; i++)
DARSHAN_BSWAP64(&file->counters[i]);
for(i=0; i<BGQ_F_NUM_INDICES; i++)
DARSHAN_BSWAP64(&file->fcounters[i]);
}
DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_POSIX_MOD],
bgq_file_rec->rank, bgq_file_rec->f_id, bgq_f_counter_names[i],
bgq_file_rec->fcounters[i], file_name, mnt_pt, fs_type);
}
return(ret);
return;
}
/*
......
......@@ -10,6 +10,9 @@
#include "darshan-logutils.h"
#include "darshan-bgq-log-format.h"
int darshan_log_get_bgq_file(darshan_fd fd, struct darshan_bgq_record *file);
extern char *bgq_counter_names[];
extern char *bgq_f_counter_names[];
extern struct darshan_mod_logutil_funcs bgq_logutils;
#endif
......@@ -48,7 +48,7 @@ struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS] =
&mpiio_logutils, /* MPI-IO */
&hdf5_logutils, /* HDF5 */
&pnetcdf_logutils, /* PNETCDF */
NULL,
&bgq_logutils, /* BG/Q */
NULL,
NULL,
NULL,
......
......@@ -66,6 +66,7 @@ extern struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS];
#include "darshan-mpiio-logutils.h"
#include "darshan-hdf5-logutils.h"
#include "darshan-pnetcdf-logutils.h"
#include "darshan-bgq-logutils.h"
darshan_fd darshan_log_open(const char *name);
darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment