Commit b70be200 authored by Shane Snyder's avatar Shane Snyder

port NULL module over to new API changes

parent ba0f880b
all: lib/libdarshan.a lib/libdarshan-stubs.a
#TODO: lib/darshan-null.o
all: lib/libdarshan.a lib/libdarshan-stubs.a lib/darshan-null.o
#TODO: each module provides own makefile with module-specific objects, build options, etc.
......@@ -24,9 +23,8 @@ DARSHAN_LOG_FORMAT = $(srcdir)/../darshan-log-format.h
DARSHAN_VERSION = @DARSHAN_VERSION@
ifndef DISABLE_LDPRELOAD
all: lib/libdarshan.so
all: lib/libdarshan.so lib/darshan-null.po
endif
#TODO: lib/darshan-null.po
VPATH = $(srcdir)
......
......@@ -12,10 +12,11 @@
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#include <assert.h>
#include "uthash.h"
#include "darshan.h"
#include "darshan-dynamic.h"
/* The "NULL" module is an example instrumentation module implementation provided
* with Darshan, primarily to indicate how arbitrary modules may be integrated
......@@ -28,88 +29,72 @@
*/
/* The DARSHAN_FORWARD_DECL macro (defined in darshan.h) is used to provide forward
* declarations for wrapped funcions, regardless if Darshan is used with statically
* or dynamically linked executables.
* declarations for wrapped funcions, regardless of whether Darshan is used with
* statically or dynamically linked executables.
*/
DARSHAN_FORWARD_DECL(foo, int, (const char *name, int arg1, int arg2));
/* The null_record_runtime structure maintains necessary runtime metadata
* for a "NULL" module data record (darshan_null_record structure, defined
* in darshan-null-log-format.h). This metadata assists with the instrumenting
* of specific statistics in the file record.
/* The null_record_ref structure maintains necessary runtime metadata
* for the NULL module record (darshan_null_record structure, defined in
* darshan-null-log-format.h) pointed to by 'record_p'. This metadata
* assists with the instrumenting of specific statistics in the record.
*
* RATIONALE: In general, a module may need to track some stateful, volatile
* information regarding specific I/O statistics to aid in the instrumentation
* process. However, this information should not be stored in the darshan_null_record
* struct because we don't want it to appear in the final darshan log file.
* We therefore associate a null_record_runtime structure with each darshan_null_record
* structure in order to track this information.
* RATIONALE: the NULL module needs to track some stateful, volatile
* information about each record it has registered (for instance, most
* recent access time, amount of bytes transferred) to aid in instrumentation,
* but this information can't be stored in the darshan_null_record struct
* because we don't want it to appear in the final darshan log file. We
* therefore associate a null_record_ref struct with each darshan_null_record
* struct in order to track this information (i.e., the mapping between
* null_record_ref structs to darshan_null_record structs is one-to-one).
*
* NOTE: The null_record_runtime struct contains a pointer to a darshan_null_record
* struct (see the *record_p member) rather than simply embedding an entire
* darshan_null_record struct. This is done so that all of the darshan_null_record
* structs can be kept contiguous in memory as a single array to simplify
* reduction, compression, and storage.
* NOTE: we use the 'darshan_record_ref' interface (in darshan-common) to
* associate different types of handles with this null_record_ref struct.
* This allows us to index this struct (and the underlying record) by using
* either the corresponding Darshan record identifier or by any other arbitrary
* handle. For the NULL module, the only handle we use to track record
* references are Darshan record identifiers.
*/
struct null_record_runtime
struct null_record_ref
{
/* Darshan record for the "NULL" example module */
struct darshan_null_record* record_p;
struct darshan_null_record *record_p;
/* ... other runtime data ... */
/* hash table link for this record */
/* NOTE: it is entirely up to the module developer how to persist module
* records in memory as the instrumented application runs. These records
* could just as easily be stored in an array or linked list. That said,
* the data structure selection should be mindful of the resulting memory
* footprint and search time complexity to attempt minimize Darshan overheads.
* hash table and linked list implementations are available in uthash.h and
* utlist.h, respectively.
*/
UT_hash_handle hlink;
};
/* The null_runtime structure simply encapsulates global data structures needed
* by the module for instrumenting functions of interest and providing the output
* I/O data for this module to the darshan-core component at shutdown time.
/* The null_runtime structure maintains necessary state for storing
* NULL records and for coordinating with darshan-core at shutdown time.
*/
struct null_runtime
{
/* runtime_record_array is the array of runtime records for the "NULL" module. */
struct null_record_runtime* runtime_record_array;
/* record_array is the array of high-level Darshan records for the "NULL" module,
* each corresponding to the the runtime record structure stored at the same array
* index in runtime_record_array.
*/
struct darshan_null_record* record_array;
/* file_array_size is the maximum amount of records that can be stored in
* record_array (and consequentially, runtime_record_array).
*/
int rec_array_size;
/* file_array_ndx is the current index into both runtime_record_array and
* record_array.
*/
int rec_array_ndx;
/* record_hash is a pointer to a hash table of null_record_runtime structures
* currently maintained by the "NULL" module.
/* rec_id_hash is a pointer to a hash table of NULL module record
* references, indexed by Darshan record id
*/
struct null_record_runtime* record_hash;
/* total_record is a pointer to a buffer for storing an aggregate record for
* all "NULL" module records. This buffer is pointed at the appropriate
* region of a mmapped partial log file managed by darshan-core -- this
* partial log file is used to get at least *some* i/o data out to a log
* file if Darshan does not shut down properly.
*/
struct darshan_null_record *total_record;
void *rec_id_hash;
/* number of records currently tracked */
int rec_count;
};
/* internal helper functions for the NULL module */
static void null_runtime_initialize(
void);
static struct null_record_ref *null_track_new_record(
darshan_record_id rec_id, const char *name);
static void null_cleanup_runtime(
void);
/* forward declaration for NULL shutdown function needed to interface
* with darshan-core
*/
static void null_shutdown(MPI_Comm mod_comm, darshan_record_id *shared_recs,
int shared_rec_count, void **null_buf, int *null_buf_sz);
/* null_runtime is the global data structure encapsulating "NULL" module state */
static struct null_runtime *null_runtime = NULL;
/* The null_runtime_mutex is a lock used when updating the null_runtime global
* structure (or any other global data structures). This is necessary to avoid race
* conditions as multiple threads execute function wrappers and update module state.
* conditions as multiple threads may execute function wrappers and update module state.
* NOTE: Recursive mutexes are used in case functions wrapped by this module call
* other wrapped functions that would result in deadlock, otherwise. This mechanism
* may not be necessary for all instrumentation modules.
......@@ -120,40 +105,57 @@ static int instrumentation_disabled = 0;
/* my_rank indicates the MPI rank of this process */
static int my_rank = -1;
/* internal helper functions for the "NULL" module */
static void null_runtime_initialize(void);
static struct null_record_runtime* null_record_by_name(const char *name);
/* forward declaration for module functions needed to interface with darshan-core */
static void null_begin_shutdown(void);
static void null_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs,
int shared_rec_count, void **null_buf, int *null_buf_sz);
static void null_shutdown(void);
/* macros for obtaining/releasing the "NULL" module lock */
#define NULL_LOCK() pthread_mutex_lock(&null_runtime_mutex)
#define NULL_UNLOCK() pthread_mutex_unlock(&null_runtime_mutex)
/* the NULL_PRE_RECORD macro is executed before performing NULL
* module instrumentation of a call. It obtains a lock for updating
* module data strucutres, and ensure the NULL module has been properly
* initialized before instrumenting.
*/
#define NULL_PRE_RECORD() do { \
NULL_LOCK(); \
if(!null_runtime && !instrumentation_disabled) null_runtime_initialize(); \
if(!null_runtime) { \
NULL_UNLOCK(); \
return(ret); \
} \
} while(0)
/* the NULL_POST_RECORD macro is executed after performing NULL
* module instrumentation. It simply releases the module lock.
*/
#define NULL_POST_RECORD() do { \
NULL_UNLOCK(); \
} while(0)
/* macro for instrumenting the "NULL" module's foo function */
/* NOTE: this macro makes use of the DARSHAN_COUNTER_* macros defined
* and documented in darshan.h.
*/
#define NULL_RECORD_FOO(__ret, __name, __dat, __tm1, __tm2) do{ \
struct null_record_runtime* rec; \
double elapsed = __tm2 - __tm1; \
darshan_record_id rec_id; \
struct null_record_ref *rec_ref; \
double __elapsed = __tm2 - __tm1; \
/* if foo returns error (return code < 0), don't instrument anything */ \
if(__ret < 0) break; \
/* use '__name' to lookup a corresponding "NULL" record */ \
rec = null_record_by_name(__name); \
if(!rec) break; \
/* use '__name' to generate a unique Darshan record id */ \
rec_id = darshan_core_gen_record_id(__name); \
/* look up a record reference for this record id using darshan rec_ref interface */ \
rec_ref = darshan_lookup_record_ref(null_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \
/* if no reference was found, track a new one for this record */ \
if(!rec_ref) null_track_new_record(rec_id, __name); \
/* if we still don't have a valid reference, back out */ \
if(!rec_ref) break; \
/* increment counter indicating number of calls to 'bar' */ \
rec->record_p->counters[NULL_BARS] += 1; \
rec_ref->record_p->counters[NULL_BARS] += 1; \
/* store data value for most recent call to 'bar' */ \
rec->record_p->counters[NULL_BAR_DAT] = __dat; \
rec_ref->record_p->counters[NULL_BAR_DAT] = __dat; \
/* store timestamp of most recent call to 'bar' */ \
rec->record_p->fcounters[NULL_F_BAR_TIMESTAMP] = __tm1; \
rec_ref->record_p->fcounters[NULL_F_BAR_TIMESTAMP] = __tm1; \
/* store duration of most recent call to 'bar' */ \
rec->record_p->fcounters[NULL_F_BAR_DURATION] = elapsed; \
rec_ref->record_p->fcounters[NULL_F_BAR_DURATION] = __elapsed; \
} while(0)
/**********************************************************
......@@ -182,182 +184,135 @@ int DARSHAN_DECL(foo)(const char* name, int arg1, int arg2)
ret = __real_foo(name, arg1, arg2);
tm2 = darshan_core_wtime();
NULL_LOCK();
/* Before attempting to instrument I/O statistics for function foo, make
* sure the "NULL" module runtime environment has been initialized.
* NOTE: this runtime environment is initialized only once -- if the
* appropriate structures have already been initialized, this function simply
* returns.
*/
null_runtime_initialize();
NULL_PRE_RECORD();
/* Call macro for instrumenting data for foo function calls. */
NULL_RECORD_FOO(ret, name, arg1+arg2, tm1, tm2);
NULL_UNLOCK();
NULL_POST_RECORD();
return(ret);
}
/**********************************************************
* Internal functions for manipulating POSIX module state *
* Internal functions for manipulating NULL module state *
**********************************************************/
/* Initialize internal POSIX module data structures and register with darshan-core. */
/* Initialize internal NULL module data structures and register with darshan-core. */
static void null_runtime_initialize()
{
int mmap_buf_size;
/* struct of function pointers for interfacing with darshan-core */
struct darshan_module_funcs null_mod_fns =
{
.begin_shutdown = &null_begin_shutdown,
.get_output_data = &null_get_output_data,
.shutdown = &null_shutdown
};
int mem_limit; /* max. memory this module can consume, dictated by darshan-core */
/* don't do anything if already initialized or instrumenation is disabled */
if(null_runtime || instrumentation_disabled)
return;
int null_buf_size;
/* register the "NULL" module with the darshan-core component */
/* try and store a default number of records for this module */
null_buf_size = DARSHAN_DEF_MOD_REC_COUNT * sizeof(struct darshan_null_record);
/* register the NULL module with the darshan-core component */
darshan_core_register_module(
DARSHAN_NULL_MOD, /* Darshan module identifier, defined in darshan-log-format.h */
&null_mod_fns,
&null_shutdown,
&null_buf_size,
&my_rank,
&mem_limit,
(void **)&null_runtime->total_record,
&mmap_buf_size,
NULL);
/* return if no memory assigned by darshan-core */
if(mem_limit == 0)
/* return if darshan-core does not provide enough module memory for at
* least one NULL record
*/
if(null_buf_size < sizeof(struct darshan_null_record))
{
darshan_core_unregister_module(DARSHAN_NULL_MOD);
return;
}
/* initialize module's global state */
null_runtime = malloc(sizeof(*null_runtime));
if(!null_runtime)
return;
memset(null_runtime, 0, sizeof(*null_runtime));
/* Set the maximum number of data records this module may track, as indicated
* by mem_limit (set by darshan-core).
* NOTE: We interpret the maximum memory limit to be related to the maximum
* amount of data which may be written to log by a single process for a given
* module. We therefore use this maximum memory limit to determine how many
* darshan_null_record structures we can track per process.
*/
null_runtime->rec_array_size = mem_limit / sizeof(struct darshan_null_record);
null_runtime->rec_array_ndx = 0;
/* allocate both record arrays (runtime and high-level records) */
null_runtime->runtime_record_array = malloc(null_runtime->rec_array_size *
sizeof(struct null_record_runtime));
null_runtime->record_array = malloc(null_runtime->rec_array_size *
sizeof(struct darshan_null_record));
if(!null_runtime->runtime_record_array || !null_runtime->record_array)
{
null_runtime->rec_array_size = 0;
darshan_core_unregister_module(DARSHAN_NULL_MOD);
return;
}
memset(null_runtime->runtime_record_array, 0, null_runtime->rec_array_size *
sizeof(struct null_record_runtime));
memset(null_runtime->record_array, 0, null_runtime->rec_array_size *
sizeof(struct darshan_null_record));
memset(null_runtime, 0, sizeof(*null_runtime));
return;
}
/* Search for and return a "NULL" module record corresponding to name parameter. */
static struct null_record_runtime* null_record_by_name(const char *name)
/* allocate and track a new NULL module record */
static struct null_record_ref *null_track_new_record(
darshan_record_id rec_id, const char *name)
{
struct null_record_runtime *rec = NULL;
darshan_record_id rec_id;
int limit_flag;
struct darshan_null_record *record_p = NULL;
struct null_record_ref *rec_ref = NULL;
int ret;
/* Don't search for a record if the "NULL" module is not initialized or
* if instrumentation has been toggled off.
*/
if(!null_runtime || instrumentation_disabled)
rec_ref = malloc(sizeof(*rec_ref));
if(!rec_ref)
return(NULL);
memset(rec_ref, 0, sizeof(*rec_ref));
/* stop tracking new records if we are tracking our maximum count */
limit_flag = (null_runtime->rec_array_ndx >= null_runtime->rec_array_size);
/* get a unique record identifier for this record from darshan-core */
darshan_core_register_record(
(void*)name,
strlen(name),
DARSHAN_NULL_MOD,
1,
limit_flag,
&rec_id,
NULL);
/* the file record id is set to 0 if no memory is available for tracking
* new records -- just fall through and ignore this record
/* allocate a new NULL record reference and add it to the hash
* table, using the Darshan record identifier as the handle
*/
if(rec_id == 0)
ret = darshan_add_record_ref(&(null_runtime->rec_id_hash), &rec_id,
sizeof(darshan_record_id), rec_ref);
if(ret == 0)
{
free(rec_ref);
return(NULL);
}
/* search the hash table for this file record, and return if found */
HASH_FIND(hlink, null_runtime->record_hash, &rec_id, sizeof(darshan_record_id), rec);
if(rec)
/* register the actual file record with darshan-core so it is persisted
* in the log file
*/
record_p = darshan_core_register_record(
rec_id,
name,
DARSHAN_NULL_MOD,
sizeof(struct darshan_null_record),
NULL);
if(!record_p)
{
return(rec);
/* if registration fails, delete record reference and return */
darshan_delete_record_ref(&(null_runtime->rec_id_hash),
&rec_id, sizeof(darshan_record_id));
free(rec_ref);
return(NULL);
}
/* no existing record, assign a new one from the global array */
rec = &(null_runtime->runtime_record_array[null_runtime->rec_array_ndx]);
rec->record_p = &(null_runtime->record_array[null_runtime->rec_array_ndx]);
/* set the darshan record id and corresponding process rank for this record */
rec->record_p->f_id = rec_id;
rec->record_p->rank = my_rank;
/* add new record to file hash table */
HASH_ADD(hlink, null_runtime->record_hash, record_p->f_id, sizeof(darshan_record_id), rec);
null_runtime->rec_array_ndx++;
/* registering this file record was successful, so initialize some fields */
record_p->base_rec.id = rec_id;
record_p->base_rec.rank = my_rank;
rec_ref->record_p = record_p;
null_runtime->rec_count++;
return(rec);
/* return pointer to the record reference */
return(rec_ref);
}
/******************************************************************************
* Functions exported by the "NULL" module for coordinating with darshan-core *
******************************************************************************/
/* Perform any necessary steps prior to shutting down for the "NULL" module. */
static void null_begin_shutdown()
/* cleanup NULL module internal data structures */
static void null_cleanup_runtime()
{
assert(null_runtime);
NULL_LOCK();
/* In general, we want to disable all wrappers while Darshan shuts down.
* This is to avoid race conditions and ensure data consistency, as
* executing wrappers could potentially modify module state while Darshan
* is in the process of shutting down.
*/
instrumentation_disabled = 1;
/* ... any other code which needs to be executed before beginning shutdown process ... */
/* iterate the hash of record references and free them */
darshan_clear_record_refs(&(null_runtime->rec_id_hash), 1);
NULL_UNLOCK();
free(null_runtime);
null_runtime = NULL;
return;
}
/* Pass output data for the "NULL" module back to darshan-core to log to file. */
static void null_get_output_data(
/**************************************************************************************
* shutdown function exported by the "NULL" module for coordinating with darshan-core *
**************************************************************************************/
/* Pass output data for the "NULL" module back to darshan-core to log to file,
* and shutdown/free internal data structures.
*/
static void null_shutdown(
MPI_Comm mod_comm,
darshan_record_id *shared_recs,
int shared_rec_count,
void **null_buf,
int *null_buf_sz)
{
NULL_LOCK();
assert(null_runtime);
/* NOTE: this function can be used to run collective operations prior to
......@@ -370,27 +325,21 @@ static void null_get_output_data(
* functionality.
*/
/* Just set the output buffer to point at the array of the "NULL" module's
* I/O records, and set the output size according to the number of records
* currently being tracked.
/* Just set the output size according to the number of records currently
* being tracked. In general, the module can decide to throw out records
* that have been previously registered by shuffling around memory in
* 'null_buf' -- 'null_buf' and 'null_buf_sz' both are passed as pointers
* so they can be updated by the shutdown function potentially.
*/
*null_buf = (void *)(null_runtime->record_array);
*null_buf_sz = null_runtime->rec_array_ndx * sizeof(struct darshan_null_record);
return;
}
/* Shutdown the "NULL" module by freeing up all data structures. */
static void null_shutdown()
{
assert(null_runtime);
*null_buf_sz = null_runtime->rec_count * sizeof(struct darshan_null_record);
HASH_CLEAR(hlink, null_runtime->record_hash); /* these hash entries are freed all at once below */
/* shutdown internal structures used for instrumenting */
null_cleanup_runtime();
free(null_runtime->runtime_record_array);
free(null_runtime);
null_runtime = NULL;
/* disable further instrumentation */
instrumentation_disabled = 1;
NULL_UNLOCK();
return;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment