From b70be20011c789832b8bd314a457a17bbdc37e07 Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Fri, 17 Jun 2016 18:29:14 -0500 Subject: [PATCH] port NULL module over to new API changes --- darshan-runtime/Makefile.in | 6 +- darshan-runtime/lib/darshan-null.c | 383 +++++++++++++---------------- 2 files changed, 168 insertions(+), 221 deletions(-) diff --git a/darshan-runtime/Makefile.in b/darshan-runtime/Makefile.in index d5e5975..17b34f2 100644 --- a/darshan-runtime/Makefile.in +++ b/darshan-runtime/Makefile.in @@ -1,5 +1,4 @@ -all: lib/libdarshan.a lib/libdarshan-stubs.a -#TODO: lib/darshan-null.o +all: lib/libdarshan.a lib/libdarshan-stubs.a lib/darshan-null.o #TODO: each module provides own makefile with module-specific objects, build options, etc. @@ -24,9 +23,8 @@ DARSHAN_LOG_FORMAT = $(srcdir)/../darshan-log-format.h DARSHAN_VERSION = @DARSHAN_VERSION@ ifndef DISABLE_LDPRELOAD -all: lib/libdarshan.so +all: lib/libdarshan.so lib/darshan-null.po endif -#TODO: lib/darshan-null.po VPATH = $(srcdir) diff --git a/darshan-runtime/lib/darshan-null.c b/darshan-runtime/lib/darshan-null.c index 72abb15..566677e 100644 --- a/darshan-runtime/lib/darshan-null.c +++ b/darshan-runtime/lib/darshan-null.c @@ -12,10 +12,11 @@ #include #include #include +#include #include -#include "uthash.h" #include "darshan.h" +#include "darshan-dynamic.h" /* The "NULL" module is an example instrumentation module implementation provided * with Darshan, primarily to indicate how arbitrary modules may be integrated @@ -28,88 +29,72 @@ */ /* The DARSHAN_FORWARD_DECL macro (defined in darshan.h) is used to provide forward - * declarations for wrapped funcions, regardless if Darshan is used with statically - * or dynamically linked executables. + * declarations for wrapped funcions, regardless of whether Darshan is used with + * statically or dynamically linked executables. */ DARSHAN_FORWARD_DECL(foo, int, (const char *name, int arg1, int arg2)); -/* The null_record_runtime structure maintains necessary runtime metadata - * for a "NULL" module data record (darshan_null_record structure, defined - * in darshan-null-log-format.h). This metadata assists with the instrumenting - * of specific statistics in the file record. +/* The null_record_ref structure maintains necessary runtime metadata + * for the NULL module record (darshan_null_record structure, defined in + * darshan-null-log-format.h) pointed to by 'record_p'. This metadata + * assists with the instrumenting of specific statistics in the record. * - * RATIONALE: In general, a module may need to track some stateful, volatile - * information regarding specific I/O statistics to aid in the instrumentation - * process. However, this information should not be stored in the darshan_null_record - * struct because we don't want it to appear in the final darshan log file. - * We therefore associate a null_record_runtime structure with each darshan_null_record - * structure in order to track this information. + * RATIONALE: the NULL module needs to track some stateful, volatile + * information about each record it has registered (for instance, most + * recent access time, amount of bytes transferred) to aid in instrumentation, + * but this information can't be stored in the darshan_null_record struct + * because we don't want it to appear in the final darshan log file. We + * therefore associate a null_record_ref struct with each darshan_null_record + * struct in order to track this information (i.e., the mapping between + * null_record_ref structs to darshan_null_record structs is one-to-one). * - * NOTE: The null_record_runtime struct contains a pointer to a darshan_null_record - * struct (see the *record_p member) rather than simply embedding an entire - * darshan_null_record struct. This is done so that all of the darshan_null_record - * structs can be kept contiguous in memory as a single array to simplify - * reduction, compression, and storage. + * NOTE: we use the 'darshan_record_ref' interface (in darshan-common) to + * associate different types of handles with this null_record_ref struct. + * This allows us to index this struct (and the underlying record) by using + * either the corresponding Darshan record identifier or by any other arbitrary + * handle. For the NULL module, the only handle we use to track record + * references are Darshan record identifiers. */ -struct null_record_runtime +struct null_record_ref { /* Darshan record for the "NULL" example module */ - struct darshan_null_record* record_p; + struct darshan_null_record *record_p; /* ... other runtime data ... */ - - /* hash table link for this record */ - /* NOTE: it is entirely up to the module developer how to persist module - * records in memory as the instrumented application runs. These records - * could just as easily be stored in an array or linked list. That said, - * the data structure selection should be mindful of the resulting memory - * footprint and search time complexity to attempt minimize Darshan overheads. - * hash table and linked list implementations are available in uthash.h and - * utlist.h, respectively. - */ - UT_hash_handle hlink; }; -/* The null_runtime structure simply encapsulates global data structures needed - * by the module for instrumenting functions of interest and providing the output - * I/O data for this module to the darshan-core component at shutdown time. +/* The null_runtime structure maintains necessary state for storing + * NULL records and for coordinating with darshan-core at shutdown time. */ struct null_runtime { - /* runtime_record_array is the array of runtime records for the "NULL" module. */ - struct null_record_runtime* runtime_record_array; - /* record_array is the array of high-level Darshan records for the "NULL" module, - * each corresponding to the the runtime record structure stored at the same array - * index in runtime_record_array. - */ - struct darshan_null_record* record_array; - /* file_array_size is the maximum amount of records that can be stored in - * record_array (and consequentially, runtime_record_array). - */ - int rec_array_size; - /* file_array_ndx is the current index into both runtime_record_array and - * record_array. - */ - int rec_array_ndx; - /* record_hash is a pointer to a hash table of null_record_runtime structures - * currently maintained by the "NULL" module. + /* rec_id_hash is a pointer to a hash table of NULL module record + * references, indexed by Darshan record id */ - struct null_record_runtime* record_hash; - - /* total_record is a pointer to a buffer for storing an aggregate record for - * all "NULL" module records. This buffer is pointed at the appropriate - * region of a mmapped partial log file managed by darshan-core -- this - * partial log file is used to get at least *some* i/o data out to a log - * file if Darshan does not shut down properly. - */ - struct darshan_null_record *total_record; + void *rec_id_hash; + /* number of records currently tracked */ + int rec_count; }; +/* internal helper functions for the NULL module */ +static void null_runtime_initialize( + void); +static struct null_record_ref *null_track_new_record( + darshan_record_id rec_id, const char *name); +static void null_cleanup_runtime( + void); + +/* forward declaration for NULL shutdown function needed to interface + * with darshan-core + */ +static void null_shutdown(MPI_Comm mod_comm, darshan_record_id *shared_recs, + int shared_rec_count, void **null_buf, int *null_buf_sz); + /* null_runtime is the global data structure encapsulating "NULL" module state */ static struct null_runtime *null_runtime = NULL; /* The null_runtime_mutex is a lock used when updating the null_runtime global * structure (or any other global data structures). This is necessary to avoid race - * conditions as multiple threads execute function wrappers and update module state. + * conditions as multiple threads may execute function wrappers and update module state. * NOTE: Recursive mutexes are used in case functions wrapped by this module call * other wrapped functions that would result in deadlock, otherwise. This mechanism * may not be necessary for all instrumentation modules. @@ -120,40 +105,57 @@ static int instrumentation_disabled = 0; /* my_rank indicates the MPI rank of this process */ static int my_rank = -1; -/* internal helper functions for the "NULL" module */ -static void null_runtime_initialize(void); -static struct null_record_runtime* null_record_by_name(const char *name); - -/* forward declaration for module functions needed to interface with darshan-core */ -static void null_begin_shutdown(void); -static void null_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs, - int shared_rec_count, void **null_buf, int *null_buf_sz); -static void null_shutdown(void); - /* macros for obtaining/releasing the "NULL" module lock */ #define NULL_LOCK() pthread_mutex_lock(&null_runtime_mutex) #define NULL_UNLOCK() pthread_mutex_unlock(&null_runtime_mutex) +/* the NULL_PRE_RECORD macro is executed before performing NULL + * module instrumentation of a call. It obtains a lock for updating + * module data strucutres, and ensure the NULL module has been properly + * initialized before instrumenting. + */ +#define NULL_PRE_RECORD() do { \ + NULL_LOCK(); \ + if(!null_runtime && !instrumentation_disabled) null_runtime_initialize(); \ + if(!null_runtime) { \ + NULL_UNLOCK(); \ + return(ret); \ + } \ +} while(0) + +/* the NULL_POST_RECORD macro is executed after performing NULL + * module instrumentation. It simply releases the module lock. + */ +#define NULL_POST_RECORD() do { \ + NULL_UNLOCK(); \ +} while(0) + /* macro for instrumenting the "NULL" module's foo function */ /* NOTE: this macro makes use of the DARSHAN_COUNTER_* macros defined * and documented in darshan.h. */ #define NULL_RECORD_FOO(__ret, __name, __dat, __tm1, __tm2) do{ \ - struct null_record_runtime* rec; \ - double elapsed = __tm2 - __tm1; \ + darshan_record_id rec_id; \ + struct null_record_ref *rec_ref; \ + double __elapsed = __tm2 - __tm1; \ /* if foo returns error (return code < 0), don't instrument anything */ \ if(__ret < 0) break; \ - /* use '__name' to lookup a corresponding "NULL" record */ \ - rec = null_record_by_name(__name); \ - if(!rec) break; \ + /* use '__name' to generate a unique Darshan record id */ \ + rec_id = darshan_core_gen_record_id(__name); \ + /* look up a record reference for this record id using darshan rec_ref interface */ \ + rec_ref = darshan_lookup_record_ref(null_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \ + /* if no reference was found, track a new one for this record */ \ + if(!rec_ref) null_track_new_record(rec_id, __name); \ + /* if we still don't have a valid reference, back out */ \ + if(!rec_ref) break; \ /* increment counter indicating number of calls to 'bar' */ \ - rec->record_p->counters[NULL_BARS] += 1; \ + rec_ref->record_p->counters[NULL_BARS] += 1; \ /* store data value for most recent call to 'bar' */ \ - rec->record_p->counters[NULL_BAR_DAT] = __dat; \ + rec_ref->record_p->counters[NULL_BAR_DAT] = __dat; \ /* store timestamp of most recent call to 'bar' */ \ - rec->record_p->fcounters[NULL_F_BAR_TIMESTAMP] = __tm1; \ + rec_ref->record_p->fcounters[NULL_F_BAR_TIMESTAMP] = __tm1; \ /* store duration of most recent call to 'bar' */ \ - rec->record_p->fcounters[NULL_F_BAR_DURATION] = elapsed; \ + rec_ref->record_p->fcounters[NULL_F_BAR_DURATION] = __elapsed; \ } while(0) /********************************************************** @@ -182,182 +184,135 @@ int DARSHAN_DECL(foo)(const char* name, int arg1, int arg2) ret = __real_foo(name, arg1, arg2); tm2 = darshan_core_wtime(); - NULL_LOCK(); - - /* Before attempting to instrument I/O statistics for function foo, make - * sure the "NULL" module runtime environment has been initialized. - * NOTE: this runtime environment is initialized only once -- if the - * appropriate structures have already been initialized, this function simply - * returns. - */ - null_runtime_initialize(); - + NULL_PRE_RECORD(); /* Call macro for instrumenting data for foo function calls. */ NULL_RECORD_FOO(ret, name, arg1+arg2, tm1, tm2); - - NULL_UNLOCK(); + NULL_POST_RECORD(); return(ret); } /********************************************************** - * Internal functions for manipulating POSIX module state * + * Internal functions for manipulating NULL module state * **********************************************************/ -/* Initialize internal POSIX module data structures and register with darshan-core. */ +/* Initialize internal NULL module data structures and register with darshan-core. */ static void null_runtime_initialize() { - int mmap_buf_size; - /* struct of function pointers for interfacing with darshan-core */ - struct darshan_module_funcs null_mod_fns = - { - .begin_shutdown = &null_begin_shutdown, - .get_output_data = &null_get_output_data, - .shutdown = &null_shutdown - }; - int mem_limit; /* max. memory this module can consume, dictated by darshan-core */ - - /* don't do anything if already initialized or instrumenation is disabled */ - if(null_runtime || instrumentation_disabled) - return; + int null_buf_size; - /* register the "NULL" module with the darshan-core component */ + /* try and store a default number of records for this module */ + null_buf_size = DARSHAN_DEF_MOD_REC_COUNT * sizeof(struct darshan_null_record); + + /* register the NULL module with the darshan-core component */ darshan_core_register_module( DARSHAN_NULL_MOD, /* Darshan module identifier, defined in darshan-log-format.h */ - &null_mod_fns, + &null_shutdown, + &null_buf_size, &my_rank, - &mem_limit, - (void **)&null_runtime->total_record, - &mmap_buf_size, NULL); - /* return if no memory assigned by darshan-core */ - if(mem_limit == 0) + /* return if darshan-core does not provide enough module memory for at + * least one NULL record + */ + if(null_buf_size < sizeof(struct darshan_null_record)) + { + darshan_core_unregister_module(DARSHAN_NULL_MOD); return; + } /* initialize module's global state */ null_runtime = malloc(sizeof(*null_runtime)); if(!null_runtime) - return; - memset(null_runtime, 0, sizeof(*null_runtime)); - - /* Set the maximum number of data records this module may track, as indicated - * by mem_limit (set by darshan-core). - * NOTE: We interpret the maximum memory limit to be related to the maximum - * amount of data which may be written to log by a single process for a given - * module. We therefore use this maximum memory limit to determine how many - * darshan_null_record structures we can track per process. - */ - null_runtime->rec_array_size = mem_limit / sizeof(struct darshan_null_record); - null_runtime->rec_array_ndx = 0; - - /* allocate both record arrays (runtime and high-level records) */ - null_runtime->runtime_record_array = malloc(null_runtime->rec_array_size * - sizeof(struct null_record_runtime)); - null_runtime->record_array = malloc(null_runtime->rec_array_size * - sizeof(struct darshan_null_record)); - if(!null_runtime->runtime_record_array || !null_runtime->record_array) { - null_runtime->rec_array_size = 0; + darshan_core_unregister_module(DARSHAN_NULL_MOD); return; } - memset(null_runtime->runtime_record_array, 0, null_runtime->rec_array_size * - sizeof(struct null_record_runtime)); - memset(null_runtime->record_array, 0, null_runtime->rec_array_size * - sizeof(struct darshan_null_record)); + memset(null_runtime, 0, sizeof(*null_runtime)); return; } -/* Search for and return a "NULL" module record corresponding to name parameter. */ -static struct null_record_runtime* null_record_by_name(const char *name) +/* allocate and track a new NULL module record */ +static struct null_record_ref *null_track_new_record( + darshan_record_id rec_id, const char *name) { - struct null_record_runtime *rec = NULL; - darshan_record_id rec_id; - int limit_flag; + struct darshan_null_record *record_p = NULL; + struct null_record_ref *rec_ref = NULL; + int ret; - /* Don't search for a record if the "NULL" module is not initialized or - * if instrumentation has been toggled off. - */ - if(!null_runtime || instrumentation_disabled) + rec_ref = malloc(sizeof(*rec_ref)); + if(!rec_ref) return(NULL); + memset(rec_ref, 0, sizeof(*rec_ref)); - /* stop tracking new records if we are tracking our maximum count */ - limit_flag = (null_runtime->rec_array_ndx >= null_runtime->rec_array_size); - - /* get a unique record identifier for this record from darshan-core */ - darshan_core_register_record( - (void*)name, - strlen(name), - DARSHAN_NULL_MOD, - 1, - limit_flag, - &rec_id, - NULL); - - /* the file record id is set to 0 if no memory is available for tracking - * new records -- just fall through and ignore this record + /* allocate a new NULL record reference and add it to the hash + * table, using the Darshan record identifier as the handle */ - if(rec_id == 0) + ret = darshan_add_record_ref(&(null_runtime->rec_id_hash), &rec_id, + sizeof(darshan_record_id), rec_ref); + if(ret == 0) { + free(rec_ref); return(NULL); } - /* search the hash table for this file record, and return if found */ - HASH_FIND(hlink, null_runtime->record_hash, &rec_id, sizeof(darshan_record_id), rec); - if(rec) + /* register the actual file record with darshan-core so it is persisted + * in the log file + */ + record_p = darshan_core_register_record( + rec_id, + name, + DARSHAN_NULL_MOD, + sizeof(struct darshan_null_record), + NULL); + + if(!record_p) { - return(rec); + /* if registration fails, delete record reference and return */ + darshan_delete_record_ref(&(null_runtime->rec_id_hash), + &rec_id, sizeof(darshan_record_id)); + free(rec_ref); + return(NULL); } - /* no existing record, assign a new one from the global array */ - rec = &(null_runtime->runtime_record_array[null_runtime->rec_array_ndx]); - rec->record_p = &(null_runtime->record_array[null_runtime->rec_array_ndx]); - - /* set the darshan record id and corresponding process rank for this record */ - rec->record_p->f_id = rec_id; - rec->record_p->rank = my_rank; - - /* add new record to file hash table */ - HASH_ADD(hlink, null_runtime->record_hash, record_p->f_id, sizeof(darshan_record_id), rec); - null_runtime->rec_array_ndx++; + /* registering this file record was successful, so initialize some fields */ + record_p->base_rec.id = rec_id; + record_p->base_rec.rank = my_rank; + rec_ref->record_p = record_p; + null_runtime->rec_count++; - return(rec); + /* return pointer to the record reference */ + return(rec_ref); } -/****************************************************************************** - * Functions exported by the "NULL" module for coordinating with darshan-core * - ******************************************************************************/ - -/* Perform any necessary steps prior to shutting down for the "NULL" module. */ -static void null_begin_shutdown() +/* cleanup NULL module internal data structures */ +static void null_cleanup_runtime() { - assert(null_runtime); - - NULL_LOCK(); - - /* In general, we want to disable all wrappers while Darshan shuts down. - * This is to avoid race conditions and ensure data consistency, as - * executing wrappers could potentially modify module state while Darshan - * is in the process of shutting down. - */ - instrumentation_disabled = 1; - - /* ... any other code which needs to be executed before beginning shutdown process ... */ + /* iterate the hash of record references and free them */ + darshan_clear_record_refs(&(null_runtime->rec_id_hash), 1); - NULL_UNLOCK(); + free(null_runtime); + null_runtime = NULL; return; } -/* Pass output data for the "NULL" module back to darshan-core to log to file. */ -static void null_get_output_data( +/************************************************************************************** + * shutdown function exported by the "NULL" module for coordinating with darshan-core * + **************************************************************************************/ + +/* Pass output data for the "NULL" module back to darshan-core to log to file, + * and shutdown/free internal data structures. + */ +static void null_shutdown( MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **null_buf, int *null_buf_sz) { + NULL_LOCK(); assert(null_runtime); /* NOTE: this function can be used to run collective operations prior to @@ -370,27 +325,21 @@ static void null_get_output_data( * functionality. */ - /* Just set the output buffer to point at the array of the "NULL" module's - * I/O records, and set the output size according to the number of records - * currently being tracked. + /* Just set the output size according to the number of records currently + * being tracked. In general, the module can decide to throw out records + * that have been previously registered by shuffling around memory in + * 'null_buf' -- 'null_buf' and 'null_buf_sz' both are passed as pointers + * so they can be updated by the shutdown function potentially. */ - *null_buf = (void *)(null_runtime->record_array); - *null_buf_sz = null_runtime->rec_array_ndx * sizeof(struct darshan_null_record); - - return; -} - -/* Shutdown the "NULL" module by freeing up all data structures. */ -static void null_shutdown() -{ - assert(null_runtime); + *null_buf_sz = null_runtime->rec_count * sizeof(struct darshan_null_record); - HASH_CLEAR(hlink, null_runtime->record_hash); /* these hash entries are freed all at once below */ + /* shutdown internal structures used for instrumenting */ + null_cleanup_runtime(); - free(null_runtime->runtime_record_array); - free(null_runtime); - null_runtime = NULL; + /* disable further instrumentation */ + instrumentation_disabled = 1; + NULL_UNLOCK(); return; } -- 2.26.2