Commit 72380aad authored by Shane Snyder's avatar Shane Snyder
Browse files

more docs and added unregister functions

parent acc05e4a
...@@ -63,12 +63,15 @@ void darshan_core_register_module( ...@@ -63,12 +63,15 @@ void darshan_core_register_module(
struct darshan_module_funcs *funcs, struct darshan_module_funcs *funcs,
int *runtime_mem_limit); int *runtime_mem_limit);
void darshan_core_unregister_module(
darshan_module_id mod_id);
void darshan_core_register_record( void darshan_core_register_record(
void *name, void *name,
int len, int len,
int printable_flag, int printable_flag,
darshan_module_id mod_id, darshan_module_id mod_id,
darshan_record_id *id); darshan_record_id *rec_id);
void darshan_core_unregister_record( void darshan_core_unregister_record(
darshan_record_id rec_id, darshan_record_id rec_id,
......
...@@ -32,7 +32,7 @@ extern char* __progname; ...@@ -32,7 +32,7 @@ extern char* __progname;
/* internal variable delcarations */ /* internal variable delcarations */
static struct darshan_core_runtime *darshan_core = NULL; static struct darshan_core_runtime *darshan_core = NULL;
static pthread_mutex_t darshan_core_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
static int my_rank = -1; static int my_rank = -1;
static int nprocs = -1; static int nprocs = -1;
...@@ -1484,19 +1484,19 @@ static int darshan_log_coll_write(MPI_File log_fh, void *buf, int count, ...@@ -1484,19 +1484,19 @@ static int darshan_log_coll_write(MPI_File log_fh, void *buf, int count,
/* ********************************************************* */ /* ********************************************************* */
void darshan_core_register_module( void darshan_core_register_module(
darshan_module_id id, darshan_module_id mod_id,
struct darshan_module_funcs *funcs, struct darshan_module_funcs *funcs,
int *runtime_mem_limit) int *runtime_mem_limit)
{ {
struct darshan_core_module* mod; struct darshan_core_module* mod;
*runtime_mem_limit = 0; *runtime_mem_limit = 0;
if(!darshan_core || (id >= DARSHAN_MAX_MODS)) if(!darshan_core || (mod_id >= DARSHAN_MAX_MODS))
return; return;
/* see if this module is already registered */ /* see if this module is already registered */
DARSHAN_CORE_LOCK(); DARSHAN_CORE_LOCK();
if(darshan_core->mod_array[id]) if(darshan_core->mod_array[mod_id])
{ {
/* if module is already registered just return */ /* if module is already registered just return */
/* NOTE: we do not recalculate memory limit here, just set to 0 */ /* NOTE: we do not recalculate memory limit here, just set to 0 */
...@@ -1513,11 +1513,11 @@ void darshan_core_register_module( ...@@ -1513,11 +1513,11 @@ void darshan_core_register_module(
} }
memset(mod, 0, sizeof(*mod)); memset(mod, 0, sizeof(*mod));
mod->id = id; mod->id = mod_id;
mod->mod_funcs = *funcs; mod->mod_funcs = *funcs;
/* register module with darshan */ /* register module with darshan */
darshan_core->mod_array[id] = mod; darshan_core->mod_array[mod_id] = mod;
/* TODO: something smarter than just 2 MiB per module */ /* TODO: something smarter than just 2 MiB per module */
*runtime_mem_limit = 2 * 1024 * 1024; *runtime_mem_limit = 2 * 1024 * 1024;
...@@ -1527,6 +1527,31 @@ void darshan_core_register_module( ...@@ -1527,6 +1527,31 @@ void darshan_core_register_module(
return; return;
} }
/* TODO: implement & test*/
void darshan_core_unregister_module(
darshan_module_id mod_id)
{
struct darshan_core_record_ref *ref, *tmp;
if(!darshan_core)
return;
DARSHAN_CORE_LOCK();
/* iterate all records and disassociate this module from them */
HASH_ITER(hlink, darshan_core->rec_hash, ref, tmp)
{
darshan_core_unregister_record(ref->rec.id, mod_id);
}
free(darshan_core->mod_array[mod_id]);
darshan_core->mod_array[mod_id] = NULL;
DARSHAN_CORE_UNLOCK();
return;
}
void darshan_core_register_record( void darshan_core_register_record(
void *name, void *name,
int len, int len,
...@@ -1579,6 +1604,7 @@ void darshan_core_register_record( ...@@ -1579,6 +1604,7 @@ void darshan_core_register_record(
return; return;
} }
/* TODO: test */
void darshan_core_unregister_record( void darshan_core_unregister_record(
darshan_record_id rec_id, darshan_record_id rec_id,
darshan_module_id mod_id) darshan_module_id mod_id)
......
...@@ -103,6 +103,7 @@ static pthread_mutex_t posix_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER ...@@ -103,6 +103,7 @@ static pthread_mutex_t posix_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER
static int instrumentation_disabled = 0; static int instrumentation_disabled = 0;
static int my_rank = -1; static int my_rank = -1;
/* TODO: I'm sure these should be applied on all modules */
/* these are paths that we will not trace */ /* these are paths that we will not trace */
static char* exclusions[] = { static char* exclusions[] = {
"/etc/", "/etc/",
......
...@@ -362,7 +362,6 @@ int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash) ...@@ -362,7 +362,6 @@ int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
} }
free(comp_buf); free(comp_buf);
/* TODO: check for duplicate entries? */
buf_ptr = hash_buf; buf_ptr = hash_buf;
while(buf_ptr < (hash_buf + hash_buf_sz)) while(buf_ptr < (hash_buf + hash_buf_sz))
{ {
...@@ -377,18 +376,6 @@ int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash) ...@@ -377,18 +376,6 @@ int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
path_ptr = (char *)buf_ptr; path_ptr = (char *)buf_ptr;
buf_ptr += *path_len_ptr; buf_ptr += *path_len_ptr;
ref = malloc(sizeof(*ref));
if(!ref)
{
return(-1);
}
ref->rec.name = malloc(*path_len_ptr + 1);
if(!ref->rec.name)
{
free(ref);
return(-1);
}
if(fd->swap_flag) if(fd->swap_flag)
{ {
/* we need to sort out endianness issues before deserializing */ /* we need to sort out endianness issues before deserializing */
...@@ -396,13 +383,29 @@ int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash) ...@@ -396,13 +383,29 @@ int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
DARSHAN_BSWAP32(path_len_ptr); DARSHAN_BSWAP32(path_len_ptr);
} }
/* set the fields for this record */ HASH_FIND(hlink, *hash, rec_id_ptr, sizeof(darshan_record_id), ref);
ref->rec.id = *rec_id_ptr; if(!ref)
memcpy(ref->rec.name, path_ptr, *path_len_ptr); {
ref->rec.name[*path_len_ptr] = '\0'; ref = malloc(sizeof(*ref));
if(!ref)
{
return(-1);
}
ref->rec.name = malloc(*path_len_ptr + 1);
if(!ref->rec.name)
{
free(ref);
return(-1);
}
/* set the fields for this record */
ref->rec.id = *rec_id_ptr;
memcpy(ref->rec.name, path_ptr, *path_len_ptr);
ref->rec.name[*path_len_ptr] = '\0';
/* add this record to the hash */ /* add this record to the hash */
HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref); HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref);
}
} }
return(0); return(0);
...@@ -476,15 +479,13 @@ int darshan_log_getmod(darshan_fd fd, darshan_module_id mod_id, ...@@ -476,15 +479,13 @@ int darshan_log_getmod(darshan_fd fd, darshan_module_id mod_id,
*mod_buf = tmp_buf; *mod_buf = tmp_buf;
*mod_buf_sz = tmp_buf_sz; *mod_buf_sz = tmp_buf_sz;
/* TODO: bswaps */
return(0); return(0);
} }
#if 0
/* TODO: hardcoded for posix -- what can we do generally? /* TODO: hardcoded for posix -- what can we do generally?
* different function for each module and a way to map to this function? * different function for each module and a way to map to this function?
*/ */
/* TODO: we need bswaps here, too */
int darshan_log_getfile(darshan_fd fd, struct darshan_posix_file *file) int darshan_log_getfile(darshan_fd fd, struct darshan_posix_file *file)
{ {
char *comp_buf; char *comp_buf;
...@@ -544,7 +545,6 @@ int darshan_log_getfile(darshan_fd fd, struct darshan_posix_file *file) ...@@ -544,7 +545,6 @@ int darshan_log_getfile(darshan_fd fd, struct darshan_posix_file *file)
fprintf(stderr, "Error: %s\n", err_string); fprintf(stderr, "Error: %s\n", err_string);
return(-1); return(-1);
} }
#endif
/* darshan_log_close() /* darshan_log_close()
* *
...@@ -563,7 +563,7 @@ void darshan_log_close(darshan_fd fd) ...@@ -563,7 +563,7 @@ void darshan_log_close(darshan_fd fd)
free(fd); free(fd);
} }
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* ******************************************* */
/* return 0 on successful seek to offset, -1 on failure. /* return 0 on successful seek to offset, -1 on failure.
*/ */
......
...@@ -168,11 +168,18 @@ int main(int argc, char **argv) ...@@ -168,11 +168,18 @@ int main(int argc, char **argv)
return(0); return(0);
} }
/* TODO: move this functionality somewhere else so it can be reused */
/* loop over the POSIX file records and print out counters */ /* loop over the POSIX file records and print out counters */
for(i = 0; i < (posix_mod_buf_sz / sizeof(struct darshan_posix_file)); i++) for(i = 0; i < (posix_mod_buf_sz / sizeof(struct darshan_posix_file)); i++)
{ {
struct darshan_posix_file next_rec = posix_mod_buf[i]; struct darshan_posix_file next_rec = posix_mod_buf[i];
DARSHAN_BSWAP64(&(next_rec.f_id));
DARSHAN_BSWAP64(&(next_rec.rank));
DARSHAN_BSWAP64(&(next_rec.counters[CP_POSIX_OPENS]));
DARSHAN_BSWAP64(&(next_rec.fcounters[CP_F_OPEN_TIMESTAMP]));
DARSHAN_BSWAP64(&(next_rec.fcounters[CP_F_CLOSE_TIMESTAMP]));
/* get the pathname for this record */ /* get the pathname for this record */
HASH_FIND(hlink, rec_hash, &next_rec.f_id, sizeof(darshan_record_id), ref); HASH_FIND(hlink, rec_hash, &next_rec.f_id, sizeof(darshan_record_id), ref);
assert(ref); assert(ref);
......
...@@ -42,9 +42,9 @@ http://www.mcs.anl.gov/research/projects/darshan/docs/darshan-util.html[darshan- ...@@ -42,9 +42,9 @@ http://www.mcs.anl.gov/research/projects/darshan/docs/darshan-util.html[darshan-
necessary steps for building these repositories should not have changed in the new version of necessary steps for building these repositories should not have changed in the new version of
Darshan. Darshan.
== Architectural overview == Darshan dev-modular overview
The Darshan source tree is composed of two primary components: The Darshan source tree is organized into two primary components:
* *darshan-runtime*: Darshan runtime environment necessary for instrumenting MPI * *darshan-runtime*: Darshan runtime environment necessary for instrumenting MPI
applications and generating I/O characterization logs. applications and generating I/O characterization logs.
...@@ -55,22 +55,303 @@ I/O characterization log. ...@@ -55,22 +55,303 @@ I/O characterization log.
The following subsections provide an overview of each of these components with specific The following subsections provide an overview of each of these components with specific
attention to how new instrumentation modules may be integrated into Darshan. attention to how new instrumentation modules may be integrated into Darshan.
=== darshan-runtime === Darshan-runtime
At a high level, the darshan-runtime library is responsible for instrumenting MPI applications The primary responsibilities of the darshan-runtime component are:
and generating a log file containing the resulting I/O characterization.
The I/O behavior an application is primarily instrumented by intercepting function calls of * intercepting I/O functions of interest from a target application;
interest and recording relevant data.
// TODO: how does dynamic vs static executable affect a module developer? * extracting statistics, timing information, and other data characterizing the application's I/O workload;
* compressing I/O characterization data and corresponding metadata;
* logging the compressed I/O characterization to file for future evaluation
=== darshan-util The first two responsibilities are the burden of the instrumentation module developer, while the last
two are handled automatically by Darshan.
Text. ==== Instrumentation modules
The wrapper functions used to intercept I/O function calls of interest are central to the design of
any Darshan instrumentation module. These wrappers are used to extract pertinent I/O data from
the function call and persist this data in some state structure maintained by the module. The wrappers
are inserted at compile time for statically linked executables (e.g., using the linkers `--wrap`
mechanism) and at runtime for dynamically linked executables (using LD_PRELOAD).
*NOTE*: Modules should not perform any I/O or communication within wrapper functions. Darshan records
I/O data independently on each application process, then merges the data from all processes when the
job is shutting down. This defers expensive I/O and communication operations to the shutdown process,
limiting Darshan's impact on application I/O performance.
When the instrumented application terminates and Darshan begins its shutdown procedure, it requires
a way to interface with any active modules that have data to contribute to the output I/O characterization.
Darshan requires that module developers implement the following functions to allow the Darshan runtime
environment to coordinate with modules while shutting down:
[source,c]
struct darshan_module_funcs
{
void (*disable_instrumentation)(void);
void (*prepare_for_reduction)(
darshan_record_id *shared_recs,
int *shared_rec_count,
void **send_buf,
void **recv_buf,
int *rec_size
);
void (*reduce_records)(
void* a,
void* b,
int *len,
MPI_Datatype *datatype
);
void (*get_output_data)(
void** buf,
int* size
);
void (*shutdown)(void);
};
`disable_instrumentation()`
This function informs the module that Darshan is about to begin shutting down. It should disable
all wrappers and stop updating internal data structures to ensure data consistency and avoid
other race conditions.
`prepare_for_reduction()`
Since Darshan aggregates shared data records (i.e., records which all application processes
accessed) into a single record, module developers must provide mechanisms for performing a reduction
on these records.
This function is used to prepare a module for performing a reduction operation. In general, this
just involves providing the input buffers to the reduction, and (on rank 0 only) providing output
buffer space to store the result of the reduction.
* _shared_recs_ is a set of Darshan record identifiers which are associated with this module.
These are the records which need to be reduced into single shared data records.
* _shared_rec_count_ is a pointer to an integer storing the number of shared records will
be reduced by this module. When the function is called this variable points to the number
of shared records detected by Darshan, but the module can decide not to reduce any number
of these records. Upon completion of the function, this variable should point to the number
of shared records to perform reductions on (i.e., the size of the input and output buffers).
* _send_buf_ is a pointer to the address of the send buffer used for performing the reduction
operation. Upon completion, this variable should point to a buffer containing *_shared_rec_count_
records that will be reduced.
* _recv_buf_ is a pointer to the address of the receive bufffer used for performing the reduction
operation. Upon completion, this variable should point to a buffer containing *_shared_rec_count_
records that will be reduced. This variable is only valid on the root process (rank 0). This
buffer address needs to be stored with module state, as it will be needed when retrieiving
the final output buffers from this module.
* _rec_size_ is just the size of the record structure being reduced for this module.
`reduce_records()`
This is the function which performs the actual shared record reduction operation. The prototype
of this function matches that of the user function provided to the MPI_Op_create function. Refer
to the http://www.mpich.org/static/docs/v3.1/www3/MPI_Op_create.html[documentation] for further
details.
`get_output_data()`
This function is responsible for passing back a single buffer storing all data this module is
contributing to the output I/O characterization. On rank 0, this may involve copying the results
of the shared record reduction into the output buffer.
* _buf_ is a pointer to the address of the buffer this module is contributing to the I/O
characterization.
* _size_ is the size of this module's output buffer.
`shutdown()`
This function is a signal from Darshan that it is safe to shutdown. It should clean up and free
all internal data structures.
==== darshan-core
Within darshan-runtime, the darshan-core component manages the initialization and shutdown of the
Darshan environment, provides instrumentation module developers an interface for registering modules
with Darshan, and manages the compressing and the writing of the resultant I/O charracterization.
Each of the functions defined by this interface are explained in detail below.
[source,c]
void darshan_core_register_module(
darshan_module_id mod_id,
struct darshan_module_funcs *funcs,
int *runtime_mem_limit);
The `darshan_core_register_module` function registers Darshan instrumentation modules with the
darshan-core runtime environment. This function needs to be called at least once for any module
that will contribute data to Darshan's final I/O characterization.
* _mod_id_ is a unique identifier for the given module, which is defined in the Darshan log
format header file (darshan-log-format.h).
* _funcs_ is the structure of function pointers (as described above) that a module developer must
provide to interface with the darshan-core runtime.
* _runtime_mem_limit_ is a pointer to an integer which will store the amount of memory Darshan
allows this module to use at runtime. Currently, darshan-core will hardcode this value to 2 MiB,
but in the future this may be changed to optimize Darshan's memory footprint. Note that Darshan
does not allocate any memory for modules, it just informs a module how much memory it can use.
[source,c]
void darshan_core_unregister_module(
darshan_module_id mod_id);
The `darshan_core_unregister_module` function disassociates the given module from the
darshan-core runtime. Consequentially, Darshan does not interface with the given module at
shutdown time and will not log any I/O data from the module. This function should only be used
if a module registers itself with darshan-core but later decides it does not want to contribute
any I/O data.
* _mod_id_ is the unique identifer for the module being unregistered.
[source,c]
void darshan_core_register_record(
void *name,
int len,
int printable_flag,
darshan_module_id mod_id,
darshan_record_id *rec_id);
The `darshan_core_register_record` function registers some data record with the darshan-core
runtime. This record could reference a POSIX file or perhaps an object identifier for an
object storage system, for instance. A unique identifier for the given record name is
generated by Darshan, which should then be used by the module for referencing the corresponding
record. This allows multiple modules to refer to a specific data record in a consistent manner
and also provides a mechanism for mapping these records back to important metadata stored by
darshan-core. It is safe (and likely necessary) to call this function many times for the same
record -- darshan-core will just set the corresponding record identifier if the record has
been previously registered.
* _name_ is just the name of the data record, which could be a file path, object ID, etc.
* _len_ is the size of the input record name. For string record names, this would just be the
string length, but for nonprintable record names (e.g., an integer object identifier), this
is the size of the record name type.
== Adding new modules * _printable_flag_ indicates whether the input record name is a printable ASCII string.
* _mod_id_ is the identifier for the module attempting to register this record.
* _rec_id_ is a pointer to a variable which will store the unique record identifier generated
by Darshan.
[source,c]
void darshan_core_unregister_record(
darshan_record_id rec_id,
darshan_module_id mod_id);
The `darshan_core_unregister_record` functoin disassociates the given module identifier from the
given record identifier. If no other modules are associated with the given record identifier, then
Darshan removes all internal references to the record. This function should only be used if a
module registers a record with darshan-core, but later decides not to store the record internally.
* _rec_id_ is the record identifier we want to unregister.
* _mod_id_ is the module identifier that is unregistering _rec_id_.
[source,c]
double darshan_core_wtime(void);
The `darshan_core_wtime` function simply returns a floating point number of seconds since
Darshan was initialized. This functionality can be used to time the duration of application
I/O calls or to store timestamps of when functions of interest were called.
==== darshan-common
darshan-common is a utility component of darshan-runtime, providing module developers with
general functions that are likely to be reused across multiple modules. These functions are
distinct from darshan-core functions since they do not require access to internal Darshan
state.
[source,c]
char* darshan_clean_file_path(
const char* path);
The `darshan_clean_file_path` function just cleans up the input path string, converting
relative paths to absolute paths and suppressing any potential noise within the string.
* _path_ is the input path string to be cleaned up.
As more modules are contributed, it is likely that more functionality can be refactored out
of module implementations and maintained in darshan-common, facilitating code reuse and
simplifying maintenance.
=== Darshan-util
Text. Text.
== Adding new instrumentation modules
In this section we outline each step necessary to adding a module to Darshan.
=== Log format headers
The following modifications to Darshan log format headers are required for defining
the module's record structure:
* Add module identifier to darshan_module_id enum and add module string name to the
darshan_module_name array in `darshan-log-format.h`.
* Add a top-level header that defines a data record structure for the module. An exemplar
log header for the POSIX instrumentation module is given in `darshan-posix-log-format.h`.
=== Darshan-runtime
==== Build modifications
The following modifications to the darshan-runtime build system are necessary to integrate
new instrumentation modules:
* Necessary linker flags for wrapping this module's functions need to be added to the definition
of `CP_WRAPPERS` in `darshan-config.in`.
* Targets must be added to `Makefile.in` to build static and shared objects for the module's
source files, which will be stored in the `lib/` directory. The prerequisites to building
static and dynamic versions of `lib-darshan` must be updated to include these objects, as well.
It is necessary to rerun the `prepare` script and reconfigure darshan-runtime for these changes
to take effect.
==== Instrumentation module implementation
An exemplar instrumentation module for POSIX I/O functions is given in `lib/darshan-posix.c` as
reference. In addtion to the development notes from above and the reference POSIX module, we
provide the following notes to assist module developers:
* Modules only need to include the `darshan.h` header to interface with darshan-core.
* Lacking a way to bootstrap themselves, modules will have to include some logic in their
wrappers to initialize necessary module state if initialization has not already occurred.
- Part of this initialization process should be registering the module with darshan-core,
since this informs the module how much memory it may allocate.
* The file record identifier given when registering a record with darshan-core can be used
to store the record structure in a hash table or some other structure.
- The `darshan_core_register_record` function is really more like a lookup function. It
may be called multiple times for the same record -- if the record already exists, the function
simply returns its record ID