Commit dafe3790 authored by Shane Snyder's avatar Shane Snyder
Browse files

fill in the hdf5 module

parent f1468f49
......@@ -33,19 +33,32 @@ DARSHAN_FORWARD_DECL(H5Fcreate, hid_t, (const char *filename, unsigned flags, hi
DARSHAN_FORWARD_DECL(H5Fopen, hid_t, (const char *filename, unsigned flags, hid_t access_plist));
DARSHAN_FORWARD_DECL(H5Fclose, herr_t, (hid_t file_id));
/* structure to track i/o stats for a given hdf5 file at runtime */
struct hdf5_file_runtime
{
struct darshan_hdf5_file* file_record;
UT_hash_handle hlink;
};
/* structure to associate a HDF5 hid with an existing file runtime structure */
struct hdf5_file_runtime_ref
{
struct hdf5_file_runtime* file;
hid_t hid;
UT_hash_handle hlink;
};
/* necessary state for storing HDF5 file records and coordinating with
* darshan-core at shutdown time
*/
struct hdf5_runtime
{
struct hdf5_file_runtime* file_runtime_array;
struct darshan_hdf5_file* file_record_array;
int file_array_size;
int file_array_ndx;
struct posix_file_runtime* hid_hash;
struct hdf5_file_runtime *file_hash;
struct hdf5_file_runtime_ref* hid_hash;
};
static struct hdf5_runtime *hdf5_runtime = NULL;
......@@ -54,9 +67,16 @@ static int instrumentation_disabled = 0;
static int my_rank = -1;
static void hdf5_runtime_initialize(void);
static struct hdf5_file_runtime* hdf5_file_by_name(const char *name);
static struct hdf5_file_runtime* hdf5_file_by_name_sethid(const char* name, hid_t hid);
static struct hdf5_file_runtime* hdf5_file_by_hid(hid_t hid);
static void hdf5_file_close_hid(hid_t hid);
static int hdf5_record_compare(const void* a, const void* b);
static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v,
int *len, MPI_Datatype *datatype);
static void hdf5_begin_shutdown(void);
static void posix_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs,
static void hdf5_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs,
int shared_rec_count, void **hdf5_buf, int *hdf5_buf_sz);
static void hdf5_shutdown(void);
......@@ -71,18 +91,16 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags,
hid_t create_plist, hid_t access_plist)
{
int ret;
struct darshan_file_runtime* file;
struct hdf5_file_runtime* file;
char* tmp;
double tm1;
#if 0
MAP_OR_FAIL(H5Fcreate);
tm1 = darshan_wtime();
tm1 = darshan_core_wtime();
ret = __real_H5Fcreate(filename, flags, create_plist, access_plist);
if(ret >= 0)
{
CP_LOCK();
/* use ROMIO approach to strip prefix if present */
/* strip off prefix if there is one, but only skip prefixes
* if they are greater than length one to allow for windows
......@@ -93,17 +111,17 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags,
filename = tmp + 1;
}
file = darshan_file_by_name_sethid(filename, ret);
HDF5_LOCK();
hdf5_runtime_initialize();
file = hdf5_file_by_name_sethid(filename, ret);
if(file)
{
if(CP_F_VALUE(file, CP_F_OPEN_TIMESTAMP) == 0)
CP_F_SET(file, CP_F_OPEN_TIMESTAMP,
tm1);
CP_INC(file, CP_HDF5_OPENS, 1);
if(file->file_record->fcounters[HDF5_F_OPEN_TIMESTAMP] == 0)
file->file_record->fcounters[HDF5_F_OPEN_TIMESTAMP] = tm1;
file->file_record->counters[HDF5_OPENS] += 1;
}
CP_UNLOCK();
HDF5_UNLOCK();
}
#endif
return(ret);
}
......@@ -112,18 +130,16 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
hid_t access_plist)
{
int ret;
struct darshan_file_runtime* file;
struct hdf5_file_runtime* file;
char* tmp;
double tm1;
#if 0
MAP_OR_FAIL(H5Fopen);
tm1 = darshan_wtime();
tm1 = darshan_core_wtime();
ret = __real_H5Fopen(filename, flags, access_plist);
if(ret >= 0)
{
CP_LOCK();
/* use ROMIO approach to strip prefix if present */
/* strip off prefix if there is one, but only skip prefixes
* if they are greater than length one to allow for windows
......@@ -134,18 +150,17 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
filename = tmp + 1;
}
file = darshan_file_by_name_sethid(filename, ret);
HDF5_LOCK();
hdf5_runtime_initialize();
file = hdf5_file_by_name_sethid(filename, ret);
if(file)
{
if(CP_F_VALUE(file, CP_F_OPEN_TIMESTAMP) == 0)
CP_F_SET(file, CP_F_OPEN_TIMESTAMP,
tm1);
CP_INC(file, CP_HDF5_OPENS, 1);
if(file->file_record->fcounters[HDF5_F_OPEN_TIMESTAMP] == 0)
file->file_record->fcounters[HDF5_F_OPEN_TIMESTAMP] = tm1;
file->file_record->counters[HDF5_OPENS] += 1;
}
CP_UNLOCK();
HDF5_UNLOCK();
}
#endif
return(ret);
......@@ -153,24 +168,24 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
herr_t DARSHAN_DECL(H5Fclose)(hid_t file_id)
{
struct darshan_file_runtime* file;
struct hdf5_file_runtime* file;
int ret;
#if 0
MAP_OR_FAIL(H5Fclose);
ret = __real_H5Fclose(file_id);
CP_LOCK();
file = darshan_file_by_hid(file_id);
HDF5_LOCK();
hdf5_runtime_initialize();
file = hdf5_file_by_hid(file_id);
if(file)
{
CP_F_SET(file, CP_F_CLOSE_TIMESTAMP, PMPI_Wtime());
darshan_file_close_hid(file_id);
file->file_record->fcounters[HDF5_F_CLOSE_TIMESTAMP] =
darshan_core_wtime();
hdf5_file_close_hid(file_id);
}
CP_UNLOCK();
HDF5_UNLOCK();
#endif
return(ret);
}
......@@ -179,6 +194,377 @@ herr_t DARSHAN_DECL(H5Fclose)(hid_t file_id)
* Internal functions for manipulating HDF5 module state *
*********************************************************/
/* initialize internal HDF5 module data strucutres and register with darshan-core */
static void hdf5_runtime_initialize()
{
int mem_limit;
struct darshan_module_funcs hdf5_mod_fns =
{
.begin_shutdown = &hdf5_begin_shutdown,
.get_output_data = &hdf5_get_output_data,
.shutdown = &hdf5_shutdown
};
/* don't do anything if already initialized or instrumenation is disabled */
if(hdf5_runtime || instrumentation_disabled)
return;
/* register hdf5 module with darshan-core */
darshan_core_register_module(
DARSHAN_HDF5_MOD,
&hdf5_mod_fns,
&my_rank,
&mem_limit,
NULL);
/* return if no memory assigned by darshan-core */
if(mem_limit == 0)
return;
hdf5_runtime = malloc(sizeof(*hdf5_runtime));
if(!hdf5_runtime)
return;
memset(hdf5_runtime, 0, sizeof(*hdf5_runtime));
/* set maximum number of file records according to max memory limit */
/* NOTE: maximum number of records is based on the size of a hdf5 file record */
/* TODO: should we base memory usage off file record or total runtime structure sizes? */
hdf5_runtime->file_array_size = mem_limit / sizeof(struct darshan_hdf5_file);
hdf5_runtime->file_array_ndx = 0;
/* allocate array of runtime file records */
hdf5_runtime->file_runtime_array = malloc(hdf5_runtime->file_array_size *
sizeof(struct hdf5_file_runtime));
hdf5_runtime->file_record_array = malloc(hdf5_runtime->file_array_size *
sizeof(struct darshan_hdf5_file));
if(!hdf5_runtime->file_runtime_array || !hdf5_runtime->file_record_array)
{
hdf5_runtime->file_array_size = 0;
return;
}
memset(hdf5_runtime->file_runtime_array, 0, hdf5_runtime->file_array_size *
sizeof(struct hdf5_file_runtime));
memset(hdf5_runtime->file_record_array, 0, hdf5_runtime->file_array_size *
sizeof(struct darshan_hdf5_file));
return;
}
/* get a HDF5 file record for the given file path */
static struct hdf5_file_runtime* hdf5_file_by_name(const char *name)
{
struct hdf5_file_runtime *file = NULL;
char *newname = NULL;
darshan_record_id file_id;
if(!hdf5_runtime || instrumentation_disabled)
return(NULL);
newname = darshan_clean_file_path(name);
if(!newname)
newname = (char*)name;
/* get a unique id for this file from darshan core */
darshan_core_register_record(
(void*)newname,
strlen(newname),
1,
DARSHAN_HDF5_MOD,
&file_id,
NULL);
/* search the hash table for this file record, and return if found */
HASH_FIND(hlink, hdf5_runtime->file_hash, &file_id, sizeof(darshan_record_id), file);
if(file)
{
if(newname != name)
free(newname);
return(file);
}
if(hdf5_runtime->file_array_ndx < hdf5_runtime->file_array_size);
{
/* no existing record, assign a new file record from the global array */
file = &(hdf5_runtime->file_runtime_array[hdf5_runtime->file_array_ndx]);
file->file_record = &(hdf5_runtime->file_record_array[hdf5_runtime->file_array_ndx]);
file->file_record->f_id = file_id;
file->file_record->rank = my_rank;
/* add new record to file hash table */
HASH_ADD(hlink, hdf5_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file);
hdf5_runtime->file_array_ndx++;
}
if(newname != name)
free(newname);
return(file);
}
/* get a HDF5 file record for the given file path, and also create a
* reference structure using the returned hid
*/
static struct hdf5_file_runtime* hdf5_file_by_name_sethid(const char* name, hid_t hid)
{
struct hdf5_file_runtime* file;
struct hdf5_file_runtime_ref* ref;
if(!hdf5_runtime || instrumentation_disabled)
return(NULL);
/* find file record by name first */
file = hdf5_file_by_name(name);
if(!file)
return(NULL);
/* search hash table for existing file ref for this fd */
HASH_FIND(hlink, hdf5_runtime->hid_hash, &hid, sizeof(hid_t), ref);
if(ref)
{
/* we have a reference. Make sure it points to the correct file
* and return it
*/
ref->file = file;
return(file);
}
/* if we hit this point, then we don't have a reference for this fd
* in the table yet. Add it.
*/
ref = malloc(sizeof(*ref));
if(!ref)
return(NULL);
memset(ref, 0, sizeof(*ref));
ref->file = file;
ref->hid = hid;
HASH_ADD(hlink, hdf5_runtime->hid_hash, hid, sizeof(hid_t), ref);
return(file);
}
/* get a HDF5 file record for the given hid */
static struct hdf5_file_runtime* hdf5_file_by_hid(hid_t hid)
{
struct hdf5_file_runtime_ref* ref;
if(!hdf5_runtime || instrumentation_disabled)
return(NULL);
/* search hash table for existing file ref for this hid */
HASH_FIND(hlink, hdf5_runtime->hid_hash, &hid, sizeof(hid_t), ref);
if(ref)
return(ref->file);
return(NULL);
}
/* free up HDF5 reference data structures for the given hid */
static void hdf5_file_close_hid(hid_t hid)
{
struct hdf5_file_runtime_ref* ref;
if(!hdf5_runtime || instrumentation_disabled)
return;
/* search hash table for this hid */
HASH_FIND(hlink, hdf5_runtime->hid_hash, &hid, sizeof(hid_t), ref);
if(ref)
{
/* we have a reference, delete it */
HASH_DELETE(hlink, hdf5_runtime->hid_hash, ref);
free(ref);
}
return;
}
/* compare function for sorting file records by descending rank */
static int hdf5_record_compare(const void* a_p, const void* b_p)
{
const struct darshan_hdf5_file* a = a_p;
const struct darshan_hdf5_file* b = b_p;
if(a->rank < b->rank)
return 1;
if(a->rank > b->rank)
return -1;
return 0;
}
static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v,
int *len, MPI_Datatype *datatype)
{
struct darshan_hdf5_file tmp_file;
struct darshan_hdf5_file *infile = infile_v;
struct darshan_hdf5_file *inoutfile = inoutfile_v;
int i, j;
assert(hdf5_runtime);
for(i=0; i<*len; i++)
{
memset(&tmp_file, 0, sizeof(struct darshan_hdf5_file));
tmp_file.f_id = infile->f_id;
tmp_file.rank = -1;
/* sum */
for(j=HDF5_OPENS; j<=HDF5_OPENS; j++)
{
tmp_file.counters[j] = infile->counters[j] + inoutfile->counters[j];
}
/* min non-zero (if available) value */
for(j=HDF5_F_OPEN_TIMESTAMP; j<=HDF5_F_OPEN_TIMESTAMP; j++)
{
if(infile->fcounters[j] > inoutfile->fcounters[j] && inoutfile->fcounters[j] > 0)
tmp_file.fcounters[j] = inoutfile->fcounters[j];
else
tmp_file.fcounters[j] = infile->fcounters[j];
}
/* max */
for(j=HDF5_F_CLOSE_TIMESTAMP; j<=HDF5_F_CLOSE_TIMESTAMP; j++)
{
if(infile->fcounters[j] > inoutfile->fcounters[j])
tmp_file.fcounters[j] = infile->fcounters[j];
else
tmp_file.fcounters[j] = inoutfile->fcounters[j];
}
/* update pointers */
*inoutfile = tmp_file;
inoutfile++;
infile++;
}
return;
}
/************************************************************************
* Functions exported by HDF5 module for coordinating with darshan-core *
************************************************************************/
static void hdf5_begin_shutdown()
{
assert(hdf5_runtime);
HDF5_LOCK();
/* disable further instrumentation while Darshan shuts down */
instrumentation_disabled = 1;
HDF5_UNLOCK();
return;
}
static void hdf5_get_output_data(
MPI_Comm mod_comm,
darshan_record_id *shared_recs,
int shared_rec_count,
void **hdf5_buf,
int *hdf5_buf_sz)
{
struct hdf5_file_runtime *file;
int i;
struct darshan_hdf5_file *red_send_buf = NULL;
struct darshan_hdf5_file *red_recv_buf = NULL;
MPI_Datatype red_type;
MPI_Op red_op;
assert(hdf5_runtime);
/* if there are globally shared files, do a shared file reduction */
if(shared_rec_count)
{
/* necessary initialization of shared records */
for(i = 0; i < shared_rec_count; i++)
{
HASH_FIND(hlink, hdf5_runtime->file_hash, &shared_recs[i],
sizeof(darshan_record_id), file);
assert(file);
file->file_record->rank = -1;
}
/* sort the array of files descending by rank so that we get all of the
* shared files (marked by rank -1) in a contiguous portion at end
* of the array
*/
qsort(hdf5_runtime->file_record_array, hdf5_runtime->file_array_ndx,
sizeof(struct darshan_hdf5_file), hdf5_record_compare);
/* make *send_buf point to the shared files at the end of sorted array */
red_send_buf =
&(hdf5_runtime->file_record_array[hdf5_runtime->file_array_ndx-(shared_rec_count)]);
/* allocate memory for the reduction output on rank 0 */
if(my_rank == 0)
{
red_recv_buf = malloc(shared_rec_count * sizeof(struct darshan_hdf5_file));
if(!red_recv_buf)
return;
}
/* construct a datatype for a HDF5 file record. This is serving no purpose
* except to make sure we can do a reduction on proper boundaries
*/
DARSHAN_MPI_CALL(PMPI_Type_contiguous)(sizeof(struct darshan_hdf5_file),
MPI_BYTE, &red_type);
DARSHAN_MPI_CALL(PMPI_Type_commit)(&red_type);
/* register a HDF5 file record reduction operator */
DARSHAN_MPI_CALL(PMPI_Op_create)(hdf5_record_reduction_op, 1, &red_op);
/* reduce shared HDF5 file records */
DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf,
shared_rec_count, red_type, red_op, 0, mod_comm);
/* clean up reduction state */
if(my_rank == 0)
{
int tmp_ndx = hdf5_runtime->file_array_ndx - shared_rec_count;
memcpy(&(hdf5_runtime->file_record_array[tmp_ndx]), red_recv_buf,
shared_rec_count * sizeof(struct darshan_hdf5_file));
free(red_recv_buf);
}
else
{
hdf5_runtime->file_array_ndx -= shared_rec_count;
}
DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type);
DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op);
}
*hdf5_buf = (void *)(hdf5_runtime->file_record_array);
*hdf5_buf_sz = hdf5_runtime->file_array_ndx * sizeof(struct darshan_hdf5_file);
return;
}
static void hdf5_shutdown()
{
struct hdf5_file_runtime_ref *ref, *tmp;
assert(hdf5_runtime);
HASH_ITER(hlink, hdf5_runtime->hid_hash, ref, tmp)
{
HASH_DELETE(hlink, hdf5_runtime->hid_hash, ref);
free(ref);
}
HASH_CLEAR(hlink, hdf5_runtime->file_hash); /* these entries are freed all at once below */
free(hdf5_runtime->file_runtime_array);
free(hdf5_runtime->file_record_array);
free(hdf5_runtime);
hdf5_runtime = NULL;
return;
}
/*
* Local variables:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment