Commit 98c93e0f authored by Shane Snyder's avatar Shane Snyder

darshan-runtime commits for configurable mem

user can now give a config option or use a runtime environment
variable to control darshan's per module memory requirements.
parent 48352d36
......@@ -69,6 +69,11 @@ static char * const darshan_module_names[] =
};
#undef X
/* simple macros for accessing module flag bitfields */
#define DARSHAN_MOD_FLAG_SET(flags, id) flags = (flags | (1 << id))
#define DARSHAN_MOD_FLAG_UNSET(flags, id) flags = (flags & ~(1 << id))
#define DARSHAN_MOD_FLAG_ISSET(flags, id) (flags & (1 << id))
/* compression method used on darshan log file */
enum darshan_comp_type
{
......@@ -97,7 +102,7 @@ struct darshan_header
char version_string[8];
int64_t magic_nr;
unsigned char comp_type;
unsigned char partial_flag;
uint32_t partial_flag;
struct darshan_log_map rec_map;
struct darshan_log_map mod_map[DARSHAN_MAX_MODS];
};
......
......@@ -693,7 +693,7 @@ with_log_hints
with_log_path
with_jobid_env
enable_bgq_mod
with_max_records
with_mod_mem
'
ac_precious_vars='build_alias
host_alias
......@@ -1331,7 +1331,7 @@ Optional Packages:
--with-jobid-env=<name> Name of environment variable that stores the jobid
(specify "NONE" if no appropriate environment variable is available:
Darshan will use rank 0's pid instead)
--with-max-records=<num> Maximum records for Darshan to track at runtime
--with-mod-mem=<num> Maximum amount of memory (in MiB) for each Darshan module
Some influential environment variables:
CC C compiler command
......@@ -4281,14 +4281,14 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
# Check whether --with-max-records was given.
if test "${with_max_records+set}" = set; then :
withval=$with_max_records; if test x$withval = xyes; then
as_fn_error $? "--with-max-records must be given a number" "$LINENO" 5
# Check whether --with-mod-mem was given.
if test "${with_mod_mem+set}" = set; then :
withval=$with_mod_mem; if test x$withval = xyes; then
as_fn_error $? "--with-mod-mem must be given a number" "$LINENO" 5
else
cat >>confdefs.h <<_ACEOF
#define __DARSHAN_MAX_RECORDS ${withval}
#define __DARSHAN_MOD_MEM_MAX ${withval}
_ACEOF
fi
......
......@@ -300,12 +300,12 @@ if test x$enable_bgq_mod != xno; then
[]))
fi
AC_ARG_WITH(max-records,
[ --with-max-records=<num> Maximum records for Darshan to track at runtime],
AC_ARG_WITH(mod-mem,
[ --with-mod-mem=<num> Maximum amount of memory (in MiB) for each Darshan module],
if test x$withval = xyes; then
AC_MSG_ERROR(--with-max-records must be given a number)
AC_MSG_ERROR(--with-mod-mem must be given a number)
else
AC_DEFINE_UNQUOTED(__DARSHAN_MAX_RECORDS, ${withval}, Maximum Darshan records to track at runtime)
AC_DEFINE_UNQUOTED(__DARSHAN_MOD_MEM_MAX, ${withval}, Maximum memory (in MiB) for each Darshan module)
fi
)
......
......@@ -26,18 +26,18 @@
/* Environment variable to override __DARSHAN_MEM_ALIGNMENT */
#define DARSHAN_MEM_ALIGNMENT_OVERRIDE "DARSHAN_MEMALIGN"
#ifdef __DARSHAN_MAX_RECORDS
#define DARSHAN_CORE_MAX_RECORDS __DARSHAN_MAX_RECORDS
/* Environment variable to override memory per module */
#define DARSHAN_MOD_MEM_OVERRIDE "DARSHAN_MODMEM"
/* Maximum amount of memory per instrumentation module in MiB */
#ifdef __DARSHAN_MOD_MEM_MAX
#define DARSHAN_MOD_MEM_MAX (__DARSHAN_MOD_MEM_MAX * 1024 * 1024)
#else
#define DARSHAN_CORE_MAX_RECORDS 2048
#define DARSHAN_MOD_MEM_MAX (2 * 1024 * 1024) /* 2 MiB default */
#endif
/* TODO: revisit this default size if we change memory per module */
#define DARSHAN_CORE_COMP_BUF_SIZE (2 * 1024 * 1024)
#define DARSHAN_CORE_MOD_SET(flags, id) (flags | (1 << id))
#define DARSHAN_CORE_MOD_UNSET(flags, id) (flags & ~(1 << id))
#define DARSHAN_CORE_MOD_ISSET(flags, id) (flags & (1 << id))
/* Default runtime compression buffer size */
#define DARSHAN_COMP_BUF_SIZE DARSHAN_MOD_MEM_MAX
/* in memory structure to keep up with job level data */
struct darshan_core_runtime
......@@ -48,7 +48,7 @@ struct darshan_core_runtime
struct darshan_core_record_ref *rec_hash;
int rec_count;
struct darshan_core_module* mod_array[DARSHAN_MAX_MODS];
char comp_buf[DARSHAN_CORE_COMP_BUF_SIZE];
char comp_buf[DARSHAN_COMP_BUF_SIZE];
double wtime_offset;
char *trailing_data;
};
......
......@@ -102,11 +102,11 @@
/* Location to store log files at run time */
#undef __DARSHAN_LOG_PATH
/* Maximum Darshan records to track at runtime */
#undef __DARSHAN_MAX_RECORDS
/* Memory alignment in bytes */
#undef __DARSHAN_MEM_ALIGNMENT
/* Maximum memory (in MiB) for each Darshan module */
#undef __DARSHAN_MOD_MEM_MAX
/* Generalized request type for MPI-IO */
#undef __D_MPI_REQUEST
......@@ -118,18 +118,21 @@ void darshan_core_unregister_module(
* Register the Darshan record given by 'name' with the darshan-core
* runtime, allowing it to be properly tracked and (potentially)
* correlated with records from other modules. 'len' is the size of
* the name pointer (string length for string names), 'printable_flag'
* indicates whether the name is a string, and 'mod_id' is the identifier
* of the calling module. 'rec_id' is an output pointer storing the
* correspoing Darshan record identifier and 'file_alignment' is an
* output pointer storing the file system alignment value for the given
* record.
* the name pointer (string length for string names), and 'printable_flag'
* indicates whether the name is a string. 'mod_limit_flag' is set if
* the calling module is out of memory (to prevent darshan-core from
* creating new records and to just search existing records) and 'mod_id'
* is the identifier of the calling module. 'rec_id' is an output pointer
* storing the correspoing Darshan record identifier and 'file_alignment'
* is an output pointer storing the file system alignment value for the
* given record.
*/
void darshan_core_register_record(
void *name,
int len,
int printable_flag,
darshan_module_id mod_id,
int printable_flag,
int mod_limit_flag,
darshan_record_id *rec_id,
int *file_alignment);
......
......@@ -156,8 +156,9 @@ void bgq_runtime_initialize()
darshan_core_register_record(
recname,
strlen(recname),
1,
DARSHAN_BGQ_MOD,
1,
0,
&bgq_runtime->record.f_id,
&bgq_runtime->record.alignment);
......
This diff is collapsed.
......@@ -256,6 +256,7 @@ static struct hdf5_file_runtime* hdf5_file_by_name(const char *name)
struct hdf5_file_runtime *file = NULL;
char *newname = NULL;
darshan_record_id file_id;
int limit_flag;
if(!hdf5_runtime || instrumentation_disabled)
return(NULL);
......@@ -264,12 +265,15 @@ static struct hdf5_file_runtime* hdf5_file_by_name(const char *name)
if(!newname)
newname = (char*)name;
limit_flag = (hdf5_runtime->file_array_ndx >= hdf5_runtime->file_array_size);
/* get a unique id for this file from darshan core */
darshan_core_register_record(
(void*)newname,
strlen(newname),
1,
DARSHAN_HDF5_MOD,
1,
limit_flag,
&file_id,
NULL);
......@@ -292,19 +296,15 @@ static struct hdf5_file_runtime* hdf5_file_by_name(const char *name)
return(file);
}
if(hdf5_runtime->file_array_ndx < hdf5_runtime->file_array_size);
{
/* no existing record, assign a new file record from the global array */
file = &(hdf5_runtime->file_runtime_array[hdf5_runtime->file_array_ndx]);
file->file_record = &(hdf5_runtime->file_record_array[hdf5_runtime->file_array_ndx]);
file->file_record->f_id = file_id;
file->file_record->rank = my_rank;
/* add new record to file hash table */
HASH_ADD(hlink, hdf5_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file);
/* no existing record, assign a new file record from the global array */
file = &(hdf5_runtime->file_runtime_array[hdf5_runtime->file_array_ndx]);
file->file_record = &(hdf5_runtime->file_record_array[hdf5_runtime->file_array_ndx]);
file->file_record->f_id = file_id;
file->file_record->rank = my_rank;
hdf5_runtime->file_array_ndx++;
}
/* add new record to file hash table */
HASH_ADD(hlink, hdf5_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file);
hdf5_runtime->file_array_ndx++;
if(newname != name)
free(newname);
......
......@@ -878,6 +878,7 @@ static struct mpiio_file_runtime* mpiio_file_by_name(const char *name)
struct mpiio_file_runtime *file = NULL;
char *newname = NULL;
darshan_record_id file_id;
int limit_flag;
if(!mpiio_runtime || instrumentation_disabled)
return(NULL);
......@@ -886,17 +887,20 @@ static struct mpiio_file_runtime* mpiio_file_by_name(const char *name)
if(!newname)
newname = (char*)name;
limit_flag = (mpiio_runtime->file_array_ndx >= mpiio_runtime->file_array_size);
/* get a unique id for this file from darshan core */
darshan_core_register_record(
(void*)newname,
strlen(newname),
1,
DARSHAN_MPIIO_MOD,
1,
limit_flag,
&file_id,
NULL);
/* if record is set to 0, darshan-core is out of space and will not
* track this record, so we should avoid tracking it, too
/* the file record id is set to 0 if no memory is available for tracking
* new records -- just fall through and ignore this record
*/
if(file_id == 0)
{
......@@ -914,19 +918,15 @@ static struct mpiio_file_runtime* mpiio_file_by_name(const char *name)
return(file);
}
if(mpiio_runtime->file_array_ndx < mpiio_runtime->file_array_size);
{
/* no existing record, assign a new file record from the global array */
file = &(mpiio_runtime->file_runtime_array[mpiio_runtime->file_array_ndx]);
file->file_record = &(mpiio_runtime->file_record_array[mpiio_runtime->file_array_ndx]);
file->file_record->f_id = file_id;
file->file_record->rank = my_rank;
/* add new record to file hash table */
HASH_ADD(hlink, mpiio_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file);
/* no existing record, assign a new file record from the global array */
file = &(mpiio_runtime->file_runtime_array[mpiio_runtime->file_array_ndx]);
file->file_record = &(mpiio_runtime->file_record_array[mpiio_runtime->file_array_ndx]);
file->file_record->f_id = file_id;
file->file_record->rank = my_rank;
mpiio_runtime->file_array_ndx++;
}
/* add new record to file hash table */
HASH_ADD(hlink, mpiio_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file);
mpiio_runtime->file_array_ndx++;
if(newname != name)
free(newname);
......
......@@ -264,6 +264,7 @@ static struct null_record_runtime* null_record_by_name(const char *name)
{
struct null_record_runtime *rec = NULL;
darshan_record_id rec_id;
int limit_flag;
/* Don't search for a record if the "NULL" module is not initialized or
* if instrumentation has been toggled off.
......@@ -271,15 +272,27 @@ static struct null_record_runtime* null_record_by_name(const char *name)
if(!null_runtime || instrumentation_disabled)
return(NULL);
/* stop tracking new records if we are tracking our maximum count */
limit_flag = (null_runtime->rec_array_ndx >= null_runtime->rec_array_size);
/* get a unique record identifier for this record from darshan-core */
darshan_core_register_record(
(void*)name,
strlen(name),
1,
DARSHAN_NULL_MOD,
1,
limit_flag,
&rec_id,
NULL);
/* the file record id is set to 0 if no memory is available for tracking
* new records -- just fall through and ignore this record
*/
if(rec_id == 0)
{
return(NULL);
}
/* search the hash table for this file record, and return if found */
HASH_FIND(hlink, null_runtime->record_hash, &rec_id, sizeof(darshan_record_id), rec);
if(rec)
......@@ -287,21 +300,17 @@ static struct null_record_runtime* null_record_by_name(const char *name)
return(rec);
}
if(null_runtime->rec_array_ndx < null_runtime->rec_array_size);
{
/* no existing record, assign a new one from the global array */
rec = &(null_runtime->runtime_record_array[null_runtime->rec_array_ndx]);
rec->record_p = &(null_runtime->record_array[null_runtime->rec_array_ndx]);
/* no existing record, assign a new one from the global array */
rec = &(null_runtime->runtime_record_array[null_runtime->rec_array_ndx]);
rec->record_p = &(null_runtime->record_array[null_runtime->rec_array_ndx]);
/* set the darshan record id and corresponding process rank for this record */
rec->record_p->f_id = rec_id;
rec->record_p->rank = my_rank;
/* set the darshan record id and corresponding process rank for this record */
rec->record_p->f_id = rec_id;
rec->record_p->rank = my_rank;
/* add new record to file hash table */
HASH_ADD(hlink, null_runtime->record_hash, record_p->f_id, sizeof(darshan_record_id), rec);
null_runtime->rec_array_ndx++;
}
/* add new record to file hash table */
HASH_ADD(hlink, null_runtime->record_hash, record_p->f_id, sizeof(darshan_record_id), rec);
null_runtime->rec_array_ndx++;
return(rec);
}
......
......@@ -268,6 +268,7 @@ static struct pnetcdf_file_runtime* pnetcdf_file_by_name(const char *name)
struct pnetcdf_file_runtime *file = NULL;
char *newname = NULL;
darshan_record_id file_id;
int limit_flag;
if(!pnetcdf_runtime || instrumentation_disabled)
return(NULL);
......@@ -276,17 +277,20 @@ static struct pnetcdf_file_runtime* pnetcdf_file_by_name(const char *name)
if(!newname)
newname = (char*)name;
limit_flag = (pnetcdf_runtime->file_array_ndx >= pnetcdf_runtime->file_array_size);
/* get a unique id for this file from darshan core */
darshan_core_register_record(
(void*)newname,
strlen(newname),
1,
DARSHAN_PNETCDF_MOD,
1,
limit_flag,
&file_id,
NULL);
/* if record is set to 0, darshan-core is out of space and will not
* track this record, so we should avoid tracking it, too
/* the file record id is set to 0 if no memory is available for tracking
* new records -- just fall through and ignore this record
*/
if(file_id == 0)
{
......@@ -304,19 +308,15 @@ static struct pnetcdf_file_runtime* pnetcdf_file_by_name(const char *name)
return(file);
}
if(pnetcdf_runtime->file_array_ndx < pnetcdf_runtime->file_array_size);
{
/* no existing record, assign a new file record from the global array */
file = &(pnetcdf_runtime->file_runtime_array[pnetcdf_runtime->file_array_ndx]);
file->file_record = &(pnetcdf_runtime->file_record_array[pnetcdf_runtime->file_array_ndx]);
file->file_record->f_id = file_id;
file->file_record->rank = my_rank;
/* add new record to file hash table */
HASH_ADD(hlink, pnetcdf_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file);
/* no existing record, assign a new file record from the global array */
file = &(pnetcdf_runtime->file_runtime_array[pnetcdf_runtime->file_array_ndx]);
file->file_record = &(pnetcdf_runtime->file_record_array[pnetcdf_runtime->file_array_ndx]);
file->file_record->f_id = file_id;
file->file_record->rank = my_rank;
pnetcdf_runtime->file_array_ndx++;
}
/* add new record to file hash table */
HASH_ADD(hlink, pnetcdf_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file);
pnetcdf_runtime->file_array_ndx++;
if(newname != name)
free(newname);
......
......@@ -1509,6 +1509,7 @@ static struct posix_file_runtime* posix_file_by_name(const char *name)
char *newname = NULL;
darshan_record_id file_id;
int file_alignment;
int limit_flag;
if(!posix_runtime || instrumentation_disabled)
return(NULL);
......@@ -1517,17 +1518,20 @@ static struct posix_file_runtime* posix_file_by_name(const char *name)
if(!newname)
newname = (char*)name;
limit_flag = (posix_runtime->file_array_ndx >= posix_runtime->file_array_size);
/* get a unique id for this file from darshan core */
darshan_core_register_record(
(void*)newname,
strlen(newname),
1,
DARSHAN_POSIX_MOD,
1,
limit_flag,
&file_id,
&file_alignment);
/* if record is set to 0, darshan-core is out of space and will not
* track this record, so we should avoid tracking it, too
/* the file record id is set to 0 if no memory is available for tracking
* new records -- just fall through and ignore this record
*/
if(file_id == 0)
{
......@@ -1545,21 +1549,17 @@ static struct posix_file_runtime* posix_file_by_name(const char *name)
return(file);
}
if(posix_runtime->file_array_ndx < posix_runtime->file_array_size);
{
/* no existing record, assign a new file record from the global array */
file = &(posix_runtime->file_runtime_array[posix_runtime->file_array_ndx]);
file->file_record = &(posix_runtime->file_record_array[posix_runtime->file_array_ndx]);
file->file_record->f_id = file_id;
file->file_record->rank = my_rank;
file->file_record->counters[POSIX_MEM_ALIGNMENT] = darshan_mem_alignment;
file->file_record->counters[POSIX_FILE_ALIGNMENT] = file_alignment;
/* add new record to file hash table */
HASH_ADD(hlink, posix_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file);
posix_runtime->file_array_ndx++;
}
/* no existing record, assign a new file record from the global array */
file = &(posix_runtime->file_runtime_array[posix_runtime->file_array_ndx]);
file->file_record = &(posix_runtime->file_record_array[posix_runtime->file_array_ndx]);
file->file_record->f_id = file_id;
file->file_record->rank = my_rank;
file->file_record->counters[POSIX_MEM_ALIGNMENT] = darshan_mem_alignment;
file->file_record->counters[POSIX_FILE_ALIGNMENT] = file_alignment;
/* add new record to file hash table */
HASH_ADD(hlink, posix_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file);
posix_runtime->file_array_ndx++;
if(newname != name)
free(newname);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment