Commit 98c93e0f authored by Shane Snyder's avatar Shane Snyder
Browse files

darshan-runtime commits for configurable mem

user can now give a config option or use a runtime environment
variable to control darshan's per module memory requirements.
parent 48352d36
...@@ -69,6 +69,11 @@ static char * const darshan_module_names[] = ...@@ -69,6 +69,11 @@ static char * const darshan_module_names[] =
}; };
#undef X #undef X
/* simple macros for accessing module flag bitfields */
#define DARSHAN_MOD_FLAG_SET(flags, id) flags = (flags | (1 << id))
#define DARSHAN_MOD_FLAG_UNSET(flags, id) flags = (flags & ~(1 << id))
#define DARSHAN_MOD_FLAG_ISSET(flags, id) (flags & (1 << id))
/* compression method used on darshan log file */ /* compression method used on darshan log file */
enum darshan_comp_type enum darshan_comp_type
{ {
...@@ -97,7 +102,7 @@ struct darshan_header ...@@ -97,7 +102,7 @@ struct darshan_header
char version_string[8]; char version_string[8];
int64_t magic_nr; int64_t magic_nr;
unsigned char comp_type; unsigned char comp_type;
unsigned char partial_flag; uint32_t partial_flag;
struct darshan_log_map rec_map; struct darshan_log_map rec_map;
struct darshan_log_map mod_map[DARSHAN_MAX_MODS]; struct darshan_log_map mod_map[DARSHAN_MAX_MODS];
}; };
......
...@@ -693,7 +693,7 @@ with_log_hints ...@@ -693,7 +693,7 @@ with_log_hints
with_log_path with_log_path
with_jobid_env with_jobid_env
enable_bgq_mod enable_bgq_mod
with_max_records with_mod_mem
' '
ac_precious_vars='build_alias ac_precious_vars='build_alias
host_alias host_alias
...@@ -1331,7 +1331,7 @@ Optional Packages: ...@@ -1331,7 +1331,7 @@ Optional Packages:
--with-jobid-env=<name> Name of environment variable that stores the jobid --with-jobid-env=<name> Name of environment variable that stores the jobid
(specify "NONE" if no appropriate environment variable is available: (specify "NONE" if no appropriate environment variable is available:
Darshan will use rank 0's pid instead) Darshan will use rank 0's pid instead)
--with-max-records=<num> Maximum records for Darshan to track at runtime --with-mod-mem=<num> Maximum amount of memory (in MiB) for each Darshan module
Some influential environment variables: Some influential environment variables:
CC C compiler command CC C compiler command
...@@ -4281,14 +4281,14 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ...@@ -4281,14 +4281,14 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi fi
# Check whether --with-max-records was given. # Check whether --with-mod-mem was given.
if test "${with_max_records+set}" = set; then : if test "${with_mod_mem+set}" = set; then :
withval=$with_max_records; if test x$withval = xyes; then withval=$with_mod_mem; if test x$withval = xyes; then
as_fn_error $? "--with-max-records must be given a number" "$LINENO" 5 as_fn_error $? "--with-mod-mem must be given a number" "$LINENO" 5
else else
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
#define __DARSHAN_MAX_RECORDS ${withval} #define __DARSHAN_MOD_MEM_MAX ${withval}
_ACEOF _ACEOF
fi fi
......
...@@ -300,12 +300,12 @@ if test x$enable_bgq_mod != xno; then ...@@ -300,12 +300,12 @@ if test x$enable_bgq_mod != xno; then
[])) []))
fi fi
AC_ARG_WITH(max-records, AC_ARG_WITH(mod-mem,
[ --with-max-records=<num> Maximum records for Darshan to track at runtime], [ --with-mod-mem=<num> Maximum amount of memory (in MiB) for each Darshan module],
if test x$withval = xyes; then if test x$withval = xyes; then
AC_MSG_ERROR(--with-max-records must be given a number) AC_MSG_ERROR(--with-mod-mem must be given a number)
else else
AC_DEFINE_UNQUOTED(__DARSHAN_MAX_RECORDS, ${withval}, Maximum Darshan records to track at runtime) AC_DEFINE_UNQUOTED(__DARSHAN_MOD_MEM_MAX, ${withval}, Maximum memory (in MiB) for each Darshan module)
fi fi
) )
......
...@@ -26,18 +26,18 @@ ...@@ -26,18 +26,18 @@
/* Environment variable to override __DARSHAN_MEM_ALIGNMENT */ /* Environment variable to override __DARSHAN_MEM_ALIGNMENT */
#define DARSHAN_MEM_ALIGNMENT_OVERRIDE "DARSHAN_MEMALIGN" #define DARSHAN_MEM_ALIGNMENT_OVERRIDE "DARSHAN_MEMALIGN"
#ifdef __DARSHAN_MAX_RECORDS /* Environment variable to override memory per module */
#define DARSHAN_CORE_MAX_RECORDS __DARSHAN_MAX_RECORDS #define DARSHAN_MOD_MEM_OVERRIDE "DARSHAN_MODMEM"
/* Maximum amount of memory per instrumentation module in MiB */
#ifdef __DARSHAN_MOD_MEM_MAX
#define DARSHAN_MOD_MEM_MAX (__DARSHAN_MOD_MEM_MAX * 1024 * 1024)
#else #else
#define DARSHAN_CORE_MAX_RECORDS 2048 #define DARSHAN_MOD_MEM_MAX (2 * 1024 * 1024) /* 2 MiB default */
#endif #endif
/* TODO: revisit this default size if we change memory per module */ /* Default runtime compression buffer size */
#define DARSHAN_CORE_COMP_BUF_SIZE (2 * 1024 * 1024) #define DARSHAN_COMP_BUF_SIZE DARSHAN_MOD_MEM_MAX
#define DARSHAN_CORE_MOD_SET(flags, id) (flags | (1 << id))
#define DARSHAN_CORE_MOD_UNSET(flags, id) (flags & ~(1 << id))
#define DARSHAN_CORE_MOD_ISSET(flags, id) (flags & (1 << id))
/* in memory structure to keep up with job level data */ /* in memory structure to keep up with job level data */
struct darshan_core_runtime struct darshan_core_runtime
...@@ -48,7 +48,7 @@ struct darshan_core_runtime ...@@ -48,7 +48,7 @@ struct darshan_core_runtime
struct darshan_core_record_ref *rec_hash; struct darshan_core_record_ref *rec_hash;
int rec_count; int rec_count;
struct darshan_core_module* mod_array[DARSHAN_MAX_MODS]; struct darshan_core_module* mod_array[DARSHAN_MAX_MODS];
char comp_buf[DARSHAN_CORE_COMP_BUF_SIZE]; char comp_buf[DARSHAN_COMP_BUF_SIZE];
double wtime_offset; double wtime_offset;
char *trailing_data; char *trailing_data;
}; };
......
...@@ -102,11 +102,11 @@ ...@@ -102,11 +102,11 @@
/* Location to store log files at run time */ /* Location to store log files at run time */
#undef __DARSHAN_LOG_PATH #undef __DARSHAN_LOG_PATH
/* Maximum Darshan records to track at runtime */
#undef __DARSHAN_MAX_RECORDS
/* Memory alignment in bytes */ /* Memory alignment in bytes */
#undef __DARSHAN_MEM_ALIGNMENT #undef __DARSHAN_MEM_ALIGNMENT
/* Maximum memory (in MiB) for each Darshan module */
#undef __DARSHAN_MOD_MEM_MAX
/* Generalized request type for MPI-IO */ /* Generalized request type for MPI-IO */
#undef __D_MPI_REQUEST #undef __D_MPI_REQUEST
...@@ -118,18 +118,21 @@ void darshan_core_unregister_module( ...@@ -118,18 +118,21 @@ void darshan_core_unregister_module(
* Register the Darshan record given by 'name' with the darshan-core * Register the Darshan record given by 'name' with the darshan-core
* runtime, allowing it to be properly tracked and (potentially) * runtime, allowing it to be properly tracked and (potentially)
* correlated with records from other modules. 'len' is the size of * correlated with records from other modules. 'len' is the size of
* the name pointer (string length for string names), 'printable_flag' * the name pointer (string length for string names), and 'printable_flag'
* indicates whether the name is a string, and 'mod_id' is the identifier * indicates whether the name is a string. 'mod_limit_flag' is set if
* of the calling module. 'rec_id' is an output pointer storing the * the calling module is out of memory (to prevent darshan-core from
* correspoing Darshan record identifier and 'file_alignment' is an * creating new records and to just search existing records) and 'mod_id'
* output pointer storing the file system alignment value for the given * is the identifier of the calling module. 'rec_id' is an output pointer
* record. * storing the correspoing Darshan record identifier and 'file_alignment'
* is an output pointer storing the file system alignment value for the
* given record.
*/ */
void darshan_core_register_record( void darshan_core_register_record(
void *name, void *name,
int len, int len,
int printable_flag,
darshan_module_id mod_id, darshan_module_id mod_id,
int printable_flag,
int mod_limit_flag,
darshan_record_id *rec_id, darshan_record_id *rec_id,
int *file_alignment); int *file_alignment);
......
...@@ -156,8 +156,9 @@ void bgq_runtime_initialize() ...@@ -156,8 +156,9 @@ void bgq_runtime_initialize()
darshan_core_register_record( darshan_core_register_record(
recname, recname,
strlen(recname), strlen(recname),
1,
DARSHAN_BGQ_MOD, DARSHAN_BGQ_MOD,
1,
0,
&bgq_runtime->record.f_id, &bgq_runtime->record.f_id,
&bgq_runtime->record.alignment); &bgq_runtime->record.alignment);
......
...@@ -99,7 +99,8 @@ static char* darshan_get_exe_and_mounts( ...@@ -99,7 +99,8 @@ static char* darshan_get_exe_and_mounts(
static void darshan_block_size_from_path( static void darshan_block_size_from_path(
const char *path, int *block_size); const char *path, int *block_size);
static void darshan_get_shared_records( static void darshan_get_shared_records(
struct darshan_core_runtime *core, darshan_record_id *shared_recs); struct darshan_core_runtime *core, darshan_record_id **shared_recs,
int *shared_rec_cnt);
static int darshan_log_open_all( static int darshan_log_open_all(
char *logfile_name, MPI_File *log_fh); char *logfile_name, MPI_File *log_fh);
static int darshan_deflate_buffer( static int darshan_deflate_buffer(
...@@ -251,7 +252,9 @@ void darshan_core_shutdown() ...@@ -251,7 +252,9 @@ void darshan_core_shutdown()
int64_t last_end_time; int64_t last_end_time;
int local_mod_use[DARSHAN_MAX_MODS] = {0}; int local_mod_use[DARSHAN_MAX_MODS] = {0};
int global_mod_use_count[DARSHAN_MAX_MODS] = {0}; int global_mod_use_count[DARSHAN_MAX_MODS] = {0};
darshan_record_id shared_recs[DARSHAN_CORE_MAX_RECORDS] = {0}; darshan_record_id *shared_recs;
darshan_record_id *mod_shared_recs;
int shared_rec_cnt = 0;
double start_log_time; double start_log_time;
double open1, open2; double open1, open2;
double job1, job2; double job1, job2;
...@@ -261,7 +264,7 @@ void darshan_core_shutdown() ...@@ -261,7 +264,7 @@ void darshan_core_shutdown()
double header1, header2; double header1, header2;
double tm_end; double tm_end;
uint64_t gz_fp = 0; uint64_t gz_fp = 0;
unsigned char tmp_partial_flag; uint32_t tmp_partial_flag;
MPI_File log_fh; MPI_File log_fh;
MPI_Status status; MPI_Status status;
...@@ -368,7 +371,7 @@ void darshan_core_shutdown() ...@@ -368,7 +371,7 @@ void darshan_core_shutdown()
DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD); DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
/* get a list of records which are shared across all processes */ /* get a list of records which are shared across all processes */
darshan_get_shared_records(final_core, shared_recs); darshan_get_shared_records(final_core, &shared_recs, &shared_rec_cnt);
if(internal_timing_flag) if(internal_timing_flag)
open1 = DARSHAN_MPI_CALL(PMPI_Wtime)(); open1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
...@@ -463,6 +466,9 @@ void darshan_core_shutdown() ...@@ -463,6 +466,9 @@ void darshan_core_shutdown()
if(internal_timing_flag) if(internal_timing_flag)
rec2 = DARSHAN_MPI_CALL(PMPI_Wtime)(); rec2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
mod_shared_recs = malloc(shared_rec_cnt * sizeof(darshan_record_id));
assert(mod_shared_recs);
/* loop over globally used darshan modules and: /* loop over globally used darshan modules and:
* - perform shared file reductions, if possible * - perform shared file reductions, if possible
* - get final output buffer * - get final output buffer
...@@ -475,7 +481,6 @@ void darshan_core_shutdown() ...@@ -475,7 +481,6 @@ void darshan_core_shutdown()
{ {
struct darshan_core_module* this_mod = final_core->mod_array[i]; struct darshan_core_module* this_mod = final_core->mod_array[i];
struct darshan_core_record_ref *ref = NULL; struct darshan_core_record_ref *ref = NULL;
darshan_record_id mod_shared_recs[DARSHAN_CORE_MAX_RECORDS];
int mod_shared_rec_cnt = 0; int mod_shared_rec_cnt = 0;
void* mod_buf = NULL; void* mod_buf = NULL;
int mod_buf_sz = 0; int mod_buf_sz = 0;
...@@ -495,13 +500,13 @@ void darshan_core_shutdown() ...@@ -495,13 +500,13 @@ void darshan_core_shutdown()
mod1[i] = DARSHAN_MPI_CALL(PMPI_Wtime)(); mod1[i] = DARSHAN_MPI_CALL(PMPI_Wtime)();
/* set the shared file list for this module */ /* set the shared file list for this module */
memset(mod_shared_recs, 0, DARSHAN_CORE_MAX_RECORDS * sizeof(darshan_record_id)); memset(mod_shared_recs, 0, shared_rec_cnt * sizeof(darshan_record_id));
for(j = 0; j < DARSHAN_CORE_MAX_RECORDS && shared_recs[j] != 0; j++) for(j = 0; j < shared_rec_cnt; j++)
{ {
HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j], HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j],
sizeof(darshan_record_id), ref); sizeof(darshan_record_id), ref);
assert(ref); assert(ref);
if(DARSHAN_CORE_MOD_ISSET(ref->global_mod_flags, i)) if(DARSHAN_MOD_FLAG_ISSET(ref->global_mod_flags, i))
{ {
mod_shared_recs[mod_shared_rec_cnt++] = shared_recs[j]; mod_shared_recs[mod_shared_rec_cnt++] = shared_recs[j];
} }
...@@ -551,11 +556,11 @@ void darshan_core_shutdown() ...@@ -551,11 +556,11 @@ void darshan_core_shutdown()
} }
/* run a reduction to determine if any application processes had to set the /* run a reduction to determine if any application processes had to set the
* partial flag. this happens when a process has tracked too many records * partial flag for any modules. this happens when a module exhausts its memory
* at once and cannot track new records * and does not track every possible record
*/ */
DARSHAN_MPI_CALL(PMPI_Reduce)(&(final_core->log_header.partial_flag), DARSHAN_MPI_CALL(PMPI_Reduce)(&(final_core->log_header.partial_flag),
&tmp_partial_flag, 1, MPI_UNSIGNED_CHAR, MPI_MAX, 0, MPI_COMM_WORLD); &tmp_partial_flag, 1, MPI_UINT32_T, MPI_BOR, 0, MPI_COMM_WORLD);
if(internal_timing_flag) if(internal_timing_flag)
header1 = DARSHAN_MPI_CALL(PMPI_Wtime)(); header1 = DARSHAN_MPI_CALL(PMPI_Wtime)();
...@@ -1085,32 +1090,45 @@ static void darshan_block_size_from_path(const char *path, int *block_size) ...@@ -1085,32 +1090,45 @@ static void darshan_block_size_from_path(const char *path, int *block_size)
} }
static void darshan_get_shared_records(struct darshan_core_runtime *core, static void darshan_get_shared_records(struct darshan_core_runtime *core,
darshan_record_id *shared_recs) darshan_record_id **shared_recs, int *shared_rec_cnt)
{ {
int i; int i, j;
int ndx; int tmp_cnt = core->rec_count;
struct darshan_core_record_ref *tmp, *ref; struct darshan_core_record_ref *tmp, *ref;
darshan_record_id id_array[DARSHAN_CORE_MAX_RECORDS] = {0}; darshan_record_id *id_array;
uint64_t mod_flags[DARSHAN_CORE_MAX_RECORDS] = {0}; uint64_t *mod_flags;
uint64_t global_mod_flags[DARSHAN_CORE_MAX_RECORDS] = {0}; uint64_t *global_mod_flags;
/* broadcast root's number of records to all other processes */
DARSHAN_MPI_CALL(PMPI_Bcast)(&tmp_cnt, 1, MPI_INT, 0, MPI_COMM_WORLD);
/* use root record count to allocate data structures */
id_array = malloc(tmp_cnt * sizeof(darshan_record_id));
mod_flags = malloc(tmp_cnt * sizeof(uint64_t));
global_mod_flags = malloc(tmp_cnt * sizeof(uint64_t));
*shared_recs = malloc(tmp_cnt * sizeof(darshan_record_id));
assert(id_array && mod_flags && global_mod_flags && *shared_recs);
memset(mod_flags, 0, tmp_cnt * sizeof(uint64_t));
memset(global_mod_flags, 0, tmp_cnt * sizeof(uint64_t));
memset(*shared_recs, 0, tmp_cnt * sizeof(darshan_record_id));
/* first, determine list of records root process has opened */ /* first, determine list of records root process has opened */
if(my_rank == 0) if(my_rank == 0)
{ {
ndx = 0; i = 0;
HASH_ITER(hlink, core->rec_hash, ref, tmp) HASH_ITER(hlink, core->rec_hash, ref, tmp)
{ {
id_array[ndx++] = ref->rec.id; id_array[i++] = ref->rec.id;
} }
} }
/* broadcast root's list of records to all other processes */ /* broadcast root's list of records to all other processes */
DARSHAN_MPI_CALL(PMPI_Bcast)(id_array, DARSHAN_MPI_CALL(PMPI_Bcast)(id_array, (tmp_cnt * sizeof(darshan_record_id)),
(DARSHAN_CORE_MAX_RECORDS * sizeof(darshan_record_id)),
MPI_BYTE, 0, MPI_COMM_WORLD); MPI_BYTE, 0, MPI_COMM_WORLD);
/* everyone looks to see if they opened the same records as root */ /* everyone looks to see if they opened the same records as root */
for(i=0; (i<DARSHAN_CORE_MAX_RECORDS && id_array[i] != 0); i++) for(i=0; i<tmp_cnt; i++)
{ {
HASH_FIND(hlink, core->rec_hash, &id_array[i], sizeof(darshan_record_id), ref); HASH_FIND(hlink, core->rec_hash, &id_array[i], sizeof(darshan_record_id), ref);
if(ref) if(ref)
...@@ -1123,15 +1141,15 @@ static void darshan_get_shared_records(struct darshan_core_runtime *core, ...@@ -1123,15 +1141,15 @@ static void darshan_get_shared_records(struct darshan_core_runtime *core,
/* now allreduce so everyone agrees which files are shared and /* now allreduce so everyone agrees which files are shared and
* which modules accessed them collectively * which modules accessed them collectively
*/ */
DARSHAN_MPI_CALL(PMPI_Allreduce)(mod_flags, global_mod_flags, DARSHAN_MPI_CALL(PMPI_Allreduce)(mod_flags, global_mod_flags, tmp_cnt,
DARSHAN_CORE_MAX_RECORDS, MPI_UINT64_T, MPI_BAND, MPI_COMM_WORLD); MPI_UINT64_T, MPI_BAND, MPI_COMM_WORLD);
ndx = 0; j = 0;
for(i=0; (i<DARSHAN_CORE_MAX_RECORDS && id_array[i] != 0); i++) for(i=0; i<tmp_cnt; i++)
{ {
if(global_mod_flags[i] != 0) if(global_mod_flags[i] != 0)
{ {
shared_recs[ndx++] = id_array[i]; (*shared_recs)[j++] = id_array[i];
/* set global_mod_flags so we know which modules collectively /* set global_mod_flags so we know which modules collectively
* accessed this module. we need this info to support shared * accessed this module. we need this info to support shared
...@@ -1142,6 +1160,7 @@ static void darshan_get_shared_records(struct darshan_core_runtime *core, ...@@ -1142,6 +1160,7 @@ static void darshan_get_shared_records(struct darshan_core_runtime *core,
ref->global_mod_flags = global_mod_flags[i]; ref->global_mod_flags = global_mod_flags[i];
} }
} }
*shared_rec_cnt = j;
return; return;
} }
...@@ -1246,7 +1265,7 @@ static int darshan_deflate_buffer(void **pointers, int *lengths, int count, ...@@ -1246,7 +1265,7 @@ static int darshan_deflate_buffer(void **pointers, int *lengths, int count,
} }
tmp_stream.next_out = (unsigned char *)comp_buf; tmp_stream.next_out = (unsigned char *)comp_buf;
tmp_stream.avail_out = DARSHAN_CORE_COMP_BUF_SIZE; tmp_stream.avail_out = DARSHAN_COMP_BUF_SIZE;
/* loop over the input pointers */ /* loop over the input pointers */
for(i = 0; i < count; i++) for(i = 0; i < count; i++)
...@@ -1307,9 +1326,9 @@ static int darshan_log_write_record_hash(MPI_File log_fh, struct darshan_core_ru ...@@ -1307,9 +1326,9 @@ static int darshan_log_write_record_hash(MPI_File log_fh, struct darshan_core_ru
char *hash_buf; char *hash_buf;
char *hash_buf_off; char *hash_buf_off;
/* allocate a buffer to store at most 64 bytes for each of a max number of records */ /* allocate a buffer to store at most 64 bytes for each registered record */
/* NOTE: this buffer may be reallocated if estimate is too small */ /* NOTE: this buffer may be reallocated if estimate is too small */
hash_buf_sz = DARSHAN_CORE_MAX_RECORDS * 64; hash_buf_sz = core->rec_count * 64;
hash_buf = malloc(hash_buf_sz); hash_buf = malloc(hash_buf_sz);
if(!hash_buf) if(!hash_buf)
{ {
...@@ -1478,7 +1497,10 @@ void darshan_core_register_module( ...@@ -1478,7 +1497,10 @@ void darshan_core_register_module(
int *mod_mem_limit, int *mod_mem_limit,
int *sys_mem_alignment) int *sys_mem_alignment)
{ {
int ret;
int tmpval;
struct darshan_core_module* mod; struct darshan_core_module* mod;
char *mod_mem_str = NULL;
*mod_mem_limit = 0; *mod_mem_limit = 0;
if(!darshan_core || (mod_id >= DARSHAN_MAX_MODS)) if(!darshan_core || (mod_id >= DARSHAN_MAX_MODS))
...@@ -1514,9 +1536,21 @@ void darshan_core_register_module( ...@@ -1514,9 +1536,21 @@ void darshan_core_register_module(
/* get the calling process's rank */ /* get the calling process's rank */
DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, my_rank); DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, my_rank);
/* TODO: something smarter than just 2 MiB per module */ /* set the maximum amount of memory this module can use */
*mod_mem_limit = 2 * 1024 * 1024; mod_mem_str = getenv(DARSHAN_MOD_MEM_OVERRIDE);
if(mod_mem_str)
{
ret = sscanf(mod_mem_str, "%d", &tmpval);
/* silently ignore if the env variable is set poorly */
if(ret == 1 && tmpval > 0)
*mod_mem_limit = (tmpval * 1024 * 1024); /* convert to MiB */
else
*mod_mem_limit = DARSHAN_MOD_MEM_MAX;
}
else
{
*mod_mem_limit = DARSHAN_MOD_MEM_MAX;
}
DARSHAN_CORE_UNLOCK(); DARSHAN_CORE_UNLOCK();
return; return;
...@@ -1550,8 +1584,9 @@ void darshan_core_unregister_module( ...@@ -1550,8 +1584,9 @@ void darshan_core_unregister_module(
void darshan_core_register_record( void darshan_core_register_record(
void *name, void *name,
int len, int len,
int printable_flag,
darshan_module_id mod_id, darshan_module_id mod_id,
int printable_flag,
int mod_limit_flag,
darshan_record_id *rec_id, darshan_record_id *rec_id,
int *file_alignment) int *file_alignment)
{ {
...@@ -1573,16 +1608,14 @@ void darshan_core_register_record( ...@@ -1573,16 +1608,14 @@ void darshan_core_register_record(
HASH_FIND(hlink, darshan_core->rec_hash, &tmp_rec_id, sizeof(darshan_record_id), ref); HASH_FIND(hlink, darshan_core->rec_hash, &tmp_rec_id, sizeof(darshan_record_id), ref);
if(!ref) if(!ref)
{ {
/* record not found -- add it to the hash if we aren't already tracking the /* record not found -- add it to the hash if this module has not already used
* maximum number of records * all of its memory
*/ */
if(darshan_core->rec_count >= DARSHAN_CORE_MAX_RECORDS) if(mod_limit_flag)
{ {
/* if we are already tracking the max records, set a flag to indicate /* if this module is OOM, set a flag in the header to indicate this */
* that this log file has partial results DARSHAN_MOD_FLAG_SET(darshan_core->log_header.partial_flag, mod_id);
*/
darshan_core->log_header.partial_flag = 1;
DARSHAN_CORE_UNLOCK(); DARSHAN_CORE_UNLOCK();
return; return;
} }
...@@ -1600,7 +1633,7 @@ void darshan_core_register_record( ...@@ -1600,7 +1633,7 @@ void darshan_core_register_record(
darshan_core->rec_count++; darshan_core->rec_count++;
} }
} }
ref->mod_flags = DARSHAN_CORE_MOD_SET(ref->mod_flags, mod_id); DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id);
DARSHAN_CORE_UNLOCK(); DARSHAN_CORE_UNLOCK();
if(file_alignment) if(file_alignment)
...@@ -1625,7 +1658,7 @@ void darshan_core_unregister_record( ...@@ -1625,7 +1658,7 @@ void darshan_core_unregister_record(
assert(ref); assert(ref);
/* disassociate this module from the given record id */ /* disassociate this module from the given record id */
ref->mod_flags = DARSHAN_CORE_MOD_UNSET(ref->mod_flags, mod_id); DARSHAN_MOD_FLAG_UNSET(ref->mod_flags, mod_id);
if(!(ref->mod_flags)) if(!(ref->