Commit 0d84b5e5 authored by Shane Snyder's avatar Shane Snyder
Browse files

first cut at mmap logic -- posix mod only for now

parent 41575785
...@@ -26,11 +26,15 @@ ...@@ -26,11 +26,15 @@
/* Environment variable to override __DARSHAN_MEM_ALIGNMENT */ /* Environment variable to override __DARSHAN_MEM_ALIGNMENT */
#define DARSHAN_MEM_ALIGNMENT_OVERRIDE "DARSHAN_MEMALIGN" #define DARSHAN_MEM_ALIGNMENT_OVERRIDE "DARSHAN_MEMALIGN"
/* maximum number of records that can be tracked on a single process */
#define DARSHAN_CORE_MAX_RECORDS 2048 #define DARSHAN_CORE_MAX_RECORDS 2048
/* TODO: revisit this default size if we change memory per module */ /* TODO: revisit this default size if we change memory per module */
#define DARSHAN_CORE_COMP_BUF_SIZE (2 * 1024 * 1024) #define DARSHAN_CORE_COMP_BUF_SIZE (2 * 1024 * 1024)
/* this controls the maximum mmapped memory each module can use */
#define DARSHAN_MMAP_CHUNK_SIZE (4 * 1024)
#define DARSHAN_CORE_MOD_SET(flags, id) (flags | (1 << id)) #define DARSHAN_CORE_MOD_SET(flags, id) (flags | (1 << id))
#define DARSHAN_CORE_MOD_UNSET(flags, id) (flags & ~(1 << id)) #define DARSHAN_CORE_MOD_UNSET(flags, id) (flags & ~(1 << id))
#define DARSHAN_CORE_MOD_ISSET(flags, id) (flags & (1 << id)) #define DARSHAN_CORE_MOD_ISSET(flags, id) (flags & (1 << id))
...@@ -38,15 +42,18 @@ ...@@ -38,15 +42,18 @@
/* in memory structure to keep up with job level data */ /* in memory structure to keep up with job level data */
struct darshan_core_runtime struct darshan_core_runtime
{ {
struct darshan_header log_header; /* XXX-MMAP */
struct darshan_job log_job; void *mmap_p;
char exe[DARSHAN_EXE_LEN+1]; struct darshan_job *mmap_job_p;
char *mmap_exe_mnt_p;
void *mmap_mod_p;
/* XXX-MMAP */
struct darshan_core_record_ref *rec_hash; struct darshan_core_record_ref *rec_hash;
int rec_count; int rec_count;
struct darshan_core_module* mod_array[DARSHAN_MAX_MODS]; struct darshan_core_module* mod_array[DARSHAN_MAX_MODS];
char comp_buf[DARSHAN_CORE_COMP_BUF_SIZE]; char comp_buf[DARSHAN_CORE_COMP_BUF_SIZE];
double wtime_offset; double wtime_offset;
char *trailing_data;
}; };
struct darshan_core_module struct darshan_core_module
......
...@@ -80,9 +80,6 @@ struct darshan_module_funcs ...@@ -80,9 +80,6 @@ struct darshan_module_funcs
void (*shutdown)(void); void (*shutdown)(void);
}; };
/* paths that darshan will not trace */
extern char* darshan_path_exclusions[]; /* defined in lib/darshan-core.c */
/***************************************************** /*****************************************************
* darshan-core functions exported to darshan modules * * darshan-core functions exported to darshan modules *
*****************************************************/ *****************************************************/
...@@ -103,6 +100,8 @@ void darshan_core_register_module( ...@@ -103,6 +100,8 @@ void darshan_core_register_module(
struct darshan_module_funcs *funcs, struct darshan_module_funcs *funcs,
int *my_rank, int *my_rank,
int *mod_mem_limit, int *mod_mem_limit,
void **mmap_buf,
int *mmap_buf_size,
int *sys_mem_alignment); int *sys_mem_alignment);
/* darshan_core_unregister_module() /* darshan_core_unregister_module()
...@@ -151,4 +150,12 @@ void darshan_core_unregister_record( ...@@ -151,4 +150,12 @@ void darshan_core_unregister_record(
*/ */
double darshan_core_wtime(void); double darshan_core_wtime(void);
/* darshan_core_excluded_path()
*
* Returns true (1) if the given file path is in Darshan's list of
* excluded file paths, false (0) otherwise.
*/
int darshan_core_excluded_path(
const char * path);
#endif /* __DARSHAN_H */ #endif /* __DARSHAN_H */
...@@ -39,6 +39,9 @@ ...@@ -39,6 +39,9 @@
struct bgq_runtime struct bgq_runtime
{ {
struct darshan_bgq_record record; struct darshan_bgq_record record;
/* TODO: we don't need the mmap and regular buffer, both */
struct darshan_bgq_record *mmap_buf;
}; };
static struct bgq_runtime *bgq_runtime = NULL; static struct bgq_runtime *bgq_runtime = NULL;
...@@ -114,6 +117,8 @@ void bgq_runtime_initialize() ...@@ -114,6 +117,8 @@ void bgq_runtime_initialize()
.shutdown = bgq_shutdown .shutdown = bgq_shutdown
}; };
int mem_limit; int mem_limit;
void *mmap_buf;
int mmap_buf_size;
char *recname = "darshan-internal-bgq"; char *recname = "darshan-internal-bgq";
BGQ_LOCK(); BGQ_LOCK();
...@@ -128,6 +133,8 @@ void bgq_runtime_initialize() ...@@ -128,6 +133,8 @@ void bgq_runtime_initialize()
&bgq_mod_fns, &bgq_mod_fns,
&my_rank, &my_rank,
&mem_limit, &mem_limit,
&mmap_buf,
&mmap_buf_size,
&darshan_mem_alignment); &darshan_mem_alignment);
/* return if no memory assigned by darshan-core */ /* return if no memory assigned by darshan-core */
......
...@@ -18,8 +18,10 @@ ...@@ -18,8 +18,10 @@
#include <time.h> #include <time.h>
#include <limits.h> #include <limits.h>
#include <pthread.h> #include <pthread.h>
#include <fcntl.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/mman.h>
#include <sys/vfs.h> #include <sys/vfs.h>
#include <zlib.h> #include <zlib.h>
#include <mpi.h> #include <mpi.h>
...@@ -92,10 +94,9 @@ static void darshan_get_logfile_name( ...@@ -92,10 +94,9 @@ static void darshan_get_logfile_name(
static void darshan_log_record_hints_and_ver( static void darshan_log_record_hints_and_ver(
struct darshan_core_runtime* core); struct darshan_core_runtime* core);
static void darshan_get_exe_and_mounts_root( static void darshan_get_exe_and_mounts_root(
struct darshan_core_runtime *core, char* trailing_data, struct darshan_core_runtime *core, int argc, char **argv);
int space_left); static void darshan_get_exe_and_mounts(
static char* darshan_get_exe_and_mounts( struct darshan_core_runtime *core, int argc, char **argv);
struct darshan_core_runtime *core);
static void darshan_block_size_from_path( static void darshan_block_size_from_path(
const char *path, int *block_size); const char *path, int *block_size);
static void darshan_get_shared_records( static void darshan_get_shared_records(
...@@ -118,15 +119,19 @@ static void darshan_core_cleanup( ...@@ -118,15 +119,19 @@ static void darshan_core_cleanup(
void darshan_core_initialize(int argc, char **argv) void darshan_core_initialize(int argc, char **argv)
{ {
int i; struct darshan_core_runtime *init_core = NULL;
int internal_timing_flag = 0; int internal_timing_flag = 0;
double init_start, init_time, init_max; double init_start, init_time, init_max;
char *mmap_log_name = "darshan-log.out";
int mmap_fd;
int mmap_size;
int sys_page_size;
char *envstr; char *envstr;
char* truncate_string = "<TRUNCATED>"; char *jobid_str;
int truncate_offset; int jobid;
int chars_left = 0;
int ret; int ret;
int tmpval; int tmpval;
int i;
DARSHAN_MPI_CALL(PMPI_Comm_size)(MPI_COMM_WORLD, &nprocs); DARSHAN_MPI_CALL(PMPI_Comm_size)(MPI_COMM_WORLD, &nprocs);
DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank); DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank);
...@@ -164,58 +169,116 @@ void darshan_core_initialize(int argc, char **argv) ...@@ -164,58 +169,116 @@ void darshan_core_initialize(int argc, char **argv)
darshan_mem_alignment = 1; darshan_mem_alignment = 1;
} }
/* allocate structure to track darshan_core_runtime information */ /* allocate structure to track darshan core runtime information */
darshan_core = malloc(sizeof(*darshan_core)); init_core = malloc(sizeof(*init_core));
if(darshan_core) if(init_core)
{ {
memset(darshan_core, 0, sizeof(*darshan_core)); memset(init_core, 0, sizeof(*init_core));
init_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
sys_page_size = sysconf(_SC_PAGESIZE);
assert(sys_page_size > 0);
darshan_core->log_job.uid = getuid(); /* set the size of the mmap, making sure to round up to the
darshan_core->log_job.start_time = time(NULL); * nearest page size. One mmap chunk is used for the job-level
darshan_core->log_job.nprocs = nprocs; * metadata, and the rest are statically assigned to modules
darshan_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)(); */
mmap_size = (1 + DARSHAN_MAX_MODS) * DARSHAN_MMAP_CHUNK_SIZE;
if(mmap_size % sys_page_size)
mmap_size = ((mmap_size / sys_page_size) + 1) * sys_page_size;
/* record exe and arguments */ /* TODO: logfile name should have process rank in it for uniqueness */
for(i=0; i<argc; i++) mmap_fd = open(mmap_log_name, O_CREAT|O_RDWR|O_EXCL , 0644);
if(mmap_fd < 0)
{ {
chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe); fprintf(stderr, "darshan library warning: "
strncat(darshan_core->exe, argv[i], chars_left); "unable to create darshan log file %s\n", mmap_log_name);
if(i < (argc-1)) free(init_core);
{ return;
chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe);
strncat(darshan_core->exe, " ", chars_left);
}
} }
/* if we don't see any arguments, then use glibc symbol to get /* allocate the necessary space in the log file */
* program name at least (this happens in fortran) ret = ftruncate(mmap_fd, mmap_size);
if(ret < 0)
{
fprintf(stderr, "darshan library warning: "
"unable to allocate darshan log file %s\n", mmap_log_name);
free(init_core);
close(mmap_fd);
unlink(mmap_log_name);
return;
}
/* memory map buffers for getting at least some summary i/o data
* into a log file if darshan does not shut down properly
*/ */
if(argc == 0) init_core->mmap_p = mmap(NULL, mmap_size, PROT_WRITE, MAP_SHARED,
mmap_fd, 0);
if(init_core->mmap_p == MAP_FAILED)
{ {
chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe); fprintf(stderr, "darshan library warning: "
strncat(darshan_core->exe, __progname_full, chars_left); "unable to mmap darshan log file %s\n", mmap_log_name);
chars_left = DARSHAN_EXE_LEN-strlen(darshan_core->exe); free(init_core);
strncat(darshan_core->exe, " <unknown args>", chars_left); close(mmap_fd);
unlink(mmap_log_name);
return;
} }
if(chars_left == 0) /* close darshan log file (this does *not* unmap the log file) */
close(mmap_fd);
/* set the pointers for each log file region */
init_core->mmap_job_p = (struct darshan_job *)(init_core->mmap_p);
init_core->mmap_exe_mnt_p =
(char *)(((char *)init_core->mmap_p) + sizeof(struct darshan_job));
init_core->mmap_mod_p =
(void *)(((char *)init_core->mmap_p) + DARSHAN_MMAP_CHUNK_SIZE);
/* set known job-level metadata files for the log file */
init_core->mmap_job_p->uid = getuid();
init_core->mmap_job_p->start_time = time(NULL);
init_core->mmap_job_p->nprocs = nprocs;
/* Use DARSHAN_JOBID_OVERRIDE for the env var for __DARSHAN_JOBID */
envstr = getenv(DARSHAN_JOBID_OVERRIDE);
if(!envstr)
{ {
/* we ran out of room; mark that string was truncated */ envstr = __DARSHAN_JOBID;
truncate_offset = DARSHAN_EXE_LEN - strlen(truncate_string);
sprintf(&darshan_core->exe[truncate_offset], "%s",
truncate_string);
} }
/* find a job id */
jobid_str = getenv(envstr);
if(jobid_str)
{
/* in cobalt we can find it in env var */
ret = sscanf(jobid_str, "%d", &jobid);
}
if(!jobid_str || ret != 1)
{
/* use pid as fall back */
jobid = getpid();
}
init_core->mmap_job_p->jobid = (int64_t)jobid;
/* if we are using any hints to write the log file, then record those
* hints with the darshan job information
*/
darshan_log_record_hints_and_ver(init_core);
/* collect information about command line and mounted file systems */ /* collect information about command line and mounted file systems */
darshan_core->trailing_data = darshan_get_exe_and_mounts(darshan_core); darshan_get_exe_and_mounts(init_core, argc, argv);
}
/* maybe bootstrap modules with static initializers */ /* TODO: what would be needed in a termination routine? set job end time? */
i = 0;
while(mod_static_init_fns[i]) /* maybe bootstrap modules with static initializers */
{ i = 0;
(*mod_static_init_fns[i])(); while(mod_static_init_fns[i])
i++; {
(*mod_static_init_fns[i])();
i++;
}
darshan_core = init_core;
} }
} }
...@@ -236,6 +299,9 @@ void darshan_core_initialize(int argc, char **argv) ...@@ -236,6 +299,9 @@ void darshan_core_initialize(int argc, char **argv)
void darshan_core_shutdown() void darshan_core_shutdown()
{ {
return;
#if 0
int i; int i;
char *logfile_name; char *logfile_name;
struct darshan_core_runtime *final_core; struct darshan_core_runtime *final_core;
...@@ -684,8 +750,9 @@ void darshan_core_shutdown() ...@@ -684,8 +750,9 @@ void darshan_core_shutdown()
fprintf(stderr, "darshan:core_shutdown\t%d\t%f\n", nprocs, all_slowest); fprintf(stderr, "darshan:core_shutdown\t%d\t%f\n", nprocs, all_slowest);
} }
} }
return; return;
#endif
} }
/* *********************************** */ /* *********************************** */
...@@ -870,15 +937,15 @@ static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core) ...@@ -870,15 +937,15 @@ static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
return; return;
meta_remain = DARSHAN_JOB_METADATA_LEN - meta_remain = DARSHAN_JOB_METADATA_LEN -
strlen(core->log_job.metadata) - 1; strlen(core->mmap_job_p->metadata) - 1;
if(meta_remain >= (strlen(PACKAGE_VERSION) + 9)) if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
{ {
sprintf(core->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION); sprintf(core->mmap_job_p->metadata, "lib_ver=%s\n", PACKAGE_VERSION);
meta_remain -= (strlen(PACKAGE_VERSION) + 9); meta_remain -= (strlen(PACKAGE_VERSION) + 9);
} }
if(meta_remain >= (3 + strlen(header_hints))) if(meta_remain >= (3 + strlen(header_hints)))
{ {
m = core->log_job.metadata + strlen(core->log_job.metadata); m = core->mmap_job_p->metadata + strlen(core->mmap_job_p->metadata);
/* We have room to store the hints in the metadata portion of /* We have room to store the hints in the metadata portion of
* the job header. We just prepend an h= to the hints list. The * the job header. We just prepend an h= to the hints list. The
* metadata parser will ignore = characters that appear in the value * metadata parser will ignore = characters that appear in the value
...@@ -905,7 +972,7 @@ static int mnt_data_cmp(const void* a, const void* b) ...@@ -905,7 +972,7 @@ static int mnt_data_cmp(const void* a, const void* b)
} }
/* adds an entry to table of mounted file systems */ /* adds an entry to table of mounted file systems */
static void add_entry(char* trailing_data, int* space_left, struct mntent *entry) static void add_entry(char* buf, int* space_left, struct mntent *entry)
{ {
int ret; int ret;
char tmp_mnt[256]; char tmp_mnt[256];
...@@ -935,7 +1002,7 @@ static void add_entry(char* trailing_data, int* space_left, struct mntent *entry ...@@ -935,7 +1002,7 @@ static void add_entry(char* trailing_data, int* space_left, struct mntent *entry
entry->mnt_type, entry->mnt_dir); entry->mnt_type, entry->mnt_dir);
if(ret < 256 && strlen(tmp_mnt) <= (*space_left)) if(ret < 256 && strlen(tmp_mnt) <= (*space_left))
{ {
strcat(trailing_data, tmp_mnt); strcat(buf, tmp_mnt);
(*space_left) -= strlen(tmp_mnt); (*space_left) -= strlen(tmp_mnt);
} }
...@@ -949,11 +1016,15 @@ static void add_entry(char* trailing_data, int* space_left, struct mntent *entry ...@@ -949,11 +1016,15 @@ static void add_entry(char* trailing_data, int* space_left, struct mntent *entry
* will be stored with the job header * will be stored with the job header
*/ */
static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core, static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core,
char* trailing_data, int space_left) int argc, char **argv)
{ {
FILE* tab; FILE* tab;
struct mntent *entry; struct mntent *entry;
char* exclude; char* exclude;
char* truncate_string = "<TRUNCATED>";
int truncate_offset;
int space_left = DARSHAN_EXE_LEN;
int i;
int tmp_index = 0; int tmp_index = 0;
int skip = 0; int skip = 0;
...@@ -975,9 +1046,37 @@ static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core, ...@@ -975,9 +1046,37 @@ static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core,
NULL NULL
}; };
/* length of exe has already been safety checked in darshan-posix.c */ /* record exe and arguments */
strcat(trailing_data, core->exe); for(i=0; i<argc; i++)
space_left = DARSHAN_EXE_LEN - strlen(trailing_data); {
strncat(core->mmap_exe_mnt_p, argv[i], space_left);
space_left = DARSHAN_EXE_LEN-strlen(core->mmap_exe_mnt_p);
if(i < (argc-1))
{
strncat(core->mmap_exe_mnt_p, " ", space_left);
space_left = DARSHAN_EXE_LEN-strlen(core->mmap_exe_mnt_p);
}
}
/* if we don't see any arguments, then use glibc symbol to get
* program name at least (this happens in fortran)
*/
if(argc == 0)
{
strncat(core->mmap_exe_mnt_p, __progname_full, space_left);
space_left = DARSHAN_EXE_LEN-strlen(core->mmap_exe_mnt_p);
strncat(core->mmap_exe_mnt_p, " <unknown args>", space_left);
space_left = DARSHAN_EXE_LEN-strlen(core->mmap_exe_mnt_p);
}
if(space_left == 0)
{
/* we ran out of room; mark that string was truncated */
truncate_offset = DARSHAN_EXE_LEN - strlen(truncate_string);
sprintf(&core->mmap_exe_mnt_p[truncate_offset], "%s",
truncate_string);
}
/* we make two passes through mounted file systems; in the first pass we /* we make two passes through mounted file systems; in the first pass we
* grab any non-nfs mount points, then on the second pass we grab nfs * grab any non-nfs mount points, then on the second pass we grab nfs
...@@ -1006,7 +1105,7 @@ static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core, ...@@ -1006,7 +1105,7 @@ static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core,
if(skip || (strcmp(entry->mnt_type, "nfs") == 0)) if(skip || (strcmp(entry->mnt_type, "nfs") == 0))
continue; continue;
add_entry(trailing_data, &space_left, entry); add_entry(core->mmap_exe_mnt_p, &space_left, entry);
} }
endmntent(tab); endmntent(tab);
...@@ -1019,7 +1118,7 @@ static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core, ...@@ -1019,7 +1118,7 @@ static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core,
if(strcmp(entry->mnt_type, "nfs") != 0) if(strcmp(entry->mnt_type, "nfs") != 0)
continue; continue;
add_entry(trailing_data, &space_left, entry); add_entry(core->mmap_exe_mnt_p, &space_left, entry);
} }
endmntent(tab); endmntent(tab);
...@@ -1036,27 +1135,14 @@ static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core, ...@@ -1036,27 +1135,14 @@ static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core,
* collects command line and list of mounted file systems into a string that * collects command line and list of mounted file systems into a string that
* will be stored with the job header * will be stored with the job header
*/ */
static char* darshan_get_exe_and_mounts(struct darshan_core_runtime *core) static void darshan_get_exe_and_mounts(struct darshan_core_runtime *core,
int argc, char **argv)
{ {
char* trailing_data;
int space_left;
space_left = DARSHAN_EXE_LEN + 1;
trailing_data = malloc(space_left);
if(!trailing_data)
{
return(NULL);
}
memset(trailing_data, 0, space_left);
if(my_rank == 0) if(my_rank == 0)
{ {
darshan_get_exe_and_mounts_root(core, trailing_data, space_left); darshan_get_exe_and_mounts_root(core, argc, argv);
} }
/* broadcast trailing data to all nodes */
DARSHAN_MPI_CALL(PMPI_Bcast)(trailing_data, space_left, MPI_CHAR, 0,
MPI_COMM_WORLD);
/* broadcast mount count to all nodes */ /* broadcast mount count to all nodes */
DARSHAN_MPI_CALL(PMPI_Bcast)(&mnt_data_count, 1, MPI_INT, 0, DARSHAN_MPI_CALL(PMPI_Bcast)(&mnt_data_count, 1, MPI_INT, 0,
MPI_COMM_WORLD); MPI_COMM_WORLD);
...@@ -1064,7 +1150,7 @@ static char* darshan_get_exe_and_mounts(struct darshan_core_runtime *core) ...@@ -1064,7 +1150,7 @@ static char* darshan_get_exe_and_mounts(struct darshan_core_runtime *core)
DARSHAN_MPI_CALL(PMPI_Bcast)(mnt_data_array, DARSHAN_MPI_CALL(PMPI_Bcast)(mnt_data_array,
mnt_data_count*sizeof(mnt_data_array[0]), MPI_BYTE, 0, MPI_COMM_WORLD); mnt_data_count*sizeof(mnt_data_array[0]), MPI_BYTE, 0, MPI_COMM_WORLD);
return(trailing_data); return;
} }
static void darshan_block_size_from_path(const char *path, int *block_size) static void darshan_block_size_from_path(const char *path, int *block_size)
...@@ -1463,7 +1549,6 @@ static void darshan_core_cleanup(struct darshan_core_runtime* core) ...@@ -1463,7 +1549,6 @@ static void darshan_core_cleanup(struct darshan_core_runtime* core)
} }
} }
free(core->trailing_data);
free(core); free(core);
return; return;