Commit acc05e4a authored by Shane Snyder's avatar Shane Snyder
Browse files

resolve minor TODO items

parent 75eee950
...@@ -28,14 +28,11 @@ CFLAGS = -DDARSHAN_CONFIG_H=\"darshan-runtime-config.h\" -I . -I ../ -I $(srcdir ...@@ -28,14 +28,11 @@ CFLAGS = -DDARSHAN_CONFIG_H=\"darshan-runtime-config.h\" -I . -I ../ -I $(srcdir
# #
CFLAGS_SHARED = -DDARSHAN_CONFIG_H=\"darshan-runtime-config.h\" -I . -I$(srcdir) -I$(srcdir)/../ @CFLAGS@ @CPPFLAGS@ -D_LARGEFILE64_SOURCE -shared -fpic -DPIC -DDARSHAN_PRELOAD CFLAGS_SHARED = -DDARSHAN_CONFIG_H=\"darshan-runtime-config.h\" -I . -I$(srcdir) -I$(srcdir)/../ @CFLAGS@ @CPPFLAGS@ -D_LARGEFILE64_SOURCE -shared -fpic -DPIC -DDARSHAN_PRELOAD
# TODO: BZ2?
LIBS = -lz @LIBBZ2@ LIBS = -lz @LIBBZ2@
lib:: lib::
@mkdir -p $@ @mkdir -p $@
# TODO make sure the headers are right in these prereqs
lib/darshan-core.o: lib/darshan-core.c darshan-core.h $(DARSHAN_LOG_FORMAT) | lib lib/darshan-core.o: lib/darshan-core.c darshan-core.h $(DARSHAN_LOG_FORMAT) | lib
$(CC) $(CFLAGS) -c $< -o $@ $(CC) $(CFLAGS) -c $< -o $@
...@@ -66,7 +63,6 @@ lib/lookup8.o: lib/lookup8.c ...@@ -66,7 +63,6 @@ lib/lookup8.o: lib/lookup8.c
lib/lookup8.po: lib/lookup8.c lib/lookup8.po: lib/lookup8.c
$(CC) $(CFLAGS_SHARED) -c $< -o $@ $(CC) $(CFLAGS_SHARED) -c $< -o $@
# TODO: huh?
#%.i: %.c #%.i: %.c
# $(CC) -E $(CFLAGS) -c $< -o $@ # $(CC) -E $(CFLAGS) -c $< -o $@
......
...@@ -12,18 +12,20 @@ ...@@ -12,18 +12,20 @@
#include "darshan.h" #include "darshan.h"
/* TODO: enforce this when handing out ids */ /* TODO: this goes where ? -- shared libs */
#define DARSHAN_MPI_CALL(func) func
#define DARSHAN_CORE_MAX_RECORDS 1024 #define DARSHAN_CORE_MAX_RECORDS 1024
/* default compression buffer size of 2 MiB */
/* TODO: revisit this default size if we change memory per module */ /* TODO: revisit this default size if we change memory per module */
#define DARSHAN_COMP_BUF_SIZE (2 * 1024 * 1024) #define DARSHAN_CORE_COMP_BUF_SIZE (2 * 1024 * 1024)
struct darshan_core_module #define DARSHAN_CORE_MOD_SET(flags, id) (flags | (1 << id))
{
darshan_module_id id; #define DARSHAN_CORE_MOD_UNSET(flags, id) (flags & ~(1 << id))
struct darshan_module_funcs mod_funcs;
}; #define DARSHAN_CORE_MOD_ISSET(flags, id) (flags & (1 << id))
/* in memory structure to keep up with job level data */ /* in memory structure to keep up with job level data */
struct darshan_core_runtime struct darshan_core_runtime
...@@ -31,12 +33,19 @@ struct darshan_core_runtime ...@@ -31,12 +33,19 @@ struct darshan_core_runtime
struct darshan_job log_job; struct darshan_job log_job;
char exe[DARSHAN_EXE_LEN+1]; char exe[DARSHAN_EXE_LEN+1];
struct darshan_core_record_ref *rec_hash; struct darshan_core_record_ref *rec_hash;
int rec_count;
struct darshan_core_module* mod_array[DARSHAN_MAX_MODS]; struct darshan_core_module* mod_array[DARSHAN_MAX_MODS];
char comp_buf[DARSHAN_COMP_BUF_SIZE]; char comp_buf[DARSHAN_CORE_COMP_BUF_SIZE];
double wtime_offset; double wtime_offset;
char *trailing_data; char *trailing_data;
}; };
struct darshan_core_module
{
darshan_module_id id;
struct darshan_module_funcs mod_funcs;
};
struct darshan_core_record_ref struct darshan_core_record_ref
{ {
struct darshan_record rec; struct darshan_record rec;
......
...@@ -25,20 +25,21 @@ ...@@ -25,20 +25,21 @@
/* Environment variable to override __CP_MEM_ALIGNMENT */ /* Environment variable to override __CP_MEM_ALIGNMENT */
#define CP_MEM_ALIGNMENT_OVERRIDE "DARSHAN_MEMALIGN" #define CP_MEM_ALIGNMENT_OVERRIDE "DARSHAN_MEMALIGN"
/* module developers provide the following functions to darshan-core */
struct darshan_module_funcs struct darshan_module_funcs
{ {
/* disable futher instrumentation within a module */ /* disable futher instrumentation within a module */
void (*disable_instrumentation)(void); void (*disable_instrumentation)(void);
/* TODO: */ /* perform any necessary steps prior to reducing */
void (*prepare_for_reduction)( void (*prepare_for_reduction)(
darshan_record_id *shared_recs, darshan_record_id *shared_recs, /* input list of shared records */
int *shared_rec_count, /* in/out shared record count */ int *shared_rec_count, /* in/out shared record count */
void **send_buf, void **send_buf, /* send buffer for shared file reduction */
void **recv_buf, void **recv_buf, /* recv buffer for shared file reduction (root only) */
int *rec_size int *rec_size /* size of records being stored for this module */
); );
/* TODO: */ /* reduce records which are shared globally across this module */
void (*reduce_record)( void (*reduce_records)(
void* infile_v, void* infile_v,
void* inoutfile_v, void* inoutfile_v,
int *len, int *len,
...@@ -62,13 +63,17 @@ void darshan_core_register_module( ...@@ -62,13 +63,17 @@ void darshan_core_register_module(
struct darshan_module_funcs *funcs, struct darshan_module_funcs *funcs,
int *runtime_mem_limit); int *runtime_mem_limit);
void darshan_core_lookup_record_id( void darshan_core_register_record(
void *name, void *name,
int len, int len,
int printable_flag, int printable_flag,
darshan_module_id mod_id, darshan_module_id mod_id,
darshan_record_id *id); darshan_record_id *id);
void darshan_core_unregister_record(
darshan_record_id rec_id,
darshan_module_id mod_id);
double darshan_core_wtime(void); double darshan_core_wtime(void);
/*********************************************** /***********************************************
......
...@@ -30,14 +30,15 @@ ...@@ -30,14 +30,15 @@
/* TODO is __progname_full needed here */ /* TODO is __progname_full needed here */
extern char* __progname; extern char* __progname;
#define DARSHAN_MPI_CALL(func) func
/* internal variable delcarations */ /* internal variable delcarations */
static struct darshan_core_runtime *darshan_core = NULL; static struct darshan_core_runtime *darshan_core = NULL;
static pthread_mutex_t darshan_core_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t darshan_core_mutex = PTHREAD_MUTEX_INITIALIZER;
static int my_rank = -1; static int my_rank = -1;
static int nprocs = -1; static int nprocs = -1;
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)
/* FS mount information */ /* FS mount information */
#define DARSHAN_MAX_MNTS 64 #define DARSHAN_MAX_MNTS 64
#define DARSHAN_MAX_MNT_PATH 256 #define DARSHAN_MAX_MNT_PATH 256
...@@ -79,9 +80,6 @@ static int darshan_log_write_record_hash( ...@@ -79,9 +80,6 @@ static int darshan_log_write_record_hash(
static int darshan_log_coll_write( static int darshan_log_coll_write(
MPI_File log_fh, void *buf, int count, struct darshan_log_map *map); MPI_File log_fh, void *buf, int count, struct darshan_log_map *map);
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)
/* intercept MPI initialize and finalize to manage darshan core runtime */ /* intercept MPI initialize and finalize to manage darshan core runtime */
int MPI_Init(int *argc, char ***argv) int MPI_Init(int *argc, char ***argv)
{ {
...@@ -397,6 +395,7 @@ static void darshan_core_shutdown() ...@@ -397,6 +395,7 @@ static void darshan_core_shutdown()
fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n", fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n",
logfile_name); logfile_name);
unlink(logfile_name); unlink(logfile_name);
} }
/* set the beginning offset of record hash, which precedes job info just written */ /* set the beginning offset of record hash, which precedes job info just written */
...@@ -438,7 +437,6 @@ static void darshan_core_shutdown() ...@@ -438,7 +437,6 @@ static void darshan_core_shutdown()
if(internal_timing_flag) if(internal_timing_flag)
rec2 = DARSHAN_MPI_CALL(PMPI_Wtime)(); rec2 = DARSHAN_MPI_CALL(PMPI_Wtime)();
/* TODO: would be nice to factor this out somehow ... a lot to look at */
/* loop over globally used darshan modules and: /* loop over globally used darshan modules and:
* - perform shared file reductions, if possible * - perform shared file reductions, if possible
* - get final output buffer * - get final output buffer
...@@ -484,7 +482,7 @@ static void darshan_core_shutdown() ...@@ -484,7 +482,7 @@ static void darshan_core_shutdown()
HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j], HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j],
sizeof(darshan_record_id), ref); sizeof(darshan_record_id), ref);
assert(ref); assert(ref);
if(ref->global_mod_flags & (1 << i)) /* TODO: MACRO? */ if(DARSHAN_CORE_MOD_ISSET(ref->global_mod_flags, i))
{ {
mod_shared_recs[shared_rec_count++] = shared_recs[j]; mod_shared_recs[shared_rec_count++] = shared_recs[j];
} }
...@@ -505,7 +503,7 @@ static void darshan_core_shutdown() ...@@ -505,7 +503,7 @@ static void darshan_core_shutdown()
DARSHAN_MPI_CALL(PMPI_Type_commit)(&red_type); DARSHAN_MPI_CALL(PMPI_Type_commit)(&red_type);
/* register a reduction operator for this module */ /* register a reduction operator for this module */
DARSHAN_MPI_CALL(PMPI_Op_create)(this_mod->mod_funcs.reduce_record, DARSHAN_MPI_CALL(PMPI_Op_create)(this_mod->mod_funcs.reduce_records,
1, &red_op); 1, &red_op);
/* reduce shared file records for this module */ /* reduce shared file records for this module */
...@@ -613,29 +611,39 @@ static void darshan_core_shutdown() ...@@ -613,29 +611,39 @@ static void darshan_core_shutdown()
* to *-<logwritetime>.darshan.gz, which indicates that this log file is * to *-<logwritetime>.darshan.gz, which indicates that this log file is
* complete and ready for analysis * complete and ready for analysis
*/ */
/* TODO: support user given logfile path/name */
if(my_rank == 0) if(my_rank == 0)
{ {
char* tmp_index; if(getenv("DARSHAN_LOGFILE"))
double end_log_time;
char* new_logfile_name;
new_logfile_name = malloc(PATH_MAX);
if(new_logfile_name)
{ {
new_logfile_name[0] = '\0';
end_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
strcat(new_logfile_name, logfile_name);
tmp_index = strstr(new_logfile_name, ".darshan_partial");
sprintf(tmp_index, "_%d.darshan.gz", (int)(end_log_time-start_log_time+1));
rename(logfile_name, new_logfile_name);
/* set permissions on log file */
#ifdef __CP_GROUP_READABLE_LOGS #ifdef __CP_GROUP_READABLE_LOGS
chmod(new_logfile_name, (S_IRUSR|S_IRGRP)); chmod(logfile_name, (S_IRUSR|S_IRGRP));
#else #else
chmod(new_logfile_name, (S_IRUSR)); chmod(logfile_name, (S_IRUSR));
#endif #endif
free(new_logfile_name); }
else
{
char* tmp_index;
double end_log_time;
char* new_logfile_name;
new_logfile_name = malloc(PATH_MAX);
if(new_logfile_name)
{
new_logfile_name[0] = '\0';
end_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
strcat(new_logfile_name, logfile_name);
tmp_index = strstr(new_logfile_name, ".darshan_partial");
sprintf(tmp_index, "_%d.darshan.gz", (int)(end_log_time-start_log_time+1));
rename(logfile_name, new_logfile_name);
/* set permissions on log file */
#ifdef __CP_GROUP_READABLE_LOGS
chmod(new_logfile_name, (S_IRUSR|S_IRGRP));
#else
chmod(new_logfile_name, (S_IRUSR));
#endif
free(new_logfile_name);
}
} }
} }
...@@ -727,6 +735,7 @@ static void darshan_core_cleanup(struct darshan_core_runtime* core) ...@@ -727,6 +735,7 @@ static void darshan_core_cleanup(struct darshan_core_runtime* core)
/* construct the darshan log file name */ /* construct the darshan log file name */
static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* start_tm) static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* start_tm)
{ {
char* user_logfile_name;
char* logpath; char* logpath;
char* logname_string; char* logname_string;
char* logpath_override = NULL; char* logpath_override = NULL;
...@@ -740,120 +749,139 @@ static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* s ...@@ -740,120 +749,139 @@ static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* s
char cuser[L_cuserid] = {0}; char cuser[L_cuserid] = {0};
int ret; int ret;
/* Use CP_LOG_PATH_OVERRIDE for the value or __CP_LOG_PATH */ /* first, check if user specifies a complete logpath to use */
logpath = getenv(CP_LOG_PATH_OVERRIDE); user_logfile_name = getenv("DARSHAN_LOGFILE");
if(!logpath) if(user_logfile_name)
{
if(strlen(user_logfile_name) >= (PATH_MAX-1))
{
fprintf(stderr, "darshan library warning: user log file name too long.\n");
logfile_name[0] = '\0';
}
else
{
strcpy(logfile_name, user_logfile_name);
}
}
else
{ {
/* otherwise, generate the log path automatically */
/* Use CP_LOG_PATH_OVERRIDE for the value or __CP_LOG_PATH */
logpath = getenv(CP_LOG_PATH_OVERRIDE);
if(!logpath)
{
#ifdef __CP_LOG_PATH #ifdef __CP_LOG_PATH
logpath = __CP_LOG_PATH; logpath = __CP_LOG_PATH;
#endif #endif
} }
/* get the username for this job. In order we will try each of the /* get the username for this job. In order we will try each of the
* following until one of them succeeds: * following until one of them succeeds:
* *
* - cuserid() * - cuserid()
* - getenv("LOGNAME") * - getenv("LOGNAME")
* - snprintf(..., geteuid()); * - snprintf(..., geteuid());
* *
* Note that we do not use getpwuid() because it generally will not * Note that we do not use getpwuid() because it generally will not
* work in statically compiled binaries. * work in statically compiled binaries.
*/ */
#ifndef DARSHAN_DISABLE_CUSERID #ifndef DARSHAN_DISABLE_CUSERID
cuserid(cuser); cuserid(cuser);
#endif #endif
/* if cuserid() didn't work, then check the environment */ /* if cuserid() didn't work, then check the environment */
if(strcmp(cuser, "") == 0) if(strcmp(cuser, "") == 0)
{
logname_string = getenv("LOGNAME");
if(logname_string)
{ {
strncpy(cuser, logname_string, (L_cuserid-1)); logname_string = getenv("LOGNAME");
if(logname_string)
{
strncpy(cuser, logname_string, (L_cuserid-1));
}
} }
}
/* if cuserid() and environment both fail, then fall back to uid */ /* if cuserid() and environment both fail, then fall back to uid */
if(strcmp(cuser, "") == 0) if(strcmp(cuser, "") == 0)
{ {
uid_t uid = geteuid(); uid_t uid = geteuid();
snprintf(cuser, sizeof(cuser), "%u", uid); snprintf(cuser, sizeof(cuser), "%u", uid);
} }
/* generate a random number to help differentiate the log */ /* generate a random number to help differentiate the log */
hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000; hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000;
(void)gethostname(hname, sizeof(hname)); (void)gethostname(hname, sizeof(hname));
logmod = darshan_hash((void*)hname,strlen(hname),hlevel); logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
/* see if darshan was configured using the --with-logpath-by-env /* see if darshan was configured using the --with-logpath-by-env
* argument, which allows the user to specify an absolute path to * argument, which allows the user to specify an absolute path to
* place logs via an env variable. * place logs via an env variable.
*/ */
#ifdef __CP_LOG_ENV #ifdef __CP_LOG_ENV
/* just silently skip if the environment variable list is too big */ /* just silently skip if the environment variable list is too big */
if(strlen(__CP_LOG_ENV) < 256) if(strlen(__CP_LOG_ENV) < 256)
{
/* copy env variable list to a temporary buffer */
strcpy(env_check, __CP_LOG_ENV);
/* tokenize the comma-separated list */
env_tok = strtok(env_check, ",");
if(env_tok)
{ {
do /* copy env variable list to a temporary buffer */
strcpy(env_check, __CP_LOG_ENV);
/* tokenize the comma-separated list */
env_tok = strtok(env_check, ",");
if(env_tok)
{ {
/* check each env variable in order */ do
logpath_override = getenv(env_tok);
if(logpath_override)
{ {
/* stop as soon as we find a match */ /* check each env variable in order */
break; logpath_override = getenv(env_tok);
} if(logpath_override)
}while((env_tok = strtok(NULL, ","))); {
/* stop as soon as we find a match */
break;
}
}while((env_tok = strtok(NULL, ",")));
}
} }
}
#endif #endif
if(logpath_override) if(logpath_override)
{
ret = snprintf(logfile_name, PATH_MAX,
"%s/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
logpath_override,
cuser, __progname, jobid,
(start_tm->tm_mon+1),
start_tm->tm_mday,
(start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
logmod);
if(ret == (PATH_MAX-1))
{ {
/* file name was too big; squish it down */ ret = snprintf(logfile_name, PATH_MAX,
snprintf(logfile_name, PATH_MAX, "%s/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
"%s/id%d.darshan_partial", logpath_override,
logpath_override, jobid); cuser, __progname, jobid,
(start_tm->tm_mon+1),
start_tm->tm_mday,
(start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
logmod);
if(ret == (PATH_MAX-1))
{
/* file name was too big; squish it down */
snprintf(logfile_name, PATH_MAX,
"%s/id%d.darshan_partial",
logpath_override, jobid);
}
} }
} else if(logpath)
else if(logpath)
{
ret = snprintf(logfile_name, PATH_MAX,
"%s/%d/%d/%d/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
logpath, (start_tm->tm_year+1900),
(start_tm->tm_mon+1), start_tm->tm_mday,
cuser, __progname, jobid,
(start_tm->tm_mon+1),
start_tm->tm_mday,
(start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
logmod);
if(ret == (PATH_MAX-1))
{ {
/* file name was too big; squish it down */ ret = snprintf(logfile_name, PATH_MAX,
snprintf(logfile_name, PATH_MAX, "%s/%d/%d/%d/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial",
"%s/id%d.darshan_partial", logpath, (start_tm->tm_year+1900),
logpath, jobid); (start_tm->tm_mon+1), start_tm->tm_mday,
cuser, __progname, jobid,
(start_tm->tm_mon+1),
start_tm->tm_mday,
(start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec),
logmod);
if(ret == (PATH_MAX-1))
{
/* file name was too big; squish it down */
snprintf(logfile_name, PATH_MAX,
"%s/id%d.darshan_partial",
logpath, jobid);
}
}
else
{
logfile_name[0] = '\0';
} }
}
else
{
logfile_name[0] = '\0';
} }
return; return;
...@@ -1198,7 +1226,7 @@ static int darshan_log_coll_open(char *logfile_name, MPI_File *log_fh) ...@@ -1198,7 +1226,7 @@ static int darshan_log_coll_open(char *logfile_name, MPI_File *log_fh)
/* open the darshan log file for writing */ /* open the darshan log file for writing */
ret = DARSHAN_MPI_CALL(PMPI_File_open)(MPI_COMM_WORLD, logfile_name, ret = DARSHAN_MPI_CALL(PMPI_File_open)(MPI_COMM_WORLD, logfile_name,
MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_EXCL, info, log_fh); MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_EXCL, info, log_fh);
if(ret < 0) if(ret != MPI_SUCCESS)
return(-1); return(-1);
MPI_Info_free(&info); MPI_Info_free(&info);
...@@ -1244,7 +1272,7 @@ static int darshan_compress_buffer(void **pointers, int *lengths, int count, ...@@ -1244,7 +1272,7 @@ static int darshan_compress_buffer(void **pointers, int *lengths, int count,
} }
tmp_stream.next_out = comp_buf; tmp_stream.next_out = comp_buf;
tmp_stream.avail_out = DARSHAN_COMP_BUF_SIZE; tmp_stream.avail_out = DARSHAN_CORE_COMP_BUF_SIZE;
/* loop over the input pointers */ /* loop over the input pointers */
for(i = 0; i < count; i++) for(i = 0; i < count; i++)
...@@ -1425,7 +1453,7 @@ static int darshan_log_coll_write(MPI_File log_fh, void *buf, int count, ...@@ -1425,7 +1453,7 @@ static int darshan_log_coll_write(MPI_File log_fh, void *buf, int count,
/* perform the collective write */ /* perform the collective write */
ret = DARSHAN_MPI_CALL(PMPI_File_write_at_all)(log_fh, my_off, buf, ret = DARSHAN_MPI_CALL(PMPI_File_write_at_all)(log_fh, my_off, buf,
count, MPI_BYTE, &status); count, MPI_BYTE, &status);
if(ret < 0) if(ret != MPI_SUCCESS)
return(-1); return(-1);