Commit 5348600b authored by Shane Snyder's avatar Shane Snyder
Browse files

update runtime/util side with new index map def

parent 721e5e5c
...@@ -30,26 +30,44 @@ ...@@ -30,26 +30,44 @@
/* max length of exe string within job record (not counting '\0') */ /* max length of exe string within job record (not counting '\0') */
#define CP_EXE_LEN (CP_JOB_RECORD_SIZE - sizeof(struct darshan_job) - 1) #define CP_EXE_LEN (CP_JOB_RECORD_SIZE - sizeof(struct darshan_job) - 1)
/* max length of module name string (not counting '\0') */
/* TODO */
#define DARSHAN_MOD_NAME_LEN 31
typedef uint64_t darshan_record_id; typedef uint64_t darshan_record_id;
/* unique identifiers to distinguish between available darshan modules */
/* NOTES: - valid ids range from [0...DARSHAN_MAX_MODS-1]
* - order of ids control module shutdown order (and consequently, order in log file)
*/
#define DARSHAN_MAX_MODS 16
typedef enum
{
DARSHAN_POSIX_MOD,
DARSHAN_MPIIO_MOD,
DARSHAN_HDF5_MOD,
DARSHAN_PNETCDF_MOD,
} darshan_module_id;
enum darshan_comp_type enum darshan_comp_type
{ {
DARSHAN_GZ_COMP, DARSHAN_GZ_COMP,
DARSHAN_BZ2_COMP, DARSHAN_BZ2_COMP, /* TODO: no bz2 support util side, yet */
}; };
struct darshan_header struct darshan_log_map
{ {
char version_string[8]; uint64_t off;
int64_t magic_nr; uint64_t len;
uint8_t comp_type; /* TODO */
uint8_t mod_count; /* TODO: */
}; };
struct darshan_record struct darshan_header
{ {
char* name; char version_string[8];
darshan_record_id id; int64_t magic_nr;
unsigned char comp_type;
struct darshan_log_map rec_map;
struct darshan_log_map mod_map[DARSHAN_MAX_MODS];
}; };
/* statistics for the job as a whole */ /* statistics for the job as a whole */
...@@ -64,4 +82,10 @@ struct darshan_job ...@@ -64,4 +82,10 @@ struct darshan_job
char metadata[DARSHAN_JOB_METADATA_LEN]; /* TODO: what is this? */ char metadata[DARSHAN_JOB_METADATA_LEN]; /* TODO: what is this? */
}; };
struct darshan_record
{
char* name;
darshan_record_id id;
};
#endif /* __DARSHAN_LOG_FORMAT_H */ #endif /* __DARSHAN_LOG_FORMAT_H */
...@@ -25,25 +25,9 @@ ...@@ -25,25 +25,9 @@
/* Environment variable to override __CP_MEM_ALIGNMENT */ /* Environment variable to override __CP_MEM_ALIGNMENT */
#define CP_MEM_ALIGNMENT_OVERRIDE "DARSHAN_MEMALIGN" #define CP_MEM_ALIGNMENT_OVERRIDE "DARSHAN_MEMALIGN"
/* TODO where do each of the following macros make most sense ? */ /* TODO where does this go? */
#define DARSHAN_MPI_CALL(func) func #define DARSHAN_MPI_CALL(func) func
/* max length of module name string (not counting \0) */
#define DARSHAN_MOD_NAME_LEN 31
/* unique identifiers to distinguish between available darshan modules */
/* NOTES: - valid ids range from [0...DARSHAN_MAX_MODS-1]
* - order of ids control module shutdown order (first module shuts down first)
*/
#define DARSHAN_MAX_MODS 16
typedef enum
{
DARSHAN_POSIX_MOD,
DARSHAN_MPIIO_MOD,
DARSHAN_HDF5_MOD,
DARSHAN_PNETCDF_MOD,
} darshan_module_id;
struct darshan_module_funcs struct darshan_module_funcs
{ {
void (*get_output_data)( void (*get_output_data)(
......
...@@ -47,13 +47,11 @@ static void darshan_log_record_hints_and_ver( ...@@ -47,13 +47,11 @@ static void darshan_log_record_hints_and_ver(
struct darshan_core_runtime* job); struct darshan_core_runtime* job);
static int darshan_get_shared_record_ids( static int darshan_get_shared_record_ids(
struct darshan_core_runtime *job, darshan_record_id *shared_recs); struct darshan_core_runtime *job, darshan_record_id *shared_recs);
static int darshan_log_write_header(
MPI_File log_fh, int mod_count, int64_t rec_off, int64_t psx_off);
static int darshan_log_write_record_map( static int darshan_log_write_record_map(
MPI_File log_fh, struct darshan_core_runtime *job, MPI_File log_fh, struct darshan_core_record_ref *rec_hash,
darshan_record_id *shared_recs, MPI_Offset *off); darshan_record_id *shared_recs, struct darshan_log_map *map);
static int darshan_log_coll_append(MPI_File log_fh, MPI_Offset *off, static int darshan_log_coll_write(
void *buf, int count); MPI_File log_fh, void *buf, int count, struct darshan_log_map *map);
#define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex) #define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
#define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex) #define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)
...@@ -186,7 +184,6 @@ static void darshan_core_shutdown() ...@@ -186,7 +184,6 @@ static void darshan_core_shutdown()
int i; int i;
char *logfile_name; char *logfile_name;
struct darshan_core_runtime *final_job; struct darshan_core_runtime *final_job;
struct darshan_core_module *mod, *tmp;
int internal_timing_flag = 0; int internal_timing_flag = 0;
char *envjobid; char *envjobid;
char *jobid_str; char *jobid_str;
...@@ -200,19 +197,12 @@ static void darshan_core_shutdown() ...@@ -200,19 +197,12 @@ static void darshan_core_shutdown()
int local_mod_use[DARSHAN_MAX_MODS] = {0}; int local_mod_use[DARSHAN_MAX_MODS] = {0};
int global_mod_use_count[DARSHAN_MAX_MODS] = {0}; int global_mod_use_count[DARSHAN_MAX_MODS] = {0};
darshan_record_id shared_recs[DARSHAN_CORE_MAX_RECORDS] = {0}; darshan_record_id shared_recs[DARSHAN_CORE_MAX_RECORDS] = {0};
char *key;
char *value;
char *hints; char *hints;
char *tok_str;
char *orig_tok_str;
char *saveptr = NULL;
char *mod_index;
char *new_logfile_name;
double start_log_time; double start_log_time;
double end_log_time;
long offset; long offset;
struct darshan_header log_header;
MPI_File log_fh; MPI_File log_fh;
MPI_Offset my_off = 0; MPI_Offset tmp_off;
MPI_Info info; MPI_Info info;
MPI_Status status; MPI_Status status;
...@@ -251,6 +241,7 @@ static void darshan_core_shutdown() ...@@ -251,6 +241,7 @@ static void darshan_core_shutdown()
envjobid = CP_JOBID; envjobid = CP_JOBID;
} }
/* find a job id */
jobid_str = getenv(envjobid); jobid_str = getenv(envjobid);
if(jobid_str) if(jobid_str)
{ {
...@@ -265,10 +256,13 @@ static void darshan_core_shutdown() ...@@ -265,10 +256,13 @@ static void darshan_core_shutdown()
final_job->log_job.jobid = (int64_t)jobid; final_job->log_job.jobid = (int64_t)jobid;
/* TODO */
#if 0
/* if we are using any hints to write the log file, then record those /* if we are using any hints to write the log file, then record those
* hints in the log file header * hints with the darshan job information
*/ */
darshan_log_record_hints_and_ver(final_job); darshan_log_record_hints_and_ver(final_job);
#endif
/* use human readable start time format in log filename */ /* use human readable start time format in log filename */
start_time_tmp = final_job->log_job.start_time; start_time_tmp = final_job->log_job.start_time;
...@@ -303,6 +297,10 @@ static void darshan_core_shutdown() ...@@ -303,6 +297,10 @@ static void darshan_core_shutdown()
final_job->log_job.end_time = last_end_time; final_job->log_job.end_time = last_end_time;
} }
/* XXX */
/* TODO: ensuing error checking...does MPI ensure collective I/O functions return the same error
* globally, or do I always need to allreduce????? */
/* set which local modules were actually used */ /* set which local modules were actually used */
for(i = 0; i < DARSHAN_MAX_MODS; i++) for(i = 0; i < DARSHAN_MAX_MODS; i++)
{ {
...@@ -313,6 +311,23 @@ static void darshan_core_shutdown() ...@@ -313,6 +311,23 @@ static void darshan_core_shutdown()
/* reduce the number of times a module was opened globally and bcast to everyone */ /* reduce the number of times a module was opened globally and bcast to everyone */
DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD); DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
/* get a list of records which are shared across all processes */
ret = darshan_get_shared_record_ids(final_job, shared_recs);
/* error out if unable to determine shared file records */
DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
MPI_LOR, MPI_COMM_WORLD);
if(all_ret != 0)
{
if(my_rank == 0)
{
fprintf(stderr, "darshan library warning: unable to determine shared file records\n");
}
free(logfile_name);
darshan_core_cleanup(final_job);
return;
}
/* check environment variable to see if the default MPI file hints have /* check environment variable to see if the default MPI file hints have
* been overridden * been overridden
*/ */
...@@ -326,6 +341,12 @@ static void darshan_core_shutdown() ...@@ -326,6 +341,12 @@ static void darshan_core_shutdown()
if(hints && strlen(hints) > 0) if(hints && strlen(hints) > 0)
{ {
char *tok_str;
char *orig_tok_str;
char *key;
char *value;
char *saveptr = NULL;
tok_str = strdup(hints); tok_str = strdup(hints);
if(tok_str) if(tok_str)
{ {
...@@ -353,27 +374,6 @@ static void darshan_core_shutdown() ...@@ -353,27 +374,6 @@ static void darshan_core_shutdown()
} }
} }
/* TODO: ensuing error checking...does MPI ensure collective I/O functions return the same error
* globally, or do I always need to allreduce????? */
/* get a list of records which are shared across all processes */
ret = darshan_get_shared_record_ids(final_job, shared_recs);
/* error out if unable to determine shared file records */
DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
MPI_LOR, MPI_COMM_WORLD);
if(all_ret != 0)
{
if(my_rank == 0)
{
fprintf(stderr, "darshan library warning: unable to determine shared file records\n");
}
free(logfile_name);
darshan_core_cleanup(final_job);
return;
}
/* open the darshan log file for writing */ /* open the darshan log file for writing */
ret = DARSHAN_MPI_CALL(PMPI_File_open)(MPI_COMM_WORLD, logfile_name, ret = DARSHAN_MPI_CALL(PMPI_File_open)(MPI_COMM_WORLD, logfile_name,
MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_EXCL, info, &log_fh); MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_EXCL, info, &log_fh);
...@@ -399,14 +399,12 @@ static void darshan_core_shutdown() ...@@ -399,14 +399,12 @@ static void darshan_core_shutdown()
return; return;
} }
/* rank 0 is responsible for writing the darshan job information */
if(my_rank == 0) if(my_rank == 0)
{ {
my_off = sizeof(struct darshan_header); /* write the job information, making sure to prealloc space for the log header */
my_off += 2 * sizeof(int64_t); /* FIXME account for changes to index map */ ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, sizeof(struct darshan_header),
&final_job->log_job, sizeof(struct darshan_job), MPI_BYTE, &status);
ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, my_off, &final_job->log_job,
sizeof(struct darshan_job), MPI_BYTE, &status);
if(ret != MPI_SUCCESS) if(ret != MPI_SUCCESS)
{ {
int msg_len; int msg_len;
...@@ -417,12 +415,13 @@ static void darshan_core_shutdown() ...@@ -417,12 +415,13 @@ static void darshan_core_shutdown()
logfile_name, msg); logfile_name, msg);
} }
my_off += sizeof(struct darshan_job); /* TODO */
log_header.rec_map.off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
} }
int64_t rec_off = my_off; /* TODO: get rid of this hack */
/* write the record name->id map to the log file */ /* write the record name->id map to the log file */
ret = darshan_log_write_record_map(log_fh, final_job, shared_recs, &my_off); ret = darshan_log_write_record_map(log_fh, final_job->rec_hash,
shared_recs, &log_header.rec_map);
DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT, DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
MPI_LOR, MPI_COMM_WORLD); MPI_LOR, MPI_COMM_WORLD);
...@@ -437,13 +436,12 @@ static void darshan_core_shutdown() ...@@ -437,13 +436,12 @@ static void darshan_core_shutdown()
darshan_core_cleanup(final_job); darshan_core_cleanup(final_job);
return; return;
} }
int64_t psx_off = my_off; /* TODO: get rid of this hack */
/* loop over globally used darshan modules and: /* loop over globally used darshan modules and:
* - get final output buffer * - get final output buffer
* - compress (zlib) provided output buffer * - compress (zlib) provided output buffer
* - append compressed buffer to log file * - append compressed buffer to log file
* - shutdown the module * - shutdown the module TODO
*/ */
for(i = 0; i < DARSHAN_MAX_MODS; i++) for(i = 0; i < DARSHAN_MAX_MODS; i++)
{ {
...@@ -453,7 +451,12 @@ static void darshan_core_shutdown() ...@@ -453,7 +451,12 @@ static void darshan_core_shutdown()
int mod_buf_size = 0; int mod_buf_size = 0;
if(!global_mod_use_count[i]) if(!global_mod_use_count[i])
{
if(my_rank == 0)
log_header.mod_map[i].off = log_header.mod_map[i].len = 0;
continue; continue;
}
/* create a communicator to use for shutting down the module */ /* create a communicator to use for shutting down the module */
if(global_mod_use_count[i] == nprocs) if(global_mod_use_count[i] == nprocs)
...@@ -472,13 +475,21 @@ static void darshan_core_shutdown() ...@@ -472,13 +475,21 @@ static void darshan_core_shutdown()
this_mod->mod_funcs.get_output_data(mod_comm, &mod_buf, &mod_buf_size); this_mod->mod_funcs.get_output_data(mod_comm, &mod_buf, &mod_buf_size);
} }
/* append module data buffer to the darshan log file */ /* set the starting offset of this module */
ret = darshan_log_coll_append(log_fh, &my_off, mod_buf, mod_buf_size); if(tmp_off == 0)
tmp_off = log_header.rec_map.off + log_header.rec_map.len;
log_header.mod_map[i].off = tmp_off;
/* write module data buffer to the darshan log file */
ret = darshan_log_coll_write(log_fh, mod_buf, mod_buf_size, &log_header.mod_map[i]);
if(ret < 0) if(ret < 0)
{ {
/* TODO: */ /* TODO: */
} }
tmp_off += log_header.mod_map[i].len;
/* shutdown module if registered locally */ /* shutdown module if registered locally */
if(local_mod_use[i]) if(local_mod_use[i])
{ {
...@@ -489,10 +500,20 @@ static void darshan_core_shutdown() ...@@ -489,10 +500,20 @@ static void darshan_core_shutdown()
MPI_Comm_free(&mod_comm); MPI_Comm_free(&mod_comm);
} }
/* rank 0 is responsible for writing the log header and index map */ /* rank 0 is responsible for writing the log header */
if(my_rank == 0) if(my_rank == 0)
{ {
ret = darshan_log_write_header(log_fh, 1, rec_off, psx_off); /* initialize the remaining header fields */
strcpy(log_header.version_string, CP_VERSION);
log_header.magic_nr = CP_MAGIC_NR;
log_header.comp_type = DARSHAN_GZ_COMP;
ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, &log_header,
sizeof(struct darshan_header), MPI_BYTE, &status);
if(ret != MPI_SUCCESS)
{
/* TODO */
}
} }
DARSHAN_MPI_CALL(PMPI_File_close)(&log_fh); DARSHAN_MPI_CALL(PMPI_File_close)(&log_fh);
...@@ -501,26 +522,33 @@ static void darshan_core_shutdown() ...@@ -501,26 +522,33 @@ static void darshan_core_shutdown()
* to *-<logwritetime>.darshan.gz, which indicates that this log file is * to *-<logwritetime>.darshan.gz, which indicates that this log file is
* complete and ready for analysis * complete and ready for analysis
*/ */
new_logfile_name = malloc(PATH_MAX); if(my_rank == 0)
if(new_logfile_name)
{ {
new_logfile_name[0] = '\0'; char* tmp_index;
end_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)(); double end_log_time;
strcat(new_logfile_name, logfile_name); char* new_logfile_name;
mod_index = strstr(new_logfile_name, ".darshan_partial");
sprintf(mod_index, "_%d.darshan.gz", (int)(end_log_time-start_log_time+1)); new_logfile_name = malloc(PATH_MAX);
rename(logfile_name, new_logfile_name); if(new_logfile_name)
/* set permissions on log file */ {
new_logfile_name[0] = '\0';
end_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
strcat(new_logfile_name, logfile_name);
tmp_index = strstr(new_logfile_name, ".darshan_partial");
sprintf(tmp_index, "_%d.darshan.gz", (int)(end_log_time-start_log_time+1));
rename(logfile_name, new_logfile_name);
/* set permissions on log file */
#ifdef __CP_GROUP_READABLE_LOGS #ifdef __CP_GROUP_READABLE_LOGS
chmod(new_logfile_name, (S_IRUSR|S_IRGRP)); chmod(new_logfile_name, (S_IRUSR|S_IRGRP));
#else #else
chmod(new_logfile_name, (S_IRUSR)); chmod(new_logfile_name, (S_IRUSR));
#endif #endif
free(new_logfile_name); free(new_logfile_name);
}
} }
darshan_core_cleanup(final_job);
free(logfile_name); free(logfile_name);
darshan_core_cleanup(final_job);
if(internal_timing_flag) if(internal_timing_flag)
{ {
...@@ -597,7 +625,6 @@ static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* s ...@@ -597,7 +625,6 @@ static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* s
{ {
strncpy(cuser, logname_string, (L_cuserid-1)); strncpy(cuser, logname_string, (L_cuserid-1));
} }
} }
/* if cuserid() and environment both fail, then fall back to uid */ /* if cuserid() and environment both fail, then fall back to uid */
...@@ -795,76 +822,32 @@ static int darshan_get_shared_record_ids(struct darshan_core_runtime *job, ...@@ -795,76 +822,32 @@ static int darshan_get_shared_record_ids(struct darshan_core_runtime *job,
return(0); return(0);
} }
static int darshan_log_write_header(MPI_File log_fh, int mod_count,
int64_t rec_off, int64_t psx_off)
{
struct darshan_header base_hdr;
unsigned char *hdr_buf;
unsigned char *tmp_p;
int hdr_size;
MPI_Status status;
int i;
int ret;
/* set the fields of the darshan header */
strcpy(base_hdr.version_string, CP_VERSION);
base_hdr.magic_nr = CP_MAGIC_NR;
base_hdr.comp_type = DARSHAN_GZ_COMP;
base_hdr.mod_count = mod_count;
hdr_size = sizeof(struct darshan_header) + (2 * sizeof(int64_t)); /* TODO: */
hdr_buf = malloc(hdr_size);
if(!hdr_buf)
{
return(-1);
}
/* pack the header in buffer for writing */
tmp_p = hdr_buf;
*((struct darshan_header *)tmp_p) = base_hdr;
tmp_p += sizeof(struct darshan_header);
/* TODO: we need to have a way of identifying different modules in index map */
*((int64_t *)tmp_p) = rec_off;
tmp_p += sizeof(int64_t);
*((int64_t *)tmp_p) = psx_off;
ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, hdr_buf, hdr_size,
MPI_BYTE, &status);
if(ret < 0)
{
return(-1);
}
return(0);
}
/* NOTE: the map written to file may contain duplicate id->name entries if a /* NOTE: the map written to file may contain duplicate id->name entries if a
* record is opened by multiple ranks, but not all ranks * record is opened by multiple ranks, but not all ranks
*/ */
static int darshan_log_write_record_map(MPI_File log_fh, struct darshan_core_runtime *job, static int darshan_log_write_record_map(MPI_File log_fh, struct darshan_core_record_ref *rec_hash,
darshan_record_id *shared_recs, MPI_Offset *off) darshan_record_id *shared_recs, struct darshan_log_map *map)
{ {
int i; int i;
int ret; int ret;
struct darshan_core_record_ref *ref, *tmp; struct darshan_core_record_ref *ref, *tmp;
uint32_t name_len; uint32_t name_len;
size_t record_sz; size_t record_sz;
size_t map_buf_sz = 0; size_t hash_buf_sz = 0;
unsigned char *map_buf; unsigned char *hash_buf;
unsigned char *map_buf_off; unsigned char *hash_buf_off;
MPI_Status status; MPI_Status status;
/* non-root ranks (rank 0) remove shared records from their map -- /* non-root ranks (rank > 0) remove shared records from their map --
* these records will be written by rank 0 * these records will be written by rank 0
*/ */
if(my_rank > 0) if(my_rank > 0)
{ {
for(i=0; (i<DARSHAN_CORE_MAX_RECORDS && shared_recs[i]); i++) for(i=0; (i<DARSHAN_CORE_MAX_RECORDS && shared_recs[i]); i++)
{ {
HASH_FIND(hlink, job->rec_hash, &shared_recs[i], sizeof(darshan_record_id), ref); HASH_FIND(hlink, rec_hash, &shared_recs[i], sizeof(darshan_record_id), ref);
assert(ref); /* this id had better be in the hash ... */ assert(ref); /* this id had better be in the hash ... */
HASH_DELETE(hlink, job->rec_hash, ref); HASH_DELETE(hlink, rec_hash, ref);
if(ref->rec.name) free(ref->rec.name); if(ref->rec.name) free(ref->rec.name);