Commit bb25083b authored by Shane Snyder's avatar Shane Snyder

rename merge log utility

parent d5abad0f
all: libdarshan-util.a darshan-null-logutils.o darshan-analyzer darshan-convert darshan-diff darshan-parser darshan-stitch-logs jenkins-hash-gen
all: libdarshan-util.a darshan-null-logutils.o darshan-analyzer darshan-convert darshan-diff darshan-parser darshan-merge jenkins-hash-gen
DESTDIR =
srcdir = @srcdir@
......@@ -107,7 +107,7 @@ darshan-diff: darshan-diff.c darshan-logutils.h $(DARSHAN_LOG_FORMAT) $(DARSHAN_
darshan-parser: darshan-parser.c darshan-logutils.h $(DARSHAN_LOG_FORMAT) $(DARSHAN_MOD_LOGUTIL_HEADERS) $(DARSHAN_MOD_LOG_FORMATS) libdarshan-util.a | uthash-1.9.2
$(CC) $(CFLAGS) $(LDFLAGS) $< libdarshan-util.a -o $@ $(LIBS)
darshan-stitch-logs: darshan-stitch-logs.c darshan-logutils.h $(DARSHAN_LOG_FORMAT) $(DARSHAN_MOD_LOGUTIL_HEADERS) $(DARSHAN_MOD_LOG_FORMATS) libdarshan-util.a | uthash-1.9.2
darshan-merge: darshan-merge.c darshan-logutils.h $(DARSHAN_LOG_FORMAT) $(DARSHAN_MOD_LOGUTIL_HEADERS) $(DARSHAN_MOD_LOG_FORMATS) libdarshan-util.a | uthash-1.9.2
$(CC) $(CFLAGS) $(LDFLAGS) $< libdarshan-util.a -o $@ $(LIBS)
#test/gztest: test/gztest.c mktestdir
......@@ -125,7 +125,7 @@ install:: all
install -m 755 darshan-convert $(bindir)
install -m 755 darshan-diff $(bindir)
install -m 755 darshan-parser $(bindir)
install -m 755 darshan-stitch-logs $(bindir)
install -m 755 darshan-merge $(bindir)
install -m 755 $(srcdir)/darshan-summary-per-file.sh $(bindir)
install -m 755 libdarshan-util.a $(libdir)
ifeq ($(DARSHAN_ENABLE_SHARED),1)
......@@ -159,7 +159,7 @@ endif
clean::
rm -f *.o *.po *.a *.so darshan-analyzer darshan-convert darshan-parser darshan-stitch-logs jenkins-hash-gen
rm -f *.o *.po *.a *.so darshan-analyzer darshan-convert darshan-parser darshan-merge jenkins-hash-gen
distclean:: clean
rm -f darshan-runtime-config.h aclocal.m4 autom4te.cache/* config.status config.log Makefile util/bin/darshan-job-summary.pl
......
......@@ -11,11 +11,6 @@
#define DEF_MOD_BUF_SIZE 1024 /* 1 KiB is enough for all current mod records ... */
/* TODO: are there any checks we should do to ensure tmp logs belong to the same job */
/* we can't specifically check the job id, since the pid is used if no job scheduler */
/* TODO: how do we set the output logfile name to be unique, and have necessary semantic info contained */
/* TODO: set job end timestamp? */
struct darshan_shared_record_ref
......@@ -28,20 +23,25 @@ struct darshan_shared_record_ref
void usage(char *exename)
{
fprintf(stderr, "Usage: %s [options] <tmp_dir> <job_id>\n", exename);
fprintf(stderr, " TODO: description.\n");
fprintf(stderr, " --shared-redux Reduce globally shared records into a single record.\n");
fprintf(stderr, "Usage: %s --output-dir <output_dir> [options] <input-logs>\n", exename);
fprintf(stderr, "This utility merges multiple Darshan log files into a single output log file.\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, "\t--output-dir\t(REQUIRED) Output directory to store output log file in.\n");
fprintf(stderr, "\t--output-name\tOutput log file name. If unspecified, name generated automatically.\n");
fprintf(stderr, "\t--shared-redux\tReduce globally shared records into a single record.\n");
exit(1);
}
void parse_args(int argc, char **argv, char **tmplog_dir, int *tmplog_jobid,
int *shared_redux)
void parse_args(int argc, char **argv, char ***infile_list, int *n_files,
char **outlog_dir, char **outlog_name, int *shared_redux)
{
int index;
static struct option long_opts[] =
{
{"shared-redux", no_argument, NULL, 's'},
{"output-dir", required_argument, NULL, 'd'},
{"output-name", required_argument, NULL, 'n'},
{0, 0, 0, 0}
};
......@@ -58,6 +58,12 @@ void parse_args(int argc, char **argv, char **tmplog_dir, int *tmplog_jobid,
case 's':
*shared_redux = 1;
break;
case 'd':
*outlog_dir = optarg;
break;
case 'n':
*outlog_name = optarg;
break;
case '?':
default:
usage(argv[0]);
......@@ -65,42 +71,20 @@ void parse_args(int argc, char **argv, char **tmplog_dir, int *tmplog_jobid,
}
}
if(optind + 2 == argc)
{
*tmplog_dir = argv[optind];
*tmplog_jobid = atoi(argv[optind+1]);
}
else
if(*outlog_dir == NULL)
{
usage(argv[0]);
}
return;
}
*infile_list = &argv[optind];
*n_files = argc - optind;
int logfile_path_comp(const void *a, const void *b)
{
char *pathA = *(char **)a;
char *pathB = *(char **)b;
char *pathA_rank_s, *pathB_rank_s;
int pathA_rank, pathB_rank;
/* extract the process rank number from end of each log file path */
pathA_rank_s = strrchr(pathA, '.') + 1;
pathA_rank = atoi(pathA_rank_s);
pathB_rank_s = strrchr(pathB, '.') + 1;
pathB_rank = atoi(pathB_rank_s);
if(pathA_rank < pathB_rank)
return(-1);
else if(pathA_rank > pathB_rank)
return(1);
else
return(0);
return;
}
int build_mod_shared_rec_hash(glob_t *globbuf, darshan_module_id mod_id,
int nprocs, char *mod_buf, struct darshan_shared_record_ref **shared_rec_hash)
int build_mod_shared_rec_hash(char **infile_list, int n_infiles,
darshan_module_id mod_id, int nprocs, char *mod_buf,
struct darshan_shared_record_ref **shared_rec_hash)
{
darshan_fd in_fd;
struct darshan_base_record *base_rec;
......@@ -110,14 +94,14 @@ int build_mod_shared_rec_hash(glob_t *globbuf, darshan_module_id mod_id,
int i;
/* loop over each input log file */
for(i = 0; i < globbuf->gl_pathc; i++)
for(i = 0; i < n_infiles; i++)
{
in_fd = darshan_log_open(globbuf->gl_pathv[i]);
in_fd = darshan_log_open(infile_list[i]);
if(in_fd == NULL)
{
fprintf(stderr,
"Error: unable to open input Darshan log file %s.\n",
globbuf->gl_pathv[i]);
infile_list[i]);
return(-1);
}
......@@ -167,7 +151,7 @@ int build_mod_shared_rec_hash(glob_t *globbuf, darshan_module_id mod_id,
{
fprintf(stderr,
"Error: unable to read %s module record from input log file %s.\n",
darshan_module_names[mod_id], globbuf->gl_pathv[i]);
darshan_module_names[mod_id], infile_list[i]);
darshan_log_close(in_fd);
return(-1);
}
......@@ -190,20 +174,20 @@ int build_mod_shared_rec_hash(glob_t *globbuf, darshan_module_id mod_id,
int main(int argc, char *argv[])
{
char **infile_list;
int n_infiles;
int shared_redux;
char *tmplog_dir;
int job_id;
glob_t globbuf;
char glob_pstr[512];
char *stitch_logname = "/tmp/test123.darshan"; /* XXX default + configurable? */
darshan_fd in_fd, stitch_fd;
struct darshan_job in_job, stitch_job;
char stitch_exe[DARSHAN_EXE_LEN+1];
char **stitch_mnt_pts;
char **stitch_fs_types;
int stitch_mnt_count = 0;
char *outlog_dir = NULL;
char *outlog_name = NULL;
char outlog_path[512];
darshan_fd in_fd, merge_fd;
struct darshan_job in_job, merge_job;
char merge_exe[DARSHAN_EXE_LEN+1];
char **merge_mnt_pts;
char **merge_fs_types;
int merge_mnt_count = 0;
struct darshan_record_ref *in_hash = NULL;
struct darshan_record_ref *stitch_hash = NULL;
struct darshan_record_ref *merge_hash = NULL;
struct darshan_record_ref *ref, *tmp, *found;
struct darshan_shared_record_ref *shared_rec_hash = NULL;
struct darshan_shared_record_ref *sref, *stmp;
......@@ -213,42 +197,25 @@ int main(int argc, char *argv[])
int ret;
/* grab command line arguments */
parse_args(argc, argv, &tmplog_dir, &job_id, &shared_redux);
/* construct the list of input log files to stitch together */
snprintf(glob_pstr, 512, "%s/darshan_job%d*", tmplog_dir, job_id);
parse_args(argc, argv, &infile_list, &n_infiles, &outlog_dir,
&outlog_name, &shared_redux);
ret = glob(glob_pstr, GLOB_NOSORT, NULL, &globbuf);
if(ret != 0)
{
fprintf(stderr,
"Error: unable to construct list of input Darshan log files.\n");
return(-1);
}
/* sort the file list according to the rank id appended to each logfile name */
/* NOTE: we don't rely on glob's default alphabetic sorting, because it won't
* sort by ascending ranks if pid's are used for job ids, for instance
*/
qsort(globbuf.gl_pathv, globbuf.gl_pathc, sizeof(char *), logfile_path_comp);
memset(&stitch_job, 0, sizeof(struct darshan_job));
memset(&merge_job, 0, sizeof(struct darshan_job));
/* first pass at stitching together logs:
/* first pass at merging together logs:
* - compose output job-level metadata structure (including exe & mount data)
* - compose output record_id->file_name mapping
*/
for(i = 0; i < globbuf.gl_pathc; i++)
for(i = 0; i < n_infiles; i++)
{
memset(&in_job, 0, sizeof(struct darshan_job));
in_fd = darshan_log_open(globbuf.gl_pathv[i]);
in_fd = darshan_log_open(infile_list[i]);
if(in_fd == NULL)
{
fprintf(stderr,
"Error: unable to open input Darshan log file %s.\n",
globbuf.gl_pathv[i]);
globfree(&globbuf);
infile_list[i]);
return(-1);
}
......@@ -258,9 +225,8 @@ int main(int argc, char *argv[])
{
fprintf(stderr,
"Error: unable to read job data from input Darshan log file %s.\n",
globbuf.gl_pathv[i]);
infile_list[i]);
darshan_log_close(in_fd);
globfree(&globbuf);
return(-1);
}
......@@ -273,47 +239,44 @@ int main(int argc, char *argv[])
{
fprintf(stderr,
"Error: potentially corrupt data found in input log file %s.\n",
globbuf.gl_pathv[i]);
infile_list[i]);
darshan_log_close(in_fd);
globfree(&globbuf);
return(-1);
}
if(i == 0)
{
/* get job data, exe, & mounts directly from the first input log */
memcpy(&stitch_job, &in_job, sizeof(struct darshan_job));
memcpy(&merge_job, &in_job, sizeof(struct darshan_job));
ret = darshan_log_getexe(in_fd, stitch_exe);
ret = darshan_log_getexe(in_fd, merge_exe);
if(ret < 0)
{
fprintf(stderr,
"Error: unable to read exe string from Darshan log file %s.\n",
globbuf.gl_pathv[i]);
"Error: unable to read exe string from input Darshan log file %s.\n",
infile_list[i]);
darshan_log_close(in_fd);
globfree(&globbuf);
return(-1);
}
ret = darshan_log_getmounts(in_fd, &stitch_mnt_pts,
&stitch_fs_types, &stitch_mnt_count);
ret = darshan_log_getmounts(in_fd, &merge_mnt_pts,
&merge_fs_types, &merge_mnt_count);
if(ret < 0)
{
fprintf(stderr,
"Error: unable to read mount info from Darshan log file %s.\n",
globbuf.gl_pathv[i]);
"Error: unable to read mount info from input Darshan log file %s.\n",
infile_list[i]);
darshan_log_close(in_fd);
globfree(&globbuf);
return(-1);
}
}
else
{
/* potentially update job timestamps using remaining logs */
if(in_job.start_time < stitch_job.start_time)
stitch_job.start_time = in_job.start_time;
if(in_job.end_time > stitch_job.end_time)
stitch_job.end_time = in_job.end_time;
if(in_job.start_time < merge_job.start_time)
merge_job.start_time = in_job.start_time;
if(in_job.end_time > merge_job.end_time)
merge_job.end_time = in_job.end_time;
}
/* read the hash of ids->names for the input log */
......@@ -322,9 +285,8 @@ int main(int argc, char *argv[])
{
fprintf(stderr,
"Error: unable to read job data from input Darshan log file %s.\n",
globbuf.gl_pathv[i]);
infile_list[i]);
darshan_log_close(in_fd);
globfree(&globbuf);
return(-1);
}
......@@ -333,17 +295,16 @@ int main(int argc, char *argv[])
*/
HASH_ITER(hlink, in_hash, ref, tmp)
{
HASH_FIND(hlink, stitch_hash, &(ref->id), sizeof(darshan_record_id), found);
HASH_FIND(hlink, merge_hash, &(ref->id), sizeof(darshan_record_id), found);
if(!found)
{
HASH_ADD(hlink, stitch_hash, id, sizeof(darshan_record_id), ref);
HASH_ADD(hlink, merge_hash, id, sizeof(darshan_record_id), ref);
}
else if(strcmp(ref->name, found->name))
{
fprintf(stderr,
"Error: invalid Darshan record table entry.\n");
darshan_log_close(in_fd);
globfree(&globbuf);
return(-1);
}
}
......@@ -351,59 +312,61 @@ int main(int argc, char *argv[])
darshan_log_close(in_fd);
}
/* create the output "stitched together" log */
stitch_fd = darshan_log_create(stitch_logname, DARSHAN_ZLIB_COMP, 1);
if(stitch_fd == NULL)
if(!outlog_name)
{
outlog_name = "test123.darshan";
}
sprintf(outlog_path, "%s/%s", outlog_dir, outlog_name);
/* create the output "merged" log */
merge_fd = darshan_log_create(outlog_path, DARSHAN_ZLIB_COMP, 1);
if(merge_fd == NULL)
{
fprintf(stderr, "Error: unable to create output darshan log.\n");
globfree(&globbuf);
return(-1);
}
/* write the darshan job info, exe string, and mount data to output file */
ret = darshan_log_putjob(stitch_fd, &stitch_job);
ret = darshan_log_putjob(merge_fd, &merge_job);
if(ret < 0)
{
fprintf(stderr, "Error: unable to write job data to output darshan log.\n");
globfree(&globbuf);
darshan_log_close(stitch_fd);
unlink(stitch_logname);
darshan_log_close(merge_fd);
unlink(outlog_path);
return(-1);
}
ret = darshan_log_putexe(stitch_fd, stitch_exe);
ret = darshan_log_putexe(merge_fd, merge_exe);
if(ret < 0)
{
fprintf(stderr, "Error: unable to write exe string to output darshan log.\n");
globfree(&globbuf);
darshan_log_close(stitch_fd);
unlink(stitch_logname);
darshan_log_close(merge_fd);
unlink(outlog_path);
return(-1);
}
ret = darshan_log_putmounts(stitch_fd, stitch_mnt_pts, stitch_fs_types, stitch_mnt_count);
ret = darshan_log_putmounts(merge_fd, merge_mnt_pts, merge_fs_types, merge_mnt_count);
if(ret < 0)
{
fprintf(stderr, "Error: unable to write mount data to output darshan log.\n");
globfree(&globbuf);
darshan_log_close(stitch_fd);
unlink(stitch_logname);
darshan_log_close(merge_fd);
unlink(outlog_path);
return(-1);
}
/* write the stitched together table of records to output file */
ret = darshan_log_puthash(stitch_fd, stitch_hash);
/* write the merged table of records to output file */
ret = darshan_log_puthash(merge_fd, merge_hash);
if(ret < 0)
{
fprintf(stderr, "Error: unable to write record table to output darshan log.\n");
globfree(&globbuf);
darshan_log_close(stitch_fd);
unlink(stitch_logname);
darshan_log_close(merge_fd);
unlink(outlog_path);
return(-1);
}
/* iterate over active darshan modules and gather module data to write
* to the stitched together output log
* to the merged output log
*/
for(i = 0; i < DARSHAN_MAX_MODS; i++)
{
......@@ -412,33 +375,30 @@ int main(int argc, char *argv[])
if(shared_redux)
{
/* build the hash of records shared globally by this module */
ret = build_mod_shared_rec_hash(&globbuf, i, stitch_job.nprocs,
mod_buf, &shared_rec_hash);
ret = build_mod_shared_rec_hash(infile_list, n_infiles, i,
merge_job.nprocs, mod_buf, &shared_rec_hash);
if(ret < 0)
{
fprintf(stderr,
"Error: unable to build list of %s module's shared records.\n",
darshan_module_names[i]);
globfree(&globbuf);
darshan_log_close(stitch_fd);
unlink(stitch_logname);
darshan_log_close(merge_fd);
unlink(outlog_path);
return(-1);
}
}
for(j = 0; j < globbuf.gl_pathc; j++)
for(j = 0; j < n_infiles; j++)
{
in_fd = darshan_log_open(globbuf.gl_pathv[j]);
in_fd = darshan_log_open(infile_list[j]);
if(in_fd == NULL)
{
fprintf(stderr,
"Error: unable to open input Darshan log file %s.\n",
globbuf.gl_pathv[j]);
globfree(&globbuf);
darshan_log_close(in_fd);
darshan_log_close(stitch_fd);
unlink(stitch_logname);
infile_list[j]);
darshan_log_close(merge_fd);
unlink(outlog_path);
return(-1);
}
......@@ -447,15 +407,15 @@ int main(int argc, char *argv[])
/* write out the shared records first */
HASH_ITER(hlink, shared_rec_hash, sref, stmp)
{
ret = mod_logutils[i]->log_put_record(stitch_fd, sref->agg_rec, in_fd->mod_ver[i]);
ret = mod_logutils[i]->log_put_record(merge_fd, sref->agg_rec, in_fd->mod_ver[i]);
if(ret < 0)
{
fprintf(stderr,
"Error: unable to write %s module record to output darshan log.\n",
darshan_module_names[i]);
globfree(&globbuf);
darshan_log_close(stitch_fd);
unlink(stitch_logname);
darshan_log_close(in_fd);
darshan_log_close(merge_fd);
unlink(outlog_path);
return(-1);
}
}
......@@ -470,16 +430,15 @@ int main(int argc, char *argv[])
if(sref)
continue; /* skip shared records */
ret = mod_logutils[i]->log_put_record(stitch_fd, mod_buf, in_fd->mod_ver[i]);
ret = mod_logutils[i]->log_put_record(merge_fd, mod_buf, in_fd->mod_ver[i]);
if(ret < 0)
{
fprintf(stderr,
"Error: unable to write %s module record to output log file %s.\n",
darshan_module_names[i], globbuf.gl_pathv[j]);
globfree(&globbuf);
darshan_module_names[i], infile_list[j]);
darshan_log_close(in_fd);
darshan_log_close(stitch_fd);
unlink(stitch_logname);
darshan_log_close(merge_fd);
unlink(outlog_path);
return(-1);
}
}
......@@ -487,11 +446,10 @@ int main(int argc, char *argv[])
{
fprintf(stderr,
"Error: unable to read %s module record from input log file %s.\n",
darshan_module_names[i], globbuf.gl_pathv[j]);
globfree(&globbuf);
darshan_module_names[i], infile_list[j]);
darshan_log_close(in_fd);
darshan_log_close(stitch_fd);
unlink(stitch_logname);
darshan_log_close(merge_fd);
unlink(outlog_path);
return(-1);
}
......@@ -509,8 +467,16 @@ int main(int argc, char *argv[])
}
}
darshan_log_close(stitch_fd);
globfree(&globbuf);
darshan_log_close(merge_fd);
return(0);
}
/*
* Local variables:
* c-indent-level: 4
* c-basic-offset: 4
* End:
*
* vim: ts=8 sts=4 sw=4 expandtab
*/
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment