Commit dcc9c7fe authored by Shane Snyder's avatar Shane Snyder

initial hooks for bzip2 compression

parent eb0e5f5b
...@@ -54,7 +54,7 @@ int process_log(const char *fname, double *io_ratio, int *used_mpio, int *used_p ...@@ -54,7 +54,7 @@ int process_log(const char *fname, double *io_ratio, int *used_mpio, int *used_p
assert(hdf5_mod); assert(hdf5_mod);
assert(pnetcdf_mod); assert(pnetcdf_mod);
zfile = darshan_log_open(fname, "r"); zfile = darshan_log_open(fname);
if (zfile == NULL) if (zfile == NULL)
{ {
fprintf(stderr, "darshan_log_open() failed to open %s.\n", fname); fprintf(stderr, "darshan_log_open() failed to open %s.\n", fname);
......
...@@ -26,6 +26,7 @@ int usage (char *exename) ...@@ -26,6 +26,7 @@ int usage (char *exename)
fprintf(stderr, "Usage: %s [options] <infile> <outfile>\n", exename); fprintf(stderr, "Usage: %s [options] <infile> <outfile>\n", exename);
fprintf(stderr, " Converts darshan log from infile to outfile.\n"); fprintf(stderr, " Converts darshan log from infile to outfile.\n");
fprintf(stderr, " rewrites the log file into the newest format.\n"); fprintf(stderr, " rewrites the log file into the newest format.\n");
fprintf(stderr, " --bzip2 Use bzip2 compression instead of zlib.\n");
fprintf(stderr, " --obfuscate Obfuscate items in the log.\n"); fprintf(stderr, " --obfuscate Obfuscate items in the log.\n");
fprintf(stderr, " --key <key> Key to use when obfuscating.\n"); fprintf(stderr, " --key <key> Key to use when obfuscating.\n");
fprintf(stderr, " --annotate <string> Additional metadata to add.\n"); fprintf(stderr, " --annotate <string> Additional metadata to add.\n");
...@@ -36,13 +37,15 @@ int usage (char *exename) ...@@ -36,13 +37,15 @@ int usage (char *exename)
} }
void parse_args (int argc, char **argv, char **infile, char **outfile, void parse_args (int argc, char **argv, char **infile, char **outfile,
int *obfuscate, int *reset_md, int *key, char **annotate, uint64_t* hash) int *bzip2, int *obfuscate, int *reset_md, int *key,
char **annotate, uint64_t* hash)
{ {
int index; int index;
int ret; int ret;
static struct option long_opts[] = static struct option long_opts[] =
{ {
{"bzip2", 0, NULL, 'b'},
{"annotate", 1, NULL, 'a'}, {"annotate", 1, NULL, 'a'},
{"obfuscate", 0, NULL, 'o'}, {"obfuscate", 0, NULL, 'o'},
{"reset-md", 0, NULL, 'r'}, {"reset-md", 0, NULL, 'r'},
...@@ -52,6 +55,7 @@ void parse_args (int argc, char **argv, char **infile, char **outfile, ...@@ -52,6 +55,7 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
{ 0, 0, 0, 0 } { 0, 0, 0, 0 }
}; };
*bzip2 = 0;
*reset_md = 0; *reset_md = 0;
*hash = 0; *hash = 0;
...@@ -63,6 +67,9 @@ void parse_args (int argc, char **argv, char **infile, char **outfile, ...@@ -63,6 +67,9 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
switch(c) switch(c)
{ {
case 'b':
*bzip2 = 1;
break;
case 'a': case 'a':
*annotate = optarg; *annotate = optarg;
break; break;
...@@ -202,25 +209,30 @@ int main(int argc, char **argv) ...@@ -202,25 +209,30 @@ int main(int argc, char **argv)
char *mod_buf; char *mod_buf;
int mod_buf_sz; int mod_buf_sz;
char** fs_types; char** fs_types;
int bzip2;
enum darshan_comp_type comp_type;
int obfuscate = 0; int obfuscate = 0;
int key = 0; int key = 0;
char *annotation = NULL; char *annotation = NULL;
uint64_t hash; uint64_t hash;
int reset_md = 0; int reset_md = 0;
parse_args(argc, argv, &infile_name, &outfile_name, &obfuscate, &reset_md, &key, &annotation, &hash); parse_args(argc, argv, &infile_name, &outfile_name, &bzip2, &obfuscate,
&reset_md, &key, &annotation, &hash);
infile = darshan_log_open(infile_name, "r"); infile = darshan_log_open(infile_name);
if(!infile) if(!infile)
{ {
fprintf(stderr, "darshan_log_open() failed to open %s\n.", infile_name); fprintf(stderr, "darshan_log_open() failed to open %s\n.", infile_name);
return(-1); return(-1);
} }
outfile = darshan_log_open(outfile_name, "w"); comp_type = bzip2 ? comp_type = DARSHAN_BZIP2_COMP : DARSHAN_ZLIB_COMP;
outfile = darshan_log_create(outfile_name, comp_type);
if(!outfile) if(!outfile)
{ {
fprintf(stderr, "darshan_log_open() failed to open %s\n.", outfile_name); fprintf(stderr, "darshan_log_create() failed to create %s\n.", outfile_name);
darshan_log_close(infile); darshan_log_close(infile);
return(-1); return(-1);
} }
......
...@@ -19,15 +19,26 @@ ...@@ -19,15 +19,26 @@
#include "darshan-logutils.h" #include "darshan-logutils.h"
/* TODO: for log reads, we need to make sure the header has been read prior */
static int darshan_log_seek(darshan_fd fd, off_t offset); static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len); static int darshan_log_read(darshan_fd fd, void *buf, int len);
static int darshan_log_write(darshan_fd fd, void *buf, int len); static int darshan_log_write(darshan_fd fd, void *buf, int len);
static int darshan_decompress_buf(char* comp_buf, int comp_buf_sz, static int darshan_decompress_buf(char* comp_buf, int comp_buf_sz,
char* decomp_buf, int* inout_decomp_buf_sz); char* decomp_buf, int* inout_decomp_buf_sz,
enum darshan_comp_type comp_type);
static int darshan_compress_buf(char* decomp_buf, int decomp_buf_sz, static int darshan_compress_buf(char* decomp_buf, int decomp_buf_sz,
char* comp_buf, int* inout_comp_buf_sz,
enum darshan_comp_type comp_type);
static int darshan_zlib_decomp(char* comp_buf, int comp_buf_sz,
char* decomp_buf, int* inout_decomp_buf_sz);
static int darshan_zlib_comp(char* decomp_buf, int decomp_buf_sz,
char* comp_buf, int* inout_comp_buf_sz);
#ifdef HAVE_LIBBZ2
static int darshan_bzip2_decomp(char* comp_buf, int comp_buf_sz,
char* decomp_buf, int* inout_decomp_buf_sz);
static int darshan_bzip2_comp(char* decomp_buf, int decomp_buf_sz,
char* comp_buf, int* inout_comp_buf_sz); char* comp_buf, int* inout_comp_buf_sz);
#endif
/* TODO: can we make this s.t. we don't care about ordering (i.e., X macro it ) */ /* TODO: can we make this s.t. we don't care about ordering (i.e., X macro it ) */
struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS] = struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS] =
...@@ -52,39 +63,54 @@ struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS] = ...@@ -52,39 +63,54 @@ struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS] =
/* darshan_log_open() /* darshan_log_open()
* *
* open a darshan log file for reading/writing * open an existing darshan log file for reading only
* *
* returns 0 on success, -1 on failure * returns 0 on success, -1 on failure
*/ */
darshan_fd darshan_log_open(const char *name, const char *mode) darshan_fd darshan_log_open(const char *name)
{ {
darshan_fd tmp_fd; darshan_fd tmp_fd;
/* we only allow "w" or "r" modes, nothing fancy */
assert(strlen(mode) == 1);
assert(mode[0] == 'r' || mode[0] == 'w');
tmp_fd = malloc(sizeof(*tmp_fd)); tmp_fd = malloc(sizeof(*tmp_fd));
if(!tmp_fd) if(!tmp_fd)
return(NULL); return(NULL);
memset(tmp_fd, 0, sizeof(*tmp_fd)); memset(tmp_fd, 0, sizeof(*tmp_fd));
if(mode[0] == 'r') tmp_fd->fildes = open(name, O_RDONLY);
{ if(tmp_fd->fildes < 0)
tmp_fd->fildes = open(name, O_RDONLY);
}
else if (mode[0] == 'w')
{ {
/* TODO: permissions when creating? umask */ perror("darshan_log_open: ");
/* when writing, we create the log file making sure not to overwrite free(tmp_fd);
* an existing log tmp_fd = NULL;
*/
tmp_fd->fildes = open(name, O_WRONLY | O_CREAT | O_EXCL, 0400);
} }
return(tmp_fd);
}
/* darshan_log_create()
*
* create a darshan log file for writing with the given compression method
*
* returns 0 on success, -1 on failure
*/
darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type)
{
darshan_fd tmp_fd;
tmp_fd = malloc(sizeof(*tmp_fd));
if(!tmp_fd)
return(NULL);
memset(tmp_fd, 0, sizeof(*tmp_fd));
/* TODO: permissions when creating? umask */
/* when writing, we create the log file making sure not to overwrite
* an existing log
*/
tmp_fd->comp_type = comp_type;
tmp_fd->fildes = open(name, O_WRONLY | O_CREAT | O_EXCL, 0400);
if(tmp_fd->fildes < 0) if(tmp_fd->fildes < 0)
{ {
perror("darshan_log_open: "); perror("darshan_log_create: ");
free(tmp_fd); free(tmp_fd);
tmp_fd = NULL; tmp_fd = NULL;
} }
...@@ -153,6 +179,8 @@ int darshan_log_getheader(darshan_fd fd, struct darshan_header *header) ...@@ -153,6 +179,8 @@ int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
} }
} }
fd->comp_type = header->comp_type;
/* save the mapping of data within log file to this file descriptor */ /* save the mapping of data within log file to this file descriptor */
fd->job_map.off = sizeof(struct darshan_header); fd->job_map.off = sizeof(struct darshan_header);
fd->job_map.len = header->rec_map.off - fd->job_map.off; fd->job_map.len = header->rec_map.off - fd->job_map.off;
...@@ -214,6 +242,8 @@ int darshan_log_getjob(darshan_fd fd, struct darshan_job *job) ...@@ -214,6 +242,8 @@ int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
int job_buf_sz = DARSHAN_JOB_RECORD_SIZE; int job_buf_sz = DARSHAN_JOB_RECORD_SIZE;
int ret; int ret;
assert(fd->job_map.len > 0 && fd->job_map.off > 0);
/* allocate buffer to store compressed job info */ /* allocate buffer to store compressed job info */
comp_buf = malloc(fd->job_map.len); comp_buf = malloc(fd->job_map.len);
if(!comp_buf) if(!comp_buf)
...@@ -237,7 +267,8 @@ int darshan_log_getjob(darshan_fd fd, struct darshan_job *job) ...@@ -237,7 +267,8 @@ int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
} }
/* decompress the job data */ /* decompress the job data */
ret = darshan_decompress_buf(comp_buf, fd->job_map.len, job_buf, &job_buf_sz); ret = darshan_decompress_buf(comp_buf, fd->job_map.len,
job_buf, &job_buf_sz, fd->comp_type);
if(ret < 0) if(ret < 0)
{ {
fprintf(stderr, "Error: failed to decompress darshan job data.\n"); fprintf(stderr, "Error: failed to decompress darshan job data.\n");
...@@ -309,7 +340,8 @@ int darshan_log_putjob(darshan_fd fd, struct darshan_job *job) ...@@ -309,7 +340,8 @@ int darshan_log_putjob(darshan_fd fd, struct darshan_job *job)
comp_buf_sz = sizeof(*job); comp_buf_sz = sizeof(*job);
/* compress the job data */ /* compress the job data */
ret = darshan_compress_buf((char*)&job_copy, sizeof(*job), comp_buf, &comp_buf_sz); ret = darshan_compress_buf((char*)&job_copy, sizeof(*job),
comp_buf, &comp_buf_sz, fd->comp_type);
if(ret < 0) if(ret < 0)
{ {
fprintf(stderr, "Error: failed to decompress darshan job data.\n"); fprintf(stderr, "Error: failed to decompress darshan job data.\n");
...@@ -378,7 +410,7 @@ int darshan_log_putexe(darshan_fd fd, char *buf) ...@@ -378,7 +410,7 @@ int darshan_log_putexe(darshan_fd fd, char *buf)
len = strlen(buf); len = strlen(buf);
/* compress the input exe string */ /* compress the input exe string */
ret = darshan_compress_buf(buf, len, comp_buf, &comp_buf_sz); ret = darshan_compress_buf(buf, len, comp_buf, &comp_buf_sz, fd->comp_type);
if(ret < 0) if(ret < 0)
{ {
fprintf(stderr, "Error: unable to compress exe string.\n"); fprintf(stderr, "Error: unable to compress exe string.\n");
...@@ -498,7 +530,7 @@ int darshan_log_putmounts(darshan_fd fd, char** mnt_pts, char** fs_types, int co ...@@ -498,7 +530,7 @@ int darshan_log_putmounts(darshan_fd fd, char** mnt_pts, char** fs_types, int co
mnt_dat_sz += strlen(line); mnt_dat_sz += strlen(line);
} }
ret = darshan_compress_buf(mnt_dat, mnt_dat_sz, comp_buf, &comp_buf_sz); ret = darshan_compress_buf(mnt_dat, mnt_dat_sz, comp_buf, &comp_buf_sz, fd->comp_type);
if(ret < 0) if(ret < 0)
{ {
fprintf(stderr, "Error: unable to compress mount data.\n"); fprintf(stderr, "Error: unable to compress mount data.\n");
...@@ -564,7 +596,7 @@ int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash) ...@@ -564,7 +596,7 @@ int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash)
/* decompress the record hash buffer */ /* decompress the record hash buffer */
ret = darshan_decompress_buf(comp_buf, fd->rec_map.len, ret = darshan_decompress_buf(comp_buf, fd->rec_map.len,
hash_buf, &hash_buf_sz); hash_buf, &hash_buf_sz, fd->comp_type);
if(ret < 0) if(ret < 0)
{ {
fprintf(stderr, "Error: unable to decompress darshan job data.\n"); fprintf(stderr, "Error: unable to decompress darshan job data.\n");
...@@ -703,7 +735,8 @@ int darshan_log_puthash(darshan_fd fd, struct darshan_record_ref *hash) ...@@ -703,7 +735,8 @@ int darshan_log_puthash(darshan_fd fd, struct darshan_record_ref *hash)
comp_buf_sz = DARSHAN_DEF_COMP_BUF_SZ; comp_buf_sz = DARSHAN_DEF_COMP_BUF_SZ;
/* compress the record hash */ /* compress the record hash */
ret = darshan_compress_buf(hash_buf, hash_buf_sz, comp_buf, &comp_buf_sz); ret = darshan_compress_buf(hash_buf, hash_buf_sz,
comp_buf, &comp_buf_sz, fd->comp_type);
if(ret < 0) if(ret < 0)
{ {
fprintf(stderr, "Error: unable to compress darshan record hash.\n"); fprintf(stderr, "Error: unable to compress darshan record hash.\n");
...@@ -776,7 +809,7 @@ int darshan_log_getmod(darshan_fd fd, darshan_module_id mod_id, ...@@ -776,7 +809,7 @@ int darshan_log_getmod(darshan_fd fd, darshan_module_id mod_id,
/* decompress this module's data */ /* decompress this module's data */
ret = darshan_decompress_buf(comp_buf, fd->mod_map[mod_id].len, ret = darshan_decompress_buf(comp_buf, fd->mod_map[mod_id].len,
mod_buf, mod_buf_sz); mod_buf, mod_buf_sz, fd->comp_type);
if(ret < 0) if(ret < 0)
{ {
fprintf(stderr, "Error: unable to decompress module %s data.\n", fprintf(stderr, "Error: unable to decompress module %s data.\n",
...@@ -820,7 +853,8 @@ int darshan_log_putmod(darshan_fd fd, darshan_module_id mod_id, ...@@ -820,7 +853,8 @@ int darshan_log_putmod(darshan_fd fd, darshan_module_id mod_id,
comp_buf_sz = mod_buf_sz; comp_buf_sz = mod_buf_sz;
/* compress the module's data */ /* compress the module's data */
ret = darshan_compress_buf(mod_buf, mod_buf_sz, comp_buf, &comp_buf_sz); ret = darshan_compress_buf(mod_buf, mod_buf_sz,
comp_buf, &comp_buf_sz, fd->comp_type);
if(ret < 0) if(ret < 0)
{ {
fprintf(stderr, "Error: unable to compress module %s data.\n", fprintf(stderr, "Error: unable to compress module %s data.\n",
...@@ -916,6 +950,32 @@ static int darshan_log_write(darshan_fd fd, void* buf, int len) ...@@ -916,6 +950,32 @@ static int darshan_log_write(darshan_fd fd, void* buf, int len)
/* return 0 on successful decompression, -1 on failure /* return 0 on successful decompression, -1 on failure
*/ */
static int darshan_decompress_buf(char* comp_buf, int comp_buf_sz, static int darshan_decompress_buf(char* comp_buf, int comp_buf_sz,
char* decomp_buf, int* inout_decomp_buf_sz,
enum darshan_comp_type comp_type)
{
int ret;
switch(comp_type)
{
case DARSHAN_ZLIB_COMP:
ret = darshan_zlib_decomp(comp_buf, comp_buf_sz,
decomp_buf, inout_decomp_buf_sz);
break;
#ifdef HAVE_LIBBZ2
case DARSHAN_BZIP2_COMP:
ret = darshan_bzip2_decomp(comp_buf, comp_buf_sz,
decomp_buf, inout_decomp_buf_sz);
break;
#endif
default:
fprintf(stderr, "Error: invalid decompression method.\n");
return(-1);
}
return(ret);
}
static int darshan_zlib_decomp(char* comp_buf, int comp_buf_sz,
char* decomp_buf, int* inout_decomp_buf_sz) char* decomp_buf, int* inout_decomp_buf_sz)
{ {
int ret; int ret;
...@@ -972,6 +1032,33 @@ static int darshan_decompress_buf(char* comp_buf, int comp_buf_sz, ...@@ -972,6 +1032,33 @@ static int darshan_decompress_buf(char* comp_buf, int comp_buf_sz,
} }
static int darshan_compress_buf(char* decomp_buf, int decomp_buf_sz, static int darshan_compress_buf(char* decomp_buf, int decomp_buf_sz,
char* comp_buf, int* inout_comp_buf_sz,
enum darshan_comp_type comp_type)
{
int ret;
switch(comp_type)
{
case DARSHAN_ZLIB_COMP:
ret = darshan_zlib_comp(decomp_buf, decomp_buf_sz,
comp_buf, inout_comp_buf_sz);
break;
#ifdef HAVE_LIBBZ2
case DARSHAN_BZIP2_COMP:
ret = darshan_bzip2_comp(decomp_buf, decomp_buf_sz,
comp_buf, inout_comp_buf_sz);
break;
#endif
default:
fprintf(stderr, "Error: invalid compression method.\n");
return(-1);
}
return(ret);
}
static int darshan_zlib_comp(char* decomp_buf, int decomp_buf_sz,
char* comp_buf, int* inout_comp_buf_sz) char* comp_buf, int* inout_comp_buf_sz)
{ {
int ret; int ret;
...@@ -1024,6 +1111,20 @@ static int darshan_compress_buf(char* decomp_buf, int decomp_buf_sz, ...@@ -1024,6 +1111,20 @@ static int darshan_compress_buf(char* decomp_buf, int decomp_buf_sz,
return(0); return(0);
} }
#ifdef HAVE_LIBBZ2
static int darshan_bzip2_decomp(char* comp_buf, int comp_buf_sz,
char* decomp_buf, int* inout_decomp_buf_sz)
{
return(-1);
}
static int darshan_bzip2_comp(char* decomp_buf, int decomp_buf_sz,
char* comp_buf, int* inout_comp_buf_sz)
{
return(-1);
}
#endif
/* /*
* Local variables: * Local variables:
* c-indent-level: 4 * c-indent-level: 4
......
...@@ -27,6 +27,7 @@ struct darshan_fd_s ...@@ -27,6 +27,7 @@ struct darshan_fd_s
{ {
int fildes; int fildes;
int64_t pos; int64_t pos;
enum darshan_comp_type comp_type;
char version[8]; char version[8];
int swap_flag; int swap_flag;
char *exe_mnt_data; char *exe_mnt_data;
...@@ -66,7 +67,8 @@ extern struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS]; ...@@ -66,7 +67,8 @@ extern struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS];
#include "darshan-hdf5-logutils.h" #include "darshan-hdf5-logutils.h"
#include "darshan-pnetcdf-logutils.h" #include "darshan-pnetcdf-logutils.h"
darshan_fd darshan_log_open(const char *name, const char* mode); darshan_fd darshan_log_open(const char *name);
darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type);
int darshan_log_getheader(darshan_fd fd, struct darshan_header *header); int darshan_log_getheader(darshan_fd fd, struct darshan_header *header);
int darshan_log_putheader(darshan_fd fd); int darshan_log_putheader(darshan_fd fd);
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job); int darshan_log_getjob(darshan_fd fd, struct darshan_job *job);
......
...@@ -46,7 +46,7 @@ int main(int argc, char **argv) ...@@ -46,7 +46,7 @@ int main(int argc, char **argv)
assert(argc == 2); assert(argc == 2);
filename = argv[1]; filename = argv[1];
fd = darshan_log_open(filename, "r"); fd = darshan_log_open(filename);
if(!fd) if(!fd)
{ {
fprintf(stderr, "darshan_log_open() failed to open %s\n.", filename); fprintf(stderr, "darshan_log_open() failed to open %s\n.", filename);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment