Commit 56d9416e authored by Philip Carns's avatar Philip Carns

ability to detect, open, and close bz2 files

- no reading or writing bz2 files yet; probably need to make some function
  pointers or wrappers so that all of the logutils routines that use I/O
  aren't littered with logic to switch compression formats.


git-svn-id: https://svn.mcs.anl.gov/repos/darshan/trunk@434 3b7491f3-a168-0410-bf4b-c445ed680a29
parent 74c16584
...@@ -33,6 +33,8 @@ CFLAGS_MPI_SHARED = -I . -I $(srcdir) @CFLAGS@ @CPPFLAGS@ -D_LARGEFILE64_SOURCE ...@@ -33,6 +33,8 @@ CFLAGS_MPI_SHARED = -I . -I $(srcdir) @CFLAGS@ @CPPFLAGS@ -D_LARGEFILE64_SOURCE
CC=@MPICC@ CC=@MPICC@
LD=@MPICC@ LD=@MPICC@
LIBS = -lz @LIBBZ2@
lib:: lib::
@mkdir -p $@ @mkdir -p $@
...@@ -43,19 +45,19 @@ uthash-1.9.2: ...@@ -43,19 +45,19 @@ uthash-1.9.2:
tar xjvf $(srcdir)/extern/uthash-1.9.2.tar.bz2 tar xjvf $(srcdir)/extern/uthash-1.9.2.tar.bz2
darshan-parser: darshan-parser.c darshan.h darshan-log-format.h darshan-logutils.h darshan-logutils.o | uthash-1.9.2 darshan-parser: darshan-parser.c darshan.h darshan-log-format.h darshan-logutils.h darshan-logutils.o | uthash-1.9.2
gcc $(CFLAGS) -lz $< darshan-logutils.o -o $@ gcc $(CFLAGS) $(LIBS) $< darshan-logutils.o -o $@
darshan-convert: darshan-convert.c darshan.h darshan-log-format.h darshan-logutils.h darshan-logutils.o | uthash-1.9.2 darshan-convert: darshan-convert.c darshan.h darshan-log-format.h darshan-logutils.h darshan-logutils.o | uthash-1.9.2
gcc $(CFLAGS) -lz $< darshan-logutils.o -o $@ gcc $(CFLAGS) $(LIBS) $< darshan-logutils.o -o $@
darshan-analyzer: darshan-analyzer.c darshan.h darshan-log-format.h darshan-logutils.h darshan-logutils.o darshan-analyzer: darshan-analyzer.c darshan.h darshan-log-format.h darshan-logutils.h darshan-logutils.o
gcc $(CFLAGS) -lz $< darshan-logutils.o -o $@ gcc $(CFLAGS) $(LIBS) $< darshan-logutils.o -o $@
darshan-log-params: darshan-log-params.c darshan-log-format.h darshan-log-params: darshan-log-params.c darshan-log-format.h
gcc $(CFLAGS) -lz $< -o $@ gcc $(CFLAGS) $(LIBS) $< -o $@
darshan-diff: darshan-diff.o darshan.h darshan-log-format.h darshan-logutils.o darshan-logutils.h darshan-diff: darshan-diff.o darshan.h darshan-log-format.h darshan-logutils.o darshan-logutils.h
gcc $(CFLAGS) -lz $< darshan-logutils.o -o $@ gcc $(CFLAGS) $(LIBS) $< darshan-logutils.o -o $@
darshan-diff.o: darshan-diff.c darshan-diff.o: darshan-diff.c
gcc $(CFLAGS) -c $< -o $@ gcc $(CFLAGS) -c $< -o $@
darshan-logutils.o: darshan-logutils.c darshan-logutils.o: darshan-logutils.c
......
...@@ -605,6 +605,7 @@ PRI_MACROS_BROKEN ...@@ -605,6 +605,7 @@ PRI_MACROS_BROKEN
HAVE_PDFLATEX HAVE_PDFLATEX
MPICC MPICC
HAVE_MPICC HAVE_MPICC
LIBBZ2
EGREP EGREP
GREP GREP
CPP CPP
...@@ -3726,6 +3727,8 @@ fi ...@@ -3726,6 +3727,8 @@ fi
$as_echo_n "checking bzlib in ${BZLIB_HOME}... " >&6; } $as_echo_n "checking bzlib in ${BZLIB_HOME}... " >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5 { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5
$as_echo "ok" >&6; } $as_echo "ok" >&6; }
LIBBZ2=-lbz2
else else
# #
# If either header or library was not found, revert and bomb # If either header or library was not found, revert and bomb
...@@ -3738,6 +3741,8 @@ $as_echo_n "checking bzlib in ${BZLIB_HOME}... " >&6; } ...@@ -3738,6 +3741,8 @@ $as_echo_n "checking bzlib in ${BZLIB_HOME}... " >&6; }
$as_echo "failed" >&6; } $as_echo "failed" >&6; }
# Don't fail; this is optional in Darshan # Don't fail; this is optional in Darshan
# AC_MSG_ERROR(either specify a valid bzlib installation with --with-bzlib=DIR or disable bzlib usage with --without-bzlib) # AC_MSG_ERROR(either specify a valid bzlib installation with --with-bzlib=DIR or disable bzlib usage with --without-bzlib)
# TODO: it would be nice if this showed up at the _end_ of
# configure...
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: libbz2 not found; Darshan utilities will use gzip only." >&5 { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: libbz2 not found; Darshan utilities will use gzip only." >&5
$as_echo "$as_me: WARNING: libbz2 not found; Darshan utilities will use gzip only." >&2;} $as_echo "$as_me: WARNING: libbz2 not found; Darshan utilities will use gzip only." >&2;}
fi fi
......
...@@ -8,7 +8,12 @@ ...@@ -8,7 +8,12 @@
#include <string.h> #include <string.h>
#include <assert.h> #include <assert.h>
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h>
#include <inttypes.h> #include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "darshan-logutils.h" #include "darshan-logutils.h"
/* isn't there a clever c way to avoid this? */ /* isn't there a clever c way to avoid this? */
...@@ -214,12 +219,71 @@ static void shift_missing_1_21(struct darshan_file* file); ...@@ -214,12 +219,71 @@ static void shift_missing_1_21(struct darshan_file* file);
/* a rather crude API for accessing raw binary darshan files */ /* a rather crude API for accessing raw binary darshan files */
darshan_fd darshan_log_open(const char *name, const char* mode) darshan_fd darshan_log_open(const char *name, const char* mode)
{ {
int test_fd;
uint8_t magic[2];
int ret;
int try_bz2 = 1;
int len = strlen(name);
darshan_fd tmp_fd = malloc(sizeof(*tmp_fd)); darshan_fd tmp_fd = malloc(sizeof(*tmp_fd));
if(!tmp_fd) if(!tmp_fd)
return(NULL); return(NULL);
memset(tmp_fd, 0, sizeof(*tmp_fd)); memset(tmp_fd, 0, sizeof(*tmp_fd));
#ifdef HAVE_LIBBZ2
if(strcmp(mode, "r") == 0)
{
/* Try to detect if existing file is a bzip2 file or not. Both
* libbz2 and libz will fall back to normal I/O (without compression)
* automatically, so we need to do some detection manually up front
* in order to get a chance to try both compression formats.
*/
test_fd = open(name, O_RDONLY);
if(!test_fd)
{
perror("open");
free(tmp_fd);
return(NULL);
}
ret = read(test_fd, &magic, 2);
if(ret != 2)
{
fprintf(stderr, "Error: failed to read any data from %s.\n",
name);
free(tmp_fd);
close(test_fd);
return(NULL);
}
/* header magic for bz2 */
if(magic[0] != 0x42 && magic[1] != 0x5A)
{
try_bz2 = 0;
}
close(test_fd);
}
if(strcmp(mode, "w") == 0)
{
/* TODO: is this the behavior that we want? */
/* if we are writing a new file, go by the file extension to tell
* whether to use bz2 or not?
*/
if(len >= 3 && name[len-2] == 'b' && name[len-1] == 'z' && name[len] == '2')
try_bz2 = 1;
else
try_bz2 = 0;
}
if(try_bz2)
{
tmp_fd->bzf = BZ2_bzopen(name, mode);
if(tmp_fd->bzf)
{
return(tmp_fd);
}
}
#endif
tmp_fd->gzf = gzopen(name, mode); tmp_fd->gzf = gzopen(name, mode);
if(!tmp_fd->gzf) if(!tmp_fd->gzf)
{ {
...@@ -236,6 +300,10 @@ darshan_fd darshan_log_open(const char *name, const char* mode) ...@@ -236,6 +300,10 @@ darshan_fd darshan_log_open(const char *name, const char* mode)
int darshan_log_getjob(darshan_fd file, struct darshan_job *job) int darshan_log_getjob(darshan_fd file, struct darshan_job *job)
{ {
int ret; int ret;
#ifdef HAVE_LIBBZ2
assert(file->bzf == NULL);
#endif
gzseek(file->gzf, 0, SEEK_SET); gzseek(file->gzf, 0, SEEK_SET);
...@@ -303,6 +371,10 @@ int darshan_log_putjob(darshan_fd file, struct darshan_job *job) ...@@ -303,6 +371,10 @@ int darshan_log_putjob(darshan_fd file, struct darshan_job *job)
z_off_t off; z_off_t off;
int ret; int ret;
#ifdef HAVE_LIBBZ2
assert(file->bzf == NULL);
#endif
off = gzseek(file->gzf, 0, SEEK_SET); off = gzseek(file->gzf, 0, SEEK_SET);
if (off != 0) if (off != 0)
{ {
...@@ -348,6 +420,10 @@ int darshan_log_putfile(darshan_fd fd, struct darshan_job *job, struct darshan_f ...@@ -348,6 +420,10 @@ int darshan_log_putfile(darshan_fd fd, struct darshan_job *job, struct darshan_f
z_off_t off; z_off_t off;
int ret; int ret;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
if(gztell(fd->gzf) < CP_JOB_RECORD_SIZE) if(gztell(fd->gzf) < CP_JOB_RECORD_SIZE)
{ {
off = gzseek(fd->gzf, CP_JOB_RECORD_SIZE, SEEK_SET); off = gzseek(fd->gzf, CP_JOB_RECORD_SIZE, SEEK_SET);
...@@ -382,6 +458,10 @@ int darshan_log_getmounts(darshan_fd fd, int64_t** devs, char*** mnt_pts, char** ...@@ -382,6 +458,10 @@ int darshan_log_getmounts(darshan_fd fd, int64_t** devs, char*** mnt_pts, char**
int array_index = 0; int array_index = 0;
char buf[CP_EXE_LEN+1]; char buf[CP_EXE_LEN+1];
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
gzseek(fd->gzf, fd->job_struct_size, SEEK_SET); gzseek(fd->gzf, fd->job_struct_size, SEEK_SET);
ret = gzread(fd->gzf, buf, (CP_EXE_LEN + 1)); ret = gzread(fd->gzf, buf, (CP_EXE_LEN + 1));
...@@ -458,6 +538,10 @@ int darshan_log_putmounts(darshan_fd fd, int64_t* devs, char** mnt_pts, char** f ...@@ -458,6 +538,10 @@ int darshan_log_putmounts(darshan_fd fd, int64_t* devs, char** mnt_pts, char** f
char line[1024]; char line[1024];
int i; int i;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
for(i=count-1; i>=0; i--) for(i=count-1; i>=0; i--)
{ {
sprintf(line, "\n%" PRId64 "\t%s\t%s", sprintf(line, "\n%" PRId64 "\t%s\t%s",
...@@ -478,6 +562,10 @@ int darshan_log_getexe(darshan_fd fd, char *buf, int *flag) ...@@ -478,6 +562,10 @@ int darshan_log_getexe(darshan_fd fd, char *buf, int *flag)
int ret; int ret;
char* newline; char* newline;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
gzseek(fd->gzf, fd->job_struct_size, SEEK_SET); gzseek(fd->gzf, fd->job_struct_size, SEEK_SET);
ret = gzread(fd->gzf, buf, (CP_EXE_LEN + 1)); ret = gzread(fd->gzf, buf, (CP_EXE_LEN + 1));
...@@ -514,6 +602,10 @@ int darshan_log_putexe(darshan_fd fd, char *buf) ...@@ -514,6 +602,10 @@ int darshan_log_putexe(darshan_fd fd, char *buf)
int ret; int ret;
int len; int len;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
off = gzseek(fd->gzf, sizeof(struct darshan_job), SEEK_SET); off = gzseek(fd->gzf, sizeof(struct darshan_job), SEEK_SET);
if (off != sizeof(struct darshan_job)) if (off != sizeof(struct darshan_job))
{ {
...@@ -536,7 +628,14 @@ int darshan_log_putexe(darshan_fd fd, char *buf) ...@@ -536,7 +628,14 @@ int darshan_log_putexe(darshan_fd fd, char *buf)
void darshan_log_close(darshan_fd file) void darshan_log_close(darshan_fd file)
{ {
gzclose(file->gzf); #ifdef HAVE_LIBBZ2
if(file->bzf)
BZ2_bzclose(file->bzf);
#endif
if(file->gzf)
gzclose(file->gzf);
free(file); free(file);
} }
...@@ -879,6 +978,10 @@ static int getjob_internal_200(darshan_fd file, struct darshan_job *job) ...@@ -879,6 +978,10 @@ static int getjob_internal_200(darshan_fd file, struct darshan_job *job)
{ {
int ret; int ret;
#ifdef HAVE_LIBBZ2
assert(file->bzf == NULL);
#endif
gzseek(file->gzf, 0, SEEK_SET); gzseek(file->gzf, 0, SEEK_SET);
ret = gzread(file->gzf, job, sizeof(*job)); ret = gzread(file->gzf, job, sizeof(*job));
...@@ -924,6 +1027,10 @@ static int getfile_internal_200(darshan_fd fd, struct darshan_job *job, ...@@ -924,6 +1027,10 @@ static int getfile_internal_200(darshan_fd fd, struct darshan_job *job,
const char* err_string; const char* err_string;
int i; int i;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
if(gztell(fd->gzf) < CP_JOB_RECORD_SIZE) if(gztell(fd->gzf) < CP_JOB_RECORD_SIZE)
gzseek(fd->gzf, CP_JOB_RECORD_SIZE, SEEK_SET); gzseek(fd->gzf, CP_JOB_RECORD_SIZE, SEEK_SET);
...@@ -981,6 +1088,10 @@ static int getjob_internal_124(darshan_fd fd, struct darshan_job *job) ...@@ -981,6 +1088,10 @@ static int getjob_internal_124(darshan_fd fd, struct darshan_job *job)
int32_t end_time; int32_t end_time;
int32_t nprocs; int32_t nprocs;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
#ifdef WORDS_BIGENDIAN #ifdef WORDS_BIGENDIAN
fd->swap_flag = 0; fd->swap_flag = 0;
#else #else
...@@ -1093,6 +1204,10 @@ static int getfile_internal_1x(darshan_fd fd, struct darshan_job *job, ...@@ -1093,6 +1204,10 @@ static int getfile_internal_1x(darshan_fd fd, struct darshan_job *job,
char* name_suffix; char* name_suffix;
int FILE_SIZE_1x = (32 + n_counters*8 + n_fcounters*8); int FILE_SIZE_1x = (32 + n_counters*8 + n_fcounters*8);
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
memset(file, 0, sizeof(*file)); memset(file, 0, sizeof(*file));
/* set file pointer if this is the first file record; otherwise pick up /* set file pointer if this is the first file record; otherwise pick up
......
...@@ -7,10 +7,16 @@ ...@@ -7,10 +7,16 @@
#define __DARSHAN_LOG_UTILS_H #define __DARSHAN_LOG_UTILS_H
#include <darshan-log-format.h> #include <darshan-log-format.h>
#include <zlib.h> #include <zlib.h>
#ifdef HAVE_LIBBZ2
#include <bzlib.h>
#endif
struct darshan_fd_s struct darshan_fd_s
{ {
gzFile gzf; gzFile gzf;
#ifdef HAVE_LIBBZ2
BZFILE* bzf;
#endif
int swap_flag; int swap_flag;
char version[10]; char version[10];
int job_struct_size; int job_struct_size;
......
...@@ -72,6 +72,8 @@ then ...@@ -72,6 +72,8 @@ then
AC_CHECK_LIB(bz2, BZ2_bzCompressInit) AC_CHECK_LIB(bz2, BZ2_bzCompressInit)
AC_MSG_CHECKING(bzlib in ${BZLIB_HOME}) AC_MSG_CHECKING(bzlib in ${BZLIB_HOME})
AC_MSG_RESULT(ok) AC_MSG_RESULT(ok)
LIBBZ2=-lbz2
AC_SUBST(LIBBZ2)
else else
# #
# If either header or library was not found, revert and bomb # If either header or library was not found, revert and bomb
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment