Commit 56d9416e authored by Philip Carns's avatar Philip Carns

ability to detect, open, and close bz2 files

- no reading or writing bz2 files yet; probably need to make some function
  pointers or wrappers so that all of the logutils routines that use I/O
  aren't littered with logic to switch compression formats.


git-svn-id: https://svn.mcs.anl.gov/repos/darshan/trunk@434 3b7491f3-a168-0410-bf4b-c445ed680a29
parent 74c16584
......@@ -33,6 +33,8 @@ CFLAGS_MPI_SHARED = -I . -I $(srcdir) @CFLAGS@ @CPPFLAGS@ -D_LARGEFILE64_SOURCE
CC=@MPICC@
LD=@MPICC@
LIBS = -lz @LIBBZ2@
lib::
@mkdir -p $@
......@@ -43,19 +45,19 @@ uthash-1.9.2:
tar xjvf $(srcdir)/extern/uthash-1.9.2.tar.bz2
darshan-parser: darshan-parser.c darshan.h darshan-log-format.h darshan-logutils.h darshan-logutils.o | uthash-1.9.2
gcc $(CFLAGS) -lz $< darshan-logutils.o -o $@
gcc $(CFLAGS) $(LIBS) $< darshan-logutils.o -o $@
darshan-convert: darshan-convert.c darshan.h darshan-log-format.h darshan-logutils.h darshan-logutils.o | uthash-1.9.2
gcc $(CFLAGS) -lz $< darshan-logutils.o -o $@
gcc $(CFLAGS) $(LIBS) $< darshan-logutils.o -o $@
darshan-analyzer: darshan-analyzer.c darshan.h darshan-log-format.h darshan-logutils.h darshan-logutils.o
gcc $(CFLAGS) -lz $< darshan-logutils.o -o $@
gcc $(CFLAGS) $(LIBS) $< darshan-logutils.o -o $@
darshan-log-params: darshan-log-params.c darshan-log-format.h
gcc $(CFLAGS) -lz $< -o $@
gcc $(CFLAGS) $(LIBS) $< -o $@
darshan-diff: darshan-diff.o darshan.h darshan-log-format.h darshan-logutils.o darshan-logutils.h
gcc $(CFLAGS) -lz $< darshan-logutils.o -o $@
gcc $(CFLAGS) $(LIBS) $< darshan-logutils.o -o $@
darshan-diff.o: darshan-diff.c
gcc $(CFLAGS) -c $< -o $@
darshan-logutils.o: darshan-logutils.c
......
......@@ -605,6 +605,7 @@ PRI_MACROS_BROKEN
HAVE_PDFLATEX
MPICC
HAVE_MPICC
LIBBZ2
EGREP
GREP
CPP
......@@ -3726,6 +3727,8 @@ fi
$as_echo_n "checking bzlib in ${BZLIB_HOME}... " >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5
$as_echo "ok" >&6; }
LIBBZ2=-lbz2
else
#
# If either header or library was not found, revert and bomb
......@@ -3738,6 +3741,8 @@ $as_echo_n "checking bzlib in ${BZLIB_HOME}... " >&6; }
$as_echo "failed" >&6; }
# Don't fail; this is optional in Darshan
# AC_MSG_ERROR(either specify a valid bzlib installation with --with-bzlib=DIR or disable bzlib usage with --without-bzlib)
# TODO: it would be nice if this showed up at the _end_ of
# configure...
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: libbz2 not found; Darshan utilities will use gzip only." >&5
$as_echo "$as_me: WARNING: libbz2 not found; Darshan utilities will use gzip only." >&2;}
fi
......
......@@ -8,7 +8,12 @@
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "darshan-logutils.h"
/* isn't there a clever c way to avoid this? */
......@@ -214,12 +219,71 @@ static void shift_missing_1_21(struct darshan_file* file);
/* a rather crude API for accessing raw binary darshan files */
darshan_fd darshan_log_open(const char *name, const char* mode)
{
int test_fd;
uint8_t magic[2];
int ret;
int try_bz2 = 1;
int len = strlen(name);
darshan_fd tmp_fd = malloc(sizeof(*tmp_fd));
if(!tmp_fd)
return(NULL);
memset(tmp_fd, 0, sizeof(*tmp_fd));
#ifdef HAVE_LIBBZ2
if(strcmp(mode, "r") == 0)
{
/* Try to detect if existing file is a bzip2 file or not. Both
* libbz2 and libz will fall back to normal I/O (without compression)
* automatically, so we need to do some detection manually up front
* in order to get a chance to try both compression formats.
*/
test_fd = open(name, O_RDONLY);
if(!test_fd)
{
perror("open");
free(tmp_fd);
return(NULL);
}
ret = read(test_fd, &magic, 2);
if(ret != 2)
{
fprintf(stderr, "Error: failed to read any data from %s.\n",
name);
free(tmp_fd);
close(test_fd);
return(NULL);
}
/* header magic for bz2 */
if(magic[0] != 0x42 && magic[1] != 0x5A)
{
try_bz2 = 0;
}
close(test_fd);
}
if(strcmp(mode, "w") == 0)
{
/* TODO: is this the behavior that we want? */
/* if we are writing a new file, go by the file extension to tell
* whether to use bz2 or not?
*/
if(len >= 3 && name[len-2] == 'b' && name[len-1] == 'z' && name[len] == '2')
try_bz2 = 1;
else
try_bz2 = 0;
}
if(try_bz2)
{
tmp_fd->bzf = BZ2_bzopen(name, mode);
if(tmp_fd->bzf)
{
return(tmp_fd);
}
}
#endif
tmp_fd->gzf = gzopen(name, mode);
if(!tmp_fd->gzf)
{
......@@ -236,6 +300,10 @@ darshan_fd darshan_log_open(const char *name, const char* mode)
int darshan_log_getjob(darshan_fd file, struct darshan_job *job)
{
int ret;
#ifdef HAVE_LIBBZ2
assert(file->bzf == NULL);
#endif
gzseek(file->gzf, 0, SEEK_SET);
......@@ -303,6 +371,10 @@ int darshan_log_putjob(darshan_fd file, struct darshan_job *job)
z_off_t off;
int ret;
#ifdef HAVE_LIBBZ2
assert(file->bzf == NULL);
#endif
off = gzseek(file->gzf, 0, SEEK_SET);
if (off != 0)
{
......@@ -348,6 +420,10 @@ int darshan_log_putfile(darshan_fd fd, struct darshan_job *job, struct darshan_f
z_off_t off;
int ret;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
if(gztell(fd->gzf) < CP_JOB_RECORD_SIZE)
{
off = gzseek(fd->gzf, CP_JOB_RECORD_SIZE, SEEK_SET);
......@@ -382,6 +458,10 @@ int darshan_log_getmounts(darshan_fd fd, int64_t** devs, char*** mnt_pts, char**
int array_index = 0;
char buf[CP_EXE_LEN+1];
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
gzseek(fd->gzf, fd->job_struct_size, SEEK_SET);
ret = gzread(fd->gzf, buf, (CP_EXE_LEN + 1));
......@@ -458,6 +538,10 @@ int darshan_log_putmounts(darshan_fd fd, int64_t* devs, char** mnt_pts, char** f
char line[1024];
int i;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
for(i=count-1; i>=0; i--)
{
sprintf(line, "\n%" PRId64 "\t%s\t%s",
......@@ -478,6 +562,10 @@ int darshan_log_getexe(darshan_fd fd, char *buf, int *flag)
int ret;
char* newline;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
gzseek(fd->gzf, fd->job_struct_size, SEEK_SET);
ret = gzread(fd->gzf, buf, (CP_EXE_LEN + 1));
......@@ -514,6 +602,10 @@ int darshan_log_putexe(darshan_fd fd, char *buf)
int ret;
int len;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
off = gzseek(fd->gzf, sizeof(struct darshan_job), SEEK_SET);
if (off != sizeof(struct darshan_job))
{
......@@ -536,7 +628,14 @@ int darshan_log_putexe(darshan_fd fd, char *buf)
void darshan_log_close(darshan_fd file)
{
gzclose(file->gzf);
#ifdef HAVE_LIBBZ2
if(file->bzf)
BZ2_bzclose(file->bzf);
#endif
if(file->gzf)
gzclose(file->gzf);
free(file);
}
......@@ -879,6 +978,10 @@ static int getjob_internal_200(darshan_fd file, struct darshan_job *job)
{
int ret;
#ifdef HAVE_LIBBZ2
assert(file->bzf == NULL);
#endif
gzseek(file->gzf, 0, SEEK_SET);
ret = gzread(file->gzf, job, sizeof(*job));
......@@ -924,6 +1027,10 @@ static int getfile_internal_200(darshan_fd fd, struct darshan_job *job,
const char* err_string;
int i;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
if(gztell(fd->gzf) < CP_JOB_RECORD_SIZE)
gzseek(fd->gzf, CP_JOB_RECORD_SIZE, SEEK_SET);
......@@ -981,6 +1088,10 @@ static int getjob_internal_124(darshan_fd fd, struct darshan_job *job)
int32_t end_time;
int32_t nprocs;
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
#ifdef WORDS_BIGENDIAN
fd->swap_flag = 0;
#else
......@@ -1093,6 +1204,10 @@ static int getfile_internal_1x(darshan_fd fd, struct darshan_job *job,
char* name_suffix;
int FILE_SIZE_1x = (32 + n_counters*8 + n_fcounters*8);
#ifdef HAVE_LIBBZ2
assert(fd->bzf == NULL);
#endif
memset(file, 0, sizeof(*file));
/* set file pointer if this is the first file record; otherwise pick up
......
......@@ -7,10 +7,16 @@
#define __DARSHAN_LOG_UTILS_H
#include <darshan-log-format.h>
#include <zlib.h>
#ifdef HAVE_LIBBZ2
#include <bzlib.h>
#endif
struct darshan_fd_s
{
gzFile gzf;
#ifdef HAVE_LIBBZ2
BZFILE* bzf;
#endif
int swap_flag;
char version[10];
int job_struct_size;
......
......@@ -72,6 +72,8 @@ then
AC_CHECK_LIB(bz2, BZ2_bzCompressInit)
AC_MSG_CHECKING(bzlib in ${BZLIB_HOME})
AC_MSG_RESULT(ok)
LIBBZ2=-lbz2
AC_SUBST(LIBBZ2)
else
#
# If either header or library was not found, revert and bomb
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment