Commit 265bdaba authored by Shane Snyder's avatar Shane Snyder

Merge branch 'dev-modular' into mmap-dev

Conflicts:
	darshan-runtime/Makefile.in
	darshan-runtime/darshan-core.h
	darshan-runtime/lib/darshan-core.c
	darshan-util/Makefile.in
	darshan-util/darshan-bgq-logutils.c
	darshan-util/darshan-diff.c
	darshan-util/darshan-hdf5-logutils.c
	darshan-util/darshan-logutils.c
	darshan-util/darshan-mpiio-logutils.c
	darshan-util/darshan-null-logutils.c
	darshan-util/darshan-parser.c
	darshan-util/darshan-pnetcdf-logutils.c
	darshan-util/darshan-posix-logutils.c
parents f78c456f b770409e
......@@ -2,6 +2,20 @@
Darshan Release Change Log
--------------------------
Darshan-3.0.0-pre3
=============
* add module-specific version fields to header to allow utilities
to handle different versions of a module's I/O data for backwards
compatibility -- NOTE: this breaks the log file parsing for logs
obtained using Darshan-3.0.0-pre2 & Darshan-3.0.0-pre1
* bug fix in regression test scripts for setting proper environment
variables to use MPI profiling configuration for Fortran apps
* bug fix in bzip2 log writing implementation in darshan-logutils
* possible race conditions resolved in each module's shutdown code
* general code, comment, and documentation cleanup
* addition of module-specific counter descriptions printed prior
to parsing a modules I/O data in darshan-parser
Darshan-3.0.0-pre2
=============
* add fix to install appropriate headers for linking external
......
......@@ -7,28 +7,42 @@
#ifndef __DARSHAN_BGQ_LOG_FORMAT_H
#define __DARSHAN_BGQ_LOG_FORMAT_H
#include "darshan-log-format.h"
/* current BGQ log format version */
#define DARSHAN_BGQ_VER 1
#define BGQ_COUNTERS \
X(BGQ_CSJOBID, "control system jobid") \
X(BGQ_NNODES, "number of BGQ compute nodes") \
X(BGQ_RANKSPERNODE, "number of MPI ranks per node") \
X(BGQ_DDRPERNODE, "size in MB of DDR3 per node") \
X(BGQ_INODES, "number of i/o nodes") \
X(BGQ_ANODES, "dimension of A torus") \
X(BGQ_BNODES, "dimension of B torus") \
X(BGQ_CNODES, "dimension of C torus") \
X(BGQ_DNODES, "dimension of D torus") \
X(BGQ_ENODES, "dimension of E torus") \
X(BGQ_TORUSENABLED, "which dimensions are torus") \
X(BGQ_NUM_INDICES, "end of counters")
/* control system jobid*/\
X(BGQ_CSJOBID) \
/* number of BGQ compute nodes */\
X(BGQ_NNODES) \
/* number of MPI ranks per node */\
X(BGQ_RANKSPERNODE) \
/* size in MB of DDR3 per node */\
X(BGQ_DDRPERNODE) \
/* number of i/o nodes */\
X(BGQ_INODES) \
/* dimension of A torus */\
X(BGQ_ANODES) \
/* dimension of B torus */\
X(BGQ_BNODES) \
/* dimension of C torus */\
X(BGQ_CNODES) \
/* dimension of D torus */\
X(BGQ_DNODES) \
/* dimension of E torus */\
X(BGQ_ENODES) \
/* which dimensions are torus */\
X(BGQ_TORUSENABLED) \
/* end of counters */\
X(BGQ_NUM_INDICES)
#define BGQ_F_COUNTERS \
X(BGQ_F_TIMESTAMP, "timestamp when data was collected") \
X(BGQ_F_NUM_INDICES, "end of counters")
/* timestamp when data was collected */\
X(BGQ_F_TIMESTAMP) \
/* end of counters */\
X(BGQ_F_NUM_INDICES)
#define X(a, b) a,
#define X(a) a,
/* integer counters for the "BGQ" example module */
enum darshan_bgq_indices
{
......
......@@ -7,7 +7,8 @@
#ifndef __DARSHAN_HDF5_LOG_FORMAT_H
#define __DARSHAN_HDF5_LOG_FORMAT_H
#include "darshan-log-format.h"
/* current HDF5 log format version */
#define DARSHAN_HDF5_VER 1
#define HDF5_COUNTERS \
/* count of HDF5 opens */\
......
......@@ -31,44 +31,9 @@
/* max length of exe string within job record (not counting '\0') */
#define DARSHAN_EXE_LEN (DARSHAN_JOB_RECORD_SIZE - sizeof(struct darshan_job) - 1)
/* max number of modules that can be used in a darshan log */
#define DARSHAN_MAX_MODS 16
/* X-macro for keeping module ordering consistent */
/* NOTE: first val used to define module enum values,
* second val used to define module name strings, and
* third val is used to provide the name of a
* corresponding logutils structure for parsing module
* data out of the log file (only used in darshan-util
* component -- NULL can be passed if there are no
* logutil definitions)
*/
#define DARSHAN_MODULE_IDS \
X(DARSHAN_NULL_MOD, "NULL", NULL) \
X(DARSHAN_POSIX_MOD, "POSIX", &posix_logutils) \
X(DARSHAN_MPIIO_MOD, "MPI-IO", &mpiio_logutils) \
X(DARSHAN_HDF5_MOD, "HDF5", &hdf5_logutils) \
X(DARSHAN_PNETCDF_MOD, "PNETCDF", &pnetcdf_logutils) \
X(DARSHAN_BGQ_MOD, "BG/Q", &bgq_logutils)
/* unique identifiers to distinguish between available darshan modules */
/* NOTES: - valid ids range from [0...DARSHAN_MAX_MODS-1]
* - order of ids control module shutdown order (and consequently, order in log file)
*/
#define X(a, b, c) a,
typedef enum
{
DARSHAN_MODULE_IDS
} darshan_module_id;
#undef X
/* module name strings */
#define X(a, b, c) b,
static char * const darshan_module_names[] =
{
DARSHAN_MODULE_IDS
};
#undef X
/* simple macros for accessing module flag bitfields */
#define DARSHAN_MOD_FLAG_SET(flags, id) flags = (flags | (1 << id))
#define DARSHAN_MOD_FLAG_UNSET(flags, id) flags = (flags & ~(1 << id))
......@@ -106,6 +71,7 @@ struct darshan_header
uint32_t partial_flag;
struct darshan_log_map rec_map;
struct darshan_log_map mod_map[DARSHAN_MAX_MODS];
uint32_t mod_ver[DARSHAN_MAX_MODS];
};
/* job-level metadata stored for this application */
......@@ -126,4 +92,61 @@ struct darshan_base_record
int64_t rank;
};
/************************************************
*** module-specific includes and definitions ***
************************************************/
#include "darshan-null-log-format.h"
#include "darshan-posix-log-format.h"
#include "darshan-mpiio-log-format.h"
#include "darshan-hdf5-log-format.h"
#include "darshan-pnetcdf-log-format.h"
#include "darshan-bgq-log-format.h"
/* X-macro for keeping module ordering consistent */
/* NOTE: first val used to define module enum values,
* second val used to define module name strings,
* third val is the log format version for the module,
* and fourth val is used to provide the name of a
* corresponding logutils structure for parsing module
* data out of the log file (only used in darshan-util
* component -- NULL can be passed if there are no
* logutil definitions)
*/
#define DARSHAN_MODULE_IDS \
X(DARSHAN_NULL_MOD, "NULL", DARSHAN_NULL_VER, NULL) \
X(DARSHAN_POSIX_MOD, "POSIX", DARSHAN_POSIX_VER, &posix_logutils) \
X(DARSHAN_MPIIO_MOD, "MPI-IO", DARSHAN_MPIIO_VER, &mpiio_logutils) \
X(DARSHAN_HDF5_MOD, "HDF5", DARSHAN_HDF5_VER, &hdf5_logutils) \
X(DARSHAN_PNETCDF_MOD, "PNETCDF", DARSHAN_PNETCDF_VER, &pnetcdf_logutils) \
X(DARSHAN_BGQ_MOD, "BG/Q", DARSHAN_BGQ_VER, &bgq_logutils)
/* unique identifiers to distinguish between available darshan modules */
/* NOTES: - valid ids range from [0...DARSHAN_MAX_MODS-1]
* - order of ids control module shutdown order (and consequently, order in log file)
*/
#define X(a, b, c, d) a,
typedef enum
{
DARSHAN_MODULE_IDS
} darshan_module_id;
#undef X
/* module name strings */
#define X(a, b, c, d) b,
static char * const darshan_module_names[] =
{
DARSHAN_MODULE_IDS
};
#undef X
/* module version numbers */
#define X(a, b, c, d) c,
static const int darshan_module_versions[] =
{
DARSHAN_MODULE_IDS
};
#undef X
#endif /* __DARSHAN_LOG_FORMAT_H */
......@@ -7,7 +7,8 @@
#ifndef __DARSHAN_MPIIO_LOG_FORMAT_H
#define __DARSHAN_MPIIO_LOG_FORMAT_H
#include "darshan-log-format.h"
/* current MPI-IO log format version */
#define DARSHAN_MPIIO_VER 1
/* TODO: maybe use a counter to track cases in which a derived datatype is used? */
......
......@@ -7,7 +7,8 @@
#ifndef __DARSHAN_NULL_LOG_FORMAT_H
#define __DARSHAN_NULL_LOG_FORMAT_H
#include "darshan-log-format.h"
/* current log format version, to support backwards compatibility */
#define DARSHAN_NULL_VER 1
#define NULL_COUNTERS \
/* count of number of 'bar' function calls */\
......
......@@ -7,7 +7,8 @@
#ifndef __DARSHAN_PNETCDF_LOG_FORMAT_H
#define __DARSHAN_PNETCDF_LOG_FORMAT_H
#include "darshan-log-format.h"
/* current PNETCDF log format version */
#define DARSHAN_PNETCDF_VER 1
#define PNETCDF_COUNTERS \
/* count of PNETCDF independent opens */\
......
......@@ -6,7 +6,8 @@
#ifndef __DARSHAN_POSIX_LOG_FORMAT_H
#define __DARSHAN_POSIX_LOG_FORMAT_H
#include "darshan-log-format.h"
/* current POSIX log format version */
#define DARSHAN_POSIX_VER 1
#define POSIX_COUNTERS \
/* count of posix opens */\
......@@ -175,11 +176,4 @@ struct darshan_posix_file
double fcounters[POSIX_F_NUM_INDICES];
};
/* This macro can be used to identify files that have been opened using
* pnetcdf, hdf5, or mpi-io, but were never opened at the posix level. As a
* result the record will not necessarily have all of the expected fields
* populated.
*/
#define POSIX_FILE_PARTIAL(__file)((((__file)->counters[POSIX_OPENS] || (__file)->counters[POSIX_FOPENS] || (__file)->counters[POSIX_STATS]) ? 0 : 1))
#endif /* __DARSHAN_POSIX_LOG_FORMAT_H */
......@@ -35,8 +35,15 @@ CFLAGS_SHARED = -DDARSHAN_CONFIG_H=\"darshan-runtime-config.h\" -I . -I$(srcdir)
LIBS = -lz @LIBBZ2@
static-mod-objs = lib/darshan-posix.o lib/darshan-mpiio.o lib/darshan-hdf5.o lib/darshan-pnetcdf.o
dynamic-mod-objs = lib/darshan-posix.po lib/darshan-mpiio.po lib/darshan-hdf5.po lib/darshan-pnetcdf.po
DARSHAN_STATIC_MOD_OBJS = lib/darshan-posix.o lib/darshan-mpiio.o lib/darshan-hdf5.o lib/darshan-pnetcdf.o
DARSHAN_DYNAMIC_MOD_OBJS = lib/darshan-posix.po lib/darshan-mpiio.po lib/darshan-hdf5.po lib/darshan-pnetcdf.po
ifdef DARSHAN_USE_BGQ
DARSHAN_STATIC_MOD_OBJS += lib/darshan-bgq.o
DARSHAN_DYNAMIC_MOD_OBJS += lib/darshan-bgq.po
CFLAGS += -DDARSHAN_BGQ
CFLAGS_SHARED += -DDARSHAN_BGQ
endif
lib::
@mkdir -p $@
......@@ -113,10 +120,10 @@ lib/lookup8.o: lib/lookup8.c
lib/lookup8.po: lib/lookup8.c
$(CC) $(CFLAGS_SHARED) -c $< -o $@
lib/libdarshan.a: lib/darshan-core-init-finalize.o lib/darshan-core.o lib/darshan-common.o $(static-mod-objs) lib/lookup3.o lib/lookup8.o
lib/libdarshan.a: lib/darshan-core-init-finalize.o lib/darshan-core.o lib/darshan-common.o $(DARSHAN_STATIC_MOD_OBJS) lib/lookup3.o lib/lookup8.o
ar rcs $@ $^
lib/libdarshan.so: lib/darshan-core-init-finalize.po lib/darshan-core.po lib/darshan-common.po $(dynamic-mod-objs) lib/lookup3.po lib/lookup8.po
lib/libdarshan.so: lib/darshan-core-init-finalize.po lib/darshan-core.po lib/darshan-common.po $(DARSHAN_DYNAMIC_MOD_OBJS) lib/lookup3.po lib/lookup8.po
$(CC) $(CFLAGS_SHARED) $(LDFLAGS) -o $@ $^ -lpthread -lrt -lz -ldl
lib/libdarshan-stubs.a: lib/darshan-hdf5-stubs.o lib/darshan-pnetcdf-stubs.o
......
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for darshan-runtime 3.0.0-pre2.
# Generated by GNU Autoconf 2.69 for darshan-runtime 3.0.0-pre3.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
......@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='darshan-runtime'
PACKAGE_TARNAME='darshan-runtime'
PACKAGE_VERSION='3.0.0-pre2'
PACKAGE_STRING='darshan-runtime 3.0.0-pre2'
PACKAGE_VERSION='3.0.0-pre3'
PACKAGE_STRING='darshan-runtime 3.0.0-pre3'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
......@@ -1244,7 +1244,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures darshan-runtime 3.0.0-pre2 to adapt to many kinds of systems.
\`configure' configures darshan-runtime 3.0.0-pre3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
......@@ -1305,7 +1305,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of darshan-runtime 3.0.0-pre2:";;
short | recursive ) echo "Configuration of darshan-runtime 3.0.0-pre3:";;
esac
cat <<\_ACEOF
......@@ -1409,7 +1409,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
darshan-runtime configure 3.0.0-pre2
darshan-runtime configure 3.0.0-pre3
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
......@@ -1761,7 +1761,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by darshan-runtime $as_me 3.0.0-pre2, which was
It was created by darshan-runtime $as_me 3.0.0-pre3, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
......@@ -4296,7 +4296,7 @@ _ACEOF
fi
DARSHAN_VERSION="3.0.0-pre2"
DARSHAN_VERSION="3.0.0-pre3"
......@@ -4815,7 +4815,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by darshan-runtime $as_me 3.0.0-pre2, which was
This file was extended by darshan-runtime $as_me 3.0.0-pre3, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
......@@ -4877,7 +4877,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
darshan-runtime config.status 3.0.0-pre2
darshan-runtime config.status 3.0.0-pre3
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
......
......@@ -5,7 +5,7 @@ dnl Process this file with autoconf to produce a configure script.
dnl You may need to use autoheader as well if changing any DEFINEs
dnl sanity checks, output header, location of scripts used here
AC_INIT([darshan-runtime], [3.0.0-pre2])
AC_INIT([darshan-runtime], [3.0.0-pre3])
AC_CONFIG_SRCDIR([darshan.h])
AC_CONFIG_AUX_DIR(../maint/config)
AC_CONFIG_HEADER(darshan-runtime-config.h)
......
......@@ -56,7 +56,7 @@ struct darshan_core_runtime
int rec_hash_cnt;
struct darshan_core_module* mod_array[DARSHAN_MAX_MODS];
int mod_mem_used;
char comp_buf[DARSHAN_COMP_BUF_SIZE];
char *comp_buf;
double wtime_offset;
};
......
......@@ -114,10 +114,22 @@ DARSHAN_EXTERN_DECL(PMPI_Type_get_envelope, int, (MPI_Datatype datatype, int *nu
DARSHAN_EXTERN_DECL(PMPI_Type_size, int, (MPI_Datatype datatype, int *size));
DARSHAN_EXTERN_DECL(PMPI_Op_create, int, (MPI_User_function *function, int commute, MPI_Op *op));
DARSHAN_EXTERN_DECL(PMPI_Op_free, int, (MPI_Op *op));
#ifdef HAVE_MPIIO_CONST
DARSHAN_EXTERN_DECL(PMPI_Reduce, int, (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm));
#else
DARSHAN_EXTERN_DECL(PMPI_Reduce, int, (void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_EXTERN_DECL(PMPI_Send, int, (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm));
#else
DARSHAN_EXTERN_DECL(PMPI_Send, int, (void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm));
#endif
DARSHAN_EXTERN_DECL(PMPI_Recv, int, (void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status));
#ifdef HAVE_MPIIO_CONST
DARSHAN_EXTERN_DECL(PMPI_Gather, int, (const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm));
#else
DARSHAN_EXTERN_DECL(PMPI_Gather, int, (void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm));
#endif
#endif
......
......@@ -261,8 +261,8 @@ print OUTPUT<<"EOF";
# is MPI in there?
grep MPI \$tmpfile >& /dev/null
rc_mpi=\$?
# is PMPI being used for any init, finalize, or mpi-io calls?
grep -E \\(PMPI_File_\\)\\|\\(PMPI_Init\\)\\|\\(PMPI_Finalize\\) \$tmpfile | grep -v -E \\(mpich.*\\.a\\) |grep \\(PMPI >& /dev/null
# is PMPI being used for PMPI_File_open?
grep -E PMPI_File_open \$tmpfile | grep -v -E \\(mpich.*\\.a\\) |grep \\(PMPI >& /dev/null
rc_pmpi=\$?
rm \$tmpfile >& /dev/null
......
......@@ -263,8 +263,8 @@ print OUTPUT<<"EOF";
grep MPI \$tmpfile >& /dev/null
rc_mpi=\$?
# is PMPI being used for any init, finalize, or mpi-io calls?
grep -E \\(PMPI_File_\\)\\|\\(PMPI_Init\\)\\|\\(PMPI_Finalize\\) \$tmpfile | grep -v -E \\(mpich.*\\.a\\) |grep \\(PMPI >& /dev/null
# is PMPI being used for PMPI_File_open?
grep -E PMPI_File_open \$tmpfile | grep -v -E \\(mpich.*\\.a\\) |grep \\(PMPI >& /dev/null
rc_pmpi=\$?
CXXMPICH=-lmpichcxx
......
......@@ -271,8 +271,8 @@ print OUTPUT<<"EOF";
grep -i MPI \$tmpfile >& /dev/null
rc_mpi=\$?
# is PMPI being used for any init, finalize, or mpi-io calls?
grep -E \\(PMPI_File_\\)\\|\\(PMPI_Init\\)\\|\\(PMPI_Finalize\\) \$tmpfile | grep -v -E \\(mpich.*\\.a\\) |grep \\(PMPI >& /dev/null
# is PMPI being used for PMPI_File_open?
grep -E PMPI_File_open \$tmpfile | grep -v -E \\(mpich.*\\.a\\) |grep \\(PMPI >& /dev/null
rc_pmpi=\$?
......
......@@ -125,17 +125,17 @@ void darshan_core_lookup_record(
/* darshan_core_register_record()
*
* Register the Darshan record given by 'name' with the darshan-core
* runtime, allowing it to be properly tracked and (potentially)
* correlated with records from other modules. 'len' is the size of
* the name pointer (string length for string names), and 'printable_flag'
* indicates whether the name is a string. 'mod_limit_flag' is set if
* the calling module is out of memory (to prevent darshan-core from
* creating new records and to just search existing records) and 'mod_id'
* is the identifier of the calling module. 'rec_id' is an output pointer
* storing the correspoing Darshan record identifier and 'file_alignment'
* is an output pointer storing the file system alignment value for the
* given record.
* Register a record with the darshan-core runtime, allowing it to be
* properly tracked and (potentially) correlated with records from other
* modules. 'name' is the the name of the Darshan record (e.g., the full
* file path) and 'len' is the size of the name pointer (string length
* for string names). 'mod_id' is the identifier of the calling module,
* 'printable_flag' indicates whether the name is a string, and
* 'mod_limit_flag' is set if the calling module is out of memory (to
* prevent darshan-core from creating new records and to just search
* through existing records). 'rec_id' is an output pointer storing the
* correspoing Darshan record identifier and 'file_alignment' is an output
* pointer storing the file system alignment value for the given record.
*/
int darshan_core_register_record(
darshan_record_id rec_id,
......
......@@ -16,7 +16,10 @@ used by the application.
The darshan-runtime instrumentation only instruments MPI applications (the
application must at least call `MPI_Init()` and `MPI_Finalize()`). However,
it captures both MPI-IO and POSIX file access. It also captures limited
information about HDF5 and PnetCDF access.
information about HDF5 and PnetCDF access. Darshan also exposes an API that
can be used to develop and add new instrumentation modules (for other I/O library
interfaces or to gather system-specific data, for instance), as detailed in
http://www.mcs.anl.gov/research/projects/darshan/docs/darshan-modularization.html[this document].
This document provides generic installation instructions, but "recipes" for
several common HPC systems are provided at the end of the document as well.
......@@ -311,7 +314,7 @@ Please set your environment to use the GNU programming environment before
configuring or compiling Darshan. Although Darshan can be built with a
variety of compilers, the GNU compilers are recommended because it will
produce a Darshan library that is interoperable with the widest range
of compmilers and linkers. On most Cray systems you can enable the GNU
of compilers and linkers. On most Cray systems you can enable the GNU
programming environment with a command similar to "module swap PrgEnv-pgi
PrgEnv-gnu". Please see your site documentation for information about
how to switch programming environments.
......@@ -463,6 +466,40 @@ older versions of Open MPI, please refer to the following mailing list discussio
http://www.open-mpi.org/community/lists/devel/2013/01/11907.php
== Upgrading to Darshan 3.x from 2.x
Beginning with Darshan 3.0.0, Darshan has been rewritten to modularize its runtime environment
and log file format to simplify the addition of new I/O characterization data. The process of
compiling and installing the Darshan 3.x source code should essentially be identical to this
process on Darshan 2.x. Therefore, the installation recipes given in the previous section
should work irrespective of the Darshan version being used. Similarly, the manner in which
Darshan is used should be the same across versions -- the sections in this document regarding
Darshan link:darshan-runtime.html#_environment_preparation[environment preparation],
instrumenting link:darshan-runtime.html#_instrumenting_statically_linked_applications[statically
linked applications] and link:darshan-runtime.html#_instrumenting_dynamically_linked_applications[
dynamically linked applications], and using link:darshan-runtime.html#_runtime_environment_variables[
runtime environment variables] are equally applicable to both versions.
However, we do provide some suggestions and expectations for system administrators to keep in
mind when upgrading to Darshan 3.x:
* Log file compatibility was broken in the upgrade, and thus 3.x log utilities do not
work directly with logs generated by 2.x versions of Darshan (and vice versa).
- There is currently no tool for converting 2.x logs into the 3.x log format.
- The `darshan-logutils` library will provide error messages to indicate whether a given
log file is incompatible with the correspnonding library version.
* The darshan log file extension has been changed from `.darshan.gz` (or `.darshan.bz2` for
log files converted to use bzip2 compression) to `.darshan`.
- A field in the Darshan log header is used to indicate whether a log is compressed using
libz or bzip2 compression.
* We encourage administrators to use the same log file directory for version 3.x as had been
used for version 2.x.
- Within this directory, the determination on which set of log utilities (version 2.x
or version 3.x) to use can be based on the file extension for a given log (as explained
above).
== Runtime environment variables
The Darshan library honors the following environment variables to modify
......
......@@ -16,7 +16,6 @@
#include "uthash.h"
#include "darshan.h"
#include "darshan-bgq-log-format.h"
#include "darshan-dynamic.h"
#include <mpix.h>
......@@ -54,7 +53,7 @@ static int instrumentation_disabled = 0;
static int my_rank = -1;
static int darshan_mem_alignment = 1;
/* internal helper functions for the "NULL" module */
/* internal helper functions for the BGQ module */
void bgq_runtime_initialize(void);
/* forward declaration for module functions needed to interface with darshan-core */
......@@ -62,7 +61,7 @@ static void bgq_begin_shutdown(void);
static void bgq_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **buffer, int *size);
static void bgq_shutdown(void);
/* macros for obtaining/releasing the "NULL" module lock */
/* macros for obtaining/releasing the BGQ module lock */
#define BGQ_LOCK() pthread_mutex_lock(&bgq_runtime_mutex)
#define BGQ_UNLOCK() pthread_mutex_unlock(&bgq_runtime_mutex)
......@@ -141,13 +140,15 @@ void bgq_runtime_initialize()
if(mem_limit == 0)
{
instrumentation_disabled = 1;
BGQ_UNLOCK();
return;
}
/* no enough memory to fit bgq module */
/* not enough memory to fit bgq module */
if (mem_limit < sizeof(*bgq_runtime))
{
instrumentation_disabled = 1;
BGQ_UNLOCK();
return;
}
......@@ -156,6 +157,7 @@ void bgq_runtime_initialize()
if(!bgq_runtime)
{
instrumentation_disabled = 1;
BGQ_UNLOCK();
return;
}
memset(bgq_runtime, 0, sizeof(*bgq_runtime));
......@@ -188,7 +190,7 @@ void bgq_runtime_initialize()
return;
}
/* Perform any necessary steps prior to shutting down for the "NULL" module. */
/* Perform any necessary steps prior to shutting down for the BGQ module. */
static void bgq_begin_shutdown()
{
BGQ_LOCK();
......@@ -212,7 +214,7 @@ static int cmpr(const void *p1, const void *p2)
return ((*a == *b) ? 0 : ((*a < *b) ? -1 : 1));
}
/* Pass output data for the "BGQ" module back to darshan-core to log to file. */
/* Pass output data for the BGQ module back to darshan-core to log to file. */
static void bgq_get_output_data(
MPI_Comm mod_comm,
darshan_record_id *shared_recs,
......@@ -220,8 +222,7 @@ static void bgq_get_output_data(
void **buffer,
int *size)
{
/* Just set the output buffer to point at the array of the "BGQ" module's
/* Just set the output buffer to point at the array of the BGQ module's
* I/O records, and set the output size according to the number of records
* currently being tracked.
*/
......@@ -279,7 +280,7 @@ static void bgq_get_output_data(
return;
}
/* Shutdown the "BGQ" module by freeing up all data structures. */
/* Shutdown the BGQ module by freeing up all data structures. */
static void bgq_shutdown()
{
if (bgq_runtime)
......
......@@ -120,9 +120,22 @@ DARSHAN_FORWARD_DECL(PMPI_Type_get_envelope, int, (MPI_Datatype datatype, int *n
DARSHAN_FORWARD_DECL(PMPI_Type_size, int, (MPI_Datatype datatype, int *size));
DARSHAN_FORWARD_DECL(PMPI_Op_create, int, (MPI_User_function *function, int commute, MPI_Op *op));
DARSHAN_FORWARD_DECL(PMPI_Op_free, int, (MPI_Op *op));
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(PMPI_Reduce, int, (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm));
#else
DARSHAN_FORWARD_DECL(PMPI_Reduce, int, (void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(PMPI_Send, int, (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm));
#else
DARSHAN_FORWARD_DECL(PMPI_Send, int, (void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm));
#endif
DARSHAN_FORWARD_DECL(PMPI_Recv, int, (void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status));
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(PMPI_Gather, int, (const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm));
#else
DARSHAN_FORWARD_DECL(PMPI_Gather, int, (void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm));
#endif
void resolve_mpi_symbols (void)
{
......@@ -182,6 +195,7 @@ void resolve_mpi_symbols (void)
MAP_OR_FAIL(PMPI_Reduce);
MAP_OR_FAIL(PMPI_Send);
MAP_OR_FAIL(PMPI_Recv);
MAP_OR_FAIL(PMPI_Gather);
return;
}
......
......@@ -313,6 +313,8 @@ void darshan_core_initialize(int argc, char **argv)
void darshan_core_shutdown()
{
struct darshan_core_runtime *final_core;
struct darshan_header out_header;
struct darshan_job out_job;
int internal_timing_flag = 0;
struct tm *start_tm;
time_t start_time_tmp;
......@@ -336,8 +338,6 @@ void darshan_core_shutdown()
int all_ret = 0;
int i;
uint64_t gz_fp = 0;
struct darshan_header out_header;
struct darshan_job out_job;
MPI_File log_fh;
MPI_Status status;
......@@ -358,14 +358,14 @@ void darshan_core_shutdown()
darshan_core = NULL;
DARSHAN_CORE_UNLOCK();
memcpy(&out_job, final_core->log_job_p, sizeof(struct darshan_job));
/* XXX just copy mmap files somewhere else to avoid corruption */
DARSHAN_MPI_CALL(PMPI_Barrier)(MPI_COMM_WORLD);
if(my_rank == 0)
system("cp /tmp/darshan* ~/Desktop");
DARSHAN_MPI_CALL(PMPI_Barrier)(MPI_COMM_WORLD);
memcpy(&out_job, final_core->log_job_p, sizeof(struct darshan_job));
/* indicate in the metadata field of the temporary darshan log file that
* the darshan shutdown process was invoked on the data in the log. since
* we have no way of knowing how far the shutdown process got, the data
......@@ -376,6 +376,10 @@ void darshan_core_shutdown()
int meta_remain = DARSHAN_JOB_METADATA_LEN - strlen(final_core->log_job_p->metadata) - 1;
snprintf(m, meta_remain, "darshan_shutdown=yes\n");
/* we also need to set which modules were registered on this process and
* call into those modules and give them a chance to perform any necessary
* pre-shutdown steps.
*/
for(i = 0; i < DARSHAN_MAX_MODS; i++)
{
if(final_core->mod_array[i])
......@@ -385,6 +389,13 @@ void darshan_core_shutdown()
}
}
final_core->comp_buf = malloc(DARSHAN_COMP_BUF_SIZE);
if(!(final_core->comp_buf))
{
darshan_core_cleanup(final_core);
return;
}
logfile_name = malloc(PATH_MAX);
if(!logfile_name)
{
......@@ -639,6 +650,9 @@ void darshan_core_shutdown()
*/
DARSHAN_MPI_CALL(PMPI_Reduce)(&(final_core->log_hdr_p->partial_flag),
&(out_header.partial_flag), 1, MPI_UINT32_T, MPI_BOR, 0, MPI_COMM_WORLD);
DARSHAN_MPI_CALL(PMPI_Reduce)(&(final_core->log_hdr_p->mod_ver),
&(out_header.mod_ver), DARSHAN_MAX_MODS, MPI_UINT32_T, MPI_MAX,
0, MPI_COMM_WORLD);
if(my_rank == 0)
{
/* rank 0 is responsible for writing the log header */
......@@ -1550,6 +1564,8 @@ static void darshan_core_cleanup(struct darshan_core_runtime* core)
}
}
if(core->comp_buf)
free(core->comp_buf);
free(core);
return;
......@@ -1609,6 +1625,7 @@ void darshan_core_register_module(
/* register module with darshan */