diff --git a/darshan-bgq-log-format.h b/darshan-bgq-log-format.h index ab19b6fe76bbd6c079adcf72068054626d6215e5..2c690e66f9db1bae4fb6fed3baf8527456807516 100644 --- a/darshan-bgq-log-format.h +++ b/darshan-bgq-log-format.h @@ -11,7 +11,7 @@ #define DARSHAN_BGQ_VER 1 #define BGQ_COUNTERS \ - /* control system jobid*/\ + /* control system jobid */\ X(BGQ_CSJOBID) \ /* number of BGQ compute nodes */\ X(BGQ_NNODES) \ @@ -60,16 +60,13 @@ enum darshan_bgq_f_indices * which would actually be logged to file by Darshan for the "BGQ" example * module. This example implementation logs the following data for each * record: - * - a corresponding Darshan record identifier - * - the rank of the process responsible for the record + * - a darshan_base_record structure, which contains the record id & rank * - integer I/O counters (operation counts, I/O sizes, etc.) * - floating point I/O counters (timestamps, cumulative timers, etc.) */ struct darshan_bgq_record { - darshan_record_id f_id; - int64_t rank; - int alignment; + struct darshan_base_record base_rec; int64_t counters[BGQ_NUM_INDICES]; double fcounters[BGQ_F_NUM_INDICES]; }; diff --git a/darshan-hdf5-log-format.h b/darshan-hdf5-log-format.h index d351a8e1383219cd09ae81048a4cd6b07b235ed8..0f4dc72fa718704fe5ca99ea2d33312ba35a7508 100644 --- a/darshan-hdf5-log-format.h +++ b/darshan-hdf5-log-format.h @@ -41,15 +41,13 @@ enum darshan_hdf5_f_indices /* file record structure for HDF5 files. a record is created and stored for * every HDF5 file opened by the original application. For the HDF5 module, * the record includes: - * - a corresponding record identifier (created by hashing the file path) - * - the rank of the process which opened the file (-1 for shared files) + * - a darshan_base_record structure, which contains the record id & rank * - integer file I/O statistics (open, read/write counts, etc) * - floating point I/O statistics (timestamps, cumulative timers, etc.) */ struct darshan_hdf5_file { - darshan_record_id f_id; - int64_t rank; + struct darshan_base_record base_rec; int64_t counters[HDF5_NUM_INDICES]; double fcounters[HDF5_F_NUM_INDICES]; }; diff --git a/darshan-log-format.h b/darshan-log-format.h index b534ee851439ac4817e924f5de56aedd11aecfd7..c556b4e7eeee9b05b5eacdda29273add97ff24e8 100644 --- a/darshan-log-format.h +++ b/darshan-log-format.h @@ -20,7 +20,7 @@ #endif /* update this on file format changes */ -#define DARSHAN_LOG_VERSION "3.00" +#define DARSHAN_LOG_VERSION "3.01" /* magic number for validating output files and checking byte order */ #define DARSHAN_MAGIC_NR 6567223 @@ -44,6 +44,7 @@ enum darshan_comp_type { DARSHAN_ZLIB_COMP, DARSHAN_BZIP2_COMP, + DARSHAN_NO_COMP, }; typedef uint64_t darshan_record_id; @@ -68,7 +69,7 @@ struct darshan_header int64_t magic_nr; unsigned char comp_type; uint32_t partial_flag; - struct darshan_log_map rec_map; + struct darshan_log_map name_map; struct darshan_log_map mod_map[DARSHAN_MAX_MODS]; uint32_t mod_ver[DARSHAN_MAX_MODS]; }; @@ -85,11 +86,18 @@ struct darshan_job char metadata[DARSHAN_JOB_METADATA_LEN]; }; -/* minimal record stored for each file/object accessed by Darshan */ -struct darshan_record +/* record to store name->darshan_id mapping for each registered record */ +struct darshan_name_record { - char* name; darshan_record_id id; + char name[1]; +}; + +/* base record definition that can be used by modules */ +struct darshan_base_record +{ + darshan_record_id id; + int64_t rank; }; diff --git a/darshan-mpiio-log-format.h b/darshan-mpiio-log-format.h index 35592f8893a6f54d5683e8f584c8858679db3fca..cdd68d8e26016eeb5c952d4aa21e512fa86199bf 100644 --- a/darshan-mpiio-log-format.h +++ b/darshan-mpiio-log-format.h @@ -140,15 +140,13 @@ enum darshan_mpiio_f_indices /* file record structure for MPI-IO files. a record is created and stored for * every MPI-IO file opened by the original application. For the MPI-IO module, * the record includes: - * - a corresponding record identifier (created by hashing the file path) - * - the rank of the process which opened the file (-1 for shared files) + * - a darshan_base_record structure, which contains the record id & rank * - integer file I/O statistics (open, read/write counts, etc) * - floating point I/O statistics (timestamps, cumulative timers, etc.) */ struct darshan_mpiio_file { - darshan_record_id f_id; - int64_t rank; + struct darshan_base_record base_rec; int64_t counters[MPIIO_NUM_INDICES]; double fcounters[MPIIO_F_NUM_INDICES]; }; diff --git a/darshan-null-log-format.h b/darshan-null-log-format.h index 1fff0e3feb6ed76c0f85edc10d79085c5fa179c7..287d8a0f61dd94626f6d4dcfba5e709a43bacd0f 100644 --- a/darshan-null-log-format.h +++ b/darshan-null-log-format.h @@ -44,15 +44,13 @@ enum darshan_null_f_indices * which would actually be logged to file by Darshan for the "NULL" example * module. This example implementation logs the following data for each * record: - * - a corresponding Darshan record identifier - * - the rank of the process responsible for the record + * - a darshan_base_record structure, which contains the record id & rank * - integer I/O counters (operation counts, I/O sizes, etc.) * - floating point I/O counters (timestamps, cumulative timers, etc.) */ struct darshan_null_record { - darshan_record_id f_id; - int64_t rank; + struct darshan_base_record base_rec; int64_t counters[NULL_NUM_INDICES]; double fcounters[NULL_F_NUM_INDICES]; }; diff --git a/darshan-pnetcdf-log-format.h b/darshan-pnetcdf-log-format.h index bd78907bc5be4b1637dd8be20dfdeadf0009a090..476bcd1191ecb0c3f726ec52f64216ecde48e413 100644 --- a/darshan-pnetcdf-log-format.h +++ b/darshan-pnetcdf-log-format.h @@ -43,15 +43,13 @@ enum darshan_pnetcdf_f_indices /* file record structure for PNETCDF files. a record is created and stored for * every PNETCDF file opened by the original application. For the PNETCDF module, * the record includes: - * - a corresponding record identifier (created by hashing the file path) - * - the rank of the process which opened the file (-1 for shared files) + * - a darshan_base_record structure, which contains the record id & rank * - integer file I/O statistics (open, read/write counts, etc) * - floating point I/O statistics (timestamps, cumulative timers, etc.) */ struct darshan_pnetcdf_file { - darshan_record_id f_id; - int64_t rank; + struct darshan_base_record base_rec; int64_t counters[PNETCDF_NUM_INDICES]; double fcounters[PNETCDF_F_NUM_INDICES]; }; diff --git a/darshan-posix-log-format.h b/darshan-posix-log-format.h index 63af0d071d033d737020559e05dcb93a42cf63b4..a667a22d8fa28d2841a8e75871e449ec2a75d935 100644 --- a/darshan-posix-log-format.h +++ b/darshan-posix-log-format.h @@ -164,15 +164,13 @@ enum darshan_posix_f_indices /* file record structure for POSIX files. a record is created and stored for * every POSIX file opened by the original application. For the POSIX module, * the record includes: - * - a corresponding record identifier (created by hashing the file path) - * - the rank of the process which opened the file (-1 for shared files) + * - a darshan_base_record structure, which contains the record id & rank * - integer file I/O statistics (open, read/write counts, etc) * - floating point I/O statistics (timestamps, cumulative timers, etc.) */ struct darshan_posix_file { - darshan_record_id f_id; - int64_t rank; + struct darshan_base_record base_rec; int64_t counters[POSIX_NUM_INDICES]; double fcounters[POSIX_F_NUM_INDICES]; }; diff --git a/darshan-runtime/Makefile.in b/darshan-runtime/Makefile.in index e33fc685658947190da46910e8f79e17999fb73c..17b34f2377183b6a288ef57c127823276f6b4e88 100644 --- a/darshan-runtime/Makefile.in +++ b/darshan-runtime/Makefile.in @@ -17,6 +17,7 @@ CC = @CC@ LD = @LD@ DISABLE_LDPRELOAD = @DISABLE_LDPRELOAD@ +ENABLE_MMAP_LOGS = @ENABLE_MMAP_LOGS@ DARSHAN_USE_BGQ = @DARSHAN_USE_BGQ@ DARSHAN_LOG_FORMAT = $(srcdir)/../darshan-log-format.h DARSHAN_VERSION = @DARSHAN_VERSION@ @@ -157,6 +158,9 @@ endif install -m 644 $(srcdir)/share/ld-opts/darshan-posix-ld-opts $(datarootdir)/ld-opts/darshan-posix-ld-opts install -m 644 $(srcdir)/share/ld-opts/darshan-hdf5-ld-opts $(datarootdir)/ld-opts/darshan-hdf5-ld-opts install -m 644 $(srcdir)/share/ld-opts/darshan-pnetcdf-ld-opts $(datarootdir)/ld-opts/darshan-pnetcdf-ld-opts +ifdef ENABLE_MMAP_LOGS + install -m 755 share/darshan-mmap-epilog.sh $(datarootdir)/darshan-mmap-epilog.sh +endif install -d $(libdir)/pkgconfig install -m 644 lib/pkgconfig/darshan-runtime.pc $(libdir)/pkgconfig/darshan-runtime.pc @@ -164,5 +168,5 @@ clean:: rm -f *.o *.a lib/*.o lib/*.po lib/*.a lib/*.so distclean:: clean - rm -f darshan-runtime-config.h darshan-gen-cxx.pl darshan-gen-fortran.pl darshan-gen-cc.pl darshan-mk-log-dirs.pl darshan-config darshan-base-ld-opts aclocal.m4 autom4te.cache/* config.status config.log Makefile + rm -f darshan-runtime-config.h darshan-gen-cxx.pl darshan-gen-fortran.pl darshan-gen-cc.pl darshan-mk-log-dirs.pl darshan-config lib/pkgconfig/darshan-runtime.pc share/craype-1.x/darshan-module share/craype-2.x/darshan-module share/darshan-mmap-epilog.sh share/ld-opts/darshan-base-ld-opts share/mpi-profile/darshan-bg-cc.conf share/mpi-profile/darshan-bg-cxx.conf share/mpi-profile/darshan-bg-f.conf share/mpi-profile/darshan-cc.conf share/mpi-profile/darshan-cxx.conf share/mpi-profile/darshan-f.conf aclocal.m4 autom4te.cache/* config.status config.log Makefile rm -rf autom4te.cache diff --git a/darshan-runtime/configure b/darshan-runtime/configure index f49b49ce75122acbc5d4e021c27640eacc28346a..2e0c723fdc550073afa9fc6a27bf4a8bcb720050 100755 --- a/darshan-runtime/configure +++ b/darshan-runtime/configure @@ -624,6 +624,7 @@ LIBOBJS DARSHAN_USE_BGQ MPICH_LIB_OLD DARSHAN_VERSION +ENABLE_MMAP_LOGS DISABLE_LDPRELOAD __DARSHAN_LOG_PATH darshan_share_path @@ -684,15 +685,16 @@ ac_subst_files='' ac_user_opts=' enable_option_checking with_zlib -enable_cuserid enable_ld_preload +enable_cuserid enable_group_readable_logs +enable_bgq_mod +enable_mmap_logs with_mem_align with_log_path_by_env with_log_hints with_log_path with_jobid_env -enable_bgq_mod with_mod_mem ' ac_precious_vars='build_alias @@ -1313,10 +1315,12 @@ Optional Features: --disable-option-checking ignore unrecognized --enable/--with options --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --disable-cuserid Disables attempted use of cuserid() at run time --disable-ld-preload Disables support for LD_PRELOAD library - --enable-group-readable-logs Set log files to be group readable + --disable-cuserid Disables attempted use of cuserid() at run time + --enable-group-readable-logs + Set log files to be group readable --disable-bgq-mod Disables compilation and use of BG/Q module (for BG/Q systems) + --enable-mmmap-logs Enables ability to mmap I/O data to log file Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1325,13 +1329,16 @@ Optional Packages: /usr/local or /usr if not found in /usr/local --without-zlib to disable zlib usage completely --with-mem-align= Memory alignment in bytes - --with-log-path-by-env= Comma separated list of environment variables to check for log path location before --with-log-path path - --with-log-hints= Semicolon-separated list of MPI-IO hints for log file write + --with-log-path-by-env= + Comma separated list of environment variables to check for + log path location before --with-log-path path + --with-log-hints= + Semicolon-separated list of MPI-IO hints for log file write --with-log-path= Location to store log files at run time --with-jobid-env= Name of environment variable that stores the jobid - (specify "NONE" if no appropriate environment variable is available: - Darshan will use rank 0's pid instead) - --with-mod-mem= Maximum amount of memory (in MiB) for each Darshan module + (specify "NONE" if no appropriate environment variable + is available: Darshan will use rank 0's pid instead) + --with-mod-mem= Maximum amount of memory (in MiB) for each Darshan module Some influential environment variables: CC C compiler command @@ -3528,31 +3535,54 @@ fi +# Check whether --enable-ld-preload was given. +if test "${enable_ld_preload+set}" = set; then : + enableval=$enable_ld_preload; if test "x$enableval" = "xno" ; then + DISABLE_LDPRELOAD="1" +fi + +fi + + # Check whether --enable-cuserid was given. if test "${enable_cuserid+set}" = set; then : enableval=$enable_cuserid; if test "x$enableval" = "xno" ; then -$as_echo "#define DARSHAN_DISABLE_CUSERID 1" >>confdefs.h +$as_echo "#define __DARSHAN_DISABLE_CUSERID 1" >>confdefs.h fi fi -# Check whether --enable-ld-preload was given. -if test "${enable_ld_preload+set}" = set; then : - enableval=$enable_ld_preload; if test "x$enableval" = "xno" ; then - DISABLE_LDPRELOAD="1" +# Check whether --enable-group-readable-logs was given. +if test "${enable_group_readable_logs+set}" = set; then : + enableval=$enable_group_readable_logs; if test "x$enableval" = "xyes" ; then + +$as_echo "#define __DARSHAN_GROUP_READABLE_LOGS 1" >>confdefs.h + fi fi -# Check whether --enable-group-readable-logs was given. -if test "${enable_group_readable_logs+set}" = set; then : - enableval=$enable_group_readable_logs; if test "x$enableval" = "xyes" ; then +# Check whether --enable-bgq_mod was given. +if test "${enable_bgq_mod+set}" = set; then : + enableval=$enable_bgq_mod; +else + enable_bgq_mod=check + +fi -$as_echo "#define __DARSHAN_GROUP_READABLE_LOGS 1" >>confdefs.h + +# Check whether --enable-mmap-logs was given. +if test "${enable_mmap_logs+set}" = set; then : + enableval=$enable_mmap_logs; if test "x$enableval" = "xyes" ; then + +$as_echo "#define __DARSHAN_ENABLE_MMAP_LOGS 1" >>confdefs.h + + ENABLE_MMAP_LOGS=1 + ac_config_files="$ac_config_files share/darshan-mmap-epilog.sh" fi @@ -3648,6 +3678,54 @@ _ACEOF fi + +# Check whether --with-mod-mem was given. +if test "${with_mod_mem+set}" = set; then : + withval=$with_mod_mem; if test x$withval = xyes; then + as_fn_error $? "--with-mod-mem must be given a number" "$LINENO" 5 + else + +cat >>confdefs.h <<_ACEOF +#define __DARSHAN_MOD_MEM_MAX ${withval} +_ACEOF + + fi + +fi + + +if test x$enable_bgq_mod != xno; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for BG/Q environment" >&5 +$as_echo_n "checking for BG/Q environment... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #ifndef __bgq__ + #error __bgq__ not set + #endif + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + DARSHAN_USE_BGQ=1 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + if test "x$enable_bgq_mod" = xyes; then : + as_fn_error $? "BG/Q module enabled in non-BG/Q environment" "$LINENO" 5 +fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + if test x$GOT_ALIGNMENT != x1; then as_fn_error $? "must provide --with-mem-align= argument to configure." "$LINENO" 5 fi @@ -4243,63 +4321,6 @@ else MPICH_LIB_OLD=0 fi -# check to see whether the bgq instrumentation module should be built -# Check whether --enable-bgq_mod was given. -if test "${enable_bgq_mod+set}" = set; then : - enableval=$enable_bgq_mod; -else - enable_bgq_mod=check - -fi - -if test x$enable_bgq_mod != xno; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for BG/Q environment" >&5 -$as_echo_n "checking for BG/Q environment... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifndef __bgq__ - #error __bgq__ not set - #endif - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } - DARSHAN_USE_BGQ=1 -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - if test "x$enable_bgq_mod" = xyes; then : - as_fn_error $? "BG/Q module enabled in non-BG/Q environment" "$LINENO" 5 -fi -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi - - -# Check whether --with-mod-mem was given. -if test "${with_mod_mem+set}" = set; then : - withval=$with_mod_mem; if test x$withval = xyes; then - as_fn_error $? "--with-mod-mem must be given a number" "$LINENO" 5 - else - -cat >>confdefs.h <<_ACEOF -#define __DARSHAN_MOD_MEM_MAX ${withval} -_ACEOF - - fi - -fi - - DARSHAN_VERSION="3.0.1" @@ -4309,6 +4330,7 @@ DARSHAN_VERSION="3.0.1" + ac_config_files="$ac_config_files Makefile darshan-mk-log-dirs.pl darshan-gen-cc.pl darshan-gen-cxx.pl darshan-gen-fortran.pl darshan-config share/craype-1.x/darshan-module share/craype-2.x/darshan-module lib/pkgconfig/darshan-runtime.pc share/mpi-profile/darshan-cc.conf share/mpi-profile/darshan-cxx.conf share/mpi-profile/darshan-f.conf share/mpi-profile/darshan-bg-cc.conf share/mpi-profile/darshan-bg-cxx.conf share/mpi-profile/darshan-bg-f.conf share/ld-opts/darshan-base-ld-opts" cat >confcache <<\_ACEOF @@ -5003,6 +5025,7 @@ for ac_config_target in $ac_config_targets do case $ac_config_target in "darshan-runtime-config.h") CONFIG_HEADERS="$CONFIG_HEADERS darshan-runtime-config.h" ;; + "share/darshan-mmap-epilog.sh") CONFIG_FILES="$CONFIG_FILES share/darshan-mmap-epilog.sh" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; "darshan-mk-log-dirs.pl") CONFIG_FILES="$CONFIG_FILES darshan-mk-log-dirs.pl" ;; "darshan-gen-cc.pl") CONFIG_FILES="$CONFIG_FILES darshan-gen-cc.pl" ;; diff --git a/darshan-runtime/configure.in b/darshan-runtime/configure.in index d0d2c188da20240ea00d24d3f62ecd353e5b6016..8554dedc6bff7f9e3cbf36a6f98e8a97c64bba72 100644 --- a/darshan-runtime/configure.in +++ b/darshan-runtime/configure.in @@ -24,27 +24,44 @@ AC_TRY_COMPILE([#include ], [int ret = MPI_Init(0, (void*)0)], dnl runtime libraries require zlib CHECK_ZLIB -AC_ARG_ENABLE(cuserid, -[ --disable-cuserid Disables attempted use of cuserid() at run time], +AC_ARG_ENABLE(ld-preload, +[ --disable-ld-preload Disables support for LD_PRELOAD library], [if test "x$enableval" = "xno" ; then - AC_DEFINE(DARSHAN_DISABLE_CUSERID, 1, Define if cuserid() should be disabled) + DISABLE_LDPRELOAD="1" fi] ,) -AC_ARG_ENABLE(ld-preload, -[ --disable-ld-preload Disables support for LD_PRELOAD library], +AC_ARG_ENABLE(cuserid, +[ --disable-cuserid Disables attempted use of cuserid() at run time], [if test "x$enableval" = "xno" ; then - DISABLE_LDPRELOAD="1" + AC_DEFINE(__DARSHAN_DISABLE_CUSERID, 1, Define if cuserid() should be disabled) fi] ,) AC_ARG_ENABLE(group-readable-logs, -[ --enable-group-readable-logs Set log files to be group readable], +[ --enable-group-readable-logs + Set log files to be group readable], [if test "x$enableval" = "xyes" ; then AC_DEFINE(__DARSHAN_GROUP_READABLE_LOGS, 1, Define if Darshan should set log files to be group readable) fi] ,) +AC_ARG_ENABLE( + [bgq_mod], + [ --disable-bgq-mod Disables compilation and use of BG/Q module (for BG/Q systems)], + [], + [enable_bgq_mod=check] +) + +AC_ARG_ENABLE(mmap-logs, +[ --enable-mmmap-logs Enables ability to mmap I/O data to log file], +[if test "x$enableval" = "xyes" ; then + AC_DEFINE(__DARSHAN_ENABLE_MMAP_LOGS, 1, Define if Darshan should mmap data structures to log file) + ENABLE_MMAP_LOGS=1 + AC_CONFIG_FILES(share/darshan-mmap-epilog.sh) +fi] +,) + AC_ARG_WITH(mem-align, [ --with-mem-align= Memory alignment in bytes], if test x$withval = xyes; then @@ -56,7 +73,9 @@ AC_ARG_WITH(mem-align, ) AC_ARG_WITH(log-path-by-env, -[ --with-log-path-by-env= Comma separated list of environment variables to check for log path location before --with-log-path path], +[ --with-log-path-by-env= + Comma separated list of environment variables to check for + log path location before --with-log-path path], if test x$withval = xyes; then AC_MSG_ERROR(--with-log-path-by-env must be given at least one variable name) else @@ -67,7 +86,8 @@ AC_ARG_WITH(log-path-by-env, ) AC_ARG_WITH(log-hints, -[ --with-log-hints= Semicolon-separated list of MPI-IO hints for log file write], +[ --with-log-hints= + Semicolon-separated list of MPI-IO hints for log file write], if test x$withval = xyes; then AC_MSG_ERROR(--with-log-hints must be given an argument) else @@ -90,8 +110,8 @@ AC_ARG_WITH(log-path, AC_ARG_WITH(jobid-env, [ --with-jobid-env= Name of environment variable that stores the jobid - (specify "NONE" if no appropriate environment variable is available: - Darshan will use rank 0's pid instead)], + (specify "NONE" if no appropriate environment variable + is available: Darshan will use rank 0's pid instead)], if test x$withval = xyes; then AC_MSG_ERROR(--with-jobid-env must be given a name) else @@ -101,6 +121,30 @@ AC_ARG_WITH(jobid-env, fi ) +AC_ARG_WITH(mod-mem, +[ --with-mod-mem= Maximum amount of memory (in MiB) for each Darshan module], + if test x$withval = xyes; then + AC_MSG_ERROR(--with-mod-mem must be given a number) + else + AC_DEFINE_UNQUOTED(__DARSHAN_MOD_MEM_MAX, ${withval}, Maximum memory (in MiB) for each Darshan module) + fi +) + +if test x$enable_bgq_mod != xno; then + AC_MSG_CHECKING(for BG/Q environment) + AC_TRY_COMPILE([ + #ifndef __bgq__ + #error __bgq__ not set + #endif + ], [], + AC_MSG_RESULT(yes) + DARSHAN_USE_BGQ=1, + AC_MSG_RESULT(no) + AS_IF([test "x$enable_bgq_mod" = xyes], + AC_MSG_ERROR(BG/Q module enabled in non-BG/Q environment), + [])) +fi + if test x$GOT_ALIGNMENT != x1; then AC_MSG_ERROR(must provide --with-mem-align= argument to configure.) fi @@ -280,43 +324,13 @@ AS_IF([$CC -show foo.c -o foo >& /dev/null], MPICH_LIB_OLD=0), MPICH_LIB_OLD=0) -# check to see whether the bgq instrumentation module should be built -AC_ARG_ENABLE( - [bgq_mod], - [ --disable-bgq-mod Disables compilation and use of BG/Q module (for BG/Q systems)], - [], - [enable_bgq_mod=check] -) -if test x$enable_bgq_mod != xno; then - AC_MSG_CHECKING(for BG/Q environment) - AC_TRY_COMPILE([ - #ifndef __bgq__ - #error __bgq__ not set - #endif - ], [], - AC_MSG_RESULT(yes) - DARSHAN_USE_BGQ=1, - AC_MSG_RESULT(no) - AS_IF([test "x$enable_bgq_mod" = xyes], - AC_MSG_ERROR(BG/Q module enabled in non-BG/Q environment), - [])) -fi - -AC_ARG_WITH(mod-mem, -[ --with-mod-mem= Maximum amount of memory (in MiB) for each Darshan module], - if test x$withval = xyes; then - AC_MSG_ERROR(--with-mod-mem must be given a number) - else - AC_DEFINE_UNQUOTED(__DARSHAN_MOD_MEM_MAX, ${withval}, Maximum memory (in MiB) for each Darshan module) - fi -) - DARSHAN_VERSION="AC_PACKAGE_VERSION" AC_SUBST(darshan_lib_path) AC_SUBST(darshan_share_path) AC_SUBST(LDFLAGS) AC_SUBST(__DARSHAN_LOG_PATH) AC_SUBST(DISABLE_LDPRELOAD) +AC_SUBST(ENABLE_MMAP_LOGS) AC_SUBST(DARSHAN_VERSION) AC_SUBST(MPICH_LIB_OLD) AC_SUBST(DARSHAN_USE_BGQ) diff --git a/darshan-runtime/darshan-common.h b/darshan-runtime/darshan-common.h index 4e42188510945ce1f941a786df45535dfe399b09..6cf62a11b53da1b3f87ffaae8f2e2ba1ab124731 100644 --- a/darshan-runtime/darshan-common.h +++ b/darshan-runtime/darshan-common.h @@ -66,44 +66,68 @@ } while(0) /* potentially set or increment a common value counter, depending on the __count - * for the given __value + * for the given __value. This macro ensures common values are stored first in + * decreasing order of their total count, and second by decreasing order of + * their value. + * * NOTE: This macro is hardcoded to expect that Darshan will only track the 4 * most common (i.e., frequently occuring) values. __val_p is a pointer to the * base of the value counters (i.e., the first of 4 contiguous common value * counters) and __cnt_p is a pointer to the base of the count counters (i.e. * the first of 4 contiguous common count counters). It is assumed your counters - * are stored as int64_t types. + * are stored as int64_t types. __add_flag is set if the given count should be + * added to the common access counter, rather than just incrementing it. */ -#define DARSHAN_COMMON_VAL_COUNTER_INC(__val_p, __cnt_p, __value, __count) do {\ +#define DARSHAN_COMMON_VAL_COUNTER_INC(__val_p, __cnt_p, __value, __count, __add_flag) do {\ int i; \ - int set = 0; \ - int64_t min = *(__cnt_p); \ - int min_index = 0; \ + int inc_count, total_count; \ + int64_t tmp_val[4] = {0}; \ + int64_t tmp_cnt[4] = {0}; \ + int tmp_ndx = 0; \ if(__value == 0) break; \ + if(__add_flag) \ + inc_count = 1; \ + else \ + inc_count = __count; \ for(i=0; i<4; i++) { \ - /* increment bucket if already exists */ \ if(*(__val_p + i) == __value) { \ - *(__cnt_p + i) += __count; \ - set = 1; \ + total_count = *(__cnt_p + i) + inc_count; \ break; \ } \ - /* otherwise find the least frequently used bucket */ \ - else if(*(__cnt_p + i) < min) { \ - min = *(__cnt_p + i); \ - min_index = i; \ + } \ + if(i == 4) total_count = __count; \ + /* first, copy over any counters that should be sorted above this one \ + * (counters with higher counts or equal counts and larger values) \ + */ \ + for(i=0;i < 4; i++) { \ + if((*(__cnt_p + i) > total_count) || \ + ((*(__cnt_p + i) == total_count) && (*(__val_p + i) > __value))) { \ + tmp_val[tmp_ndx] = *(__val_p + i); \ + tmp_cnt[tmp_ndx] = *(__cnt_p + i); \ + tmp_ndx++; \ } \ + else break; \ } \ - if(!set && (__count > min)) { \ - *(__cnt_p + min_index) = __count; \ - *(__val_p + min_index) = __value; \ + if(tmp_ndx == 4) break; /* all done, updated counter is not added */ \ + /* next, add the updated counter */ \ + tmp_val[tmp_ndx] = __value; \ + tmp_cnt[tmp_ndx] = total_count; \ + tmp_ndx++; \ + /* last, copy over any remaining counters to make sure we have 4 sets total */ \ + while(tmp_ndx != 4) { \ + if(*(__val_p + i) != __value) { \ + tmp_val[tmp_ndx] = *(__val_p + i); \ + tmp_cnt[tmp_ndx] = *(__cnt_p + i); \ + tmp_ndx++; \ + } \ + i++; \ } \ + memcpy(__val_p, tmp_val, 4*sizeof(int64_t)); \ + memcpy(__cnt_p, tmp_cnt, 4*sizeof(int64_t)); \ } while(0) -/* maximum number of common values that darshan will track per file at - * runtime; at shutdown time these will be reduced to the 4 most - * frequently occuring ones - */ +/* maximum number of common values that darshan will track per file at runtime */ #define DARSHAN_COMMON_VAL_MAX_RUNTIME_COUNT 32 struct darshan_common_val_counter { @@ -130,6 +154,67 @@ struct darshan_variance_dt * darshan-common functions for darshan modules * ***********************************************/ +/* darshan_lookup_record_ref() + * + * Lookup a record reference pointer using the given 'handle'. + * 'handle_sz' is the size of the handle structure, and 'hash_head' + * is the pointer to the hash table to search. + * If the handle is found, the corresponding record reference pointer + * is returned, otherwise NULL is returned. + */ +void *darshan_lookup_record_ref( + void *hash_head, + void *handle, + size_t handle_sz); + +/* darshan_add_record_ref() + * + * Add the given record reference pointer, 'rec_ref_p' to the hash + * table whose address is stored in the 'hash_head_p' pointer. The + * hash is generated from the given 'handle', with size 'handle_sz'. + * If the record reference is successfully added, 1 is returned, + * otherwise, 0 is returned. + */ +int darshan_add_record_ref( + void **hash_head_p, + void *handle, + size_t handle_sz, + void *rec_ref_p); + +/* darshan_delete_record_ref() + * + * Delete the record reference for the given 'handle', with size + * 'handle_sz', from the hash table whose address is stored in + * the 'hash_head_p' pointer. + * On success deletion, the corresponding record reference pointer + * is returned, otherwise NULL is returned. + */ +void *darshan_delete_record_ref( + void **hash_head_p, + void *handle, + size_t handle_sz); + +/* darshan_clear_record_refs() + * + * Clear all record references from the hash table stored in the + * 'hash_head_p' pointer. If 'free_flag' is set, the corresponding + * record_reference_pointer is also freed. + */ +void darshan_clear_record_refs( + void **hash_head_p, + int free_flag); + +/* darshan_iter_record_ref() + * + * Iterate each record reference stored in the hash table pointed + * to by 'hash_head' and perform the given action 'iter_action'. + * The action function takes a single pointer which points to the + * corresponding record reference pointer. + */ +void darshan_iter_record_refs( + void *hash_head, + void (*iter_action)(void *)); + /* darshan_clean_file_path() * * Allocate a new string that contains a new cleaned-up version of @@ -138,7 +223,21 @@ struct darshan_variance_dt * path string. */ char* darshan_clean_file_path( - const char* path); + const char *path); + +/* darshan_record_sort() + * + * Sort the records in 'rec_buf' by descending rank to get all + * shared records in a contiguous region at the end of the buffer. + * Records are secondarily sorted by ascending record identifiers. + * 'rec_count' is the number of records in the buffer, and 'rec_size' + * is the size of the record structure. + * NOTE: this function only works on fixed-length records. + */ +void darshan_record_sort( + void *rec_buf, + int rec_count, + int rec_size); /* darshan_common_val_counter() * @@ -148,29 +247,19 @@ char* darshan_clean_file_path( * used by a specific module, for instance. 'common_val_root' is the * root pointer for the tree which stores common value info, * 'common_val_count' is a pointer to the number of nodes in the - * tree (i.e., the number of allocated common value counters), and - * 'val' is the new value to attempt to add. + * tree (i.e., the number of allocated common value counters), 'val' + * is the new value to attempt to add, 'val_p' is a pointer to the + * base counter (i.e., the first) of the common values (which are + * assumed to be 4 total and contiguous in memory), and 'cnt_p' is + * a pointer to the base counter of the common counts (which are + * again expected to be contiguous in memory). */ void darshan_common_val_counter( - void** common_val_root, - int* common_val_count, - int64_t val); - -/* darshan_walk_common_vals() - * - * Walks the tree of common value counters and determines the 4 most - * frequently occuring values, storing the common values in the - * appropriate counter fields of the given record. 'common_val_root' - * is the root of the tree which stores the common value info, 'val_p' - * is a pointer to the base counter (i.e., the first) of the common - * values (which are assumed to be 4 total and contiguous in memory), - * and 'cnt_p' is a pointer to the base counter of the common counts - * (which are again expected to be contiguous in memory). - */ -void darshan_walk_common_vals( - void* common_val_root, - int64_t* val_p, - int64_t* cnt_p); + void **common_val_root, + int *common_val_count, + int64_t val, + int64_t *val_p, + int64_t *cnt_p); /* darshan_variance_reduce() * diff --git a/darshan-runtime/darshan-config.in b/darshan-runtime/darshan-config.in index 0ba49a27e3cdb5ac3341863b9b0d66dd610db15e..8162db9614ebc2fa01ac85fec94a50030167d350 100755 --- a/darshan-runtime/darshan-config.in +++ b/darshan-runtime/darshan-config.in @@ -5,7 +5,7 @@ DARSHAN_SHARE_PATH="@darshan_share_path@" DARSHAN_LD_FLAGS="@LDFLAGS@" # NOTE: -# - we deliberately list libdarshan-mpi-io twice in the link command. The +# - we deliberately list libdarshan twice in the link command. The # first is necessary to correctly use the MPI profiling interface. The # final one is necessary to give the linker a change to resolve indirect # dependencies on PnetCDF and HDF5 symbols (if the app used a library which diff --git a/darshan-runtime/darshan-core.h b/darshan-runtime/darshan-core.h index 2ebf8b9b353d74dd825cb75c6a31b35017763304..300501343d5fa152b4b152f3225dc5630cac9ff3 100644 --- a/darshan-runtime/darshan-core.h +++ b/darshan-runtime/darshan-core.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "uthash.h" #include "darshan-log-format.h" @@ -29,6 +30,14 @@ /* Environment variable to override memory per module */ #define DARSHAN_MOD_MEM_OVERRIDE "DARSHAN_MODMEM" +#ifdef __DARSHAN_ENABLE_MMAP_LOGS +/* Environment variable to override default mmap log path */ +#define DARSHAN_MMAP_LOG_PATH_OVERRIDE "DARSHAN_MMAP_LOGPATH" + +/* default path for storing mmap log files is '/tmp' */ +#define DARSHAN_DEF_MMAP_LOG_PATH "/tmp" +#endif + /* Maximum amount of memory per instrumentation module in MiB */ #ifdef __DARSHAN_MOD_MEM_MAX #define DARSHAN_MOD_MEM_MAX (__DARSHAN_MOD_MEM_MAX * 1024 * 1024) @@ -36,37 +45,52 @@ #define DARSHAN_MOD_MEM_MAX (2 * 1024 * 1024) /* 2 MiB default */ #endif +/* default name record buf can store 2048 records of size 100 bytes */ +#define DARSHAN_NAME_RECORD_BUF_SIZE (2048 * 100) + /* Default runtime compression buffer size */ #define DARSHAN_COMP_BUF_SIZE DARSHAN_MOD_MEM_MAX -/* in memory structure to keep up with job level data */ -struct darshan_core_runtime -{ - struct darshan_header log_header; - struct darshan_job log_job; - char exe[DARSHAN_EXE_LEN+1]; - struct darshan_core_record_ref *rec_hash; - int rec_count; - struct darshan_core_module* mod_array[DARSHAN_MAX_MODS]; - char *comp_buf; - double wtime_offset; - char *trailing_data; -}; - +/* structure to track registered modules */ struct darshan_core_module { - darshan_module_id id; - struct darshan_module_funcs mod_funcs; + void *rec_buf_start; + void *rec_buf_p; + int rec_mem_avail; + darshan_module_shutdown mod_shutdown_func; }; -struct darshan_core_record_ref +/* strucutre for keeping a reference to registered name records */ +struct darshan_core_name_record_ref { - struct darshan_record rec; + struct darshan_name_record *name_record; uint64_t mod_flags; uint64_t global_mod_flags; UT_hash_handle hlink; }; +/* in memory structure to keep up with job level data */ +struct darshan_core_runtime +{ + /* pointers to each log file component */ + struct darshan_header *log_hdr_p; + struct darshan_job *log_job_p; + char *log_exemnt_p; + void *log_name_p; + void *log_mod_p; + + /* darshan-core internal data structures */ + struct darshan_core_module* mod_array[DARSHAN_MAX_MODS]; + int mod_mem_used; + struct darshan_core_name_record_ref *name_hash; + int name_mem_used; + double wtime_offset; + char *comp_buf; +#ifdef __DARSHAN_ENABLE_MMAP_LOGS + char mmap_log_name[PATH_MAX]; +#endif +}; + void darshan_core_initialize(int argc, char **argv); void darshan_core_shutdown(void); diff --git a/darshan-runtime/darshan-runtime-config.h.in b/darshan-runtime/darshan-runtime-config.h.in index a9b831fbd6f668a077f0229b8504dc9bd73de646..fe93e563f120ecdedd72c20dfb5523cca8df6413 100644 --- a/darshan-runtime/darshan-runtime-config.h.in +++ b/darshan-runtime/darshan-runtime-config.h.in @@ -3,9 +3,6 @@ /* Define if building universal (internal helper macro) */ #undef AC_APPLE_UNIVERSAL_BUILD -/* Define if cuserid() should be disabled */ -#undef DARSHAN_DISABLE_CUSERID - /* Define if struct aiocb64 type is defined */ #undef HAVE_AIOCB64 @@ -87,6 +84,12 @@ # endif #endif +/* Define if cuserid() should be disabled */ +#undef __DARSHAN_DISABLE_CUSERID + +/* Define if Darshan should mmap data structures to log file */ +#undef __DARSHAN_ENABLE_MMAP_LOGS + /* Define if Darshan should set log files to be group readable */ #undef __DARSHAN_GROUP_READABLE_LOGS diff --git a/darshan-runtime/darshan.h b/darshan-runtime/darshan.h index 1019e50251b926bef50dbb2dcbe95dcf3dd33dc5..b58c17d868af950dd6e3a03c64f10395ae992869 100644 --- a/darshan-runtime/darshan.h +++ b/darshan-runtime/darshan.h @@ -53,35 +53,25 @@ #endif -/* module developers provide the following functions to darshan-core */ -struct darshan_module_funcs -{ - /* perform any necessary pre-shutdown steps - * - * NOTE: this typically includes disabling wrapper functions so - * darshan-core can shutdown in a consistent state. - */ - void (*begin_shutdown)(void); - /* retrieve module data to write to log file - * - * NOTE: module developers can use this function to run collective - * MPI operations at shutdown time. Typically this functionality - * has been used to reduce records shared globablly (given in the - * 'shared_recs' array) into a single data record. - */ - void (*get_output_data)( - MPI_Comm mod_comm, /* MPI communicator to run collectives with */ - darshan_record_id *shared_recs, /* list of shared data record ids */ - int shared_rec_count, /* count of shared data records */ - void** mod_buf, /* output parameter to save module buffer address */ - int* mod_buf_sz /* output parameter to save module buffer size */ - ); - /* shutdown module data structures */ - void (*shutdown)(void); -}; - -/* paths that darshan will not trace */ -extern char* darshan_path_exclusions[]; /* defined in lib/darshan-core.c */ +/* default number of records to attempt to store for each module */ +#define DARSHAN_DEF_MOD_REC_COUNT 1024 + +/* module developers must define a 'darshan_module_shutdown' function + * for allowing darshan-core to call into a module and retrieve final + * output data to be saved in the log. + * + * NOTE: module developers can use this function to run collective + * MPI operations at shutdown time. Typically this functionality + * has been used to reduce records shared globablly (given in the + * 'shared_recs' array) into a single data record. + */ +typedef void (*darshan_module_shutdown)( + MPI_Comm mod_comm, /* MPI communicator to run collectives with */ + darshan_record_id *shared_recs, /* list of shared data record ids */ + int shared_rec_count, /* count of shared data records */ + void **mod_buf, /* output parameter to save module buffer address */ + int *mod_buf_sz /* output parameter to save module buffer size */ +); /***************************************************** * darshan-core functions exported to darshan modules * @@ -91,18 +81,21 @@ extern char* darshan_path_exclusions[]; /* defined in lib/darshan-core.c */ * * Register module identifier 'mod_id' with the darshan-core runtime * environment, allowing the module to store I/O characterization data. - * 'funcs' is a pointer to a structure containing each of the function - * pointers required by darshan-core to shut down the module. The function - * returns the following integers passed in as pointers: 'rank' is the - * MPI rank of the calling process, 'mod_mem_limit' is the maximum amount - * of memory the module may use, and 'sys_mem_alignment' is the configured - * memory alignment value Darshan was configured with. + * 'mod_shutdown_func is a pointer to a function responsible for + * shutting down the module and returning final output data to darshan-core. + * 'inout_mod_buf_size' is an input/output argument, with it being + * set to the requested amount of module memory on input, and set to + * the amount allocated by darshan-core on output. If given, 'rank' is + * a pointer to an integer which will contain the calling process's + * MPI rank on return. If given, 'sys_mem_alignment' is a pointer to + * an integer which will contain the memory alignment value Darshan + * was configured with on return. */ void darshan_core_register_module( darshan_module_id mod_id, - struct darshan_module_funcs *funcs, + darshan_module_shutdown mod_shutdown_func, + int *inout_mod_buf_size, int *rank, - int *mod_mem_limit, int *sys_mem_alignment); /* darshan_core_unregister_module() @@ -113,40 +106,34 @@ void darshan_core_register_module( void darshan_core_unregister_module( darshan_module_id mod_id); +/* darshan_core_gen_record_id() + * + * Returns the Darshan record ID correpsonding to input string 'name'. + */ +darshan_record_id darshan_core_gen_record_id( + const char *name); + /* darshan_core_register_record() * * Register a record with the darshan-core runtime, allowing it to be * properly tracked and (potentially) correlated with records from other - * modules. 'name' is the the name of the Darshan record (e.g., the full - * file path) and 'len' is the size of the name pointer (string length - * for string names). 'mod_id' is the identifier of the calling module, - * 'printable_flag' indicates whether the name is a string, and - * 'mod_limit_flag' is set if the calling module is out of memory (to - * prevent darshan-core from creating new records and to just search - * through existing records). 'rec_id' is an output pointer storing the - * correspoing Darshan record identifier and 'file_alignment' is an output - * pointer storing the file system alignment value for the given record. + * modules. 'rec_id' is the Darshan record id as given by the + * `darshan_core_gen_record_id` function. 'name' is the the name of the + * Darshan record (e.g., the full file path), which for now is just a + * string. 'mod_id' is the identifier of the calling module. 'rec_len' + * is the size of the record being registered with Darshan. If given, + * 'file_alignment' is a pointer to an integer which on return will + * contain the corresponding file system alignment of the file system + * path 'name' resides on. Returns a pointer to the address the record + * should be written to on success, NULL on failure. */ -void darshan_core_register_record( - void *name, - int len, +void *darshan_core_register_record( + darshan_record_id rec_id, + const char *name, darshan_module_id mod_id, - int printable_flag, - int mod_limit_flag, - darshan_record_id *rec_id, + int rec_len, int *file_alignment); -/* darshan_core_unregister_record() - * - * Unregister record identifier 'rec_id' in the darshan-core runtime. - * This unregister is only in the context of module identifier 'mod_id', - * meaning that if the file record has other module's associated with - * it, then the record won't be completely removed. - */ -void darshan_core_unregister_record( - darshan_record_id rec_id, - darshan_module_id mod_id); - /* darshan_core_wtime() * * Returns the elapsed time relative to (roughly) the start of @@ -154,4 +141,12 @@ void darshan_core_unregister_record( */ double darshan_core_wtime(void); +/* darshan_core_excluded_path() + * + * Returns true (1) if the given file path 'path' is in Darshan's + * list of excluded file paths, false (0) otherwise. + */ +int darshan_core_excluded_path( + const char * path); + #endif /* __DARSHAN_H */ diff --git a/darshan-runtime/doc/darshan-runtime.txt b/darshan-runtime/doc/darshan-runtime.txt index 3ee748a41bfd2a76d17dbb921b8df8406c3cc19a..2853dd1e6342488d7c26f17f31d892baaf9b889c 100644 --- a/darshan-runtime/doc/darshan-runtime.txt +++ b/darshan-runtime/doc/darshan-runtime.txt @@ -65,6 +65,7 @@ each Darshan module can consume. * `--with-zlib=`: specifies an alternate location for the zlib development header and library. * `CC=`: specifies the MPI C compiler to use for compilation. +* `--enable-mmap-logs`: enables the use of Darshan's mmap log file mechanism. * `--disable-cuserid`: disables use of cuserid() at runtime. * `--disable-ld-preload`: disables building of the Darshan LD_PRELOAD library * `--disable-bgq-mod`: disables building of the BG/Q module (default checks @@ -152,7 +153,7 @@ the darshan-enabled MPI compiler scripts. [[static-prof]] The MPICH MPI implementation supports the specification of a profiling library -configuration, then it can be used to insert Darshan instrumentation without +configuration that can be used to insert Darshan instrumentation without modifying the existing MPI compiler script. Example profiling configuration files are installed with Darshan 2.3.1 and later. You can enable a profiling configuration using environment variables or command line arguments to the @@ -339,7 +340,7 @@ PrgEnv-gnu". Please see your site documentation for information about how to switch programming environments. The following example shows how to configure and build Darshan on a Cray -system using either the GNU programming environment. Adjust the +system using the GNU programming environment. Adjust the --with-log-path and --prefix arguments to point to the desired log file path and installation path, respectively. @@ -529,7 +530,8 @@ behavior at runtime: * DARSHAN_DISABLE_SHARED_REDUCTION: disables the step in Darshan aggregation in which files that were accessed by all ranks are collapsed into a single cumulative file record at rank 0. This option retains more per-process information at the expense of creating larger log files. Note that it is up to individual instrumentation module implementations whether this environment variable is actually honored. * DARSHAN_LOGPATH: specifies the path to write Darshan log files to. Note that this directory needs to be formatted using the darshan-mk-log-dirs script. * DARSHAN_LOGFILE: specifies the path (directory + Darshan log file name) to write the output Darshan log to. This overrides the default Darshan behavior of automatically generating a log file name and adding it to a log file directory formatted using darshan-mk-log-dirs script. -* DARSHAN_MODMEM: specifies the maximum amount of memory (in MiB) a Darshan instrumentation module can consume at runtime. +* DARSHAN_MODMEM: specifies the maximum amount of memory (in MiB) Darshan instrumentation modules can collectively consume at runtime (if not specified, Darshan uses a default quota of 2 MiB). +* DARSHAN_MMAP_LOGPATH: if Darshan's mmap log file mechanism is enabled, this variable specifies what path the mmap log files should be stored in (if not specified, log files will be stored in `/tmp`. == Debugging diff --git a/darshan-runtime/lib/darshan-bgq.c b/darshan-runtime/lib/darshan-bgq.c index fe8632052987caccff9956971d23930121630bac..2c723ccbaf6f25428d7d823d880a1e6331c8721b 100644 --- a/darshan-runtime/lib/darshan-bgq.c +++ b/darshan-runtime/lib/darshan-bgq.c @@ -37,26 +37,20 @@ */ struct bgq_runtime { - struct darshan_bgq_record record; + struct darshan_bgq_record *record; }; static struct bgq_runtime *bgq_runtime = NULL; static pthread_mutex_t bgq_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; -/* the instrumentation_disabled flag is used to toggle functions on/off */ -static int instrumentation_disabled = 0; - /* my_rank indicates the MPI rank of this process */ static int my_rank = -1; -static int darshan_mem_alignment = 1; /* internal helper functions for the BGQ module */ void bgq_runtime_initialize(void); -/* forward declaration for module functions needed to interface with darshan-core */ -static void bgq_begin_shutdown(void); -static void bgq_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **buffer, int *size); -static void bgq_shutdown(void); +/* forward declaration for shutdown function needed to interface with darshan-core */ +static void bgq_shutdown(MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **buffer, int *size); /* macros for obtaining/releasing the BGQ module lock */ #define BGQ_LOCK() pthread_mutex_lock(&bgq_runtime_mutex) @@ -65,7 +59,7 @@ static void bgq_shutdown(void); /* * Function which updates all the counter data */ -static void capture(struct darshan_bgq_record *rec) +static void capture(struct darshan_bgq_record *rec, darshan_record_id rec_id) { Personality_t person; int r; @@ -93,7 +87,8 @@ static void capture(struct darshan_bgq_record *rec) rec->counters[BGQ_DDRPERNODE] = person.DDR_Config.DDRSizeMB; } - rec->rank = my_rank; + rec->base_rec.id = rec_id; + rec->base_rec.rank = my_rank; rec->fcounters[BGQ_F_TIMESTAMP] = darshan_core_wtime(); return; @@ -105,42 +100,33 @@ static void capture(struct darshan_bgq_record *rec) void bgq_runtime_initialize() { - /* struct of function pointers for interfacing with darshan-core */ - struct darshan_module_funcs bgq_mod_fns = - { - .begin_shutdown = bgq_begin_shutdown, - .get_output_data = bgq_get_output_data, - .shutdown = bgq_shutdown - }; - int mem_limit; - char *recname = "darshan-internal-bgq"; + int bgq_buf_size; + darshan_record_id rec_id; BGQ_LOCK(); - /* don't do anything if already initialized or instrumenation is disabled */ - if(bgq_runtime || instrumentation_disabled) + /* don't do anything if already initialized */ + if(bgq_runtime) + { + BGQ_UNLOCK(); return; + } + + /* we just need to store one single record */ + bgq_buf_size = sizeof(struct darshan_bgq_record); /* register the BG/Q module with the darshan-core component */ darshan_core_register_module( DARSHAN_BGQ_MOD, - &bgq_mod_fns, + &bgq_shutdown, + &bgq_buf_size, &my_rank, - &mem_limit, - &darshan_mem_alignment); + NULL); - /* return if no memory assigned by darshan-core */ - if(mem_limit == 0) + /* not enough memory to fit bgq module record */ + if(bgq_buf_size < sizeof(struct darshan_bgq_record)) { - instrumentation_disabled = 1; - BGQ_UNLOCK(); - return; - } - - /* not enough memory to fit bgq module */ - if (mem_limit < sizeof(*bgq_runtime)) - { - instrumentation_disabled = 1; + darshan_core_unregister_module(DARSHAN_BGQ_MOD); BGQ_UNLOCK(); return; } @@ -149,51 +135,31 @@ void bgq_runtime_initialize() bgq_runtime = malloc(sizeof(*bgq_runtime)); if(!bgq_runtime) { - instrumentation_disabled = 1; + darshan_core_unregister_module(DARSHAN_BGQ_MOD); BGQ_UNLOCK(); return; } memset(bgq_runtime, 0, sizeof(*bgq_runtime)); - darshan_core_register_record( - recname, - strlen(recname), + rec_id = darshan_core_gen_record_id("darshan-bgq-record"); + + /* register the bgq file record with darshan-core */ + bgq_runtime->record = darshan_core_register_record( + rec_id, + NULL, DARSHAN_BGQ_MOD, - 1, - 0, - &bgq_runtime->record.f_id, - &bgq_runtime->record.alignment); - - /* if record is set to 0, darshan-core is out of space and will not - * track this record, so we should avoid tracking it, too - */ - if(bgq_runtime->record.f_id == 0) + sizeof(struct darshan_bgq_record), + NULL); + if(!(bgq_runtime->record)) { - instrumentation_disabled = 1; + darshan_core_unregister_module(DARSHAN_BGQ_MOD); free(bgq_runtime); bgq_runtime = NULL; BGQ_UNLOCK(); return; } - capture(&bgq_runtime->record); - - BGQ_UNLOCK(); - - return; -} - -/* Perform any necessary steps prior to shutting down for the BGQ module. */ -static void bgq_begin_shutdown() -{ - BGQ_LOCK(); - - /* In general, we want to disable all wrappers while Darshan shuts down. - * This is to avoid race conditions and ensure data consistency, as - * executing wrappers could potentially modify module state while Darshan - * is in the process of shutting down. - */ - instrumentation_disabled = 1; + capture(bgq_runtime->record, rec_id); BGQ_UNLOCK(); @@ -207,22 +173,25 @@ static int cmpr(const void *p1, const void *p2) return ((*a == *b) ? 0 : ((*a < *b) ? -1 : 1)); } +/******************************************************************************** + * shutdown function exported by this module for coordinating with darshan-core * + ********************************************************************************/ + /* Pass output data for the BGQ module back to darshan-core to log to file. */ -static void bgq_get_output_data( +static void bgq_shutdown( MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **buffer, int *size) { - /* Just set the output buffer to point at the array of the BGQ module's - * I/O records, and set the output size according to the number of records - * currently being tracked. - */ int nprocs; int result; uint64_t *ion_ids; + BGQ_LOCK(); + assert(bgq_runtime); + if (my_rank == 0) { DARSHAN_MPI_CALL(PMPI_Comm_size)(mod_comm, &nprocs); @@ -231,12 +200,13 @@ static void bgq_get_output_data( } DARSHAN_MPI_CALL(PMPI_Bcast)(&result, 1, MPI_INT, 0, mod_comm); - if (bgq_runtime && result) + /* caclulate the number of I/O nodes */ + if (result) { int i, found; uint64_t val; - DARSHAN_MPI_CALL(PMPI_Gather)(&bgq_runtime->record.counters[BGQ_INODES], + DARSHAN_MPI_CALL(PMPI_Gather)(&bgq_runtime->record->counters[BGQ_INODES], 1, MPI_LONG_LONG_INT, ion_ids, @@ -255,32 +225,21 @@ static void bgq_get_output_data( found += 1; } } - bgq_runtime->record.counters[BGQ_INODES] = found; + bgq_runtime->record->counters[BGQ_INODES] = found; } } - if ((bgq_runtime) && (my_rank == 0)) - { - *buffer = &bgq_runtime->record; - *size = sizeof(struct darshan_bgq_record); - } - else + /* non-zero ranks throw out their BGQ record */ + if (my_rank != 0) { *buffer = NULL; *size = 0; } - return; -} + free(bgq_runtime); + bgq_runtime = NULL; -/* Shutdown the BGQ module by freeing up all data structures. */ -static void bgq_shutdown() -{ - if (bgq_runtime) - { - free(bgq_runtime); - bgq_runtime = NULL; - } + BGQ_UNLOCK(); return; } diff --git a/darshan-runtime/lib/darshan-common.c b/darshan-runtime/lib/darshan-common.c index 6f0468d41164afb0d2e8f3bb2ab752576dd66c51..6b9e9da26ef127f036bc4413a3bed5ae3608446b 100644 --- a/darshan-runtime/lib/darshan-common.c +++ b/darshan-runtime/lib/darshan-common.c @@ -15,11 +15,110 @@ #include #include +#include "uthash.h" + #include "darshan.h" -static int darshan_common_val_compare(const void* a_p, const void* b_p); -static void darshan_common_val_walker(const void* nodep, const VISIT which, - const int depth); +/* track opaque record referencre using a hash link */ +struct darshan_record_ref_tracker +{ + void *rec_ref_p; + UT_hash_handle hlink; +}; + +void *darshan_lookup_record_ref(void *hash_head, void *handle, size_t handle_sz) +{ + struct darshan_record_ref_tracker *ref_tracker; + struct darshan_record_ref_tracker *ref_tracker_head = + (struct darshan_record_ref_tracker *)hash_head; + + /* search the hash table for the given handle */ + HASH_FIND(hlink, ref_tracker_head, handle, handle_sz, ref_tracker); + if(ref_tracker) + return(ref_tracker->rec_ref_p); + else + return(NULL); +} + +int darshan_add_record_ref(void **hash_head_p, void *handle, size_t handle_sz, + void *rec_ref_p) +{ + struct darshan_record_ref_tracker *ref_tracker; + struct darshan_record_ref_tracker *ref_tracker_head = + *(struct darshan_record_ref_tracker **)hash_head_p; + void *handle_p; + + /* allocate a reference tracker, with room to store the handle at the end */ + ref_tracker = malloc(sizeof(*ref_tracker) + handle_sz); + if(!ref_tracker) + return(0); + memset(ref_tracker, 0, sizeof(*ref_tracker) + handle_sz); + + /* initialize the reference tracker and add it to the hash table */ + ref_tracker->rec_ref_p = rec_ref_p; + handle_p = (char *)ref_tracker + sizeof(*ref_tracker); + memcpy(handle_p, handle, handle_sz); + HASH_ADD_KEYPTR(hlink, ref_tracker_head, handle_p, handle_sz, ref_tracker); + *hash_head_p = ref_tracker_head; + return(1); +} + +void *darshan_delete_record_ref(void **hash_head_p, void *handle, size_t handle_sz) +{ + struct darshan_record_ref_tracker *ref_tracker; + struct darshan_record_ref_tracker *ref_tracker_head = + *(struct darshan_record_ref_tracker **)hash_head_p; + void *rec_ref_p; + + /* find the reference tracker for this handle */ + HASH_FIND(hlink, ref_tracker_head, handle, handle_sz, ref_tracker); + if(!ref_tracker) + return(NULL); + + /* if found, delete from hash table and return the record reference pointer */ + HASH_DELETE(hlink, ref_tracker_head, ref_tracker); + *hash_head_p = ref_tracker_head; + rec_ref_p = ref_tracker->rec_ref_p; + free(ref_tracker); + + return(rec_ref_p); +} + +void darshan_clear_record_refs(void **hash_head_p, int free_flag) +{ + struct darshan_record_ref_tracker *ref_tracker, *tmp; + struct darshan_record_ref_tracker *ref_tracker_head = + *(struct darshan_record_ref_tracker **)hash_head_p; + + /* iterate the hash table and remove/free all reference trackers */ + HASH_ITER(hlink, ref_tracker_head, ref_tracker, tmp) + { + HASH_DELETE(hlink, ref_tracker_head, ref_tracker); + if(free_flag) + free(ref_tracker->rec_ref_p); + free(ref_tracker); + } + *hash_head_p = ref_tracker_head; + + return; +} + +void darshan_iter_record_refs(void *hash_head, void (*iter_action)(void *)) +{ + struct darshan_record_ref_tracker *ref_tracker, *tmp; + struct darshan_record_ref_tracker *ref_tracker_head = + (struct darshan_record_ref_tracker *)hash_head; + + /* iterate the hash table, performing the given action for each reference + * tracker's corresponding record reference pointer + */ + HASH_ITER(hlink, ref_tracker_head, ref_tracker, tmp) + { + iter_action(ref_tracker->rec_ref_p); + } + + return; +} char* darshan_clean_file_path(const char* path) { @@ -78,12 +177,51 @@ char* darshan_clean_file_path(const char* path) return(newpath); } -/* HACK: global variables for determining 4 most common values */ -static int64_t* walker_val_p = NULL; -static int64_t* walker_cnt_p = NULL; +/* compare function for sorting file records according to their + * darshan_base_record structure. Records are sorted first by + * descending rank (to get all shared records, with rank set to -1, in + * a contiguous region at the end of the record buffer) then + * by ascending record identifiers (which are just unsigned integers). + */ +static int darshan_base_record_compare(const void* a_p, const void* b_p) +{ + const struct darshan_base_record *a = a_p; + const struct darshan_base_record *b = b_p; + + if(a->rank < b->rank) + return(1); + if(a->rank > b->rank) + return(-1); + + /* same rank, sort by ascending record ids */ + if(a->id > b->id) + return(1); + if(a->id < b->id) + return(-1); + + return(0); +} + +void darshan_record_sort(void *rec_buf, int rec_count, int rec_size) +{ + qsort(rec_buf, rec_count, rec_size, darshan_base_record_compare); + return; +} + +static int darshan_common_val_compare(const void *a_p, const void *b_p) +{ + const struct darshan_common_val_counter* a = a_p; + const struct darshan_common_val_counter* b = b_p; + + if(a->val < b->val) + return(-1); + if(a->val > b->val) + return(1); + return(0); +} void darshan_common_val_counter(void **common_val_root, int *common_val_count, - int64_t val) + int64_t val, int64_t *common_val_p, int64_t *common_cnt_p) { struct darshan_common_val_counter* counter; struct darshan_common_val_counter* found; @@ -102,12 +240,10 @@ void darshan_common_val_counter(void **common_val_root, int *common_val_count, { found = *(struct darshan_common_val_counter**)tmp; found->freq++; - return; } - - /* we can add a new one as long as we haven't hit the limit */ - if(*common_val_count < DARSHAN_COMMON_VAL_MAX_RUNTIME_COUNT) + else if(*common_val_count < DARSHAN_COMMON_VAL_MAX_RUNTIME_COUNT) { + /* we can add a new one as long as we haven't hit the limit */ counter = malloc(sizeof(*counter)); if(!counter) { @@ -127,52 +263,13 @@ void darshan_common_val_counter(void **common_val_root, int *common_val_count, (*common_val_count)++; } - return; -} - -void darshan_walk_common_vals(void *common_val_root, int64_t* val_p, - int64_t* cnt_p) -{ - walker_val_p = val_p; - walker_cnt_p = cnt_p; - - twalk(common_val_root, darshan_common_val_walker); - tdestroy(common_val_root, free); + /* update common access counters as we go */ + DARSHAN_COMMON_VAL_COUNTER_INC(common_val_p, common_cnt_p, + found->val, found->freq, 1); return; } -static void darshan_common_val_walker(const void* nodep, const VISIT which, - const int depth) -{ - struct darshan_common_val_counter* counter; - - switch (which) - { - case postorder: - case leaf: - counter = *(struct darshan_common_val_counter**)nodep; - DARSHAN_COMMON_VAL_COUNTER_INC(walker_val_p, walker_cnt_p, - counter->val, counter->freq); - default: - break; - } - - return; -} - -static int darshan_common_val_compare(const void* a_p, const void* b_p) -{ - const struct darshan_common_val_counter* a = a_p; - const struct darshan_common_val_counter* b = b_p; - - if(a->val < b->val) - return(-1); - if(a->val > b->val) - return(1); - return(0); -} - void darshan_variance_reduce(void *invec, void *inoutvec, int *len, MPI_Datatype *dt) { diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index f97d5211951845188d634561863ea9a60ed1f4c5..69e1d164925ece2b4e60f1e605bf1cde5c2c4ed8 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -18,8 +18,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -39,6 +41,7 @@ static pthread_mutex_t darshan_core_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_ static int my_rank = -1; static int nprocs = -1; static int darshan_mem_alignment = 1; +static long darshan_mod_mem_quota = DARSHAN_MOD_MEM_MAX; /* paths prefixed with the following directories are not traced by darshan */ char* darshan_path_exclusions[] = { @@ -88,17 +91,23 @@ static struct mnt_data mnt_data_array[DARSHAN_MAX_MNTS]; static int mnt_data_count = 0; /* prototypes for internal helper functions */ -static void darshan_get_logfile_name( - char* logfile_name, int jobid, struct tm* start_tm); +#ifdef __DARSHAN_ENABLE_MMAP_LOGS +static void *darshan_init_mmap_log( + struct darshan_core_runtime* core, int jobid); +#endif static void darshan_log_record_hints_and_ver( struct darshan_core_runtime* core); -static void darshan_get_exe_and_mounts_root( - struct darshan_core_runtime *core, char* trailing_data, - int space_left); -static char* darshan_get_exe_and_mounts( - struct darshan_core_runtime *core); -static void darshan_block_size_from_path( - const char *path, int *block_size); +static void darshan_get_exe_and_mounts( + struct darshan_core_runtime *core, int argc, char **argv); +static int darshan_add_name_record_ref( + struct darshan_core_runtime *core, darshan_record_id rec_id, + const char *name, darshan_module_id mod_id); +static int darshan_block_size_from_path( + const char *path); +static void darshan_get_user_name( + char *user); +static void darshan_get_logfile_name( + char* logfile_name, int jobid, struct tm* start_tm); static void darshan_get_shared_records( struct darshan_core_runtime *core, darshan_record_id **shared_recs, int *shared_rec_cnt); @@ -107,7 +116,7 @@ static int darshan_log_open_all( static int darshan_deflate_buffer( void **pointers, int *lengths, int count, char *comp_buf, int *comp_buf_length); -static int darshan_log_write_record_hash( +static int darshan_log_write_name_record_hash( MPI_File log_fh, struct darshan_core_runtime *core, uint64_t *inout_off); static int darshan_log_append_all( @@ -121,15 +130,14 @@ static void darshan_core_cleanup( void darshan_core_initialize(int argc, char **argv) { struct darshan_core_runtime *init_core = NULL; - int i; int internal_timing_flag = 0; double init_start, init_time, init_max; char *envstr; - char* truncate_string = ""; - int truncate_offset; - int chars_left = 0; + char *jobid_str; + int jobid; int ret; int tmpval; + int i; DARSHAN_MPI_CALL(PMPI_Comm_size)(MPI_COMM_WORLD, &nprocs); DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank); @@ -162,63 +170,120 @@ void darshan_core_initialize(int argc, char **argv) } /* avoid floating point errors on faulty input */ - if (darshan_mem_alignment < 1) + if(darshan_mem_alignment < 1) { darshan_mem_alignment = 1; } - /* allocate structure to track darshan_core_runtime information */ + /* Use DARSHAN_JOBID_OVERRIDE for the env var for __DARSHAN_JOBID */ + envstr = getenv(DARSHAN_JOBID_OVERRIDE); + if(!envstr) + { + envstr = __DARSHAN_JOBID; + } + + /* find a job id */ + jobid_str = getenv(envstr); + if(jobid_str) + { + /* in cobalt we can find it in env var */ + ret = sscanf(jobid_str, "%d", &jobid); + } + if(!jobid_str || ret != 1) + { + /* use pid as fall back */ + jobid = getpid(); + } + + /* set the memory quota for darshan modules' records */ + envstr = getenv(DARSHAN_MOD_MEM_OVERRIDE); + if(envstr) + { + ret = sscanf(envstr, "%d", &tmpval); + /* silently ignore if the env variable is set poorly */ + if(ret == 1 && tmpval > 0) + { + darshan_mod_mem_quota = tmpval * 1024 * 1024; /* convert from MiB */ + } + } + + /* allocate structure to track darshan core runtime information */ init_core = malloc(sizeof(*init_core)); if(init_core) { memset(init_core, 0, sizeof(*init_core)); - - init_core->log_job.uid = getuid(); - init_core->log_job.start_time = time(NULL); - init_core->log_job.nprocs = nprocs; init_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)(); - /* record exe and arguments */ - for(i=0; ilog_hdr_p = malloc(sizeof(struct darshan_header)); + init_core->log_job_p = malloc(sizeof(struct darshan_job)); + init_core->log_exemnt_p = malloc(DARSHAN_EXE_LEN+1); + init_core->log_name_p = malloc(DARSHAN_NAME_RECORD_BUF_SIZE); + init_core->log_mod_p = malloc(darshan_mod_mem_quota); + + if(!(init_core->log_hdr_p) || !(init_core->log_job_p) || + !(init_core->log_exemnt_p) || !(init_core->log_name_p) || + !(init_core->log_mod_p)) { - chars_left = DARSHAN_EXE_LEN-strlen(init_core->exe); - strncat(init_core->exe, argv[i], chars_left); - if(i < (argc-1)) - { - chars_left = DARSHAN_EXE_LEN-strlen(init_core->exe); - strncat(init_core->exe, " ", chars_left); - } + free(init_core); + return; } - - /* if we don't see any arguments, then use glibc symbol to get - * program name at least (this happens in fortran) + /* if allocation succeeds, zero fill memory regions */ + memset(init_core->log_hdr_p, 0, sizeof(struct darshan_header)); + memset(init_core->log_job_p, 0, sizeof(struct darshan_job)); + memset(init_core->log_exemnt_p, 0, DARSHAN_EXE_LEN+1); + memset(init_core->log_name_p, 0, DARSHAN_NAME_RECORD_BUF_SIZE); + memset(init_core->log_mod_p, 0, darshan_mod_mem_quota); +#else + /* if mmap logs are enabled, we need to initialize the mmap region + * before setting the corresponding log file region pointers */ - if(argc == 0) + void *mmap_p = darshan_init_mmap_log(init_core, jobid); + if(!mmap_p) { - chars_left = DARSHAN_EXE_LEN-strlen(init_core->exe); - strncat(init_core->exe, __progname_full, chars_left); - chars_left = DARSHAN_EXE_LEN-strlen(init_core->exe); - strncat(init_core->exe, " ", chars_left); + free(init_core); + return; } - if(chars_left == 0) - { - /* we ran out of room; mark that string was truncated */ - truncate_offset = DARSHAN_EXE_LEN - strlen(truncate_string); - sprintf(&init_core->exe[truncate_offset], "%s", - truncate_string); - } + /* set the memory pointers for each log file region */ + init_core->log_hdr_p = (struct darshan_header *)mmap_p; + init_core->log_job_p = (struct darshan_job *) + ((char *)init_core->log_hdr_p + sizeof(struct darshan_header)); + init_core->log_exemnt_p = (char *) + ((char *)init_core->log_job_p + sizeof(struct darshan_job)); + init_core->log_name_p = (void *) + ((char *)init_core->log_exemnt_p + DARSHAN_EXE_LEN + 1); + init_core->log_mod_p = (void *) + ((char *)init_core->log_name_p + DARSHAN_NAME_RECORD_BUF_SIZE); + + /* set header fields needed for the mmap log mechanism */ + init_core->log_hdr_p->comp_type = DARSHAN_NO_COMP; + init_core->log_hdr_p->name_map.off = + ((char *)init_core->log_name_p - (char *)init_core->log_hdr_p); +#endif + + /* set known header fields for the log file */ + strcpy(init_core->log_hdr_p->version_string, DARSHAN_LOG_VERSION); + init_core->log_hdr_p->magic_nr = DARSHAN_MAGIC_NR; + + /* set known job-level metadata fields for the log file */ + init_core->log_job_p->uid = getuid(); + init_core->log_job_p->start_time = time(NULL); + init_core->log_job_p->nprocs = nprocs; + init_core->log_job_p->jobid = (int64_t)jobid; + + /* if we are using any hints to write the log file, then record those + * hints with the darshan job information + */ + darshan_log_record_hints_and_ver(init_core); /* collect information about command line and mounted file systems */ - init_core->trailing_data = darshan_get_exe_and_mounts(init_core); + darshan_get_exe_and_mounts(init_core, argc, argv); - /* bootstrap any modules with static initialization routines */ - i = 0; - while(mod_static_init_fns[i]) - { - (*mod_static_init_fns[i])(); - i++; - } + darshan_core = init_core; } } @@ -234,41 +299,49 @@ void darshan_core_initialize(int argc, char **argv) } } - /* if darshan was successfully initialized, set the global pointer */ + /* if darshan was successfully initialized, set the global pointer and + * bootstrap any modules with static initialization routines + */ if(init_core) + { darshan_core = init_core; + i = 0; + while(mod_static_init_fns[i]) + { + (*mod_static_init_fns[i])(); + i++; + } + } + return; } void darshan_core_shutdown() { - int i; - char *logfile_name; struct darshan_core_runtime *final_core; int internal_timing_flag = 0; - char *envjobid; - char *jobid_str; - int jobid; struct tm *start_tm; time_t start_time_tmp; - int ret = 0; - int all_ret = 0; int64_t first_start_time; int64_t last_end_time; + double start_log_time; + double tm_end; + double open1, open2; + double job1, job2; + double rec1, rec2; + double mod1[DARSHAN_MAX_MODS] = {0}; + double mod2[DARSHAN_MAX_MODS] = {0}; + double header1, header2; + char *logfile_name; int local_mod_use[DARSHAN_MAX_MODS] = {0}; int global_mod_use_count[DARSHAN_MAX_MODS] = {0}; darshan_record_id *shared_recs; darshan_record_id *mod_shared_recs; int shared_rec_cnt = 0; - double start_log_time; - double open1 = 0, open2 = 0; - double job1 = 0, job2 = 0; - double rec1 = 0, rec2 = 0; - double mod1[DARSHAN_MAX_MODS] = {0}; - double mod2[DARSHAN_MAX_MODS] = {0}; - double header1 = 0, header2 = 0; - double tm_end; + int ret = 0; + int all_ret = 0; + int i; uint64_t gz_fp = 0; MPI_File log_fh; MPI_Status status; @@ -287,6 +360,30 @@ void darshan_core_shutdown() } final_core = darshan_core; darshan_core = NULL; + DARSHAN_CORE_UNLOCK(); + +#ifdef __DARSHAN_ENABLE_MMAP_LOGS + /* remove the temporary mmap log files */ + /* NOTE: this unlink is not immediate as it must wait for the mapping + * to no longer be referenced, which in our case happens when the + * executable exits. If the application terminates mid-shutdown, then + * there will be no mmap files and no final log file. + */ + unlink(final_core->mmap_log_name); +#endif + + final_core->log_job_p->end_time = time(NULL); + + /* reduce to report first start and last end time across all ranks at rank 0 */ + DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job_p->start_time, &first_start_time, + 1, MPI_INT64_T, MPI_MIN, 0, MPI_COMM_WORLD); + DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job_p->end_time, &last_end_time, + 1, MPI_INT64_T, MPI_MAX, 0, MPI_COMM_WORLD); + if(my_rank == 0) + { + final_core->log_job_p->start_time = first_start_time; + final_core->log_job_p->end_time = last_end_time; + } final_core->comp_buf = malloc(DARSHAN_COMP_BUF_SIZE); if(!(final_core->comp_buf)) @@ -295,20 +392,6 @@ void darshan_core_shutdown() return; } - /* we also need to set which modules were registered on this process and - * call into those modules and give them a chance to perform any necessary - * pre-shutdown steps. - */ - for(i = 0; i < DARSHAN_MAX_MODS; i++) - { - if(final_core->mod_array[i]) - { - local_mod_use[i] = 1; - final_core->mod_array[i]->mod_funcs.begin_shutdown(); - } - } - DARSHAN_CORE_UNLOCK(); - logfile_name = malloc(PATH_MAX); if(!logfile_name) { @@ -316,42 +399,14 @@ void darshan_core_shutdown() return; } - /* set darshan job id/metadata and constuct log file name on rank 0 */ + /* set the log file name on rank 0 */ if(my_rank == 0) { - /* Use DARSHAN_JOBID_OVERRIDE for the env var for __DARSHAN_JOBID */ - envjobid = getenv(DARSHAN_JOBID_OVERRIDE); - if(!envjobid) - { - envjobid = __DARSHAN_JOBID; - } - - /* find a job id */ - jobid_str = getenv(envjobid); - if(jobid_str) - { - /* in cobalt we can find it in env var */ - ret = sscanf(jobid_str, "%d", &jobid); - } - if(!jobid_str || ret != 1) - { - /* use pid as fall back */ - jobid = getpid(); - } - - final_core->log_job.jobid = (int64_t)jobid; - - /* if we are using any hints to write the log file, then record those - * hints with the darshan job information - */ - darshan_log_record_hints_and_ver(final_core); - /* use human readable start time format in log filename */ - start_time_tmp = final_core->log_job.start_time; + start_time_tmp = final_core->log_job_p->start_time; start_tm = localtime(&start_time_tmp); - /* construct log file name */ - darshan_get_logfile_name(logfile_name, jobid, start_tm); + darshan_get_logfile_name(logfile_name, final_core->log_job_p->jobid, start_tm); } /* broadcast log file name */ @@ -366,21 +421,19 @@ void darshan_core_shutdown() return; } - final_core->log_job.end_time = time(NULL); - - /* reduce to report first start time and last end time across all ranks - * at rank 0 + /* set which modules were registered locally, and call into + * them to disable further instrumentation and to perform any + * other pre-shutdown steps */ - DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD); - DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD); - if(my_rank == 0) + for(i = 0; i < DARSHAN_MAX_MODS; i++) { - final_core->log_job.start_time = first_start_time; - final_core->log_job.end_time = last_end_time; + if(final_core->mod_array[i]) + local_mod_use[i] = 1; } - /* reduce the number of times a module was opened globally and bcast to everyone */ - DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + /* reduce the number of times a module was opened globally and bcast to everyone */ + DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, + DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD); /* get a list of records which are shared across all processes */ darshan_get_shared_records(final_core, &shared_recs, &shared_rec_cnt); @@ -412,8 +465,8 @@ void darshan_core_shutdown() /* rank 0 is responsible for writing the compressed darshan job information */ if(my_rank == 0) { - void *pointers[2] = {&final_core->log_job, final_core->trailing_data}; - int lengths[2] = {sizeof(struct darshan_job), strlen(final_core->trailing_data)}; + void *pointers[2] = {final_core->log_job_p, final_core->log_exemnt_p}; + int lengths[2] = {sizeof(struct darshan_job), strlen(final_core->log_exemnt_p)}; int comp_buf_sz = 0; /* compress the job info and the trailing mount/exe data */ @@ -432,10 +485,11 @@ void darshan_core_shutdown() final_core->comp_buf, comp_buf_sz, MPI_BYTE, &status); if(all_ret != MPI_SUCCESS) { - fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n", + fprintf(stderr, + "darshan library warning: unable to write job data to log file %s\n", logfile_name); unlink(logfile_name); - + } gz_fp += comp_buf_sz; } @@ -455,18 +509,19 @@ void darshan_core_shutdown() if(internal_timing_flag) rec1 = DARSHAN_MPI_CALL(PMPI_Wtime)(); /* write the record name->id hash to the log file */ - final_core->log_header.rec_map.off = gz_fp; - ret = darshan_log_write_record_hash(log_fh, final_core, &gz_fp); - final_core->log_header.rec_map.len = gz_fp - final_core->log_header.rec_map.off; + final_core->log_hdr_p->name_map.off = gz_fp; + ret = darshan_log_write_name_record_hash(log_fh, final_core, &gz_fp); + final_core->log_hdr_p->name_map.len = gz_fp - final_core->log_hdr_p->name_map.off; - /* error out if unable to write record hash */ + /* error out if unable to write the name record hash */ DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT, MPI_LOR, MPI_COMM_WORLD); if(all_ret != 0) { if(my_rank == 0) { - fprintf(stderr, "darshan library warning: unable to write record hash to log file %s\n", + fprintf(stderr, + "darshan library warning: unable to write record hash to log file %s\n", logfile_name); unlink(logfile_name); } @@ -481,17 +536,16 @@ void darshan_core_shutdown() assert(mod_shared_recs); /* loop over globally used darshan modules and: - * - perform shared file reductions, if possible * - get final output buffer * - compress (zlib) provided output buffer * - append compressed buffer to log file - * - add module index info (file offset/length) to log header + * - add module map info (file offset/length) to log header * - shutdown the module */ for(i = 0; i < DARSHAN_MAX_MODS; i++) { struct darshan_core_module* this_mod = final_core->mod_array[i]; - struct darshan_core_record_ref *ref = NULL; + struct darshan_core_name_record_ref *ref = NULL; int mod_shared_rec_cnt = 0; void* mod_buf = NULL; int mod_buf_sz = 0; @@ -501,20 +555,19 @@ void darshan_core_shutdown() { if(my_rank == 0) { - final_core->log_header.mod_map[i].off = 0; - final_core->log_header.mod_map[i].len = 0; + final_core->log_hdr_p->mod_map[i].off = 0; + final_core->log_hdr_p->mod_map[i].len = 0; } continue; } - + if(internal_timing_flag) mod1[i] = DARSHAN_MPI_CALL(PMPI_Wtime)(); - /* set the shared file list for this module */ - memset(mod_shared_recs, 0, shared_rec_cnt * sizeof(darshan_record_id)); + /* set the shared record list for this module */ for(j = 0; j < shared_rec_cnt; j++) { - HASH_FIND(hlink, final_core->rec_hash, &shared_recs[j], + HASH_FIND(hlink, final_core->name_hash, &shared_recs[j], sizeof(darshan_record_id), ref); assert(ref); if(DARSHAN_MOD_FLAG_ISSET(ref->global_mod_flags, i)) @@ -526,19 +579,21 @@ void darshan_core_shutdown() /* if module is registered locally, get the corresponding output buffer * * NOTE: this function can be used to run collective operations across - * modules, if there are file records shared globally. + * modules, if there are records shared globally. */ if(this_mod) { - this_mod->mod_funcs.get_output_data(MPI_COMM_WORLD, mod_shared_recs, + mod_buf = final_core->mod_array[i]->rec_buf_start; + mod_buf_sz = final_core->mod_array[i]->rec_buf_p - mod_buf; + this_mod->mod_shutdown_func(MPI_COMM_WORLD, mod_shared_recs, mod_shared_rec_cnt, &mod_buf, &mod_buf_sz); } /* append this module's data to the darshan log */ - final_core->log_header.mod_map[i].off = gz_fp; + final_core->log_hdr_p->mod_map[i].off = gz_fp; ret = darshan_log_append_all(log_fh, final_core, mod_buf, mod_buf_sz, &gz_fp); - final_core->log_header.mod_map[i].len = - gz_fp - final_core->log_header.mod_map[i].off; + final_core->log_hdr_p->mod_map[i].len = + gz_fp - final_core->log_hdr_p->mod_map[i].off; /* error out if the log append failed */ DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT, @@ -557,39 +612,31 @@ void darshan_core_shutdown() return; } - /* shutdown module if registered locally */ - if(this_mod) - { - this_mod->mod_funcs.shutdown(); - } if(internal_timing_flag) mod2[i] = DARSHAN_MPI_CALL(PMPI_Wtime)(); } if(internal_timing_flag) header1 = DARSHAN_MPI_CALL(PMPI_Wtime)(); - /* write out log header, after running 2 reduction on header variables: + /* write out log header, after running 2 reductions on header variables: * 1) reduce 'partial_flag' variable to determine which modules ran out - * of memory for storing I/O data + * of memory for storing data * 2) reduce 'mod_ver' array to determine which log format version each * module used for this output log */ if(my_rank == 0) { - DARSHAN_MPI_CALL(PMPI_Reduce)(MPI_IN_PLACE, - &(final_core->log_header.partial_flag), 1, MPI_UINT32_T, - MPI_BOR, 0, MPI_COMM_WORLD); - DARSHAN_MPI_CALL(PMPI_Reduce)(MPI_IN_PLACE, - final_core->log_header.mod_ver, DARSHAN_MAX_MODS, MPI_UINT32_T, - MPI_MAX, 0, MPI_COMM_WORLD); - /* rank 0 is responsible for writing the log header */ - /* initialize the remaining header fields */ - strcpy(final_core->log_header.version_string, DARSHAN_LOG_VERSION); - final_core->log_header.magic_nr = DARSHAN_MAGIC_NR; - final_core->log_header.comp_type = DARSHAN_ZLIB_COMP; + final_core->log_hdr_p->comp_type = DARSHAN_ZLIB_COMP; + + DARSHAN_MPI_CALL(PMPI_Reduce)( + MPI_IN_PLACE, &(final_core->log_hdr_p->partial_flag), + 1, MPI_UINT32_T, MPI_BOR, 0, MPI_COMM_WORLD); + DARSHAN_MPI_CALL(PMPI_Reduce)( + MPI_IN_PLACE, &(final_core->log_hdr_p->mod_ver), + DARSHAN_MAX_MODS, MPI_UINT32_T, MPI_MAX, 0, MPI_COMM_WORLD); - all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, &(final_core->log_header), + all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, final_core->log_hdr_p, sizeof(struct darshan_header), MPI_BYTE, &status); if(all_ret != MPI_SUCCESS) { @@ -600,12 +647,12 @@ void darshan_core_shutdown() } else { - DARSHAN_MPI_CALL(PMPI_Reduce)(&(final_core->log_header.partial_flag), - &(final_core->log_header.partial_flag), 1, MPI_UINT32_T, - MPI_BOR, 0, MPI_COMM_WORLD); - DARSHAN_MPI_CALL(PMPI_Reduce)(final_core->log_header.mod_ver, - final_core->log_header.mod_ver, DARSHAN_MAX_MODS, MPI_UINT32_T, - MPI_MAX, 0, MPI_COMM_WORLD); + DARSHAN_MPI_CALL(PMPI_Reduce)( + &(final_core->log_hdr_p->partial_flag), &(final_core->log_hdr_p->partial_flag), + 1, MPI_UINT32_T, MPI_BOR, 0, MPI_COMM_WORLD); + DARSHAN_MPI_CALL(PMPI_Reduce)( + &(final_core->log_hdr_p->mod_ver), &(final_core->log_hdr_p->mod_ver), + DARSHAN_MAX_MODS, MPI_UINT32_T, MPI_MAX, 0, MPI_COMM_WORLD); } /* error out if unable to write log header */ @@ -627,13 +674,14 @@ void darshan_core_shutdown() */ if(my_rank == 0) { - if(getenv("DARSHAN_LOGFILE")) - { + mode_t chmod_mode = S_IRUSR; #ifdef __DARSHAN_GROUP_READABLE_LOGS - chmod(logfile_name, (S_IRUSR|S_IRGRP)); -#else - chmod(logfile_name, (S_IRUSR)); + chmod_mode |= S_IRGRP; #endif + + if(getenv("DARSHAN_LOGFILE")) + { + chmod(logfile_name, chmod_mode); } else { @@ -651,17 +699,15 @@ void darshan_core_shutdown() sprintf(tmp_index, "_%d.darshan", (int)(end_log_time-start_log_time+1)); rename(logfile_name, new_logfile_name); /* set permissions on log file */ -#ifdef __DARSHAN_GROUP_READABLE_LOGS - chmod(new_logfile_name, (S_IRUSR|S_IRGRP)); -#else - chmod(new_logfile_name, (S_IRUSR)); -#endif + chmod(new_logfile_name, chmod_mode); free(new_logfile_name); } } } free(logfile_name); + free(shared_recs); + free(mod_shared_recs); darshan_core_cleanup(final_core); if(internal_timing_flag) @@ -714,172 +760,93 @@ void darshan_core_shutdown() fprintf(stderr, "darshan:core_shutdown\t%d\t%f\n", nprocs, all_slowest); } } - + return; } /* *********************************** */ -/* construct the darshan log file name */ -static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* start_tm) +#ifdef __DARSHAN_ENABLE_MMAP_LOGS +static void *darshan_init_mmap_log(struct darshan_core_runtime* core, int jobid) { - char* user_logfile_name; - char* logpath; - char* logname_string; - char* logpath_override = NULL; -#ifdef __DARSHAN_LOG_ENV - char env_check[256]; - char* env_tok; -#endif - uint64_t hlevel; - char hname[HOST_NAME_MAX]; - uint64_t logmod; - char cuser[L_cuserid] = {0}; int ret; + int mmap_fd; + int mmap_size; + int sys_page_size; + char cuser[L_cuserid] = {0}; + char *envstr; + char *mmap_log_path; + void *mmap_p; - /* first, check if user specifies a complete logpath to use */ - user_logfile_name = getenv("DARSHAN_LOGFILE"); - if(user_logfile_name) - { - if(strlen(user_logfile_name) >= (PATH_MAX-1)) - { - fprintf(stderr, "darshan library warning: user log file name too long.\n"); - logfile_name[0] = '\0'; - } - else - { - strcpy(logfile_name, user_logfile_name); - } - } - else - { - /* otherwise, generate the log path automatically */ - - /* Use DARSHAN_LOG_PATH_OVERRIDE for the value or __DARSHAN_LOG_PATH */ - logpath = getenv(DARSHAN_LOG_PATH_OVERRIDE); - if(!logpath) - { -#ifdef __DARSHAN_LOG_PATH - logpath = __DARSHAN_LOG_PATH; -#endif - } + sys_page_size = sysconf(_SC_PAGESIZE); + assert(sys_page_size > 0); - /* get the username for this job. In order we will try each of the - * following until one of them succeeds: - * - * - cuserid() - * - getenv("LOGNAME") - * - snprintf(..., geteuid()); - * - * Note that we do not use getpwuid() because it generally will not - * work in statically compiled binaries. - */ + mmap_size = sizeof(struct darshan_header) + DARSHAN_JOB_RECORD_SIZE + + + DARSHAN_NAME_RECORD_BUF_SIZE + darshan_mod_mem_quota; + if(mmap_size % sys_page_size) + mmap_size = ((mmap_size / sys_page_size) + 1) * sys_page_size; -#ifndef DARSHAN_DISABLE_CUSERID - cuserid(cuser); -#endif + envstr = getenv(DARSHAN_MMAP_LOG_PATH_OVERRIDE); + if(envstr) + mmap_log_path = envstr; + else + mmap_log_path = DARSHAN_DEF_MMAP_LOG_PATH; - /* if cuserid() didn't work, then check the environment */ - if(strcmp(cuser, "") == 0) - { - logname_string = getenv("LOGNAME"); - if(logname_string) - { - strncpy(cuser, logname_string, (L_cuserid-1)); - } - } + darshan_get_user_name(cuser); - /* if cuserid() and environment both fail, then fall back to uid */ - if(strcmp(cuser, "") == 0) - { - uid_t uid = geteuid(); - snprintf(cuser, sizeof(cuser), "%u", uid); - } + /* construct a unique temporary log file name for this process + * to write mmap log data to + */ + snprintf(core->mmap_log_name, PATH_MAX, + "/%s/%s_%s_id%d_mmap-log-%d.darshan", + mmap_log_path, cuser, __progname, jobid, my_rank); - /* generate a random number to help differentiate the log */ - hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000; - (void)gethostname(hname, sizeof(hname)); - logmod = darshan_hash((void*)hname,strlen(hname),hlevel); + /* create the temporary mmapped darshan log */ + mmap_fd = open(core->mmap_log_name, O_CREAT|O_RDWR|O_EXCL , 0644); + if(mmap_fd < 0) + { + fprintf(stderr, "darshan library warning: " + "unable to create darshan log file %s\n", core->mmap_log_name); + return(NULL); + } - /* see if darshan was configured using the --with-logpath-by-env - * argument, which allows the user to specify an absolute path to - * place logs via an env variable. - */ -#ifdef __DARSHAN_LOG_ENV - /* just silently skip if the environment variable list is too big */ - if(strlen(__DARSHAN_LOG_ENV) < 256) - { - /* copy env variable list to a temporary buffer */ - strcpy(env_check, __DARSHAN_LOG_ENV); - /* tokenize the comma-separated list */ - env_tok = strtok(env_check, ","); - if(env_tok) - { - do - { - /* check each env variable in order */ - logpath_override = getenv(env_tok); - if(logpath_override) - { - /* stop as soon as we find a match */ - break; - } - }while((env_tok = strtok(NULL, ","))); - } - } -#endif + /* TODO: ftruncate or just zero fill? */ + /* allocate the necessary space in the log file */ + ret = ftruncate(mmap_fd, mmap_size); + if(ret < 0) + { + fprintf(stderr, "darshan library warning: " + "unable to allocate darshan log file %s\n", core->mmap_log_name); + close(mmap_fd); + unlink(core->mmap_log_name); + return(NULL); + } - if(logpath_override) - { - ret = snprintf(logfile_name, PATH_MAX, - "%s/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial", - logpath_override, - cuser, __progname, jobid, - (start_tm->tm_mon+1), - start_tm->tm_mday, - (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec), - logmod); - if(ret == (PATH_MAX-1)) - { - /* file name was too big; squish it down */ - snprintf(logfile_name, PATH_MAX, - "%s/id%d.darshan_partial", - logpath_override, jobid); - } - } - else if(logpath) - { - ret = snprintf(logfile_name, PATH_MAX, - "%s/%d/%d/%d/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial", - logpath, (start_tm->tm_year+1900), - (start_tm->tm_mon+1), start_tm->tm_mday, - cuser, __progname, jobid, - (start_tm->tm_mon+1), - start_tm->tm_mday, - (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec), - logmod); - if(ret == (PATH_MAX-1)) - { - /* file name was too big; squish it down */ - snprintf(logfile_name, PATH_MAX, - "%s/id%d.darshan_partial", - logpath, jobid); - } - } - else - { - logfile_name[0] = '\0'; - } + /* create the memory map for darshan's data structures so they are + * persisted to file as the application executes + */ + mmap_p = mmap(NULL, mmap_size, PROT_WRITE, MAP_SHARED, mmap_fd, 0); + if(mmap_p == MAP_FAILED) + { + fprintf(stderr, "darshan library warning: " + "unable to mmap darshan log file %s\n", core->mmap_log_name); + close(mmap_fd); + unlink(core->mmap_log_name); + return(NULL); } - return; + /* close darshan log file (this does *not* unmap the log file) */ + close(mmap_fd); + + return(mmap_p); } +#endif -/* record any hints used to write the darshan log in the log header */ +/* record any hints used to write the darshan log in the job data */ static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core) { char* hints; - char* header_hints; + char* job_hints; int meta_remain = 0; char* m; @@ -895,28 +862,28 @@ static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core) if(!hints || strlen(hints) < 1) return; - header_hints = strdup(hints); - if(!header_hints) + job_hints = strdup(hints); + if(!job_hints) return; meta_remain = DARSHAN_JOB_METADATA_LEN - - strlen(core->log_job.metadata) - 1; + strlen(core->log_job_p->metadata) - 1; if(meta_remain >= (strlen(PACKAGE_VERSION) + 9)) { - sprintf(core->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION); + sprintf(core->log_job_p->metadata, "lib_ver=%s\n", PACKAGE_VERSION); meta_remain -= (strlen(PACKAGE_VERSION) + 9); } - if(meta_remain >= (3 + strlen(header_hints))) + if(meta_remain >= (3 + strlen(job_hints))) { - m = core->log_job.metadata + strlen(core->log_job.metadata); + m = core->log_job_p->metadata + strlen(core->log_job_p->metadata); /* We have room to store the hints in the metadata portion of - * the job header. We just prepend an h= to the hints list. The + * the job structure. We just prepend an h= to the hints list. The * metadata parser will ignore = characters that appear in the value * portion of the metadata key/value pair. */ - sprintf(m, "h=%s\n", header_hints); + sprintf(m, "h=%s\n", job_hints); } - free(header_hints); + free(job_hints); return; } @@ -935,7 +902,7 @@ static int mnt_data_cmp(const void* a, const void* b) } /* adds an entry to table of mounted file systems */ -static void add_entry(char* trailing_data, int* space_left, struct mntent *entry) +static void add_entry(char* buf, int* space_left, struct mntent* entry) { int ret; char tmp_mnt[256]; @@ -960,12 +927,12 @@ static void add_entry(char* trailing_data, int* space_left, struct mntent *entry else mnt_data_array[mnt_data_count].block_size = 4096; - /* store mount information for use in header of darshan log */ + /* store mount information with the job-level metadata in darshan log */ ret = snprintf(tmp_mnt, 256, "\n%s\t%s", entry->mnt_type, entry->mnt_dir); if(ret < 256 && strlen(tmp_mnt) <= (*space_left)) { - strcat(trailing_data, tmp_mnt); + strcat(buf, tmp_mnt); (*space_left) -= strlen(tmp_mnt); } @@ -973,17 +940,21 @@ static void add_entry(char* trailing_data, int* space_left, struct mntent *entry return; } -/* darshan_get_exe_and_mounts_root() +/* darshan_get_exe_and_mounts() * * collects command line and list of mounted file systems into a string that - * will be stored with the job header + * will be stored with the job-level metadata */ -static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core, - char* trailing_data, int space_left) +static void darshan_get_exe_and_mounts(struct darshan_core_runtime *core, + int argc, char **argv) { FILE* tab; struct mntent *entry; char* exclude; + char* truncate_string = ""; + int truncate_offset; + int space_left = DARSHAN_EXE_LEN; + int i; int tmp_index = 0; int skip = 0; @@ -1005,9 +976,36 @@ static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core, NULL }; - /* length of exe has already been safety checked in darshan initialization */ - strcat(trailing_data, core->exe); - space_left = DARSHAN_EXE_LEN - strlen(trailing_data); + /* record exe and arguments */ + for(i=0; ilog_exemnt_p, argv[i], space_left); + space_left = DARSHAN_EXE_LEN-strlen(core->log_exemnt_p); + if(i < (argc-1)) + { + strncat(core->log_exemnt_p, " ", space_left); + space_left = DARSHAN_EXE_LEN-strlen(core->log_exemnt_p); + } + } + + /* if we don't see any arguments, then use glibc symbol to get + * program name at least (this happens in fortran) + */ + if(argc == 0) + { + strncat(core->log_exemnt_p, __progname_full, space_left); + space_left = DARSHAN_EXE_LEN-strlen(core->log_exemnt_p); + strncat(core->log_exemnt_p, " ", space_left); + space_left = DARSHAN_EXE_LEN-strlen(core->log_exemnt_p); + } + + if(space_left == 0) + { + /* we ran out of room; mark that string was truncated */ + truncate_offset = DARSHAN_EXE_LEN - strlen(truncate_string); + sprintf(&(core->log_exemnt_p[truncate_offset]), "%s", + truncate_string); + } /* we make two passes through mounted file systems; in the first pass we * grab any non-nfs mount points, then on the second pass we grab nfs @@ -1037,7 +1035,7 @@ static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core, if(skip || (strcmp(entry->mnt_type, "nfs") == 0)) continue; - add_entry(trailing_data, &space_left, entry); + add_entry(core->log_exemnt_p, &space_left, entry); } endmntent(tab); @@ -1050,77 +1048,237 @@ static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core, if(strcmp(entry->mnt_type, "nfs") != 0) continue; - add_entry(trailing_data, &space_left, entry); + add_entry(core->log_exemnt_p, &space_left, entry); + } + endmntent(tab); + + /* sort mount points in order of longest path to shortest path. This is + * necessary so that if we try to match file paths to mount points later + * we don't match on "/" every time. + */ + qsort(mnt_data_array, mnt_data_count, sizeof(mnt_data_array[0]), mnt_data_cmp); + return; +} + +static int darshan_block_size_from_path(const char *path) +{ + int i; + int block_size = -1; + + for(i=0; i= (PATH_MAX-1)) { - *block_size = mnt_data_array[i].block_size; - return; + fprintf(stderr, "darshan library warning: user log file name too long.\n"); + logfile_name[0] = '\0'; + } + else + { + strcpy(logfile_name, user_logfile_name); + } + } + else + { + /* otherwise, generate the log path automatically */ + + /* Use DARSHAN_LOG_PATH_OVERRIDE for the value or __DARSHAN_LOG_PATH */ + logpath = getenv(DARSHAN_LOG_PATH_OVERRIDE); + if(!logpath) + { +#ifdef __DARSHAN_LOG_PATH + logpath = __DARSHAN_LOG_PATH; +#endif + } + + darshan_get_user_name(cuser); + + /* generate a random number to help differentiate the log */ + hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000; + (void)gethostname(hname, sizeof(hname)); + logmod = darshan_hash((void*)hname,strlen(hname),hlevel); + + /* see if darshan was configured using the --with-logpath-by-env + * argument, which allows the user to specify an absolute path to + * place logs via an env variable. + */ +#ifdef __DARSHAN_LOG_ENV + /* just silently skip if the environment variable list is too big */ + if(strlen(__DARSHAN_LOG_ENV) < 256) + { + /* copy env variable list to a temporary buffer */ + strcpy(env_check, __DARSHAN_LOG_ENV); + /* tokenize the comma-separated list */ + env_tok = strtok(env_check, ","); + if(env_tok) + { + do + { + /* check each env variable in order */ + logpath_override = getenv(env_tok); + if(logpath_override) + { + /* stop as soon as we find a match */ + break; + } + }while((env_tok = strtok(NULL, ","))); + } + } +#endif + + if(logpath_override) + { + ret = snprintf(logfile_name, PATH_MAX, + "%s/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial", + logpath_override, + cuser, __progname, jobid, + (start_tm->tm_mon+1), + start_tm->tm_mday, + (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec), + logmod); + if(ret == (PATH_MAX-1)) + { + /* file name was too big; squish it down */ + snprintf(logfile_name, PATH_MAX, + "%s/id%d.darshan_partial", + logpath_override, jobid); + } + } + else if(logpath) + { + ret = snprintf(logfile_name, PATH_MAX, + "%s/%d/%d/%d/%s_%s_id%d_%d-%d-%d-%" PRIu64 ".darshan_partial", + logpath, (start_tm->tm_year+1900), + (start_tm->tm_mon+1), start_tm->tm_mday, + cuser, __progname, jobid, + (start_tm->tm_mon+1), + start_tm->tm_mday, + (start_tm->tm_hour*60*60 + start_tm->tm_min*60 + start_tm->tm_sec), + logmod); + if(ret == (PATH_MAX-1)) + { + /* file name was too big; squish it down */ + snprintf(logfile_name, PATH_MAX, + "%s/id%d.darshan_partial", + logpath, jobid); + } + } + else + { + logfile_name[0] = '\0'; } } return; } +static int darshan_add_name_record_ref(struct darshan_core_runtime *core, + darshan_record_id rec_id, const char *name, darshan_module_id mod_id) +{ + struct darshan_core_name_record_ref *ref; + int record_size = sizeof(darshan_record_id) + strlen(name) + 1; + + if((record_size + core->name_mem_used) > DARSHAN_NAME_RECORD_BUF_SIZE) + return(0); + + ref = malloc(sizeof(*ref)); + if(!ref) + return(0); + memset(ref, 0, sizeof(*ref)); + + /* initialize the name record */ + ref->name_record = (struct darshan_name_record *) + ((char *)core->log_name_p + core->name_mem_used); + memset(ref->name_record, 0, record_size); + ref->name_record->id = rec_id; + strcpy(ref->name_record->name, name); + DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id); + + /* add the record to the hash table */ + HASH_ADD(hlink, core->name_hash, name_record->id, + sizeof(darshan_record_id), ref); + core->name_mem_used += record_size; +#ifdef __DARSHAN_ENABLE_MMAP_LOGS + core->log_hdr_p->name_map.len += record_size; +#endif + + return(1); +} + static void darshan_get_shared_records(struct darshan_core_runtime *core, darshan_record_id **shared_recs, int *shared_rec_cnt) { int i, j; - int tmp_cnt = core->rec_count; - struct darshan_core_record_ref *tmp, *ref; + int tmp_cnt = HASH_CNT(hlink, core->name_hash); + struct darshan_core_name_record_ref *tmp, *ref; darshan_record_id *id_array; uint64_t *mod_flags; uint64_t *global_mod_flags; @@ -1143,9 +1301,9 @@ static void darshan_get_shared_records(struct darshan_core_runtime *core, if(my_rank == 0) { i = 0; - HASH_ITER(hlink, core->rec_hash, ref, tmp) + HASH_ITER(hlink, core->name_hash, ref, tmp) { - id_array[i++] = ref->rec.id; + id_array[i++] = ref->name_record->id; } } @@ -1156,7 +1314,7 @@ static void darshan_get_shared_records(struct darshan_core_runtime *core, /* everyone looks to see if they opened the same records as root */ for(i=0; irec_hash, &id_array[i], sizeof(darshan_record_id), ref); + HASH_FIND(hlink, core->name_hash, &id_array[i], sizeof(darshan_record_id), ref); if(ref) { /* we opened that record too, save the mod_flags */ @@ -1164,7 +1322,7 @@ static void darshan_get_shared_records(struct darshan_core_runtime *core, } } - /* now allreduce so everyone agrees which files are shared and + /* now allreduce so everyone agrees which records are shared and * which modules accessed them collectively */ DARSHAN_MPI_CALL(PMPI_Allreduce)(mod_flags, global_mod_flags, tmp_cnt, @@ -1179,15 +1337,18 @@ static void darshan_get_shared_records(struct darshan_core_runtime *core, /* set global_mod_flags so we know which modules collectively * accessed this module. we need this info to support shared - * file reductions + * record reductions */ - HASH_FIND(hlink, core->rec_hash, &id_array[i], sizeof(darshan_record_id), ref); + HASH_FIND(hlink, core->name_hash, &id_array[i], sizeof(darshan_record_id), ref); assert(ref); ref->global_mod_flags = global_mod_flags[i]; } } *shared_rec_cnt = j; + free(id_array); + free(mod_flags); + free(global_mod_flags); return; } @@ -1341,75 +1502,97 @@ static int darshan_deflate_buffer(void **pointers, int *lengths, int count, /* NOTE: the map written to file may contain duplicate id->name entries if a * record is opened by multiple ranks, but not all ranks */ -static int darshan_log_write_record_hash(MPI_File log_fh, struct darshan_core_runtime *core, - uint64_t *inout_off) +static int darshan_log_write_name_record_hash(MPI_File log_fh, + struct darshan_core_runtime *core, uint64_t *inout_off) { + struct darshan_core_name_record_ref *ref; + struct darshan_name_record *name_rec; + char *my_buf, *shared_buf; + char *tmp_p; + int rec_len; + int shared_buf_len; + int name_rec_buf_len; int ret; - struct darshan_core_record_ref *ref, *tmp; - uint32_t name_len; - size_t record_sz; - size_t hash_buf_sz = 0; - char *hash_buf; - char *hash_buf_off; - - /* allocate a buffer to store at most 64 bytes for each registered record */ - /* NOTE: this buffer may be reallocated if estimate is too small */ - hash_buf_sz = core->rec_count * 64; - hash_buf = malloc(hash_buf_sz); - if(!hash_buf) - { - return(-1); - } - /* serialize the record hash into a buffer for writing */ - hash_buf_off = hash_buf; - HASH_ITER(hlink, core->rec_hash, ref, tmp) + /* remove globally shared name records from non-zero ranks */ + name_rec_buf_len = core->name_mem_used; + if(my_rank > 0) { - /* to avoid duplicate records, only rank 0 will write shared records */ - if(my_rank > 0 && ref->global_mod_flags) - continue; - - name_len = strlen(ref->rec.name); - record_sz = sizeof(darshan_record_id) + sizeof(uint32_t) + name_len; - /* make sure there is room in the buffer for this record */ - if((hash_buf_off + record_sz) > (hash_buf + hash_buf_sz)) + name_rec = core->log_name_p; + my_buf = core->log_name_p; + shared_buf = core->comp_buf; + shared_buf_len = 0; + while(name_rec_buf_len > 0) { - char *tmp_buf; - size_t old_buf_sz; - - /* if no room, reallocate the hash buffer at twice the current size */ - old_buf_sz = hash_buf_off - hash_buf; - hash_buf_sz *= 2; - tmp_buf = malloc(hash_buf_sz); - if(!tmp_buf) + HASH_FIND(hlink, core->name_hash, &(name_rec->id), + sizeof(darshan_record_id), ref); + assert(ref); + rec_len = sizeof(darshan_record_id) + strlen(name_rec->name) + 1; + + if(ref->global_mod_flags) { - free(hash_buf); - return(-1); + /* this record is shared globally, move to the temporary + * shared record buffer and update hash references + */ + HASH_DELETE(hlink, core->name_hash, ref); + memcpy(shared_buf, name_rec, rec_len); + ref->name_record = (struct darshan_name_record *)shared_buf; + HASH_ADD(hlink, core->name_hash, name_record->id, + sizeof(darshan_record_id), ref); + + shared_buf += rec_len; + shared_buf_len += rec_len; + } + else + { + /* this record is not shared, but we still may need to + * move it forward in our buffer and update hash references + */ + if(my_buf != (char *)name_rec) + { + HASH_DELETE(hlink, core->name_hash, ref); + memcpy(my_buf, name_rec, rec_len); + ref->name_record =(struct darshan_name_record *)my_buf; + HASH_ADD(hlink, core->name_hash, name_record->id, + sizeof(darshan_record_id), ref); + } + my_buf += rec_len; } - memcpy(tmp_buf, hash_buf, old_buf_sz); - free(hash_buf); - hash_buf = tmp_buf; - hash_buf_off = hash_buf + old_buf_sz; + tmp_p = (char *)name_rec + rec_len; + name_rec = (struct darshan_name_record *)tmp_p; + name_rec_buf_len -= rec_len; } + name_rec_buf_len = core->name_mem_used - shared_buf_len; - /* now serialize the record into the hash buffer. - * NOTE: darshan record hash serialization method: - * ... darshan_record_id | (uint32_t) path_len | path ... + /* append the shared records back to the end of the name record + * buffer and update hash table references so we can still + * reference these records as modules shutdown */ - *((darshan_record_id *)hash_buf_off) = ref->rec.id; - hash_buf_off += sizeof(darshan_record_id); - *((uint32_t *)hash_buf_off) = name_len; - hash_buf_off += sizeof(uint32_t); - memcpy(hash_buf_off, ref->rec.name, name_len); - hash_buf_off += name_len; + name_rec = (struct darshan_name_record *)core->comp_buf; + while(shared_buf_len > 0) + { + HASH_FIND(hlink, core->name_hash, &(name_rec->id), + sizeof(darshan_record_id), ref); + assert(ref); + rec_len = sizeof(darshan_record_id) + strlen(name_rec->name) + 1; + + HASH_DELETE(hlink, core->name_hash, ref); + memcpy(my_buf, name_rec, rec_len); + ref->name_record = (struct darshan_name_record *)my_buf; + HASH_ADD(hlink, core->name_hash, name_record->id, + sizeof(darshan_record_id), ref); + + tmp_p = (char *)name_rec + rec_len; + name_rec = (struct darshan_name_record *)tmp_p; + my_buf += rec_len; + shared_buf_len -= rec_len; + } } - hash_buf_sz = hash_buf_off - hash_buf; /* collectively write out the record hash to the darshan log */ - ret = darshan_log_append_all(log_fh, core, hash_buf, hash_buf_sz, inout_off); - - free(hash_buf); + ret = darshan_log_append_all(log_fh, core, core->log_name_p, + name_rec_buf_len, inout_off); return(ret); } @@ -1441,7 +1624,7 @@ static int darshan_log_append_all(MPI_File log_fh, struct darshan_core_runtime * DARSHAN_MPI_CALL(PMPI_Scan)(&send_off, &my_off, 1, MPI_OFFSET, MPI_SUM, MPI_COMM_WORLD); - /* scan in inclusive; subtract local size back out */ + /* scan is inclusive; subtract local size back out */ my_off -= comp_buf_sz; if(ret == 0) @@ -1489,13 +1672,12 @@ static int darshan_log_append_all(MPI_File log_fh, struct darshan_core_runtime * /* free darshan core data structures to shutdown */ static void darshan_core_cleanup(struct darshan_core_runtime* core) { - struct darshan_core_record_ref *tmp, *ref; + struct darshan_core_name_record_ref *tmp, *ref; int i; - HASH_ITER(hlink, core->rec_hash, ref, tmp) + HASH_ITER(hlink, core->name_hash, ref, tmp) { - HASH_DELETE(hlink, core->rec_hash, ref); - free(ref->rec.name); + HASH_DELETE(hlink, core->name_hash, ref); free(ref); } @@ -1508,7 +1690,16 @@ static void darshan_core_cleanup(struct darshan_core_runtime* core) } } - free(core->trailing_data); +#ifndef __DARSHAN_ENABLE_MMAP_LOGS + free(core->log_hdr_p); + free(core->log_job_p); + free(core->log_exemnt_p); + free(core->log_name_p); + free(core->log_mod_p); +#endif + + if(core->comp_buf) + free(core->comp_buf); free(core); return; @@ -1597,34 +1788,28 @@ void darshan_shutdown_bench(int argc, char **argv) void darshan_core_register_module( darshan_module_id mod_id, - struct darshan_module_funcs *funcs, + darshan_module_shutdown mod_shutdown_func, + int *inout_mod_buf_size, int *rank, - int *mod_mem_limit, int *sys_mem_alignment) { - int ret; - int tmpval; struct darshan_core_module* mod; - char *mod_mem_str = NULL; - *mod_mem_limit = 0; + int mod_mem_req = *inout_mod_buf_size; + int mod_mem_avail; + + *inout_mod_buf_size = 0; if(!darshan_core || (mod_id >= DARSHAN_MAX_MODS)) return; - if(sys_mem_alignment) - *sys_mem_alignment = darshan_mem_alignment; - - /* see if this module is already registered */ DARSHAN_CORE_LOCK(); if(darshan_core->mod_array[mod_id]) { /* if module is already registered just return */ - /* NOTE: we do not recalculate memory limit here, just set to 0 */ DARSHAN_CORE_UNLOCK(); return; } - /* this module has not been registered yet, allocate and initialize it */ mod = malloc(sizeof(*mod)); if(!mod) { @@ -1632,159 +1817,155 @@ void darshan_core_register_module( return; } memset(mod, 0, sizeof(*mod)); - mod->id = mod_id; - mod->mod_funcs = *funcs; + + /* set module's record buffer and max memory usage */ + mod_mem_avail = darshan_mod_mem_quota - darshan_core->mod_mem_used; + if(mod_mem_avail >= mod_mem_req) + mod->rec_mem_avail = mod_mem_req; + else + mod->rec_mem_avail = mod_mem_avail; + mod->rec_buf_start = darshan_core->log_mod_p + darshan_core->mod_mem_used; + mod->rec_buf_p = mod->rec_buf_start; + mod->mod_shutdown_func = mod_shutdown_func; /* register module with darshan */ darshan_core->mod_array[mod_id] = mod; - darshan_core->log_header.mod_ver[mod_id] = darshan_module_versions[mod_id]; + darshan_core->mod_mem_used += mod->rec_mem_avail; + darshan_core->log_hdr_p->mod_ver[mod_id] = darshan_module_versions[mod_id]; +#ifdef __DARSHAN_ENABLE_MMAP_LOGS + darshan_core->log_hdr_p->mod_map[mod_id].off = + ((char *)mod->rec_buf_start - (char *)darshan_core->log_hdr_p); +#endif + + *inout_mod_buf_size = mod->rec_mem_avail; + DARSHAN_CORE_UNLOCK(); - /* get the calling process's rank */ + /* set the memory alignment and calling process's rank, if desired */ + if(sys_mem_alignment) + *sys_mem_alignment = darshan_mem_alignment; if(rank) *rank = my_rank; - /* set the maximum amount of memory this module can use */ - mod_mem_str = getenv(DARSHAN_MOD_MEM_OVERRIDE); - if(mod_mem_str) - { - ret = sscanf(mod_mem_str, "%d", &tmpval); - /* silently ignore if the env variable is set poorly */ - if(ret == 1 && tmpval > 0) - *mod_mem_limit = (tmpval * 1024 * 1024); /* convert to MiB */ - else - *mod_mem_limit = DARSHAN_MOD_MEM_MAX; - } - else - { - *mod_mem_limit = DARSHAN_MOD_MEM_MAX; - } - DARSHAN_CORE_UNLOCK(); - return; } +/* NOTE: we currently don't really have a simple way of returning the + * memory allocated to this module back to darshan to hand out to + * other modules, so all we do is disable the module so darshan does + * not attempt to call into it at shutdown time + */ void darshan_core_unregister_module( darshan_module_id mod_id) { - struct darshan_core_record_ref *ref, *tmp; + DARSHAN_CORE_LOCK(); if(!darshan_core) return; - DARSHAN_CORE_LOCK(); - - if(darshan_core->mod_array[mod_id]) - { - /* iterate all records and disassociate this module from them */ - HASH_ITER(hlink, darshan_core->rec_hash, ref, tmp) - { - darshan_core_unregister_record(ref->rec.id, mod_id); - } + /* update darshan internal structures and header */ + free(darshan_core->mod_array[mod_id]); + darshan_core->mod_array[mod_id] = NULL; + darshan_core->log_hdr_p->mod_ver[mod_id] = 0; +#ifdef __DARSHAN_ENABLE_MMAP_LOGS + darshan_core->log_hdr_p->mod_map[mod_id].off = + darshan_core->log_hdr_p->mod_map[mod_id].len = 0; +#endif - free(darshan_core->mod_array[mod_id]); - darshan_core->mod_array[mod_id] = NULL; - } DARSHAN_CORE_UNLOCK(); - return; } -void darshan_core_register_record( - void *name, - int len, +darshan_record_id darshan_core_gen_record_id( + const char *name) +{ + /* hash the input name to get a unique id for this record */ + return darshan_hash((unsigned char *)name, strlen(name), 0); +} + +void *darshan_core_register_record( + darshan_record_id rec_id, + const char *name, darshan_module_id mod_id, - int printable_flag, - int mod_limit_flag, - darshan_record_id *rec_id, + int rec_len, int *file_alignment) { - darshan_record_id tmp_rec_id; - struct darshan_core_record_ref *ref; - - *rec_id = 0; + struct darshan_core_name_record_ref *ref; + void *rec_buf; + int ret; if(!darshan_core) - return; + return(NULL); - /* TODO: what do you do with printable flag? */ + DARSHAN_CORE_LOCK(); - /* hash the input name to get a unique id for this record */ - tmp_rec_id = darshan_hash(name, len, 0); + /* check to see if this module has enough space to store a new record */ + if(darshan_core->mod_array[mod_id]->rec_mem_avail < rec_len) + { + DARSHAN_MOD_FLAG_SET(darshan_core->log_hdr_p->partial_flag, mod_id); + DARSHAN_CORE_UNLOCK(); + return(NULL); + } - /* check to see if we've already stored the id->name mapping for this record */ - DARSHAN_CORE_LOCK(); - HASH_FIND(hlink, darshan_core->rec_hash, &tmp_rec_id, sizeof(darshan_record_id), ref); - if(!ref) + /* register a name record if a name is given for this record */ + if(name) { - /* record not found -- add it to the hash if this module has not already used - * all of its memory + /* check to see if we've already stored the id->name mapping for + * this record, and add a new name record if not */ - - if(mod_limit_flag) + HASH_FIND(hlink, darshan_core->name_hash, &rec_id, + sizeof(darshan_record_id), ref); + if(!ref) { - /* if this module is OOM, set a flag in the header to indicate this */ - DARSHAN_MOD_FLAG_SET(darshan_core->log_header.partial_flag, mod_id); - DARSHAN_CORE_UNLOCK(); - return; + ret = darshan_add_name_record_ref(darshan_core, rec_id, name, mod_id); + if(ret == 0) + { + DARSHAN_MOD_FLAG_SET(darshan_core->log_hdr_p->partial_flag, mod_id); + DARSHAN_CORE_UNLOCK(); + return(NULL); + } } - - ref = malloc(sizeof(struct darshan_core_record_ref)); - if(ref) + else { - ref->mod_flags = ref->global_mod_flags = 0; - ref->rec.id = tmp_rec_id; - ref->rec.name = malloc(strlen(name) + 1); - if(ref->rec.name) - strcpy(ref->rec.name, name); - - HASH_ADD(hlink, darshan_core->rec_hash, rec.id, sizeof(darshan_record_id), ref); - darshan_core->rec_count++; + DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id); } } - DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id); + + rec_buf = darshan_core->mod_array[mod_id]->rec_buf_p; + darshan_core->mod_array[mod_id]->rec_buf_p += rec_len; + darshan_core->mod_array[mod_id]->rec_mem_avail -= rec_len; +#ifdef __DARSHAN_ENABLE_MMAP_LOGS + darshan_core->log_hdr_p->mod_map[mod_id].len += rec_len; +#endif DARSHAN_CORE_UNLOCK(); if(file_alignment) - darshan_block_size_from_path(name, file_alignment); + *file_alignment = darshan_block_size_from_path(name); - *rec_id = tmp_rec_id; - return; + return(rec_buf);; } -/* TODO: test */ -void darshan_core_unregister_record( - darshan_record_id rec_id, - darshan_module_id mod_id) +double darshan_core_wtime() { - struct darshan_core_record_ref *ref; - if(!darshan_core) - return; - - DARSHAN_CORE_LOCK(); - HASH_FIND(hlink, darshan_core->rec_hash, &rec_id, sizeof(darshan_record_id), ref); - assert(ref); - - /* disassociate this module from the given record id */ - DARSHAN_MOD_FLAG_UNSET(ref->mod_flags, mod_id); - if(!(ref->mod_flags)) { - /* if no other modules are associated with this rec, delete it */ - HASH_DELETE(hlink, darshan_core->rec_hash, ref); + return(0); } - DARSHAN_CORE_UNLOCK(); - return; + return(DARSHAN_MPI_CALL(PMPI_Wtime)() - darshan_core->wtime_offset); } -double darshan_core_wtime() +int darshan_core_excluded_path(const char *path) { - if(!darshan_core) - { - return(0); + char *exclude; + int tmp_index = 0; + + while((exclude = darshan_path_exclusions[tmp_index])) { + if(!(strncmp(exclude, path, strlen(exclude)))) + return(1); + tmp_index++; } - return(DARSHAN_MPI_CALL(PMPI_Wtime)() - darshan_core->wtime_offset); + return(0); } /* diff --git a/darshan-runtime/lib/darshan-hdf5.c b/darshan-runtime/lib/darshan-hdf5.c index 50a77bd5488343644e9224dc131d0672b0f2cafe..6946151d38843fec85e865e5b7f0cb81b778e0ea 100644 --- a/darshan-runtime/lib/darshan-hdf5.c +++ b/darshan-runtime/lib/darshan-hdf5.c @@ -19,8 +19,6 @@ #define __USE_GNU #include -#include "uthash.h" - #include "darshan.h" #include "darshan-dynamic.h" @@ -32,56 +30,78 @@ DARSHAN_FORWARD_DECL(H5Fcreate, hid_t, (const char *filename, unsigned flags, hi DARSHAN_FORWARD_DECL(H5Fopen, hid_t, (const char *filename, unsigned flags, hid_t access_plist)); DARSHAN_FORWARD_DECL(H5Fclose, herr_t, (hid_t file_id)); -/* structure to track i/o stats for a given hdf5 file at runtime */ -struct hdf5_file_runtime -{ - struct darshan_hdf5_file* file_record; - UT_hash_handle hlink; -}; - -/* structure to associate a HDF5 hid with an existing file runtime structure */ -struct hdf5_file_runtime_ref +/* structure that can track i/o stats for a given HDF5 file record at runtime */ +struct hdf5_file_record_ref { - struct hdf5_file_runtime* file; - hid_t hid; - UT_hash_handle hlink; + struct darshan_hdf5_file* file_rec; }; -/* necessary state for storing HDF5 file records and coordinating with - * darshan-core at shutdown time - */ +/* struct to encapsulate runtime state for the HDF5 module */ struct hdf5_runtime { - struct hdf5_file_runtime* file_runtime_array; - struct darshan_hdf5_file* file_record_array; - int file_array_size; - int file_array_ndx; - struct hdf5_file_runtime *file_hash; - struct hdf5_file_runtime_ref* hid_hash; + void *rec_id_hash; + void *hid_hash; + int file_rec_count; }; +static void hdf5_runtime_initialize( + void); +static struct hdf5_file_record_ref *hdf5_track_new_file_record( + darshan_record_id rec_id, const char *path); +static void hdf5_record_reduction_op( + void* infile_v, void* inoutfile_v, int *len, MPI_Datatype *datatype); +static void hdf5_cleanup_runtime( + void); + +static void hdf5_shutdown( + MPI_Comm mod_comm, darshan_record_id *shared_recs, + int shared_rec_count, void **hdf5_buf, int *hdf5_buf_sz); + static struct hdf5_runtime *hdf5_runtime = NULL; static pthread_mutex_t hdf5_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; static int instrumentation_disabled = 0; static int my_rank = -1; -static void hdf5_runtime_initialize(void); -static struct hdf5_file_runtime* hdf5_file_by_name(const char *name); -static struct hdf5_file_runtime* hdf5_file_by_name_sethid(const char* name, hid_t hid); -static struct hdf5_file_runtime* hdf5_file_by_hid(hid_t hid); -static void hdf5_file_close_hid(hid_t hid); -static int hdf5_record_compare(const void* a, const void* b); -static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v, - int *len, MPI_Datatype *datatype); - -static void hdf5_begin_shutdown(void); -static void hdf5_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs, - int shared_rec_count, void **hdf5_buf, int *hdf5_buf_sz); -static void hdf5_shutdown(void); - #define HDF5_LOCK() pthread_mutex_lock(&hdf5_runtime_mutex) #define HDF5_UNLOCK() pthread_mutex_unlock(&hdf5_runtime_mutex) +#define HDF5_PRE_RECORD() do { \ + HDF5_LOCK(); \ + if(!hdf5_runtime && !instrumentation_disabled) hdf5_runtime_initialize(); \ + if(!hdf5_runtime) { \ + HDF5_UNLOCK(); \ + return(ret); \ + } \ +} while(0) + +#define HDF5_POST_RECORD() do { \ + HDF5_UNLOCK(); \ +} while(0) + +#define HDF5_RECORD_OPEN(__ret, __path, __tm1) do { \ + darshan_record_id rec_id; \ + struct hdf5_file_record_ref *rec_ref; \ + char *newpath; \ + newpath = darshan_clean_file_path(__path); \ + if(!newpath) newpath = (char *)__path; \ + if(darshan_core_excluded_path(newpath)) { \ + if(newpath != __path) free(newpath); \ + break; \ + } \ + rec_id = darshan_core_gen_record_id(newpath); \ + rec_ref = darshan_lookup_record_ref(hdf5_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \ + if(!rec_ref) rec_ref = hdf5_track_new_file_record(rec_id, newpath); \ + if(!rec_ref) { \ + if(newpath != __path) free(newpath); \ + break; \ + } \ + if(rec_ref->file_rec->fcounters[HDF5_F_OPEN_TIMESTAMP] == 0) \ + rec_ref->file_rec->fcounters[HDF5_F_OPEN_TIMESTAMP] = __tm1; \ + rec_ref->file_rec->counters[HDF5_OPENS] += 1; \ + darshan_add_record_ref(&(hdf5_runtime->hid_hash), &__ret, sizeof(hid_t), rec_ref); \ + if(newpath != __path) free(newpath); \ +} while(0) + /********************************************************* * Wrappers for HDF5 functions of interest * *********************************************************/ @@ -90,7 +110,6 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags, hid_t create_plist, hid_t access_plist) { int ret; - struct hdf5_file_runtime* file; char* tmp; double tm1; @@ -110,17 +129,9 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags, filename = tmp + 1; } - HDF5_LOCK(); - hdf5_runtime_initialize(); - file = hdf5_file_by_name_sethid(filename, ret); - if(file) - { - if(file->file_record->fcounters[HDF5_F_OPEN_TIMESTAMP] == 0 || - file->file_record->fcounters[HDF5_F_OPEN_TIMESTAMP] > tm1) - file->file_record->fcounters[HDF5_F_OPEN_TIMESTAMP] = tm1; - file->file_record->counters[HDF5_OPENS] += 1; - } - HDF5_UNLOCK(); + HDF5_PRE_RECORD(); + HDF5_RECORD_OPEN(ret, filename, tm1); + HDF5_POST_RECORD(); } return(ret); @@ -130,7 +141,6 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags, hid_t access_plist) { int ret; - struct hdf5_file_runtime* file; char* tmp; double tm1; @@ -150,17 +160,9 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags, filename = tmp + 1; } - HDF5_LOCK(); - hdf5_runtime_initialize(); - file = hdf5_file_by_name_sethid(filename, ret); - if(file) - { - if(file->file_record->fcounters[HDF5_F_OPEN_TIMESTAMP] == 0 || - file->file_record->fcounters[HDF5_F_OPEN_TIMESTAMP] > tm1) - file->file_record->fcounters[HDF5_F_OPEN_TIMESTAMP] = tm1; - file->file_record->counters[HDF5_OPENS] += 1; - } - HDF5_UNLOCK(); + HDF5_PRE_RECORD(); + HDF5_RECORD_OPEN(ret, filename, tm1); + HDF5_POST_RECORD(); } return(ret); @@ -169,23 +171,24 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags, herr_t DARSHAN_DECL(H5Fclose)(hid_t file_id) { - struct hdf5_file_runtime* file; + struct hdf5_file_record_ref *rec_ref; int ret; MAP_OR_FAIL(H5Fclose); ret = __real_H5Fclose(file_id); - HDF5_LOCK(); - hdf5_runtime_initialize(); - file = hdf5_file_by_hid(file_id); - if(file) + HDF5_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(hdf5_runtime->hid_hash, + &file_id, sizeof(hid_t)); + if(rec_ref) { - file->file_record->fcounters[HDF5_F_CLOSE_TIMESTAMP] = + rec_ref->file_rec->fcounters[HDF5_F_CLOSE_TIMESTAMP] = darshan_core_wtime(); - hdf5_file_close_hid(file_id); + darshan_delete_record_ref(&(hdf5_runtime->hid_hash), + &file_id, sizeof(hid_t)); } - HDF5_UNLOCK(); + HDF5_POST_RECORD(); return(ret); @@ -198,211 +201,83 @@ herr_t DARSHAN_DECL(H5Fclose)(hid_t file_id) /* initialize internal HDF5 module data strucutres and register with darshan-core */ static void hdf5_runtime_initialize() { - int mem_limit; - struct darshan_module_funcs hdf5_mod_fns = - { - .begin_shutdown = &hdf5_begin_shutdown, - .get_output_data = &hdf5_get_output_data, - .shutdown = &hdf5_shutdown - }; + int hdf5_buf_size; - /* don't do anything if already initialized or instrumenation is disabled */ - if(hdf5_runtime || instrumentation_disabled) - return; + /* try and store the default number of records for this module */ + hdf5_buf_size = DARSHAN_DEF_MOD_REC_COUNT * sizeof(struct darshan_hdf5_file); /* register hdf5 module with darshan-core */ darshan_core_register_module( DARSHAN_HDF5_MOD, - &hdf5_mod_fns, + &hdf5_shutdown, + &hdf5_buf_size, &my_rank, - &mem_limit, NULL); - /* return if no memory assigned by darshan-core */ - if(mem_limit == 0) + /* return if darshan-core does not provide enough module memory */ + if(hdf5_buf_size < sizeof(struct darshan_hdf5_file)) + { + darshan_core_unregister_module(DARSHAN_HDF5_MOD); return; + } hdf5_runtime = malloc(sizeof(*hdf5_runtime)); if(!hdf5_runtime) - return; - memset(hdf5_runtime, 0, sizeof(*hdf5_runtime)); - - /* set maximum number of file records according to max memory limit */ - /* NOTE: maximum number of records is based on the size of a hdf5 file record */ - /* TODO: should we base memory usage off file record or total runtime structure sizes? */ - hdf5_runtime->file_array_size = mem_limit / sizeof(struct darshan_hdf5_file); - hdf5_runtime->file_array_ndx = 0; - - /* allocate array of runtime file records */ - hdf5_runtime->file_runtime_array = malloc(hdf5_runtime->file_array_size * - sizeof(struct hdf5_file_runtime)); - hdf5_runtime->file_record_array = malloc(hdf5_runtime->file_array_size * - sizeof(struct darshan_hdf5_file)); - if(!hdf5_runtime->file_runtime_array || !hdf5_runtime->file_record_array) { - hdf5_runtime->file_array_size = 0; + darshan_core_unregister_module(DARSHAN_HDF5_MOD); return; } - memset(hdf5_runtime->file_runtime_array, 0, hdf5_runtime->file_array_size * - sizeof(struct hdf5_file_runtime)); - memset(hdf5_runtime->file_record_array, 0, hdf5_runtime->file_array_size * - sizeof(struct darshan_hdf5_file)); + memset(hdf5_runtime, 0, sizeof(*hdf5_runtime)); return; } -/* get a HDF5 file record for the given file path */ -static struct hdf5_file_runtime* hdf5_file_by_name(const char *name) +static struct hdf5_file_record_ref *hdf5_track_new_file_record( + darshan_record_id rec_id, const char *path) { - struct hdf5_file_runtime *file = NULL; - char *newname = NULL; - darshan_record_id file_id; - int limit_flag; - - if(!hdf5_runtime || instrumentation_disabled) - return(NULL); - - newname = darshan_clean_file_path(name); - if(!newname) - newname = (char*)name; - - limit_flag = (hdf5_runtime->file_array_ndx >= hdf5_runtime->file_array_size); - - /* get a unique id for this file from darshan core */ - darshan_core_register_record( - (void*)newname, - strlen(newname), - DARSHAN_HDF5_MOD, - 1, - limit_flag, - &file_id, - NULL); + struct darshan_hdf5_file *file_rec = NULL; + struct hdf5_file_record_ref *rec_ref = NULL; + int ret; - /* if record is set to 0, darshan-core is out of space and will not - * track this record, so we should avoid tracking it, too - */ - if(file_id == 0) - { - if(newname != name) - free(newname); + rec_ref = malloc(sizeof(*rec_ref)); + if(!rec_ref) return(NULL); - } + memset(rec_ref, 0, sizeof(*rec_ref)); - /* search the hash table for this file record, and return if found */ - HASH_FIND(hlink, hdf5_runtime->file_hash, &file_id, sizeof(darshan_record_id), file); - if(file) + /* add a reference to this file record based on record id */ + ret = darshan_add_record_ref(&(hdf5_runtime->rec_id_hash), &rec_id, + sizeof(darshan_record_id), rec_ref); + if(ret == 0) { - if(newname != name) - free(newname); - return(file); - } - - /* no existing record, assign a new file record from the global array */ - file = &(hdf5_runtime->file_runtime_array[hdf5_runtime->file_array_ndx]); - file->file_record = &(hdf5_runtime->file_record_array[hdf5_runtime->file_array_ndx]); - file->file_record->f_id = file_id; - file->file_record->rank = my_rank; - - /* add new record to file hash table */ - HASH_ADD(hlink, hdf5_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file); - hdf5_runtime->file_array_ndx++; - - if(newname != name) - free(newname); - return(file); -} - -/* get a HDF5 file record for the given file path, and also create a - * reference structure using the returned hid - */ -static struct hdf5_file_runtime* hdf5_file_by_name_sethid(const char* name, hid_t hid) -{ - struct hdf5_file_runtime* file; - struct hdf5_file_runtime_ref* ref; - - if(!hdf5_runtime || instrumentation_disabled) + free(rec_ref); return(NULL); - - /* find file record by name first */ - file = hdf5_file_by_name(name); - - if(!file) - return(NULL); - - /* search hash table for existing file ref for this fd */ - HASH_FIND(hlink, hdf5_runtime->hid_hash, &hid, sizeof(hid_t), ref); - if(ref) - { - /* we have a reference. Make sure it points to the correct file - * and return it - */ - ref->file = file; - return(file); } - /* if we hit this point, then we don't have a reference for this fd - * in the table yet. Add it. + /* register the actual file record with darshan-core so it is persisted + * in the log file */ - ref = malloc(sizeof(*ref)); - if(!ref) - return(NULL); - memset(ref, 0, sizeof(*ref)); - - ref->file = file; - ref->hid = hid; - HASH_ADD(hlink, hdf5_runtime->hid_hash, hid, sizeof(hid_t), ref); - - return(file); -} - -/* get a HDF5 file record for the given hid */ -static struct hdf5_file_runtime* hdf5_file_by_hid(hid_t hid) -{ - struct hdf5_file_runtime_ref* ref; - - if(!hdf5_runtime || instrumentation_disabled) - return(NULL); - - /* search hash table for existing file ref for this hid */ - HASH_FIND(hlink, hdf5_runtime->hid_hash, &hid, sizeof(hid_t), ref); - if(ref) - return(ref->file); - - return(NULL); -} - -/* free up HDF5 reference data structures for the given hid */ -static void hdf5_file_close_hid(hid_t hid) -{ - struct hdf5_file_runtime_ref* ref; - - if(!hdf5_runtime || instrumentation_disabled) - return; + file_rec = darshan_core_register_record( + rec_id, + path, + DARSHAN_HDF5_MOD, + sizeof(struct darshan_hdf5_file), + NULL); - /* search hash table for this hid */ - HASH_FIND(hlink, hdf5_runtime->hid_hash, &hid, sizeof(hid_t), ref); - if(ref) + if(!file_rec) { - /* we have a reference, delete it */ - HASH_DELETE(hlink, hdf5_runtime->hid_hash, ref); - free(ref); + darshan_delete_record_ref(&(hdf5_runtime->rec_id_hash), + &rec_id, sizeof(darshan_record_id)); + free(rec_ref); + return(NULL); } - return; -} - -/* compare function for sorting file records by descending rank */ -static int hdf5_record_compare(const void* a_p, const void* b_p) -{ - const struct darshan_hdf5_file* a = a_p; - const struct darshan_hdf5_file* b = b_p; + /* registering this file record was successful, so initialize some fields */ + file_rec->base_rec.id = rec_id; + file_rec->base_rec.rank = my_rank; + rec_ref->file_rec = file_rec; + hdf5_runtime->file_rec_count++; - if(a->rank < b->rank) - return 1; - if(a->rank > b->rank) - return -1; - - return 0; + return(rec_ref); } static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v, @@ -418,8 +293,8 @@ static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v, for(i=0; i<*len; i++) { memset(&tmp_file, 0, sizeof(struct darshan_hdf5_file)); - tmp_file.f_id = infile->f_id; - tmp_file.rank = -1; + tmp_file.base_rec.id = infile->base_rec.id; + tmp_file.base_rec.rank = -1; /* sum */ for(j=HDF5_OPENS; j<=HDF5_OPENS; j++) @@ -455,37 +330,40 @@ static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v, return; } -/************************************************************************ - * Functions exported by HDF5 module for coordinating with darshan-core * - ************************************************************************/ - -static void hdf5_begin_shutdown() +static void hdf5_cleanup_runtime() { - assert(hdf5_runtime); + darshan_clear_record_refs(&(hdf5_runtime->hid_hash), 0); + darshan_clear_record_refs(&(hdf5_runtime->rec_id_hash), 1); - HDF5_LOCK(); - /* disable further instrumentation while Darshan shuts down */ - instrumentation_disabled = 1; - HDF5_UNLOCK(); + free(hdf5_runtime); + hdf5_runtime = NULL; return; } -static void hdf5_get_output_data( +/************************************************************************ + * Functions exported by HDF5 module for coordinating with darshan-core * + ************************************************************************/ + +static void hdf5_shutdown( MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **hdf5_buf, int *hdf5_buf_sz) { - struct hdf5_file_runtime *file; - int i; + struct hdf5_file_record_ref *rec_ref; + struct darshan_hdf5_file *hdf5_rec_buf = *(struct darshan_hdf5_file **)hdf5_buf; + int hdf5_rec_count; struct darshan_hdf5_file *red_send_buf = NULL; struct darshan_hdf5_file *red_recv_buf = NULL; MPI_Datatype red_type; MPI_Op red_op; + int i; + HDF5_LOCK(); assert(hdf5_runtime); + hdf5_rec_count = hdf5_runtime->file_rec_count; /* if there are globally shared files, do a shared file reduction */ /* NOTE: the shared file reduction is also skipped if the @@ -496,23 +374,21 @@ static void hdf5_get_output_data( /* necessary initialization of shared records */ for(i = 0; i < shared_rec_count; i++) { - HASH_FIND(hlink, hdf5_runtime->file_hash, &shared_recs[i], - sizeof(darshan_record_id), file); - assert(file); + rec_ref = darshan_lookup_record_ref(hdf5_runtime->rec_id_hash, + &shared_recs[i], sizeof(darshan_record_id)); + assert(rec_ref); - file->file_record->rank = -1; + rec_ref->file_rec->base_rec.rank = -1; } - /* sort the array of files descending by rank so that we get all of the - * shared files (marked by rank -1) in a contiguous portion at end - * of the array + /* sort the array of records so we get all of the shared records + * (marked by rank -1) in a contiguous portion at end of the array */ - qsort(hdf5_runtime->file_record_array, hdf5_runtime->file_array_ndx, - sizeof(struct darshan_hdf5_file), hdf5_record_compare); + darshan_record_sort(hdf5_rec_buf, hdf5_rec_count, + sizeof(struct darshan_hdf5_file)); /* make *send_buf point to the shared files at the end of sorted array */ - red_send_buf = - &(hdf5_runtime->file_record_array[hdf5_runtime->file_array_ndx-shared_rec_count]); + red_send_buf = &(hdf5_rec_buf[hdf5_rec_count-shared_rec_count]); /* allocate memory for the reduction output on rank 0 */ if(my_rank == 0) @@ -520,6 +396,7 @@ static void hdf5_get_output_data( red_recv_buf = malloc(shared_rec_count * sizeof(struct darshan_hdf5_file)); if(!red_recv_buf) { + HDF5_UNLOCK(); return; } } @@ -541,45 +418,30 @@ static void hdf5_get_output_data( /* clean up reduction state */ if(my_rank == 0) { - int tmp_ndx = hdf5_runtime->file_array_ndx - shared_rec_count; - memcpy(&(hdf5_runtime->file_record_array[tmp_ndx]), red_recv_buf, + int tmp_ndx = hdf5_rec_count - shared_rec_count; + memcpy(&(hdf5_rec_buf[tmp_ndx]), red_recv_buf, shared_rec_count * sizeof(struct darshan_hdf5_file)); free(red_recv_buf); } else { - hdf5_runtime->file_array_ndx -= shared_rec_count; + hdf5_rec_count -= shared_rec_count; } DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type); DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op); } - *hdf5_buf = (void *)(hdf5_runtime->file_record_array); - *hdf5_buf_sz = hdf5_runtime->file_array_ndx * sizeof(struct darshan_hdf5_file); - - return; -} + /* update output buffer size to account for shared file reduction */ + *hdf5_buf_sz = hdf5_rec_count * sizeof(struct darshan_hdf5_file); -static void hdf5_shutdown() -{ - struct hdf5_file_runtime_ref *ref, *tmp; + /* shutdown internal structures used for instrumenting */ + hdf5_cleanup_runtime(); - assert(hdf5_runtime); - - HASH_ITER(hlink, hdf5_runtime->hid_hash, ref, tmp) - { - HASH_DELETE(hlink, hdf5_runtime->hid_hash, ref); - free(ref); - } - - HASH_CLEAR(hlink, hdf5_runtime->file_hash); /* these entries are freed all at once below */ - - free(hdf5_runtime->file_runtime_array); - free(hdf5_runtime->file_record_array); - free(hdf5_runtime); - hdf5_runtime = NULL; + /* disable further instrumentation */ + instrumentation_disabled = 1; + HDF5_UNLOCK(); return; } diff --git a/darshan-runtime/lib/darshan-mpiio.c b/darshan-runtime/lib/darshan-mpiio.c index 5365efaa7151f73362fe9135856d711096fd469c..9042467ff869a799753d44fcedf39fb6ba55a833 100644 --- a/darshan-runtime/lib/darshan-mpiio.c +++ b/darshan-runtime/lib/darshan-mpiio.c @@ -4,6 +4,9 @@ * */ +#define _XOPEN_SOURCE 500 +#define _GNU_SOURCE + #include "darshan-runtime-config.h" #include #include @@ -17,77 +20,42 @@ #include #include #include -#define __USE_GNU #include -#include "uthash.h" - #include "darshan.h" #include "darshan-dynamic.h" -/* The mpiio_file_runtime structure maintains necessary runtime metadata +/* The mpiio_file_record_ref structure maintains necessary runtime metadata * for the MPIIO file record (darshan_mpiio_file structure, defined in - * darshan-mpiio-log-format.h) pointed to by 'file_record'. This metadata + * darshan-mpiio-log-format.h) pointed to by 'file_rec'. This metadata * assists with the instrumenting of specific statistics in the file record. - * 'hlink' is a hash table link structure used to add/remove this record - * from the hash table of MPIIO file records for this process. * * RATIONALE: the MPIIO module needs to track some stateful, volatile * information about each open file (like the current file offset, most recent * access time, etc.) to aid in instrumentation, but this information can't be * stored in the darshan_mpiio_file struct because we don't want it to appear in - * the final darshan log file. We therefore associate a mpiio_file_runtime - * struct with each darshan_mpiio_file struct in order to track this information. - * - * NOTE: There is a one-to-one mapping of mpiio_file_runtime structs to - * darshan_mpiio_file structs. + * the final darshan log file. We therefore associate a mpiio_file_record_ref + * struct with each darshan_mpiio_file struct in order to track this information + * (i.e., the mapping between mpiio_file_record_ref structs to darshan_mpiio_file + * structs is one-to-one). * - * NOTE: The mpiio_file_runtime struct contains a pointer to a darshan_mpiio_file - * struct (see the *file_record member) rather than simply embedding an entire - * darshan_mpiio_file struct. This is done so that all of the darshan_mpiio_file - * structs can be kept contiguous in memory as a single array to simplify - * reduction, compression, and storage. + * NOTE: we use the 'darshan_record_ref' interface (in darshan-common) to + * associate different types of handles with this mpiio_file_record_ref struct. + * This allows us to index this struct (and the underlying file record) by using + * either the corresponding Darshan record identifier (derived from the filename) + * or by a generated MPI file handle, for instance. So, while there should only + * be a single Darshan record identifier that indexes a mpiio_file_record_ref, + * there could be multiple open file handles that index it. */ -struct mpiio_file_runtime +struct mpiio_file_record_ref { - struct darshan_mpiio_file* file_record; + struct darshan_mpiio_file *file_rec; enum darshan_io_type last_io_type; double last_meta_end; double last_read_end; double last_write_end; void *access_root; int access_count; - UT_hash_handle hlink; -}; - -/* The mpiio_file_runtime_ref structure is used to associate a MPIIO - * file handle with an already existing MPIIO file record. This is - * necessary as many MPIIO I/O functions take only a file handle as input, - * but MPIIO file records are indexed by their full file paths (i.e., darshan - * record identifiers for MPIIO files are created by hashing the file path). - * In other words, this structure is necessary as it allows us to look up a - * file record either by a pathname (mpiio_file_runtime) or by MPIIO file - * descriptor (mpiio_file_runtime_ref), depending on which parameters are - * available. This structure includes another hash table link, since separate - * hashes are maintained for mpiio_file_runtime structures and mpiio_file_runtime_ref - * structures. - * - * RATIONALE: In theory the file handle information could be included in the - * mpiio_file_runtime struct rather than in a separate structure here. The - * reason we don't do that is to handle the potential for an MPI implementation - * to produce a new file handle instance each time MPI_File_open() is called on a - * file. Thus there might be multiple file handles referring to the same - * underlying record. - * - * NOTE: there are potentially multiple mpiio_file_runtime_ref structures - * referring to a single mpiio_file_runtime structure. Most of the time there is - * only one, however. - */ -struct mpiio_file_runtime_ref -{ - struct mpiio_file_runtime* file; - MPI_File fh; - UT_hash_handle hlink; }; /* The mpiio_runtime structure maintains necessary state for storing @@ -96,117 +64,142 @@ struct mpiio_file_runtime_ref */ struct mpiio_runtime { - struct mpiio_file_runtime* file_runtime_array; - struct darshan_mpiio_file* file_record_array; - int file_array_size; - int file_array_ndx; - struct mpiio_file_runtime* file_hash; - struct mpiio_file_runtime_ref* fh_hash; + void *rec_id_hash; + void *fh_hash; + int file_rec_count; }; +static void mpiio_runtime_initialize( + void); +static struct mpiio_file_record_ref *mpiio_track_new_file_record( + darshan_record_id rec_id, const char *path); +static void mpiio_finalize_file_records( + void *rec_ref_p); +static void mpiio_record_reduction_op( + void* infile_v, void* inoutfile_v, int *len, MPI_Datatype *datatype); +static void mpiio_shared_record_variance( + MPI_Comm mod_comm, struct darshan_mpiio_file *inrec_array, + struct darshan_mpiio_file *outrec_array, int shared_rec_count); +static void mpiio_cleanup_runtime( + void); + +static void mpiio_shutdown( + MPI_Comm mod_comm, darshan_record_id *shared_recs, + int shared_rec_count, void **mpiio_buf, int *mpiio_buf_sz); + static struct mpiio_runtime *mpiio_runtime = NULL; static pthread_mutex_t mpiio_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; static int instrumentation_disabled = 0; static int my_rank = -1; -static void mpiio_runtime_initialize(void); -static struct mpiio_file_runtime* mpiio_file_by_name(const char *name); -static struct mpiio_file_runtime* mpiio_file_by_name_setfh(const char* name, MPI_File fh); -static struct mpiio_file_runtime* mpiio_file_by_fh(MPI_File fh); -static void mpiio_file_close_fh(MPI_File fh); -static int mpiio_record_compare(const void* a, const void* b); -static void mpiio_record_reduction_op(void* infile_v, void* inoutfile_v, - int *len, MPI_Datatype *datatype); -static void mpiio_shared_record_variance(MPI_Comm mod_comm, - struct darshan_mpiio_file *inrec_array, struct darshan_mpiio_file *outrec_array, - int shared_rec_count); - -static void mpiio_begin_shutdown(void); -static void mpiio_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs, - int shared_rec_count, void **mpiio_buf, int *mpiio_buf_sz); -static void mpiio_shutdown(void); - #define MPIIO_LOCK() pthread_mutex_lock(&mpiio_runtime_mutex) #define MPIIO_UNLOCK() pthread_mutex_unlock(&mpiio_runtime_mutex) +#define MPIIO_PRE_RECORD() do { \ + MPIIO_LOCK(); \ + if(!mpiio_runtime && !instrumentation_disabled) mpiio_runtime_initialize(); \ + if(!mpiio_runtime) { \ + MPIIO_UNLOCK(); \ + return(ret); \ + } \ +} while(0) + +#define MPIIO_POST_RECORD() do { \ + MPIIO_UNLOCK(); \ +} while(0) + #define MPIIO_RECORD_OPEN(__ret, __path, __fh, __comm, __mode, __info, __tm1, __tm2) do { \ - struct mpiio_file_runtime* file; \ - char *exclude; \ - int tmp_index = 0; \ + darshan_record_id rec_id; \ + struct mpiio_file_record_ref *rec_ref; \ + char *newpath; \ int comm_size; \ if(__ret != MPI_SUCCESS) break; \ - while((exclude=darshan_path_exclusions[tmp_index])) { \ - if(!(strncmp(exclude, __path, strlen(exclude)))) \ - break; \ - tmp_index++; \ + newpath = darshan_clean_file_path(__path); \ + if(!newpath) newpath = (char *)__path; \ + if(darshan_core_excluded_path(newpath)) { \ + if(newpath != __path) free(newpath); \ + break; \ } \ - if(exclude) break; \ - file = mpiio_file_by_name_setfh(__path, __fh); \ - if(!file) break; \ - file->file_record->counters[MPIIO_MODE] = __mode; \ + rec_id = darshan_core_gen_record_id(newpath); \ + rec_ref = darshan_lookup_record_ref(mpiio_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \ + if(!rec_ref) rec_ref = mpiio_track_new_file_record(rec_id, newpath); \ + if(!rec_ref) { \ + if(newpath != __path) free(newpath); \ + break; \ + } \ + rec_ref->file_rec->counters[MPIIO_MODE] = __mode; \ DARSHAN_MPI_CALL(PMPI_Comm_size)(__comm, &comm_size); \ if(comm_size == 1) \ - file->file_record->counters[MPIIO_INDEP_OPENS] += 1; \ + rec_ref->file_rec->counters[MPIIO_INDEP_OPENS] += 1; \ else \ - file->file_record->counters[MPIIO_COLL_OPENS] += 1; \ + rec_ref->file_rec->counters[MPIIO_COLL_OPENS] += 1; \ if(__info != MPI_INFO_NULL) \ - file->file_record->counters[MPIIO_HINTS] += 1; \ - if(file->file_record->fcounters[MPIIO_F_OPEN_TIMESTAMP] == 0 || \ - file->file_record->fcounters[MPIIO_F_OPEN_TIMESTAMP] > __tm1) \ - file->file_record->fcounters[MPIIO_F_OPEN_TIMESTAMP] = __tm1; \ - DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[MPIIO_F_META_TIME], __tm1, __tm2, file->last_meta_end); \ + rec_ref->file_rec->counters[MPIIO_HINTS] += 1; \ + if(rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] == 0 || \ + rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] > __tm1) \ + rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] = __tm1; \ + DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], \ + __tm1, __tm2, rec_ref->last_meta_end); \ + darshan_add_record_ref(&(mpiio_runtime->fh_hash), &__fh, sizeof(MPI_File), rec_ref); \ + if(newpath != __path) free(newpath); \ } while(0) #define MPIIO_RECORD_READ(__ret, __fh, __count, __datatype, __counter, __tm1, __tm2) do { \ - struct mpiio_file_runtime* file; \ + struct mpiio_file_record_ref *rec_ref; \ int size = 0; \ double __elapsed = __tm2-__tm1; \ if(__ret != MPI_SUCCESS) break; \ - file = mpiio_file_by_fh(__fh); \ - if(!file) break; \ + rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash, &(__fh), sizeof(MPI_File)); \ + if(!rec_ref) break; \ DARSHAN_MPI_CALL(PMPI_Type_size)(__datatype, &size); \ size = size * __count; \ - DARSHAN_BUCKET_INC(&(file->file_record->counters[MPIIO_SIZE_READ_AGG_0_100]), size); \ - darshan_common_val_counter(&file->access_root, &file->access_count, size); \ - file->file_record->counters[MPIIO_BYTES_READ] += size; \ - file->file_record->counters[__counter] += 1; \ - if(file->last_io_type == DARSHAN_IO_WRITE) \ - file->file_record->counters[MPIIO_RW_SWITCHES] += 1; \ - file->last_io_type = DARSHAN_IO_READ; \ - if(file->file_record->fcounters[MPIIO_F_READ_START_TIMESTAMP] == 0 || \ - file->file_record->fcounters[MPIIO_F_READ_START_TIMESTAMP] > __tm1) \ - file->file_record->fcounters[MPIIO_F_READ_START_TIMESTAMP] = __tm1; \ - file->file_record->fcounters[MPIIO_F_READ_END_TIMESTAMP] = __tm2; \ - if(file->file_record->fcounters[MPIIO_F_MAX_READ_TIME] < __elapsed) { \ - file->file_record->fcounters[MPIIO_F_MAX_READ_TIME] = __elapsed; \ - file->file_record->counters[MPIIO_MAX_READ_TIME_SIZE] = size; } \ - DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[MPIIO_F_READ_TIME], __tm1, __tm2, file->last_read_end); \ + DARSHAN_BUCKET_INC(&(rec_ref->file_rec->counters[MPIIO_SIZE_READ_AGG_0_100]), size); \ + darshan_common_val_counter(&rec_ref->access_root, &rec_ref->access_count, size, \ + &(rec_ref->file_rec->counters[MPIIO_ACCESS1_ACCESS]), \ + &(rec_ref->file_rec->counters[MPIIO_ACCESS1_COUNT])); \ + rec_ref->file_rec->counters[MPIIO_BYTES_READ] += size; \ + rec_ref->file_rec->counters[__counter] += 1; \ + if(rec_ref->last_io_type == DARSHAN_IO_WRITE) \ + rec_ref->file_rec->counters[MPIIO_RW_SWITCHES] += 1; \ + rec_ref->last_io_type = DARSHAN_IO_READ; \ + if(rec_ref->file_rec->fcounters[MPIIO_F_READ_START_TIMESTAMP] == 0 || \ + rec_ref->file_rec->fcounters[MPIIO_F_READ_START_TIMESTAMP] > __tm1) \ + rec_ref->file_rec->fcounters[MPIIO_F_READ_START_TIMESTAMP] = __tm1; \ + rec_ref->file_rec->fcounters[MPIIO_F_READ_END_TIMESTAMP] = __tm2; \ + if(rec_ref->file_rec->fcounters[MPIIO_F_MAX_READ_TIME] < __elapsed) { \ + rec_ref->file_rec->fcounters[MPIIO_F_MAX_READ_TIME] = __elapsed; \ + rec_ref->file_rec->counters[MPIIO_MAX_READ_TIME_SIZE] = size; } \ + DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[MPIIO_F_READ_TIME], \ + __tm1, __tm2, rec_ref->last_read_end); \ } while(0) #define MPIIO_RECORD_WRITE(__ret, __fh, __count, __datatype, __counter, __tm1, __tm2) do { \ - struct mpiio_file_runtime* file; \ + struct mpiio_file_record_ref *rec_ref; \ int size = 0; \ double __elapsed = __tm2-__tm1; \ if(__ret != MPI_SUCCESS) break; \ - file = mpiio_file_by_fh(__fh); \ - if(!file) break; \ + rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash, &(__fh), sizeof(MPI_File)); \ + if(!rec_ref) break; \ DARSHAN_MPI_CALL(PMPI_Type_size)(__datatype, &size); \ size = size * __count; \ - DARSHAN_BUCKET_INC(&(file->file_record->counters[MPIIO_SIZE_WRITE_AGG_0_100]), size); \ - darshan_common_val_counter(&file->access_root, &file->access_count, size); \ - file->file_record->counters[MPIIO_BYTES_WRITTEN] += size; \ - file->file_record->counters[__counter] += 1; \ - if(file->last_io_type == DARSHAN_IO_READ) \ - file->file_record->counters[MPIIO_RW_SWITCHES] += 1; \ - file->last_io_type = DARSHAN_IO_WRITE; \ - if(file->file_record->fcounters[MPIIO_F_READ_START_TIMESTAMP] == 0 || \ - file->file_record->fcounters[MPIIO_F_READ_START_TIMESTAMP] > __tm1) \ - file->file_record->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] = __tm1; \ - file->file_record->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] = __tm2; \ - if(file->file_record->fcounters[MPIIO_F_MAX_WRITE_TIME] < __elapsed) { \ - file->file_record->fcounters[MPIIO_F_MAX_WRITE_TIME] = __elapsed; \ - file->file_record->counters[MPIIO_MAX_WRITE_TIME_SIZE] = size; } \ - DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[MPIIO_F_WRITE_TIME], __tm1, __tm2, file->last_write_end); \ + DARSHAN_BUCKET_INC(&(rec_ref->file_rec->counters[MPIIO_SIZE_WRITE_AGG_0_100]), size); \ + darshan_common_val_counter(&rec_ref->access_root, &rec_ref->access_count, size, \ + &(rec_ref->file_rec->counters[MPIIO_ACCESS1_ACCESS]), \ + &(rec_ref->file_rec->counters[MPIIO_ACCESS1_COUNT])); \ + rec_ref->file_rec->counters[MPIIO_BYTES_WRITTEN] += size; \ + rec_ref->file_rec->counters[__counter] += 1; \ + if(rec_ref->last_io_type == DARSHAN_IO_READ) \ + rec_ref->file_rec->counters[MPIIO_RW_SWITCHES] += 1; \ + rec_ref->last_io_type = DARSHAN_IO_WRITE; \ + if(rec_ref->file_rec->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] == 0 || \ + rec_ref->file_rec->fcounters[MPIIO_F_READ_START_TIMESTAMP] > __tm1) \ + rec_ref->file_rec->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] = __tm1; \ + rec_ref->file_rec->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] = __tm2; \ + if(rec_ref->file_rec->fcounters[MPIIO_F_MAX_WRITE_TIME] < __elapsed) { \ + rec_ref->file_rec->fcounters[MPIIO_F_MAX_WRITE_TIME] = __elapsed; \ + rec_ref->file_rec->counters[MPIIO_MAX_WRITE_TIME_SIZE] = size; } \ + DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[MPIIO_F_WRITE_TIME], \ + __tm1, __tm2, rec_ref->last_write_end); \ } while(0) /********************************************************** @@ -220,6 +213,7 @@ int MPI_File_open(MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_F #endif { int ret; + MPI_File tmp_fh; char* tmp; double tm1, tm2; @@ -237,10 +231,11 @@ int MPI_File_open(MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_F filename = tmp + 1; } - MPIIO_LOCK(); - mpiio_runtime_initialize(); - MPIIO_RECORD_OPEN(ret, filename, (*fh), comm, amode, info, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_PRE_RECORD(); + tmp_fh = *fh; + MPIIO_RECORD_OPEN(ret, filename, tmp_fh, comm, amode, info, tm1, tm2); + MPIIO_POST_RECORD(); + return(ret); } @@ -254,10 +249,10 @@ int MPI_File_read(MPI_File fh, void *buf, int count, ret = DARSHAN_MPI_CALL(PMPI_File_read)(fh, buf, count, datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_INDEP_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -276,10 +271,10 @@ int MPI_File_write(MPI_File fh, void *buf, int count, ret = DARSHAN_MPI_CALL(PMPI_File_write)(fh, buf, count, datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_INDEP_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -294,10 +289,10 @@ int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, count, datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_INDEP_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -317,10 +312,10 @@ int MPI_File_write_at(MPI_File fh, MPI_Offset offset, void *buf, count, datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_INDEP_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -334,10 +329,10 @@ int MPI_File_read_all(MPI_File fh, void * buf, int count, MPI_Datatype datatype, datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_COLL_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -355,10 +350,10 @@ int MPI_File_write_all(MPI_File fh, void * buf, int count, MPI_Datatype datatype datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_COLL_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -373,10 +368,10 @@ int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void * buf, count, datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_COLL_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -396,10 +391,10 @@ int MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, void * buf, count, datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_COLL_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -413,10 +408,10 @@ int MPI_File_read_shared(MPI_File fh, void * buf, int count, MPI_Datatype dataty datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_INDEP_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -434,10 +429,10 @@ int MPI_File_write_shared(MPI_File fh, void * buf, int count, MPI_Datatype datat datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_INDEP_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -452,10 +447,10 @@ int MPI_File_read_ordered(MPI_File fh, void * buf, int count, datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_COLL_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -475,10 +470,10 @@ int MPI_File_write_ordered(MPI_File fh, void * buf, int count, datatype, status); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_COLL_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -491,10 +486,10 @@ int MPI_File_read_all_begin(MPI_File fh, void * buf, int count, MPI_Datatype dat ret = DARSHAN_MPI_CALL(PMPI_File_read_all_begin)(fh, buf, count, datatype); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_SPLIT_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -511,10 +506,10 @@ int MPI_File_write_all_begin(MPI_File fh, void * buf, int count, MPI_Datatype da ret = DARSHAN_MPI_CALL(PMPI_File_write_all_begin)(fh, buf, count, datatype); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_SPLIT_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -529,10 +524,10 @@ int MPI_File_read_at_all_begin(MPI_File fh, MPI_Offset offset, void * buf, count, datatype); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_SPLIT_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -552,10 +547,10 @@ int MPI_File_write_at_all_begin(MPI_File fh, MPI_Offset offset, void * buf, buf, count, datatype); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_SPLIT_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -569,10 +564,10 @@ int MPI_File_read_ordered_begin(MPI_File fh, void * buf, int count, MPI_Datatype datatype); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_SPLIT_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -590,10 +585,10 @@ int MPI_File_write_ordered_begin(MPI_File fh, void * buf, int count, MPI_Datatyp datatype); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_SPLIT_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -606,10 +601,10 @@ int MPI_File_iread(MPI_File fh, void * buf, int count, MPI_Datatype datatype, __ ret = DARSHAN_MPI_CALL(PMPI_File_iread)(fh, buf, count, datatype, request); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_NB_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -628,10 +623,10 @@ int MPI_File_iwrite(MPI_File fh, void * buf, int count, ret = DARSHAN_MPI_CALL(PMPI_File_iwrite)(fh, buf, count, datatype, request); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_NB_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -646,10 +641,10 @@ int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void * buf, datatype, request); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_NB_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -669,10 +664,10 @@ int MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, void * buf, count, datatype, request); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_NB_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -687,10 +682,10 @@ int MPI_File_iread_shared(MPI_File fh, void * buf, int count, datatype, request); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_NB_READS, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } @@ -710,17 +705,17 @@ int MPI_File_iwrite_shared(MPI_File fh, void * buf, int count, datatype, request); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); + MPIIO_PRE_RECORD(); MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_NB_WRITES, tm1, tm2); - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); + return(ret); } int MPI_File_sync(MPI_File fh) { int ret; - struct mpiio_file_runtime* file; + struct mpiio_file_record_ref *rec_ref; double tm1, tm2; tm1 = darshan_core_wtime(); @@ -729,17 +724,17 @@ int MPI_File_sync(MPI_File fh) if(ret == MPI_SUCCESS) { - MPIIO_LOCK(); - mpiio_runtime_initialize(); - file = mpiio_file_by_fh(fh); - if(file) + MPIIO_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash, + &fh, sizeof(MPI_File)); + if(rec_ref) { - file->file_record->counters[MPIIO_SYNCS] += 1; + rec_ref->file_rec->counters[MPIIO_SYNCS] += 1; DARSHAN_TIMER_INC_NO_OVERLAP( - file->file_record->fcounters[MPIIO_F_WRITE_TIME], - tm1, tm2, file->last_write_end); + rec_ref->file_rec->fcounters[MPIIO_F_WRITE_TIME], + tm1, tm2, rec_ref->last_write_end); } - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); } return(ret); @@ -754,7 +749,7 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, #endif { int ret; - struct mpiio_file_runtime* file; + struct mpiio_file_record_ref *rec_ref; double tm1, tm2; tm1 = darshan_core_wtime(); @@ -764,21 +759,21 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, if(ret == MPI_SUCCESS) { - MPIIO_LOCK(); - mpiio_runtime_initialize(); - file = mpiio_file_by_fh(fh); - if(file) + MPIIO_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash, + &fh, sizeof(MPI_File)); + if(rec_ref) { - file->file_record->counters[MPIIO_VIEWS] += 1; + rec_ref->file_rec->counters[MPIIO_VIEWS] += 1; if(info != MPI_INFO_NULL) { - file->file_record->counters[MPIIO_HINTS] += 1; + rec_ref->file_rec->counters[MPIIO_HINTS] += 1; DARSHAN_TIMER_INC_NO_OVERLAP( - file->file_record->fcounters[MPIIO_F_META_TIME], - tm1, tm2, file->last_meta_end); + rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); } } - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); } return(ret); @@ -787,7 +782,7 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, int MPI_File_close(MPI_File *fh) { int ret; - struct mpiio_file_runtime* file; + struct mpiio_file_record_ref *rec_ref; MPI_File tmp_fh = *fh; double tm1, tm2; @@ -795,19 +790,20 @@ int MPI_File_close(MPI_File *fh) ret = DARSHAN_MPI_CALL(PMPI_File_close)(fh); tm2 = darshan_core_wtime(); - MPIIO_LOCK(); - mpiio_runtime_initialize(); - file = mpiio_file_by_fh(tmp_fh); - if(file) + MPIIO_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash, + &tmp_fh, sizeof(MPI_File)); + if(rec_ref) { - file->file_record->fcounters[MPIIO_F_CLOSE_TIMESTAMP] = + rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_TIMESTAMP] = darshan_core_wtime(); DARSHAN_TIMER_INC_NO_OVERLAP( - file->file_record->fcounters[MPIIO_F_META_TIME], - tm1, tm2, file->last_meta_end); - mpiio_file_close_fh(tmp_fh); + rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + darshan_delete_record_ref(&(mpiio_runtime->fh_hash), + &tmp_fh, sizeof(MPI_File)); } - MPIIO_UNLOCK(); + MPIIO_POST_RECORD(); return(ret); } @@ -819,212 +815,94 @@ int MPI_File_close(MPI_File *fh) /* initialize data structures and register with darshan-core component */ static void mpiio_runtime_initialize() { - int mem_limit; - struct darshan_module_funcs mpiio_mod_fns = - { - .begin_shutdown = &mpiio_begin_shutdown, - .get_output_data = &mpiio_get_output_data, - .shutdown = &mpiio_shutdown - }; + int mpiio_buf_size; - /* don't do anything if already initialized or instrumenation is disabled */ - if(mpiio_runtime || instrumentation_disabled) - return; + /* try and store the default number of records for this module */ + mpiio_buf_size = DARSHAN_DEF_MOD_REC_COUNT * sizeof(struct darshan_mpiio_file); /* register the mpiio module with darshan core */ darshan_core_register_module( DARSHAN_MPIIO_MOD, - &mpiio_mod_fns, + &mpiio_shutdown, + &mpiio_buf_size, &my_rank, - &mem_limit, NULL); - /* return if no memory assigned by darshan core */ - if(mem_limit == 0) + /* return if darshan-core does not provide enough module memory */ + if(mpiio_buf_size < sizeof(struct darshan_mpiio_file)) + { + darshan_core_unregister_module(DARSHAN_MPIIO_MOD); return; + } mpiio_runtime = malloc(sizeof(*mpiio_runtime)); if(!mpiio_runtime) - return; - memset(mpiio_runtime, 0, sizeof(*mpiio_runtime)); - - /* set maximum number of file records according to max memory limit */ - /* NOTE: maximum number of records is based on the size of a mpiio file record */ - mpiio_runtime->file_array_size = mem_limit / sizeof(struct darshan_mpiio_file); - mpiio_runtime->file_array_ndx = 0; - - /* allocate array of runtime file records */ - mpiio_runtime->file_runtime_array = malloc(mpiio_runtime->file_array_size * - sizeof(struct mpiio_file_runtime)); - mpiio_runtime->file_record_array = malloc(mpiio_runtime->file_array_size * - sizeof(struct darshan_mpiio_file)); - if(!mpiio_runtime->file_runtime_array || !mpiio_runtime->file_record_array) { - mpiio_runtime->file_array_size = 0; + darshan_core_unregister_module(DARSHAN_MPIIO_MOD); return; } - memset(mpiio_runtime->file_runtime_array, 0, mpiio_runtime->file_array_size * - sizeof(struct mpiio_file_runtime)); - memset(mpiio_runtime->file_record_array, 0, mpiio_runtime->file_array_size * - sizeof(struct darshan_mpiio_file)); + memset(mpiio_runtime, 0, sizeof(*mpiio_runtime)); return; } -/* get a MPIIO file record for the given file path */ -static struct mpiio_file_runtime* mpiio_file_by_name(const char *name) +static struct mpiio_file_record_ref *mpiio_track_new_file_record( + darshan_record_id rec_id, const char *path) { - struct mpiio_file_runtime *file = NULL; - char *newname = NULL; - darshan_record_id file_id; - int limit_flag; - - if(!mpiio_runtime || instrumentation_disabled) - return(NULL); - - newname = darshan_clean_file_path(name); - if(!newname) - newname = (char*)name; - - limit_flag = (mpiio_runtime->file_array_ndx >= mpiio_runtime->file_array_size); - - /* get a unique id for this file from darshan core */ - darshan_core_register_record( - (void*)newname, - strlen(newname), - DARSHAN_MPIIO_MOD, - 1, - limit_flag, - &file_id, - NULL); + struct darshan_mpiio_file *file_rec = NULL; + struct mpiio_file_record_ref *rec_ref = NULL; + int ret; - /* the file record id is set to 0 if no memory is available for tracking - * new records -- just fall through and ignore this record - */ - if(file_id == 0) - { - if(newname != name) - free(newname); + rec_ref = malloc(sizeof(*rec_ref)); + if(!rec_ref) return(NULL); - } + memset(rec_ref, 0, sizeof(*rec_ref)); - /* search the hash table for this file record, and return if found */ - HASH_FIND(hlink, mpiio_runtime->file_hash, &file_id, sizeof(darshan_record_id), file); - if(file) + /* add a reference to this file record based on record id */ + ret = darshan_add_record_ref(&(mpiio_runtime->rec_id_hash), &rec_id, + sizeof(darshan_record_id), rec_ref); + if(ret == 0) { - if(newname != name) - free(newname); - return(file); - } - - /* no existing record, assign a new file record from the global array */ - file = &(mpiio_runtime->file_runtime_array[mpiio_runtime->file_array_ndx]); - file->file_record = &(mpiio_runtime->file_record_array[mpiio_runtime->file_array_ndx]); - file->file_record->f_id = file_id; - file->file_record->rank = my_rank; - - /* add new record to file hash table */ - HASH_ADD(hlink, mpiio_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file); - mpiio_runtime->file_array_ndx++; - - if(newname != name) - free(newname); - return(file); -} - -/* get an MPIIO file record for the given file path, and also create a - * reference structure using the corresponding file handle - */ -static struct mpiio_file_runtime* mpiio_file_by_name_setfh(const char* name, MPI_File fh) -{ - struct mpiio_file_runtime* file; - struct mpiio_file_runtime_ref* ref; - - if(!mpiio_runtime || instrumentation_disabled) + free(rec_ref); return(NULL); - - /* find file record by name first */ - file = mpiio_file_by_name(name); - - if(!file) - return(NULL); - - /* search hash table for existing file ref for this fh */ - HASH_FIND(hlink, mpiio_runtime->fh_hash, &fh, sizeof(fh), ref); - if(ref) - { - /* we have a reference. Make sure it points to the correct file - * and return it - */ - ref->file = file; - return(file); } - /* if we hit this point, then we don't have a reference for this fh - * in the table yet. Add it. + /* register the actual file record with darshan-core so it is persisted + * in the log file */ - ref = malloc(sizeof(*ref)); - if(!ref) - return(NULL); - memset(ref, 0, sizeof(*ref)); - - ref->file = file; - ref->fh = fh; - HASH_ADD(hlink, mpiio_runtime->fh_hash, fh, sizeof(fh), ref); - - return(file); -} - -/* get an MPIIO file record for the given file handle */ -static struct mpiio_file_runtime* mpiio_file_by_fh(MPI_File fh) -{ - struct mpiio_file_runtime_ref* ref; + file_rec = darshan_core_register_record( + rec_id, + path, + DARSHAN_MPIIO_MOD, + sizeof(struct darshan_mpiio_file), + NULL); - if(!mpiio_runtime || instrumentation_disabled) + if(!file_rec) + { + darshan_delete_record_ref(&(mpiio_runtime->rec_id_hash), + &rec_id, sizeof(darshan_record_id)); + free(rec_ref); return(NULL); + } - /* search hash table for existing file ref for this file handle */ - HASH_FIND(hlink, mpiio_runtime->fh_hash, &fh, sizeof(fh), ref); - if(ref) - return(ref->file); + /* registering this file record was successful, so initialize some fields */ + file_rec->base_rec.id = rec_id; + file_rec->base_rec.rank = my_rank; + rec_ref->file_rec = file_rec; + mpiio_runtime->file_rec_count++; - return(NULL); + return(rec_ref); } -/* free up reference data structures for the given file handle */ -static void mpiio_file_close_fh(MPI_File fh) +static void mpiio_finalize_file_records(void *rec_ref_p) { - struct mpiio_file_runtime_ref* ref; - - if(!mpiio_runtime || instrumentation_disabled) - return; - - /* search hash table for this fd */ - HASH_FIND(hlink, mpiio_runtime->fh_hash, &fh, sizeof(fh), ref); - if(ref) - { - /* we have a reference, delete it */ - HASH_DELETE(hlink, mpiio_runtime->fh_hash, ref); - free(ref); - } + struct mpiio_file_record_ref *rec_ref = + (struct mpiio_file_record_ref *)rec_ref_p; + tdestroy(rec_ref->access_root, free); return; } -/* compare function for sorting file records by descending rank */ -static int mpiio_record_compare(const void* a_p, const void* b_p) -{ - const struct darshan_mpiio_file* a = a_p; - const struct darshan_mpiio_file* b = b_p; - - if(a->rank < b->rank) - return 1; - if(a->rank > b->rank) - return -1; - - return 0; -} - static void mpiio_record_reduction_op( void* infile_v, void* inoutfile_v, @@ -1036,14 +914,11 @@ static void mpiio_record_reduction_op( struct darshan_mpiio_file *inoutfile = inoutfile_v; int i, j, k; - assert(mpiio_runtime); - for(i=0; i<*len; i++) { memset(&tmp_file, 0, sizeof(struct darshan_mpiio_file)); - - tmp_file.f_id = infile->f_id; - tmp_file.rank = -1; + tmp_file.base_rec.id = infile->base_rec.id; + tmp_file.base_rec.rank = -1; /* sum */ for(j=MPIIO_INDEP_OPENS; j<=MPIIO_VIEWS; j++) @@ -1085,7 +960,7 @@ static void mpiio_record_reduction_op( { DARSHAN_COMMON_VAL_COUNTER_INC(&(tmp_file.counters[MPIIO_ACCESS1_ACCESS]), &(tmp_file.counters[MPIIO_ACCESS1_COUNT]), infile->counters[j], - infile->counters[j+4]); + infile->counters[j+4], 0); } /* second set */ @@ -1093,7 +968,7 @@ static void mpiio_record_reduction_op( { DARSHAN_COMMON_VAL_COUNTER_INC(&(tmp_file.counters[MPIIO_ACCESS1_ACCESS]), &(tmp_file.counters[MPIIO_ACCESS1_COUNT]), inoutfile->counters[j], - inoutfile->counters[j+4]); + inoutfile->counters[j+4], 0); } /* min non-zero (if available) value */ @@ -1287,6 +1162,17 @@ static void mpiio_shared_record_variance(MPI_Comm mod_comm, return; } +static void mpiio_cleanup_runtime() +{ + darshan_clear_record_refs(&(mpiio_runtime->fh_hash), 0); + darshan_clear_record_refs(&(mpiio_runtime->rec_id_hash), 1); + + free(mpiio_runtime); + mpiio_runtime = NULL; + + return; +} + /* mpiio module shutdown benchmark routine */ void darshan_mpiio_shutdown_bench_setup(int test_case) { @@ -1297,7 +1183,7 @@ void darshan_mpiio_shutdown_bench_setup(int test_case) intptr_t j; if(mpiio_runtime) - mpiio_shutdown(); + mpiio_cleanup_runtime(); mpiio_runtime_initialize(); @@ -1364,50 +1250,35 @@ void darshan_mpiio_shutdown_bench_setup(int test_case) return; } -/************************************************************************** - * Functions exported by MPI-IO module for coordinating with darshan-core * - **************************************************************************/ +/******************************************************************************** + * shutdown function exported by this module for coordinating with darshan-core * + ********************************************************************************/ -static void mpiio_begin_shutdown() -{ - assert(mpiio_runtime); - - MPIIO_LOCK(); - /* disable further instrumentation while Darshan shuts down */ - instrumentation_disabled = 1; - MPIIO_UNLOCK(); - - return; -} - -static void mpiio_get_output_data( +static void mpiio_shutdown( MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **mpiio_buf, int *mpiio_buf_sz) { - struct mpiio_file_runtime *file; - struct mpiio_file_runtime* tmp; - int i; + struct mpiio_file_record_ref *rec_ref; + struct darshan_mpiio_file *mpiio_rec_buf = *(struct darshan_mpiio_file **)mpiio_buf; + int mpiio_rec_count; double mpiio_time; - void *red_send_buf = NULL; - void *red_recv_buf = NULL; + struct darshan_mpiio_file *red_send_buf = NULL; + struct darshan_mpiio_file *red_recv_buf = NULL; MPI_Datatype red_type; MPI_Op red_op; + int i; + MPIIO_LOCK(); assert(mpiio_runtime); + mpiio_rec_count = mpiio_runtime->file_rec_count; - /* go through and set the 4 most common access sizes for MPI-IO */ - for(i = 0; i < mpiio_runtime->file_array_ndx; i++) - { - tmp = &(mpiio_runtime->file_runtime_array[i]); - - /* common access sizes */ - darshan_walk_common_vals(tmp->access_root, - &(tmp->file_record->counters[MPIIO_ACCESS1_ACCESS]), - &(tmp->file_record->counters[MPIIO_ACCESS1_COUNT])); - } + /* perform any final transformations on MPIIO file records before + * writing them out to log file + */ + darshan_iter_record_refs(mpiio_runtime->rec_id_hash, &mpiio_finalize_file_records); /* if there are globally shared files, do a shared file reduction */ /* NOTE: the shared file reduction is also skipped if the @@ -1418,48 +1289,46 @@ static void mpiio_get_output_data( /* necessary initialization of shared records */ for(i = 0; i < shared_rec_count; i++) { - HASH_FIND(hlink, mpiio_runtime->file_hash, &shared_recs[i], - sizeof(darshan_record_id), file); - assert(file); + rec_ref = darshan_lookup_record_ref(mpiio_runtime->rec_id_hash, + &shared_recs[i], sizeof(darshan_record_id)); + assert(rec_ref); mpiio_time = - file->file_record->fcounters[MPIIO_F_READ_TIME] + - file->file_record->fcounters[MPIIO_F_WRITE_TIME] + - file->file_record->fcounters[MPIIO_F_META_TIME]; + rec_ref->file_rec->fcounters[MPIIO_F_READ_TIME] + + rec_ref->file_rec->fcounters[MPIIO_F_WRITE_TIME] + + rec_ref->file_rec->fcounters[MPIIO_F_META_TIME]; /* initialize fastest/slowest info prior to the reduction */ - file->file_record->counters[MPIIO_FASTEST_RANK] = - file->file_record->rank; - file->file_record->counters[MPIIO_FASTEST_RANK_BYTES] = - file->file_record->counters[MPIIO_BYTES_READ] + - file->file_record->counters[MPIIO_BYTES_WRITTEN]; - file->file_record->fcounters[MPIIO_F_FASTEST_RANK_TIME] = + rec_ref->file_rec->counters[MPIIO_FASTEST_RANK] = + rec_ref->file_rec->base_rec.rank; + rec_ref->file_rec->counters[MPIIO_FASTEST_RANK_BYTES] = + rec_ref->file_rec->counters[MPIIO_BYTES_READ] + + rec_ref->file_rec->counters[MPIIO_BYTES_WRITTEN]; + rec_ref->file_rec->fcounters[MPIIO_F_FASTEST_RANK_TIME] = mpiio_time; /* until reduction occurs, we assume that this rank is both * the fastest and slowest. It is up to the reduction operator * to find the true min and max. */ - file->file_record->counters[MPIIO_SLOWEST_RANK] = - file->file_record->counters[MPIIO_FASTEST_RANK]; - file->file_record->counters[MPIIO_SLOWEST_RANK_BYTES] = - file->file_record->counters[MPIIO_FASTEST_RANK_BYTES]; - file->file_record->fcounters[MPIIO_F_SLOWEST_RANK_TIME] = - file->file_record->fcounters[MPIIO_F_FASTEST_RANK_TIME]; - - file->file_record->rank = -1; + rec_ref->file_rec->counters[MPIIO_SLOWEST_RANK] = + rec_ref->file_rec->counters[MPIIO_FASTEST_RANK]; + rec_ref->file_rec->counters[MPIIO_SLOWEST_RANK_BYTES] = + rec_ref->file_rec->counters[MPIIO_FASTEST_RANK_BYTES]; + rec_ref->file_rec->fcounters[MPIIO_F_SLOWEST_RANK_TIME] = + rec_ref->file_rec->fcounters[MPIIO_F_FASTEST_RANK_TIME]; + + rec_ref->file_rec->base_rec.rank = -1; } - /* sort the array of files descending by rank so that we get all of the - * shared files (marked by rank -1) in a contiguous portion at end - * of the array + /* sort the array of records so we get all of the shared records + * (marked by rank -1) in a contiguous portion at end of the array */ - qsort(mpiio_runtime->file_record_array, mpiio_runtime->file_array_ndx, - sizeof(struct darshan_mpiio_file), mpiio_record_compare); + darshan_record_sort(mpiio_rec_buf, mpiio_rec_count, + sizeof(struct darshan_mpiio_file)); - /* make *send_buf point to the shared files at the end of sorted array */ - red_send_buf = - &(mpiio_runtime->file_record_array[mpiio_runtime->file_array_ndx-shared_rec_count]); + /* make send_buf point to the shared files at the end of sorted array */ + red_send_buf = &(mpiio_rec_buf[mpiio_rec_count-shared_rec_count]); /* allocate memory for the reduction output on rank 0 */ if(my_rank == 0) @@ -1467,6 +1336,7 @@ static void mpiio_get_output_data( red_recv_buf = malloc(shared_rec_count * sizeof(struct darshan_mpiio_file)); if(!red_recv_buf) { + MPIIO_UNLOCK(); return; } } @@ -1492,46 +1362,29 @@ static void mpiio_get_output_data( /* clean up reduction state */ if(my_rank == 0) { - int tmp_ndx = mpiio_runtime->file_array_ndx - shared_rec_count; - memcpy(&(mpiio_runtime->file_record_array[tmp_ndx]), red_recv_buf, + int tmp_ndx = mpiio_rec_count - shared_rec_count; + memcpy(&(mpiio_rec_buf[tmp_ndx]), red_recv_buf, shared_rec_count * sizeof(struct darshan_mpiio_file)); free(red_recv_buf); } else { - mpiio_runtime->file_array_ndx -= shared_rec_count; + mpiio_rec_count -= shared_rec_count; } DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type); DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op); } - *mpiio_buf = (void *)(mpiio_runtime->file_record_array); - *mpiio_buf_sz = mpiio_runtime->file_array_ndx * sizeof(struct darshan_mpiio_file); + *mpiio_buf_sz = mpiio_rec_count * sizeof(struct darshan_mpiio_file); - return; -} - -static void mpiio_shutdown() -{ - struct mpiio_file_runtime_ref *ref, *tmp; + /* shutdown internal structures used for instrumenting */ + mpiio_cleanup_runtime(); - assert(mpiio_runtime); - - HASH_ITER(hlink, mpiio_runtime->fh_hash, ref, tmp) - { - HASH_DELETE(hlink, mpiio_runtime->fh_hash, ref); - free(ref); - } - - HASH_CLEAR(hlink, mpiio_runtime->file_hash); /* these entries are freed all at once below */ - - free(mpiio_runtime->file_runtime_array); - free(mpiio_runtime->file_record_array); - free(mpiio_runtime); - mpiio_runtime = NULL; - instrumentation_disabled = 0; + /* disable further instrumentation */ + instrumentation_disabled = 1; + MPIIO_UNLOCK(); return; } diff --git a/darshan-runtime/lib/darshan-null.c b/darshan-runtime/lib/darshan-null.c index b7d5d5daac390025c959651487c67d48700254d8..566677e9088bea962b83c3f2f2a679258675d825 100644 --- a/darshan-runtime/lib/darshan-null.c +++ b/darshan-runtime/lib/darshan-null.c @@ -12,10 +12,11 @@ #include #include #include +#include #include -#include "uthash.h" #include "darshan.h" +#include "darshan-dynamic.h" /* The "NULL" module is an example instrumentation module implementation provided * with Darshan, primarily to indicate how arbitrary modules may be integrated @@ -28,80 +29,72 @@ */ /* The DARSHAN_FORWARD_DECL macro (defined in darshan.h) is used to provide forward - * declarations for wrapped funcions, regardless if Darshan is used with statically - * or dynamically linked executables. + * declarations for wrapped funcions, regardless of whether Darshan is used with + * statically or dynamically linked executables. */ DARSHAN_FORWARD_DECL(foo, int, (const char *name, int arg1, int arg2)); -/* The null_record_runtime structure maintains necessary runtime metadata - * for a "NULL" module data record (darshan_null_record structure, defined - * in darshan-null-log-format.h). This metadata assists with the instrumenting - * of specific statistics in the file record. +/* The null_record_ref structure maintains necessary runtime metadata + * for the NULL module record (darshan_null_record structure, defined in + * darshan-null-log-format.h) pointed to by 'record_p'. This metadata + * assists with the instrumenting of specific statistics in the record. * - * RATIONALE: In general, a module may need to track some stateful, volatile - * information regarding specific I/O statistics to aid in the instrumentation - * process. However, this information should not be stored in the darshan_null_record - * struct because we don't want it to appear in the final darshan log file. - * We therefore associate a null_record_runtime structure with each darshan_null_record - * structure in order to track this information. + * RATIONALE: the NULL module needs to track some stateful, volatile + * information about each record it has registered (for instance, most + * recent access time, amount of bytes transferred) to aid in instrumentation, + * but this information can't be stored in the darshan_null_record struct + * because we don't want it to appear in the final darshan log file. We + * therefore associate a null_record_ref struct with each darshan_null_record + * struct in order to track this information (i.e., the mapping between + * null_record_ref structs to darshan_null_record structs is one-to-one). * - * NOTE: The null_record_runtime struct contains a pointer to a darshan_null_record - * struct (see the *record_p member) rather than simply embedding an entire - * darshan_null_record struct. This is done so that all of the darshan_null_record - * structs can be kept contiguous in memory as a single array to simplify - * reduction, compression, and storage. + * NOTE: we use the 'darshan_record_ref' interface (in darshan-common) to + * associate different types of handles with this null_record_ref struct. + * This allows us to index this struct (and the underlying record) by using + * either the corresponding Darshan record identifier or by any other arbitrary + * handle. For the NULL module, the only handle we use to track record + * references are Darshan record identifiers. */ -struct null_record_runtime +struct null_record_ref { /* Darshan record for the "NULL" example module */ - struct darshan_null_record* record_p; + struct darshan_null_record *record_p; /* ... other runtime data ... */ - - /* hash table link for this record */ - /* NOTE: it is entirely up to the module developer how to persist module - * records in memory as the instrumented application runs. These records - * could just as easily be stored in an array or linked list. That said, - * the data structure selection should be mindful of the resulting memory - * footprint and search time complexity to attempt minimize Darshan overheads. - * hash table and linked list implementations are available in uthash.h and - * utlist.h, respectively. - */ - UT_hash_handle hlink; }; -/* The null_runtime structure simply encapsulates global data structures needed - * by the module for instrumenting functions of interest and providing the output - * I/O data for this module to the darshan-core component at shutdown time. +/* The null_runtime structure maintains necessary state for storing + * NULL records and for coordinating with darshan-core at shutdown time. */ struct null_runtime { - /* runtime_record_array is the array of runtime records for the "NULL" module. */ - struct null_record_runtime* runtime_record_array; - /* record_array is the array of high-level Darshan records for the "NULL" module, - * each corresponding to the the runtime record structure stored at the same array - * index in runtime_record_array. - */ - struct darshan_null_record* record_array; - /* file_array_size is the maximum amount of records that can be stored in - * record_array (and consequentially, runtime_record_array). - */ - int rec_array_size; - /* file_array_ndx is the current index into both runtime_record_array and - * record_array. - */ - int rec_array_ndx; - /* record_hash is a pointer to a hash table of null_record_runtime structures - * currently maintained by the "NULL" module. + /* rec_id_hash is a pointer to a hash table of NULL module record + * references, indexed by Darshan record id */ - struct null_record_runtime* record_hash; + void *rec_id_hash; + /* number of records currently tracked */ + int rec_count; }; +/* internal helper functions for the NULL module */ +static void null_runtime_initialize( + void); +static struct null_record_ref *null_track_new_record( + darshan_record_id rec_id, const char *name); +static void null_cleanup_runtime( + void); + +/* forward declaration for NULL shutdown function needed to interface + * with darshan-core + */ +static void null_shutdown(MPI_Comm mod_comm, darshan_record_id *shared_recs, + int shared_rec_count, void **null_buf, int *null_buf_sz); + /* null_runtime is the global data structure encapsulating "NULL" module state */ static struct null_runtime *null_runtime = NULL; /* The null_runtime_mutex is a lock used when updating the null_runtime global * structure (or any other global data structures). This is necessary to avoid race - * conditions as multiple threads execute function wrappers and update module state. + * conditions as multiple threads may execute function wrappers and update module state. * NOTE: Recursive mutexes are used in case functions wrapped by this module call * other wrapped functions that would result in deadlock, otherwise. This mechanism * may not be necessary for all instrumentation modules. @@ -112,40 +105,57 @@ static int instrumentation_disabled = 0; /* my_rank indicates the MPI rank of this process */ static int my_rank = -1; -/* internal helper functions for the "NULL" module */ -static void null_runtime_initialize(void); -static struct null_record_runtime* null_record_by_name(const char *name); - -/* forward declaration for module functions needed to interface with darshan-core */ -static void null_begin_shutdown(void); -static void null_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs, - int shared_rec_count, void **null_buf, int *null_buf_sz); -static void null_shutdown(void); - /* macros for obtaining/releasing the "NULL" module lock */ #define NULL_LOCK() pthread_mutex_lock(&null_runtime_mutex) #define NULL_UNLOCK() pthread_mutex_unlock(&null_runtime_mutex) +/* the NULL_PRE_RECORD macro is executed before performing NULL + * module instrumentation of a call. It obtains a lock for updating + * module data strucutres, and ensure the NULL module has been properly + * initialized before instrumenting. + */ +#define NULL_PRE_RECORD() do { \ + NULL_LOCK(); \ + if(!null_runtime && !instrumentation_disabled) null_runtime_initialize(); \ + if(!null_runtime) { \ + NULL_UNLOCK(); \ + return(ret); \ + } \ +} while(0) + +/* the NULL_POST_RECORD macro is executed after performing NULL + * module instrumentation. It simply releases the module lock. + */ +#define NULL_POST_RECORD() do { \ + NULL_UNLOCK(); \ +} while(0) + /* macro for instrumenting the "NULL" module's foo function */ /* NOTE: this macro makes use of the DARSHAN_COUNTER_* macros defined * and documented in darshan.h. */ #define NULL_RECORD_FOO(__ret, __name, __dat, __tm1, __tm2) do{ \ - struct null_record_runtime* rec; \ - double elapsed = __tm2 - __tm1; \ + darshan_record_id rec_id; \ + struct null_record_ref *rec_ref; \ + double __elapsed = __tm2 - __tm1; \ /* if foo returns error (return code < 0), don't instrument anything */ \ if(__ret < 0) break; \ - /* use '__name' to lookup a corresponding "NULL" record */ \ - rec = null_record_by_name(__name); \ - if(!rec) break; \ + /* use '__name' to generate a unique Darshan record id */ \ + rec_id = darshan_core_gen_record_id(__name); \ + /* look up a record reference for this record id using darshan rec_ref interface */ \ + rec_ref = darshan_lookup_record_ref(null_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \ + /* if no reference was found, track a new one for this record */ \ + if(!rec_ref) null_track_new_record(rec_id, __name); \ + /* if we still don't have a valid reference, back out */ \ + if(!rec_ref) break; \ /* increment counter indicating number of calls to 'bar' */ \ - rec->record_p->counters[NULL_BARS] += 1; \ + rec_ref->record_p->counters[NULL_BARS] += 1; \ /* store data value for most recent call to 'bar' */ \ - rec->record_p->counters[NULL_BAR_DAT] = __dat; \ + rec_ref->record_p->counters[NULL_BAR_DAT] = __dat; \ /* store timestamp of most recent call to 'bar' */ \ - rec->record_p->fcounters[NULL_F_BAR_TIMESTAMP] = __tm1; \ + rec_ref->record_p->fcounters[NULL_F_BAR_TIMESTAMP] = __tm1; \ /* store duration of most recent call to 'bar' */ \ - rec->record_p->fcounters[NULL_F_BAR_DURATION] = elapsed; \ + rec_ref->record_p->fcounters[NULL_F_BAR_DURATION] = __elapsed; \ } while(0) /********************************************************** @@ -174,179 +184,135 @@ int DARSHAN_DECL(foo)(const char* name, int arg1, int arg2) ret = __real_foo(name, arg1, arg2); tm2 = darshan_core_wtime(); - NULL_LOCK(); - - /* Before attempting to instrument I/O statistics for function foo, make - * sure the "NULL" module runtime environment has been initialized. - * NOTE: this runtime environment is initialized only once -- if the - * appropriate structures have already been initialized, this function simply - * returns. - */ - null_runtime_initialize(); - + NULL_PRE_RECORD(); /* Call macro for instrumenting data for foo function calls. */ NULL_RECORD_FOO(ret, name, arg1+arg2, tm1, tm2); - - NULL_UNLOCK(); + NULL_POST_RECORD(); return(ret); } /********************************************************** - * Internal functions for manipulating POSIX module state * + * Internal functions for manipulating NULL module state * **********************************************************/ -/* Initialize internal POSIX module data structures and register with darshan-core. */ +/* Initialize internal NULL module data structures and register with darshan-core. */ static void null_runtime_initialize() { - /* struct of function pointers for interfacing with darshan-core */ - struct darshan_module_funcs null_mod_fns = - { - .begin_shutdown = &null_begin_shutdown, - .get_output_data = &null_get_output_data, - .shutdown = &null_shutdown - }; - int mem_limit; /* max. memory this module can consume, dictated by darshan-core */ - - /* don't do anything if already initialized or instrumenation is disabled */ - if(null_runtime || instrumentation_disabled) - return; + int null_buf_size; + + /* try and store a default number of records for this module */ + null_buf_size = DARSHAN_DEF_MOD_REC_COUNT * sizeof(struct darshan_null_record); - /* register the "NULL" module with the darshan-core component */ + /* register the NULL module with the darshan-core component */ darshan_core_register_module( DARSHAN_NULL_MOD, /* Darshan module identifier, defined in darshan-log-format.h */ - &null_mod_fns, + &null_shutdown, + &null_buf_size, &my_rank, - &mem_limit, NULL); - /* return if no memory assigned by darshan-core */ - if(mem_limit == 0) + /* return if darshan-core does not provide enough module memory for at + * least one NULL record + */ + if(null_buf_size < sizeof(struct darshan_null_record)) + { + darshan_core_unregister_module(DARSHAN_NULL_MOD); return; + } /* initialize module's global state */ null_runtime = malloc(sizeof(*null_runtime)); if(!null_runtime) - return; - memset(null_runtime, 0, sizeof(*null_runtime)); - - /* Set the maximum number of data records this module may track, as indicated - * by mem_limit (set by darshan-core). - * NOTE: We interpret the maximum memory limit to be related to the maximum - * amount of data which may be written to log by a single process for a given - * module. We therefore use this maximum memory limit to determine how many - * darshan_null_record structures we can track per process. - */ - null_runtime->rec_array_size = mem_limit / sizeof(struct darshan_null_record); - null_runtime->rec_array_ndx = 0; - - /* allocate both record arrays (runtime and high-level records) */ - null_runtime->runtime_record_array = malloc(null_runtime->rec_array_size * - sizeof(struct null_record_runtime)); - null_runtime->record_array = malloc(null_runtime->rec_array_size * - sizeof(struct darshan_null_record)); - if(!null_runtime->runtime_record_array || !null_runtime->record_array) { - null_runtime->rec_array_size = 0; + darshan_core_unregister_module(DARSHAN_NULL_MOD); return; } - memset(null_runtime->runtime_record_array, 0, null_runtime->rec_array_size * - sizeof(struct null_record_runtime)); - memset(null_runtime->record_array, 0, null_runtime->rec_array_size * - sizeof(struct darshan_null_record)); + memset(null_runtime, 0, sizeof(*null_runtime)); return; } -/* Search for and return a "NULL" module record corresponding to name parameter. */ -static struct null_record_runtime* null_record_by_name(const char *name) +/* allocate and track a new NULL module record */ +static struct null_record_ref *null_track_new_record( + darshan_record_id rec_id, const char *name) { - struct null_record_runtime *rec = NULL; - darshan_record_id rec_id; - int limit_flag; + struct darshan_null_record *record_p = NULL; + struct null_record_ref *rec_ref = NULL; + int ret; - /* Don't search for a record if the "NULL" module is not initialized or - * if instrumentation has been toggled off. - */ - if(!null_runtime || instrumentation_disabled) + rec_ref = malloc(sizeof(*rec_ref)); + if(!rec_ref) return(NULL); + memset(rec_ref, 0, sizeof(*rec_ref)); - /* stop tracking new records if we are tracking our maximum count */ - limit_flag = (null_runtime->rec_array_ndx >= null_runtime->rec_array_size); - - /* get a unique record identifier for this record from darshan-core */ - darshan_core_register_record( - (void*)name, - strlen(name), - DARSHAN_NULL_MOD, - 1, - limit_flag, - &rec_id, - NULL); - - /* the file record id is set to 0 if no memory is available for tracking - * new records -- just fall through and ignore this record + /* allocate a new NULL record reference and add it to the hash + * table, using the Darshan record identifier as the handle */ - if(rec_id == 0) + ret = darshan_add_record_ref(&(null_runtime->rec_id_hash), &rec_id, + sizeof(darshan_record_id), rec_ref); + if(ret == 0) { + free(rec_ref); return(NULL); } - /* search the hash table for this file record, and return if found */ - HASH_FIND(hlink, null_runtime->record_hash, &rec_id, sizeof(darshan_record_id), rec); - if(rec) + /* register the actual file record with darshan-core so it is persisted + * in the log file + */ + record_p = darshan_core_register_record( + rec_id, + name, + DARSHAN_NULL_MOD, + sizeof(struct darshan_null_record), + NULL); + + if(!record_p) { - return(rec); + /* if registration fails, delete record reference and return */ + darshan_delete_record_ref(&(null_runtime->rec_id_hash), + &rec_id, sizeof(darshan_record_id)); + free(rec_ref); + return(NULL); } - /* no existing record, assign a new one from the global array */ - rec = &(null_runtime->runtime_record_array[null_runtime->rec_array_ndx]); - rec->record_p = &(null_runtime->record_array[null_runtime->rec_array_ndx]); + /* registering this file record was successful, so initialize some fields */ + record_p->base_rec.id = rec_id; + record_p->base_rec.rank = my_rank; + rec_ref->record_p = record_p; + null_runtime->rec_count++; - /* set the darshan record id and corresponding process rank for this record */ - rec->record_p->f_id = rec_id; - rec->record_p->rank = my_rank; - - /* add new record to file hash table */ - HASH_ADD(hlink, null_runtime->record_hash, record_p->f_id, sizeof(darshan_record_id), rec); - null_runtime->rec_array_ndx++; - - return(rec); + /* return pointer to the record reference */ + return(rec_ref); } -/****************************************************************************** - * Functions exported by the "NULL" module for coordinating with darshan-core * - ******************************************************************************/ - -/* Perform any necessary steps prior to shutting down for the "NULL" module. */ -static void null_begin_shutdown() +/* cleanup NULL module internal data structures */ +static void null_cleanup_runtime() { - assert(null_runtime); - - NULL_LOCK(); - - /* In general, we want to disable all wrappers while Darshan shuts down. - * This is to avoid race conditions and ensure data consistency, as - * executing wrappers could potentially modify module state while Darshan - * is in the process of shutting down. - */ - instrumentation_disabled = 1; + /* iterate the hash of record references and free them */ + darshan_clear_record_refs(&(null_runtime->rec_id_hash), 1); - /* ... any other code which needs to be executed before beginning shutdown process ... */ - - NULL_UNLOCK(); + free(null_runtime); + null_runtime = NULL; return; } -/* Pass output data for the "NULL" module back to darshan-core to log to file. */ -static void null_get_output_data( +/************************************************************************************** + * shutdown function exported by the "NULL" module for coordinating with darshan-core * + **************************************************************************************/ + +/* Pass output data for the "NULL" module back to darshan-core to log to file, + * and shutdown/free internal data structures. + */ +static void null_shutdown( MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **null_buf, int *null_buf_sz) { + NULL_LOCK(); assert(null_runtime); /* NOTE: this function can be used to run collective operations prior to @@ -359,28 +325,21 @@ static void null_get_output_data( * functionality. */ - /* Just set the output buffer to point at the array of the "NULL" module's - * I/O records, and set the output size according to the number of records - * currently being tracked. + /* Just set the output size according to the number of records currently + * being tracked. In general, the module can decide to throw out records + * that have been previously registered by shuffling around memory in + * 'null_buf' -- 'null_buf' and 'null_buf_sz' both are passed as pointers + * so they can be updated by the shutdown function potentially. */ - *null_buf = (void *)(null_runtime->record_array); - *null_buf_sz = null_runtime->rec_array_ndx * sizeof(struct darshan_null_record); + *null_buf_sz = null_runtime->rec_count * sizeof(struct darshan_null_record); - return; -} + /* shutdown internal structures used for instrumenting */ + null_cleanup_runtime(); -/* Shutdown the "NULL" module by freeing up all data structures. */ -static void null_shutdown() -{ - assert(null_runtime); - - HASH_CLEAR(hlink, null_runtime->record_hash); /* these hash entries are freed all at once below */ - - free(null_runtime->runtime_record_array); - free(null_runtime->record_array); - free(null_runtime); - null_runtime = NULL; + /* disable further instrumentation */ + instrumentation_disabled = 1; + NULL_UNLOCK(); return; } diff --git a/darshan-runtime/lib/darshan-pnetcdf.c b/darshan-runtime/lib/darshan-pnetcdf.c index 4b52ec461253778feb12aa308fe10f09fd7fac00..1b71a89633a2d06030da109980f4d86d3b7f9d4e 100644 --- a/darshan-runtime/lib/darshan-pnetcdf.c +++ b/darshan-runtime/lib/darshan-pnetcdf.c @@ -19,8 +19,6 @@ #define __USE_GNU #include -#include "uthash.h" - #include "darshan.h" #include "darshan-dynamic.h" @@ -28,56 +26,81 @@ DARSHAN_FORWARD_DECL(ncmpi_create, int, (MPI_Comm comm, const char *path, int cm DARSHAN_FORWARD_DECL(ncmpi_open, int, (MPI_Comm comm, const char *path, int omode, MPI_Info info, int *ncidp)); DARSHAN_FORWARD_DECL(ncmpi_close, int, (int ncid)); -/* structure to track i/o stats for a given PNETCDF file at runtime */ -struct pnetcdf_file_runtime -{ - struct darshan_pnetcdf_file* file_record; - UT_hash_handle hlink; -}; - -/* structure to associate a PNETCDF ncid with an existing file runtime structure */ -struct pnetcdf_file_runtime_ref +/* structure that can track i/o stats for a given PNETCDF file record at runtime */ +struct pnetcdf_file_record_ref { - struct pnetcdf_file_runtime* file; - int ncid; - UT_hash_handle hlink; + struct darshan_pnetcdf_file* file_rec; }; -/* necessary state for storing PNETCDF file records and coordinating with - * darshan-core at shutdown time - */ +/* struct to encapsulate runtime state for the PNETCDF module */ struct pnetcdf_runtime { - struct pnetcdf_file_runtime* file_runtime_array; - struct darshan_pnetcdf_file* file_record_array; - int file_array_size; - int file_array_ndx; - struct pnetcdf_file_runtime *file_hash; - struct pnetcdf_file_runtime_ref* ncid_hash; + void *rec_id_hash; + void *ncid_hash; + int file_rec_count; }; +static void pnetcdf_runtime_initialize( + void); +static struct pnetcdf_file_record_ref *pnetcdf_track_new_file_record( + darshan_record_id rec_id, const char *path); +static void pnetcdf_record_reduction_op( + void* infile_v, void* inoutfile_v, int *len, MPI_Datatype *datatype); +static void pnetcdf_cleanup_runtime( + void); + +static void pnetcdf_shutdown( + MPI_Comm mod_comm, darshan_record_id *shared_recs, + int shared_rec_count, void **pnetcdf_buf, int *pnetcdf_buf_sz); + static struct pnetcdf_runtime *pnetcdf_runtime = NULL; static pthread_mutex_t pnetcdf_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; static int instrumentation_disabled = 0; static int my_rank = -1; -static void pnetcdf_runtime_initialize(void); -static struct pnetcdf_file_runtime* pnetcdf_file_by_name(const char *name); -static struct pnetcdf_file_runtime* pnetcdf_file_by_name_setncid(const char* name, int ncid); -static struct pnetcdf_file_runtime* pnetcdf_file_by_ncid(int ncid); -static void pnetcdf_file_close_ncid(int ncid); -static int pnetcdf_record_compare(const void* a, const void* b); -static void pnetcdf_record_reduction_op(void* infile_v, void* inoutfile_v, - int *len, MPI_Datatype *datatype); - -static void pnetcdf_begin_shutdown(void); -static void pnetcdf_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs, - int shared_rec_count, void **pnetcdf_buf, int *pnetcdf_buf_sz); -static void pnetcdf_shutdown(void); - #define PNETCDF_LOCK() pthread_mutex_lock(&pnetcdf_runtime_mutex) #define PNETCDF_UNLOCK() pthread_mutex_unlock(&pnetcdf_runtime_mutex) +#define PNETCDF_PRE_RECORD() do { \ + PNETCDF_LOCK(); \ + if(!pnetcdf_runtime && !instrumentation_disabled) pnetcdf_runtime_initialize(); \ + if(!pnetcdf_runtime) { \ + PNETCDF_UNLOCK(); \ + return(ret); \ + } \ +} while(0) + +#define PNETCDF_POST_RECORD() do { \ + PNETCDF_UNLOCK(); \ +} while(0) + +#define PNETCDF_RECORD_OPEN(__ncidp, __path, __comm, __tm1) do { \ + darshan_record_id rec_id; \ + struct pnetcdf_file_record_ref *rec_ref; \ + char *newpath; \ + int comm_size; \ + newpath = darshan_clean_file_path(__path); \ + if(!newpath) newpath = (char *)__path; \ + if(darshan_core_excluded_path(newpath)) { \ + if(newpath != __path) free(newpath); \ + break; \ + } \ + rec_id = darshan_core_gen_record_id(newpath); \ + rec_ref = darshan_lookup_record_ref(pnetcdf_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \ + if(!rec_ref) rec_ref = pnetcdf_track_new_file_record(rec_id, newpath); \ + if(!rec_ref) { \ + if(newpath != __path) free(newpath); \ + break; \ + } \ + DARSHAN_MPI_CALL(PMPI_Comm_size)(__comm, &comm_size); \ + if(rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_TIMESTAMP] == 0) \ + rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_TIMESTAMP] = __tm1; \ + if(comm_size == 1) rec_ref->file_rec->counters[PNETCDF_INDEP_OPENS] += 1; \ + else rec_ref->file_rec->counters[PNETCDF_COLL_OPENS] += 1; \ + darshan_add_record_ref(&(pnetcdf_runtime->ncid_hash), __ncidp, sizeof(int), rec_ref); \ + if(newpath != __path) free(newpath); \ +} while(0) + /********************************************************* * Wrappers for PNETCDF functions of interest * *********************************************************/ @@ -86,9 +109,7 @@ int DARSHAN_DECL(ncmpi_create)(MPI_Comm comm, const char *path, int cmode, MPI_Info info, int *ncidp) { int ret; - struct pnetcdf_file_runtime* file; char* tmp; - int comm_size; double tm1; MAP_OR_FAIL(ncmpi_create); @@ -107,25 +128,9 @@ int DARSHAN_DECL(ncmpi_create)(MPI_Comm comm, const char *path, path = tmp + 1; } - PNETCDF_LOCK(); - pnetcdf_runtime_initialize(); - file = pnetcdf_file_by_name_setncid(path, (*ncidp)); - if(file) - { - if(file->file_record->fcounters[PNETCDF_F_OPEN_TIMESTAMP] == 0 || - file->file_record->fcounters[PNETCDF_F_OPEN_TIMESTAMP] > tm1) - file->file_record->fcounters[PNETCDF_F_OPEN_TIMESTAMP] = tm1; - DARSHAN_MPI_CALL(PMPI_Comm_size)(comm, &comm_size); - if(comm_size == 1) - { - file->file_record->counters[PNETCDF_INDEP_OPENS] += 1; - } - else - { - file->file_record->counters[PNETCDF_COLL_OPENS] += 1; - } - } - PNETCDF_UNLOCK(); + PNETCDF_PRE_RECORD(); + PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1); + PNETCDF_POST_RECORD(); } return(ret); @@ -135,9 +140,7 @@ int DARSHAN_DECL(ncmpi_open)(MPI_Comm comm, const char *path, int omode, MPI_Info info, int *ncidp) { int ret; - struct pnetcdf_file_runtime* file; char* tmp; - int comm_size; double tm1; MAP_OR_FAIL(ncmpi_open); @@ -156,25 +159,9 @@ int DARSHAN_DECL(ncmpi_open)(MPI_Comm comm, const char *path, path = tmp + 1; } - PNETCDF_LOCK(); - pnetcdf_runtime_initialize(); - file = pnetcdf_file_by_name_setncid(path, (*ncidp)); - if(file) - { - if(file->file_record->fcounters[PNETCDF_F_OPEN_TIMESTAMP] == 0 || - file->file_record->fcounters[PNETCDF_F_OPEN_TIMESTAMP] > tm1) - file->file_record->fcounters[PNETCDF_F_OPEN_TIMESTAMP] = tm1; - DARSHAN_MPI_CALL(PMPI_Comm_size)(comm, &comm_size); - if(comm_size == 1) - { - file->file_record->counters[PNETCDF_INDEP_OPENS] += 1; - } - else - { - file->file_record->counters[PNETCDF_COLL_OPENS] += 1; - } - } - PNETCDF_UNLOCK(); + PNETCDF_PRE_RECORD(); + PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1); + PNETCDF_POST_RECORD(); } return(ret); @@ -182,23 +169,24 @@ int DARSHAN_DECL(ncmpi_open)(MPI_Comm comm, const char *path, int DARSHAN_DECL(ncmpi_close)(int ncid) { - struct pnetcdf_file_runtime* file; + struct pnetcdf_file_record_ref *rec_ref; int ret; MAP_OR_FAIL(ncmpi_close); ret = __real_ncmpi_close(ncid); - PNETCDF_LOCK(); - pnetcdf_runtime_initialize(); - file = pnetcdf_file_by_ncid(ncid); - if(file) + PNETCDF_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(pnetcdf_runtime->ncid_hash, + &ncid, sizeof(int)); + if(rec_ref) { - file->file_record->fcounters[PNETCDF_F_CLOSE_TIMESTAMP] = + rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_TIMESTAMP] = darshan_core_wtime(); - pnetcdf_file_close_ncid(ncid); + darshan_delete_record_ref(&(pnetcdf_runtime->ncid_hash), + &ncid, sizeof(int)); } - PNETCDF_UNLOCK(); + PNETCDF_POST_RECORD(); return(ret); } @@ -210,211 +198,83 @@ int DARSHAN_DECL(ncmpi_close)(int ncid) /* initialize internal PNETCDF module data strucutres and register with darshan-core */ static void pnetcdf_runtime_initialize() { - int mem_limit; - struct darshan_module_funcs pnetcdf_mod_fns = - { - .begin_shutdown = &pnetcdf_begin_shutdown, - .get_output_data = &pnetcdf_get_output_data, - .shutdown = &pnetcdf_shutdown - }; + int pnetcdf_buf_size; - /* don't do anything if already initialized or instrumenation is disabled */ - if(pnetcdf_runtime || instrumentation_disabled) - return; + /* try and store the default number of records for this module */ + pnetcdf_buf_size = DARSHAN_DEF_MOD_REC_COUNT * sizeof(struct darshan_pnetcdf_file); /* register pnetcdf module with darshan-core */ darshan_core_register_module( DARSHAN_PNETCDF_MOD, - &pnetcdf_mod_fns, + &pnetcdf_shutdown, + &pnetcdf_buf_size, &my_rank, - &mem_limit, NULL); - /* return if no memory assigned by darshan-core */ - if(mem_limit == 0) + /* return if darshan-core does not provide enough module memory */ + if(pnetcdf_buf_size < sizeof(struct darshan_pnetcdf_file)) + { + darshan_core_unregister_module(DARSHAN_PNETCDF_MOD); return; + } pnetcdf_runtime = malloc(sizeof(*pnetcdf_runtime)); if(!pnetcdf_runtime) - return; - memset(pnetcdf_runtime, 0, sizeof(*pnetcdf_runtime)); - - /* set maximum number of file records according to max memory limit */ - /* NOTE: maximum number of records is based on the size of a pnetcdf file record */ - /* TODO: should we base memory usage off file record or total runtime structure sizes? */ - pnetcdf_runtime->file_array_size = mem_limit / sizeof(struct darshan_pnetcdf_file); - pnetcdf_runtime->file_array_ndx = 0; - - /* allocate array of runtime file records */ - pnetcdf_runtime->file_runtime_array = malloc(pnetcdf_runtime->file_array_size * - sizeof(struct pnetcdf_file_runtime)); - pnetcdf_runtime->file_record_array = malloc(pnetcdf_runtime->file_array_size * - sizeof(struct darshan_pnetcdf_file)); - if(!pnetcdf_runtime->file_runtime_array || !pnetcdf_runtime->file_record_array) { - pnetcdf_runtime->file_array_size = 0; + darshan_core_unregister_module(DARSHAN_PNETCDF_MOD); return; } - memset(pnetcdf_runtime->file_runtime_array, 0, pnetcdf_runtime->file_array_size * - sizeof(struct pnetcdf_file_runtime)); - memset(pnetcdf_runtime->file_record_array, 0, pnetcdf_runtime->file_array_size * - sizeof(struct darshan_pnetcdf_file)); + memset(pnetcdf_runtime, 0, sizeof(*pnetcdf_runtime)); return; } -/* get a PNETCDF file record for the given file path */ -static struct pnetcdf_file_runtime* pnetcdf_file_by_name(const char *name) +static struct pnetcdf_file_record_ref *pnetcdf_track_new_file_record( + darshan_record_id rec_id, const char *path) { - struct pnetcdf_file_runtime *file = NULL; - char *newname = NULL; - darshan_record_id file_id; - int limit_flag; + struct darshan_pnetcdf_file *file_rec = NULL; + struct pnetcdf_file_record_ref *rec_ref = NULL; + int ret; - if(!pnetcdf_runtime || instrumentation_disabled) + rec_ref = malloc(sizeof(*rec_ref)); + if(!rec_ref) return(NULL); + memset(rec_ref, 0, sizeof(*rec_ref)); - newname = darshan_clean_file_path(name); - if(!newname) - newname = (char*)name; - - limit_flag = (pnetcdf_runtime->file_array_ndx >= pnetcdf_runtime->file_array_size); - - /* get a unique id for this file from darshan core */ - darshan_core_register_record( - (void*)newname, - strlen(newname), - DARSHAN_PNETCDF_MOD, - 1, - limit_flag, - &file_id, - NULL); - - /* the file record id is set to 0 if no memory is available for tracking - * new records -- just fall through and ignore this record - */ - if(file_id == 0) - { - if(newname != name) - free(newname); - return(NULL); - } - - /* search the hash table for this file record, and return if found */ - HASH_FIND(hlink, pnetcdf_runtime->file_hash, &file_id, sizeof(darshan_record_id), file); - if(file) + /* add a reference to this file record based on record id */ + ret = darshan_add_record_ref(&(pnetcdf_runtime->rec_id_hash), &rec_id, + sizeof(darshan_record_id), rec_ref); + if(ret == 0) { - if(newname != name) - free(newname); - return(file); - } - - /* no existing record, assign a new file record from the global array */ - file = &(pnetcdf_runtime->file_runtime_array[pnetcdf_runtime->file_array_ndx]); - file->file_record = &(pnetcdf_runtime->file_record_array[pnetcdf_runtime->file_array_ndx]); - file->file_record->f_id = file_id; - file->file_record->rank = my_rank; - - /* add new record to file hash table */ - HASH_ADD(hlink, pnetcdf_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file); - pnetcdf_runtime->file_array_ndx++; - - if(newname != name) - free(newname); - return(file); -} - -/* get a PNETCDF file record for the given file path, and also create a - * reference structure using the returned ncid - */ -static struct pnetcdf_file_runtime* pnetcdf_file_by_name_setncid(const char* name, int ncid) -{ - struct pnetcdf_file_runtime* file; - struct pnetcdf_file_runtime_ref* ref; - - if(!pnetcdf_runtime || instrumentation_disabled) - return(NULL); - - /* find file record by name first */ - file = pnetcdf_file_by_name(name); - - if(!file) + free(rec_ref); return(NULL); - - /* search hash table for existing file ref for this ncid */ - HASH_FIND(hlink, pnetcdf_runtime->ncid_hash, &ncid, sizeof(int), ref); - if(ref) - { - /* we have a reference. Make sure it points to the correct file - * and return it - */ - ref->file = file; - return(file); } - /* if we hit this point, then we don't have a reference for this ncid - * in the table yet. Add it. + /* register the actual file record with darshan-core so it is persisted + * in the log file */ - ref = malloc(sizeof(*ref)); - if(!ref) - return(NULL); - memset(ref, 0, sizeof(*ref)); - - ref->file = file; - ref->ncid = ncid; - HASH_ADD(hlink, pnetcdf_runtime->ncid_hash, ncid, sizeof(int), ref); - - return(file); -} - -/* get a PNETCDF file record for the given ncid */ -static struct pnetcdf_file_runtime* pnetcdf_file_by_ncid(int ncid) -{ - struct pnetcdf_file_runtime_ref* ref; - - if(!pnetcdf_runtime || instrumentation_disabled) - return(NULL); - - /* search hash table for existing file ref for this ncid */ - HASH_FIND(hlink, pnetcdf_runtime->ncid_hash, &ncid, sizeof(int), ref); - if(ref) - return(ref->file); - - return(NULL); -} - -/* free up PNETCDF reference data structures for the given ncid */ -static void pnetcdf_file_close_ncid(int ncid) -{ - struct pnetcdf_file_runtime_ref* ref; - - if(!pnetcdf_runtime || instrumentation_disabled) - return; + file_rec = darshan_core_register_record( + rec_id, + path, + DARSHAN_PNETCDF_MOD, + sizeof(struct darshan_pnetcdf_file), + NULL); - /* search hash table for this ncid */ - HASH_FIND(hlink, pnetcdf_runtime->ncid_hash, &ncid, sizeof(int), ref); - if(ref) + if(!file_rec) { - /* we have a reference, delete it */ - HASH_DELETE(hlink, pnetcdf_runtime->ncid_hash, ref); - free(ref); + darshan_delete_record_ref(&(pnetcdf_runtime->rec_id_hash), + &rec_id, sizeof(darshan_record_id)); + free(rec_ref); + return(NULL); } - return; -} - -/* compare function for sorting file records by descending rank */ -static int pnetcdf_record_compare(const void* a_p, const void* b_p) -{ - const struct darshan_pnetcdf_file* a = a_p; - const struct darshan_pnetcdf_file* b = b_p; - - if(a->rank < b->rank) - return 1; - if(a->rank > b->rank) - return -1; + /* registering this file record was successful, so initialize some fields */ + file_rec->base_rec.id = rec_id; + file_rec->base_rec.rank = my_rank; + rec_ref->file_rec = file_rec; + pnetcdf_runtime->file_rec_count++; - return 0; + return(rec_ref); } static void pnetcdf_record_reduction_op(void* infile_v, void* inoutfile_v, @@ -430,8 +290,8 @@ static void pnetcdf_record_reduction_op(void* infile_v, void* inoutfile_v, for(i=0; i<*len; i++) { memset(&tmp_file, 0, sizeof(struct darshan_pnetcdf_file)); - tmp_file.f_id = infile->f_id; - tmp_file.rank = -1; + tmp_file.base_rec.id = infile->base_rec.id; + tmp_file.base_rec.rank = -1; /* sum */ for(j=PNETCDF_INDEP_OPENS; j<=PNETCDF_COLL_OPENS; j++) @@ -467,37 +327,41 @@ static void pnetcdf_record_reduction_op(void* infile_v, void* inoutfile_v, return; } -/*************************************************************************** - * Functions exported by PNETCDF module for coordinating with darshan-core * - ***************************************************************************/ - -static void pnetcdf_begin_shutdown() +static void pnetcdf_cleanup_runtime() { - assert(pnetcdf_runtime); + darshan_clear_record_refs(&(pnetcdf_runtime->ncid_hash), 0); + darshan_clear_record_refs(&(pnetcdf_runtime->rec_id_hash), 1); - PNETCDF_LOCK(); - /* disable further instrumentation while Darshan shuts down */ - instrumentation_disabled = 1; - PNETCDF_UNLOCK(); + free(pnetcdf_runtime); + pnetcdf_runtime = NULL; return; } -static void pnetcdf_get_output_data( +/*************************************************************************** + * Functions exported by PNETCDF module for coordinating with darshan-core * + ***************************************************************************/ + +static void pnetcdf_shutdown( MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **pnetcdf_buf, int *pnetcdf_buf_sz) { - struct pnetcdf_file_runtime *file; - int i; + struct pnetcdf_file_record_ref *rec_ref; + struct darshan_pnetcdf_file *pnetcdf_rec_buf = + *(struct darshan_pnetcdf_file **)pnetcdf_buf; + int pnetcdf_rec_count; struct darshan_pnetcdf_file *red_send_buf = NULL; struct darshan_pnetcdf_file *red_recv_buf = NULL; MPI_Datatype red_type; MPI_Op red_op; + int i; + PNETCDF_LOCK(); assert(pnetcdf_runtime); + pnetcdf_rec_count = pnetcdf_runtime->file_rec_count; /* if there are globally shared files, do a shared file reduction */ /* NOTE: the shared file reduction is also skipped if the @@ -508,23 +372,22 @@ static void pnetcdf_get_output_data( /* necessary initialization of shared records */ for(i = 0; i < shared_rec_count; i++) { - HASH_FIND(hlink, pnetcdf_runtime->file_hash, &shared_recs[i], - sizeof(darshan_record_id), file); - assert(file); + rec_ref = darshan_lookup_record_ref(pnetcdf_runtime->rec_id_hash, + &shared_recs[i], sizeof(darshan_record_id)); + assert(rec_ref); - file->file_record->rank = -1; + rec_ref->file_rec->base_rec.rank = -1; } - /* sort the array of files descending by rank so that we get all of the - * shared files (marked by rank -1) in a contiguous portion at end - * of the array + + /* sort the array of records so we get all of the shared records + * (marked by rank -1) in a contiguous portion at end of the array */ - qsort(pnetcdf_runtime->file_record_array, pnetcdf_runtime->file_array_ndx, - sizeof(struct darshan_pnetcdf_file), pnetcdf_record_compare); + darshan_record_sort(pnetcdf_rec_buf, pnetcdf_rec_count, + sizeof(struct darshan_pnetcdf_file)); /* make *send_buf point to the shared files at the end of sorted array */ - red_send_buf = - &(pnetcdf_runtime->file_record_array[pnetcdf_runtime->file_array_ndx-shared_rec_count]); + red_send_buf = &(pnetcdf_rec_buf[pnetcdf_rec_count-shared_rec_count]); /* allocate memory for the reduction output on rank 0 */ if(my_rank == 0) @@ -532,6 +395,7 @@ static void pnetcdf_get_output_data( red_recv_buf = malloc(shared_rec_count * sizeof(struct darshan_pnetcdf_file)); if(!red_recv_buf) { + PNETCDF_UNLOCK(); return; } } @@ -553,45 +417,30 @@ static void pnetcdf_get_output_data( /* clean up reduction state */ if(my_rank == 0) { - int tmp_ndx = pnetcdf_runtime->file_array_ndx - shared_rec_count; - memcpy(&(pnetcdf_runtime->file_record_array[tmp_ndx]), red_recv_buf, + int tmp_ndx = pnetcdf_rec_count - shared_rec_count; + memcpy(&(pnetcdf_rec_buf[tmp_ndx]), red_recv_buf, shared_rec_count * sizeof(struct darshan_pnetcdf_file)); free(red_recv_buf); } else { - pnetcdf_runtime->file_array_ndx -= shared_rec_count; + pnetcdf_rec_count -= shared_rec_count; } DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type); DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op); } - *pnetcdf_buf = (void *)(pnetcdf_runtime->file_record_array); - *pnetcdf_buf_sz = pnetcdf_runtime->file_array_ndx * sizeof(struct darshan_pnetcdf_file); + /* update output buffer size to account for shared file reduction */ + *pnetcdf_buf_sz = pnetcdf_rec_count * sizeof(struct darshan_pnetcdf_file); - return; -} - -static void pnetcdf_shutdown() -{ - struct pnetcdf_file_runtime_ref *ref, *tmp; - - assert(pnetcdf_runtime); - - HASH_ITER(hlink, pnetcdf_runtime->ncid_hash, ref, tmp) - { - HASH_DELETE(hlink, pnetcdf_runtime->ncid_hash, ref); - free(ref); - } + /* shutdown internal structures used for instrumenting */ + pnetcdf_cleanup_runtime(); - HASH_CLEAR(hlink, pnetcdf_runtime->file_hash); /* these entries are freed all at once below */ - - free(pnetcdf_runtime->file_runtime_array); - free(pnetcdf_runtime->file_record_array); - free(pnetcdf_runtime); - pnetcdf_runtime = NULL; + /* disable further instrumentation */ + instrumentation_disabled = 1; + PNETCDF_UNLOCK(); return; } diff --git a/darshan-runtime/lib/darshan-posix.c b/darshan-runtime/lib/darshan-posix.c index f75603ceeb6a63b1479ee377c82e94950e4a75c0..f9bc22e342702e8441e9a47a87eb0def46fa033e 100644 --- a/darshan-runtime/lib/darshan-posix.c +++ b/darshan-runtime/lib/darshan-posix.c @@ -26,9 +26,7 @@ #include #include -#include "uthash.h" #include "utlist.h" - #include "darshan.h" #include "darshan-dynamic.h" @@ -83,40 +81,31 @@ DARSHAN_FORWARD_DECL(aio_return64, ssize_t, (struct aiocb64 *aiocbp)); DARSHAN_FORWARD_DECL(lio_listio, int, (int mode, struct aiocb *const aiocb_list[], int nitems, struct sigevent *sevp)); DARSHAN_FORWARD_DECL(lio_listio64, int, (int mode, struct aiocb64 *const aiocb_list[], int nitems, struct sigevent *sevp)); -/* struct to track information about aio operations in flight */ -struct posix_aio_tracker -{ - double tm1; - void *aiocbp; - struct posix_aio_tracker* next; -}; - -/* The posix_file_runtime structure maintains necessary runtime metadata +/* The posix_file_record_ref structure maintains necessary runtime metadata * for the POSIX file record (darshan_posix_file structure, defined in - * darshan-posix-log-format.h) pointed to by 'file_record'. This metadata + * darshan-posix-log-format.h) pointed to by 'file_rec'. This metadata * assists with the instrumenting of specific statistics in the file record. - * 'hlink' is a hash table link structure used to add/remove this record - * from the hash table of POSIX file records for this process. * * RATIONALE: the POSIX module needs to track some stateful, volatile * information about each open file (like the current file offset, most recent * access time, etc.) to aid in instrumentation, but this information can't be * stored in the darshan_posix_file struct because we don't want it to appear in - * the final darshan log file. We therefore associate a posix_file_runtime - * struct with each darshan_posix_file struct in order to track this information. - * - * NOTE: There is a one-to-one mapping of posix_file_runtime structs to - * darshan_posix_file structs. + * the final darshan log file. We therefore associate a posix_file_record_ref + * struct with each darshan_posix_file struct in order to track this information + * (i.e., the mapping between posix_file_record_ref structs to darshan_posix_file + * structs is one-to-one). * - * NOTE: The posix_file_runtime struct contains a pointer to a darshan_posix_file - * struct (see the *file_record member) rather than simply embedding an entire - * darshan_posix_file struct. This is done so that all of the darshan_posix_file - * structs can be kept contiguous in memory as a single array to simplify - * reduction, compression, and storage. + * NOTE: we use the 'darshan_record_ref' interface (in darshan-common) to + * associate different types of handles with this posix_file_record_ref struct. + * This allows us to index this struct (and the underlying file record) by using + * either the corresponding Darshan record identifier (derived from the filename) + * or by a generated file descriptor, for instance. Note that, while there should + * only be a single Darshan record identifier that indexes a posix_file_record_ref, + * there could be multiple open file descriptors that index it. */ -struct posix_file_runtime +struct posix_file_record_ref { - struct darshan_posix_file* file_record; + struct darshan_posix_file *file_rec; int64_t offset; int64_t last_byte_read; int64_t last_byte_written; @@ -124,42 +113,11 @@ struct posix_file_runtime double last_meta_end; double last_read_end; double last_write_end; - void* access_root; + void *access_root; int access_count; - void* stride_root; + void *stride_root; int stride_count; struct posix_aio_tracker* aio_list; - UT_hash_handle hlink; -}; - -/* The posix_file_runtime_ref structure is used to associate a POSIX - * file descriptor with an already existing POSIX file record. This is - * necessary as many POSIX I/O functions take only an input file descriptor, - * but POSIX file records are indexed by their full file paths (i.e., darshan - * record identifiers for POSIX files are created by hashing the file path). - * In other words, this structure is necessary as it allows us to look up a - * file record either by a pathname (posix_file_runtime) or by POSIX file - * descriptor (posix_file_runtime_ref), depending on which parameters are - * available. This structure includes another hash table link, since separate - * hashes are maintained for posix_file_runtime structures and posix_file_runtime_ref - * structures. - * - * RATIONALE: In theory the fd information could be included in the - * posix_file_runtime struct rather than in a separate structure here. The - * reason we don't do that is because the same file could be opened multiple - * times by a given process with different file descriptors and thus - * simulataneously referenced using different file descriptors. This practice is - * not common, but we must support it. - * - * NOTE: there are potentially multiple posix_file_runtime_ref structures - * referring to a single posix_file_runtime structure. Most of the time there is - * only one, however. - */ -struct posix_file_runtime_ref -{ - struct posix_file_runtime* file; - int fd; - UT_hash_handle hlink; }; /* The posix_runtime structure maintains necessary state for storing @@ -168,196 +126,238 @@ struct posix_file_runtime_ref */ struct posix_runtime { - struct posix_file_runtime* file_runtime_array; - struct darshan_posix_file* file_record_array; - int file_array_size; - int file_array_ndx; - struct posix_file_runtime* file_hash; - struct posix_file_runtime_ref* fd_hash; + void *rec_id_hash; + void *fd_hash; + int file_rec_count; }; +/* struct to track information about aio operations in flight */ +struct posix_aio_tracker +{ + double tm1; + void *aiocbp; + struct posix_aio_tracker *next; +}; + +static void posix_runtime_initialize( + void); +static struct posix_file_record_ref *posix_track_new_file_record( + darshan_record_id rec_id, const char *path); +static void posix_aio_tracker_add( + int fd, void *aiocbp); +static struct posix_aio_tracker* posix_aio_tracker_del( + int fd, void *aiocbp); +static void posix_finalize_file_records( + void *rec_ref_p); +static void posix_record_reduction_op( + void* infile_v, void* inoutfile_v, int *len, MPI_Datatype *datatype); +static void posix_shared_record_variance( + MPI_Comm mod_comm, struct darshan_posix_file *inrec_array, + struct darshan_posix_file *outrec_array, int shared_rec_count); +static void posix_cleanup_runtime( + void); + +static void posix_shutdown( + MPI_Comm mod_comm, darshan_record_id *shared_recs, + int shared_rec_count, void **posix_buf, int *posix_buf_sz); + static struct posix_runtime *posix_runtime = NULL; static pthread_mutex_t posix_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; static int instrumentation_disabled = 0; static int my_rank = -1; static int darshan_mem_alignment = 1; -static void posix_runtime_initialize(void); -static struct posix_file_runtime* posix_file_by_name(const char *name); -static struct posix_file_runtime* posix_file_by_name_setfd(const char* name, int fd); -static struct posix_file_runtime* posix_file_by_fd(int fd); -static void posix_file_close_fd(int fd); -static void posix_aio_tracker_add(int fd, void *aiocbp); -static struct posix_aio_tracker* posix_aio_tracker_del(int fd, void *aiocbp); -static int posix_record_compare(const void* a, const void* b); -static void posix_record_reduction_op(void* infile_v, void* inoutfile_v, - int *len, MPI_Datatype *datatype); -static void posix_shared_record_variance(MPI_Comm mod_comm, - struct darshan_posix_file *inrec_array, struct darshan_posix_file *outrec_array, - int shared_rec_count); - -static void posix_begin_shutdown(void); -static void posix_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs, - int shared_rec_count, void **posix_buf, int *posix_buf_sz); -static void posix_shutdown(void); - #define POSIX_LOCK() pthread_mutex_lock(&posix_runtime_mutex) #define POSIX_UNLOCK() pthread_mutex_unlock(&posix_runtime_mutex) +#define POSIX_PRE_RECORD() do { \ + POSIX_LOCK(); \ + if(!posix_runtime && !instrumentation_disabled) posix_runtime_initialize(); \ + if(!posix_runtime) { \ + POSIX_UNLOCK(); \ + return(ret); \ + } \ +} while(0) + +#define POSIX_POST_RECORD() do { \ + POSIX_UNLOCK(); \ +} while(0) + #define POSIX_RECORD_OPEN(__ret, __path, __mode, __stream_flag, __tm1, __tm2) do { \ - struct posix_file_runtime* file; \ - char* exclude; \ - int tmp_index = 0; \ + darshan_record_id rec_id; \ + struct posix_file_record_ref *rec_ref; \ + char *newpath; \ if(__ret < 0) break; \ - while((exclude = darshan_path_exclusions[tmp_index])) { \ - if(!(strncmp(exclude, __path, strlen(exclude)))) \ - break; \ - tmp_index++; \ + newpath = darshan_clean_file_path(__path); \ + if(!newpath) newpath = (char *)__path; \ + if(darshan_core_excluded_path(newpath)) { \ + if(newpath != __path) free(newpath); \ + break; \ + } \ + rec_id = darshan_core_gen_record_id(newpath); \ + rec_ref = darshan_lookup_record_ref(posix_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \ + if(!rec_ref) rec_ref = posix_track_new_file_record(rec_id, newpath); \ + if(!rec_ref) { \ + if(newpath != __path) free(newpath); \ + break; \ } \ - if(exclude) break; \ - file = posix_file_by_name_setfd(__path, __ret); \ - if(!file) break; \ if(__mode) \ - file->file_record->counters[POSIX_MODE] = __mode; \ - file->offset = 0; \ - file->last_byte_written = 0; \ - file->last_byte_read = 0; \ + rec_ref->file_rec->counters[POSIX_MODE] = __mode; \ + rec_ref->offset = 0; \ + rec_ref->last_byte_written = 0; \ + rec_ref->last_byte_read = 0; \ if(__stream_flag)\ - file->file_record->counters[POSIX_FOPENS] += 1; \ + rec_ref->file_rec->counters[POSIX_FOPENS] += 1; \ else \ - file->file_record->counters[POSIX_OPENS] += 1; \ - if(file->file_record->fcounters[POSIX_F_OPEN_TIMESTAMP] == 0 || \ - file->file_record->fcounters[POSIX_F_OPEN_TIMESTAMP] > __tm1) \ - file->file_record->fcounters[POSIX_F_OPEN_TIMESTAMP] = __tm1; \ - DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[POSIX_F_META_TIME], __tm1, __tm2, file->last_meta_end); \ + rec_ref->file_rec->counters[POSIX_OPENS] += 1; \ + if(rec_ref->file_rec->fcounters[POSIX_F_OPEN_TIMESTAMP] == 0 || \ + rec_ref->file_rec->fcounters[POSIX_F_OPEN_TIMESTAMP] > __tm1) \ + rec_ref->file_rec->fcounters[POSIX_F_OPEN_TIMESTAMP] = __tm1; \ + DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[POSIX_F_META_TIME], \ + __tm1, __tm2, rec_ref->last_meta_end); \ + darshan_add_record_ref(&(posix_runtime->fd_hash), &__ret, sizeof(int), rec_ref); \ + if(newpath != __path) free(newpath); \ } while(0) -#define POSIX_RECORD_READ(__ret, __fd, __pread_flag, __pread_offset, __aligned, __stream_flag, __tm1, __tm2) do{ \ +#define POSIX_RECORD_READ(__ret, __fd, __pread_flag, __pread_offset, __aligned, __stream_flag, __tm1, __tm2) do { \ + struct posix_file_record_ref* rec_ref; \ size_t stride; \ int64_t this_offset; \ - struct posix_file_runtime* file; \ int64_t file_alignment; \ double __elapsed = __tm2-__tm1; \ if(__ret < 0) break; \ - file = posix_file_by_fd(__fd); \ - if(!file) break; \ + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &(__fd), sizeof(int)); \ + if(!rec_ref) break; \ if(__pread_flag) \ this_offset = __pread_offset; \ else \ - this_offset = file->offset; \ - if(this_offset > file->last_byte_read) \ - file->file_record->counters[POSIX_SEQ_READS] += 1; \ - if(this_offset == (file->last_byte_read + 1)) \ - file->file_record->counters[POSIX_CONSEC_READS] += 1; \ - if(this_offset > 0 && this_offset > file->last_byte_read \ - && file->last_byte_read != 0) \ - stride = this_offset - file->last_byte_read - 1; \ + this_offset = rec_ref->offset; \ + if(this_offset > rec_ref->last_byte_read) \ + rec_ref->file_rec->counters[POSIX_SEQ_READS] += 1; \ + if(this_offset == (rec_ref->last_byte_read + 1)) \ + rec_ref->file_rec->counters[POSIX_CONSEC_READS] += 1; \ + if(this_offset > 0 && this_offset > rec_ref->last_byte_read \ + && rec_ref->last_byte_read != 0) \ + stride = this_offset - rec_ref->last_byte_read - 1; \ else \ stride = 0; \ - file->last_byte_read = this_offset + __ret - 1; \ - file->offset = this_offset + __ret; \ - if(file->file_record->counters[POSIX_MAX_BYTE_READ] < (this_offset + __ret - 1)) \ - file->file_record->counters[POSIX_MAX_BYTE_READ] = (this_offset + __ret - 1); \ - file->file_record->counters[POSIX_BYTES_READ] += __ret; \ + rec_ref->last_byte_read = this_offset + __ret - 1; \ + rec_ref->offset = this_offset + __ret; \ + if(rec_ref->file_rec->counters[POSIX_MAX_BYTE_READ] < (this_offset + __ret - 1)) \ + rec_ref->file_rec->counters[POSIX_MAX_BYTE_READ] = (this_offset + __ret - 1); \ + rec_ref->file_rec->counters[POSIX_BYTES_READ] += __ret; \ if(__stream_flag) \ - file->file_record->counters[POSIX_FREADS] += 1; \ + rec_ref->file_rec->counters[POSIX_FREADS] += 1; \ else \ - file->file_record->counters[POSIX_READS] += 1; \ - DARSHAN_BUCKET_INC(&(file->file_record->counters[POSIX_SIZE_READ_0_100]), __ret); \ - darshan_common_val_counter(&file->access_root, &file->access_count, __ret); \ - darshan_common_val_counter(&file->stride_root, &file->stride_count, stride); \ + rec_ref->file_rec->counters[POSIX_READS] += 1; \ + DARSHAN_BUCKET_INC(&(rec_ref->file_rec->counters[POSIX_SIZE_READ_0_100]), __ret); \ + darshan_common_val_counter(&rec_ref->access_root, &rec_ref->access_count, __ret, \ + &(rec_ref->file_rec->counters[POSIX_ACCESS1_ACCESS]), \ + &(rec_ref->file_rec->counters[POSIX_ACCESS1_COUNT])); \ + darshan_common_val_counter(&rec_ref->stride_root, &rec_ref->stride_count, stride, \ + &(rec_ref->file_rec->counters[POSIX_STRIDE1_STRIDE]), \ + &(rec_ref->file_rec->counters[POSIX_STRIDE1_COUNT])); \ if(!__aligned) \ - file->file_record->counters[POSIX_MEM_NOT_ALIGNED] += 1; \ - file_alignment = file->file_record->counters[POSIX_FILE_ALIGNMENT]; \ + rec_ref->file_rec->counters[POSIX_MEM_NOT_ALIGNED] += 1; \ + file_alignment = rec_ref->file_rec->counters[POSIX_FILE_ALIGNMENT]; \ if(file_alignment > 0 && (this_offset % file_alignment) != 0) \ - file->file_record->counters[POSIX_FILE_NOT_ALIGNED] += 1; \ - if(file->last_io_type == DARSHAN_IO_WRITE) \ - file->file_record->counters[POSIX_RW_SWITCHES] += 1; \ - file->last_io_type = DARSHAN_IO_READ; \ - if(file->file_record->fcounters[POSIX_F_READ_START_TIMESTAMP] == 0 || \ - file->file_record->fcounters[POSIX_F_READ_START_TIMESTAMP] > __tm1) \ - file->file_record->fcounters[POSIX_F_READ_START_TIMESTAMP] = __tm1; \ - file->file_record->fcounters[POSIX_F_READ_END_TIMESTAMP] = __tm2; \ - if(file->file_record->fcounters[POSIX_F_MAX_READ_TIME] < __elapsed) { \ - file->file_record->fcounters[POSIX_F_MAX_READ_TIME] = __elapsed; \ - file->file_record->counters[POSIX_MAX_READ_TIME_SIZE] = __ret; } \ - DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[POSIX_F_READ_TIME], __tm1, __tm2, file->last_read_end); \ + rec_ref->file_rec->counters[POSIX_FILE_NOT_ALIGNED] += 1; \ + if(rec_ref->last_io_type == DARSHAN_IO_WRITE) \ + rec_ref->file_rec->counters[POSIX_RW_SWITCHES] += 1; \ + rec_ref->last_io_type = DARSHAN_IO_READ; \ + if(rec_ref->file_rec->fcounters[POSIX_F_READ_START_TIMESTAMP] == 0 || \ + rec_ref->file_rec->fcounters[POSIX_F_READ_START_TIMESTAMP] > __tm1) \ + rec_ref->file_rec->fcounters[POSIX_F_READ_START_TIMESTAMP] = __tm1; \ + rec_ref->file_rec->fcounters[POSIX_F_READ_END_TIMESTAMP] = __tm2; \ + if(rec_ref->file_rec->fcounters[POSIX_F_MAX_READ_TIME] < __elapsed) { \ + rec_ref->file_rec->fcounters[POSIX_F_MAX_READ_TIME] = __elapsed; \ + rec_ref->file_rec->counters[POSIX_MAX_READ_TIME_SIZE] = __ret; } \ + DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[POSIX_F_READ_TIME], \ + __tm1, __tm2, rec_ref->last_read_end); \ } while(0) -#define POSIX_RECORD_WRITE(__ret, __fd, __pwrite_flag, __pwrite_offset, __aligned, __stream_flag, __tm1, __tm2) do{ \ +#define POSIX_RECORD_WRITE(__ret, __fd, __pwrite_flag, __pwrite_offset, __aligned, __stream_flag, __tm1, __tm2) do { \ + struct posix_file_record_ref* rec_ref; \ size_t stride; \ int64_t this_offset; \ - struct posix_file_runtime* file; \ int64_t file_alignment; \ double __elapsed = __tm2-__tm1; \ if(__ret < 0) break; \ - file = posix_file_by_fd(__fd); \ - if(!file) break; \ + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &__fd, sizeof(int)); \ + if(!rec_ref) break; \ if(__pwrite_flag) \ this_offset = __pwrite_offset; \ else \ - this_offset = file->offset; \ - if(this_offset > file->last_byte_written) \ - file->file_record->counters[POSIX_SEQ_WRITES] += 1; \ - if(this_offset == (file->last_byte_written + 1)) \ - file->file_record->counters[POSIX_CONSEC_WRITES] += 1; \ - if(this_offset > 0 && this_offset > file->last_byte_written \ - && file->last_byte_written != 0) \ - stride = this_offset - file->last_byte_written - 1; \ + this_offset = rec_ref->offset; \ + if(this_offset > rec_ref->last_byte_written) \ + rec_ref->file_rec->counters[POSIX_SEQ_WRITES] += 1; \ + if(this_offset == (rec_ref->last_byte_written + 1)) \ + rec_ref->file_rec->counters[POSIX_CONSEC_WRITES] += 1; \ + if(this_offset > 0 && this_offset > rec_ref->last_byte_written \ + && rec_ref->last_byte_written != 0) \ + stride = this_offset - rec_ref->last_byte_written - 1; \ else \ stride = 0; \ - file->last_byte_written = this_offset + __ret - 1; \ - file->offset = this_offset + __ret; \ - if(file->file_record->counters[POSIX_MAX_BYTE_WRITTEN] < (this_offset + __ret - 1)) \ - file->file_record->counters[POSIX_MAX_BYTE_WRITTEN] = (this_offset + __ret - 1); \ - file->file_record->counters[POSIX_BYTES_WRITTEN] += __ret; \ + rec_ref->last_byte_written = this_offset + __ret - 1; \ + rec_ref->offset = this_offset + __ret; \ + if(rec_ref->file_rec->counters[POSIX_MAX_BYTE_WRITTEN] < (this_offset + __ret - 1)) \ + rec_ref->file_rec->counters[POSIX_MAX_BYTE_WRITTEN] = (this_offset + __ret - 1); \ + rec_ref->file_rec->counters[POSIX_BYTES_WRITTEN] += __ret; \ if(__stream_flag) \ - file->file_record->counters[POSIX_FWRITES] += 1; \ + rec_ref->file_rec->counters[POSIX_FWRITES] += 1; \ else \ - file->file_record->counters[POSIX_WRITES] += 1; \ - DARSHAN_BUCKET_INC(&(file->file_record->counters[POSIX_SIZE_WRITE_0_100]), __ret); \ - darshan_common_val_counter(&file->access_root, &file->access_count, __ret); \ - darshan_common_val_counter(&file->stride_root, &file->stride_count, stride); \ + rec_ref->file_rec->counters[POSIX_WRITES] += 1; \ + DARSHAN_BUCKET_INC(&(rec_ref->file_rec->counters[POSIX_SIZE_WRITE_0_100]), __ret); \ + darshan_common_val_counter(&rec_ref->access_root, &rec_ref->access_count, __ret, \ + &(rec_ref->file_rec->counters[POSIX_ACCESS1_ACCESS]), \ + &(rec_ref->file_rec->counters[POSIX_ACCESS1_COUNT])); \ + darshan_common_val_counter(&rec_ref->stride_root, &rec_ref->stride_count, stride, \ + &(rec_ref->file_rec->counters[POSIX_STRIDE1_STRIDE]), \ + &(rec_ref->file_rec->counters[POSIX_STRIDE1_COUNT])); \ if(!__aligned) \ - file->file_record->counters[POSIX_MEM_NOT_ALIGNED] += 1; \ - file_alignment = file->file_record->counters[POSIX_FILE_ALIGNMENT]; \ + rec_ref->file_rec->counters[POSIX_MEM_NOT_ALIGNED] += 1; \ + file_alignment = rec_ref->file_rec->counters[POSIX_FILE_ALIGNMENT]; \ if(file_alignment > 0 && (this_offset % file_alignment) != 0) \ - file->file_record->counters[POSIX_FILE_NOT_ALIGNED] += 1; \ - if(file->last_io_type == DARSHAN_IO_READ) \ - file->file_record->counters[POSIX_RW_SWITCHES] += 1; \ - file->last_io_type = DARSHAN_IO_WRITE; \ - if(file->file_record->fcounters[POSIX_F_WRITE_START_TIMESTAMP] == 0 || \ - file->file_record->fcounters[POSIX_F_WRITE_START_TIMESTAMP] > __tm1) \ - file->file_record->fcounters[POSIX_F_WRITE_START_TIMESTAMP] = __tm1; \ - file->file_record->fcounters[POSIX_F_WRITE_END_TIMESTAMP] = __tm2; \ - if(file->file_record->fcounters[POSIX_F_MAX_WRITE_TIME] < __elapsed) { \ - file->file_record->fcounters[POSIX_F_MAX_WRITE_TIME] = __elapsed; \ - file->file_record->counters[POSIX_MAX_WRITE_TIME_SIZE] = __ret; } \ - DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[POSIX_F_WRITE_TIME], __tm1, __tm2, file->last_write_end); \ + rec_ref->file_rec->counters[POSIX_FILE_NOT_ALIGNED] += 1; \ + if(rec_ref->last_io_type == DARSHAN_IO_READ) \ + rec_ref->file_rec->counters[POSIX_RW_SWITCHES] += 1; \ + rec_ref->last_io_type = DARSHAN_IO_WRITE; \ + if(rec_ref->file_rec->fcounters[POSIX_F_WRITE_START_TIMESTAMP] == 0 || \ + rec_ref->file_rec->fcounters[POSIX_F_WRITE_START_TIMESTAMP] > __tm1) \ + rec_ref->file_rec->fcounters[POSIX_F_WRITE_START_TIMESTAMP] = __tm1; \ + rec_ref->file_rec->fcounters[POSIX_F_WRITE_END_TIMESTAMP] = __tm2; \ + if(rec_ref->file_rec->fcounters[POSIX_F_MAX_WRITE_TIME] < __elapsed) { \ + rec_ref->file_rec->fcounters[POSIX_F_MAX_WRITE_TIME] = __elapsed; \ + rec_ref->file_rec->counters[POSIX_MAX_WRITE_TIME_SIZE] = __ret; } \ + DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[POSIX_F_WRITE_TIME], \ + __tm1, __tm2, rec_ref->last_write_end); \ } while(0) #define POSIX_LOOKUP_RECORD_STAT(__path, __statbuf, __tm1, __tm2) do { \ - char* exclude; \ - int tmp_index = 0; \ - struct posix_file_runtime* file; \ - while((exclude = darshan_path_exclusions[tmp_index])) { \ - if(!(strncmp(exclude, __path, strlen(exclude)))) \ - break; \ - tmp_index++; \ + darshan_record_id rec_id; \ + struct posix_file_record_ref* rec_ref; \ + char *newpath = darshan_clean_file_path(__path); \ + if(!newpath) newpath = (char *)__path; \ + if(darshan_core_excluded_path(newpath)) { \ + if(newpath != __path) free(newpath); \ + break; \ } \ - if(exclude) break; \ - file = posix_file_by_name(__path); \ - if(file) \ - { \ - POSIX_RECORD_STAT(file, __statbuf, __tm1, __tm2); \ + rec_id = darshan_core_gen_record_id(newpath); \ + rec_ref = darshan_lookup_record_ref(posix_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \ + if(!rec_ref) rec_ref = posix_track_new_file_record(rec_id, newpath); \ + if(newpath != __path) free(newpath); \ + if(rec_ref) { \ + POSIX_RECORD_STAT(rec_ref, __statbuf, __tm1, __tm2); \ } \ } while(0) -#define POSIX_RECORD_STAT(__file, __statbuf, __tm1, __tm2) do { \ - DARSHAN_TIMER_INC_NO_OVERLAP((__file)->file_record->fcounters[POSIX_F_META_TIME], __tm1, __tm2, (__file)->last_meta_end); \ - (__file)->file_record->counters[POSIX_STATS] += 1; \ +#define POSIX_RECORD_STAT(__rec_ref, __statbuf, __tm1, __tm2) do { \ + (__rec_ref)->file_rec->counters[POSIX_STATS] += 1; \ + DARSHAN_TIMER_INC_NO_OVERLAP((__rec_ref)->file_rec->fcounters[POSIX_F_META_TIME], \ + __tm1, __tm2, (__rec_ref)->last_meta_end); \ } while(0) + /********************************************************** * Wrappers for POSIX I/O functions of interest * **********************************************************/ @@ -388,10 +388,9 @@ int DARSHAN_DECL(open)(const char *path, int flags, ...) tm2 = darshan_core_wtime(); } - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_OPEN(ret, path, mode, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -422,10 +421,9 @@ int DARSHAN_DECL(open64)(const char *path, int flags, ...) tm2 = darshan_core_wtime(); } - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_OPEN(ret, path, mode, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -441,10 +439,9 @@ int DARSHAN_DECL(creat)(const char* path, mode_t mode) ret = __real_creat(path, mode); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_OPEN(ret, path, mode, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -460,10 +457,9 @@ int DARSHAN_DECL(creat64)(const char* path, mode_t mode) ret = __real_creat64(path, mode); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_OPEN(ret, path, mode, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -485,10 +481,9 @@ FILE* DARSHAN_DECL(fopen)(const char *path, const char *mode) else fd = fileno(ret); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_OPEN(fd, path, 0, 1, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -510,10 +505,9 @@ FILE* DARSHAN_DECL(fopen64)(const char *path, const char *mode) else fd = fileno(ret); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_OPEN(fd, path, 0, 1, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -529,10 +523,9 @@ int DARSHAN_DECL(mkstemp)(char* template) ret = __real_mkstemp(template); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_OPEN(ret, template, 0, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -548,10 +541,9 @@ int DARSHAN_DECL(mkostemp)(char* template, int flags) ret = __real_mkostemp(template, flags); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_OPEN(ret, template, 0, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -567,10 +559,9 @@ int DARSHAN_DECL(mkstemps)(char* template, int suffixlen) ret = __real_mkstemps(template, suffixlen); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_OPEN(ret, template, 0, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -586,10 +577,9 @@ int DARSHAN_DECL(mkostemps)(char* template, int suffixlen, int flags) ret = __real_mkostemps(template, suffixlen, flags); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_OPEN(ret, template, 0, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -608,10 +598,9 @@ ssize_t DARSHAN_DECL(read)(int fd, void *buf, size_t count) ret = __real_read(fd, buf, count); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_READ(ret, fd, 0, 0, aligned_flag, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -630,10 +619,9 @@ ssize_t DARSHAN_DECL(write)(int fd, const void *buf, size_t count) ret = __real_write(fd, buf, count); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_WRITE(ret, fd, 0, 0, aligned_flag, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -652,10 +640,9 @@ ssize_t DARSHAN_DECL(pread)(int fd, void *buf, size_t count, off_t offset) ret = __real_pread(fd, buf, count, offset); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_READ(ret, fd, 1, offset, aligned_flag, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -674,10 +661,9 @@ ssize_t DARSHAN_DECL(pwrite)(int fd, const void *buf, size_t count, off_t offset ret = __real_pwrite(fd, buf, count, offset); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_WRITE(ret, fd, 1, offset, aligned_flag, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -696,10 +682,9 @@ ssize_t DARSHAN_DECL(pread64)(int fd, void *buf, size_t count, off64_t offset) ret = __real_pread64(fd, buf, count, offset); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_READ(ret, fd, 1, offset, aligned_flag, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -718,10 +703,9 @@ ssize_t DARSHAN_DECL(pwrite64)(int fd, const void *buf, size_t count, off64_t of ret = __real_pwrite64(fd, buf, count, offset); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_WRITE(ret, fd, 1, offset, aligned_flag, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -745,10 +729,9 @@ ssize_t DARSHAN_DECL(readv)(int fd, const struct iovec *iov, int iovcnt) ret = __real_readv(fd, iov, iovcnt); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_READ(ret, fd, 0, 0, aligned_flag, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -772,10 +755,9 @@ ssize_t DARSHAN_DECL(writev)(int fd, const struct iovec *iov, int iovcnt) ret = __real_writev(fd, iov, iovcnt); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_RECORD_WRITE(ret, fd, 0, 0, aligned_flag, 0, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -784,6 +766,7 @@ size_t DARSHAN_DECL(fread)(void *ptr, size_t size, size_t nmemb, FILE *stream) { size_t ret; int aligned_flag = 0; + int fd; double tm1, tm2; MAP_OR_FAIL(fread); @@ -794,19 +777,17 @@ size_t DARSHAN_DECL(fread)(void *ptr, size_t size, size_t nmemb, FILE *stream) ret = __real_fread(ptr, size, nmemb, stream); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + fd = fileno(stream); + POSIX_PRE_RECORD(); if(ret > 0) { - POSIX_RECORD_READ(size*ret, fileno(stream), 0, 0, - aligned_flag, 1, tm1, tm2); + POSIX_RECORD_READ(size*ret, fd, 0, 0, aligned_flag, 1, tm1, tm2); } else { - POSIX_RECORD_READ(ret, fileno(stream), 0, 0, - aligned_flag, 1, tm1, tm2); + POSIX_RECORD_READ(ret, fd, 0, 0, aligned_flag, 1, tm1, tm2); } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -815,6 +796,7 @@ size_t DARSHAN_DECL(fwrite)(const void *ptr, size_t size, size_t nmemb, FILE *st { size_t ret; int aligned_flag = 0; + int fd; double tm1, tm2; MAP_OR_FAIL(fwrite); @@ -825,19 +807,17 @@ size_t DARSHAN_DECL(fwrite)(const void *ptr, size_t size, size_t nmemb, FILE *st ret = __real_fwrite(ptr, size, nmemb, stream); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + fd = fileno(stream); + POSIX_PRE_RECORD(); if(ret > 0) { - POSIX_RECORD_WRITE(size*ret, fileno(stream), 0, 0, - aligned_flag, 1, tm1, tm2); + POSIX_RECORD_WRITE(size*ret, fd, 0, 0, aligned_flag, 1, tm1, tm2); } else { - POSIX_RECORD_WRITE(ret, fileno(stream), 0, 0, - aligned_flag, 1, tm1, tm2); + POSIX_RECORD_WRITE(ret, fd, 0, 0, aligned_flag, 1, tm1, tm2); } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -845,7 +825,7 @@ size_t DARSHAN_DECL(fwrite)(const void *ptr, size_t size, size_t nmemb, FILE *st off_t DARSHAN_DECL(lseek)(int fd, off_t offset, int whence) { off_t ret; - struct posix_file_runtime* file; + struct posix_file_record_ref *rec_ref; double tm1, tm2; MAP_OR_FAIL(lseek); @@ -856,18 +836,17 @@ off_t DARSHAN_DECL(lseek)(int fd, off_t offset, int whence) if(ret >= 0) { - POSIX_LOCK(); - posix_runtime_initialize(); - file = posix_file_by_fd(fd); - if(file) + POSIX_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { - file->offset = ret; + rec_ref->offset = ret; DARSHAN_TIMER_INC_NO_OVERLAP( - file->file_record->fcounters[POSIX_F_META_TIME], - tm1, tm2, file->last_meta_end); - file->file_record->counters[POSIX_SEEKS] += 1; + rec_ref->file_rec->fcounters[POSIX_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->file_rec->counters[POSIX_SEEKS] += 1; } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); } return(ret); @@ -876,7 +855,7 @@ off_t DARSHAN_DECL(lseek)(int fd, off_t offset, int whence) off_t DARSHAN_DECL(lseek64)(int fd, off_t offset, int whence) { off_t ret; - struct posix_file_runtime* file; + struct posix_file_record_ref *rec_ref; double tm1, tm2; MAP_OR_FAIL(lseek64); @@ -887,18 +866,17 @@ off_t DARSHAN_DECL(lseek64)(int fd, off_t offset, int whence) if(ret >= 0) { - POSIX_LOCK(); - posix_runtime_initialize(); - file = posix_file_by_fd(fd); - if(file) + POSIX_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { - file->offset = ret; + rec_ref->offset = ret; DARSHAN_TIMER_INC_NO_OVERLAP( - file->file_record->fcounters[POSIX_F_META_TIME], - tm1, tm2, file->last_meta_end); - file->file_record->counters[POSIX_SEEKS] += 1; + rec_ref->file_rec->fcounters[POSIX_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->file_rec->counters[POSIX_SEEKS] += 1; } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); } return(ret); @@ -907,7 +885,8 @@ off_t DARSHAN_DECL(lseek64)(int fd, off_t offset, int whence) int DARSHAN_DECL(fseek)(FILE *stream, long offset, int whence) { int ret; - struct posix_file_runtime* file; + struct posix_file_record_ref *rec_ref; + int fd; double tm1, tm2; MAP_OR_FAIL(fseek); @@ -918,18 +897,18 @@ int DARSHAN_DECL(fseek)(FILE *stream, long offset, int whence) if(ret >= 0) { - POSIX_LOCK(); - posix_runtime_initialize(); - file = posix_file_by_fd(fileno(stream)); - if(file) + POSIX_PRE_RECORD(); + fd = fileno(stream); + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { - file->offset = ftell(stream); + rec_ref->offset = ftell(stream); DARSHAN_TIMER_INC_NO_OVERLAP( - file->file_record->fcounters[POSIX_F_META_TIME], - tm1, tm2, file->last_meta_end); - file->file_record->counters[POSIX_FSEEKS] += 1; + rec_ref->file_rec->fcounters[POSIX_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->file_rec->counters[POSIX_FSEEKS] += 1; } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); } return(ret); @@ -949,10 +928,9 @@ int DARSHAN_DECL(__xstat)(int vers, const char *path, struct stat *buf) if(ret < 0 || !S_ISREG(buf->st_mode)) return(ret); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_LOOKUP_RECORD_STAT(path, buf, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -971,10 +949,9 @@ int DARSHAN_DECL(__xstat64)(int vers, const char *path, struct stat64 *buf) if(ret < 0 || !S_ISREG(buf->st_mode)) return(ret); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_LOOKUP_RECORD_STAT(path, buf, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -993,10 +970,9 @@ int DARSHAN_DECL(__lxstat)(int vers, const char *path, struct stat *buf) if(ret < 0 || !S_ISREG(buf->st_mode)) return(ret); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_LOOKUP_RECORD_STAT(path, buf, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -1015,10 +991,9 @@ int DARSHAN_DECL(__lxstat64)(int vers, const char *path, struct stat64 *buf) if(ret < 0 || !S_ISREG(buf->st_mode)) return(ret); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); POSIX_LOOKUP_RECORD_STAT(path, buf, tm1, tm2); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -1026,7 +1001,7 @@ int DARSHAN_DECL(__lxstat64)(int vers, const char *path, struct stat64 *buf) int DARSHAN_DECL(__fxstat)(int vers, int fd, struct stat *buf) { int ret; - struct posix_file_runtime* file; + struct posix_file_record_ref *rec_ref; double tm1, tm2; MAP_OR_FAIL(__fxstat); @@ -1038,14 +1013,13 @@ int DARSHAN_DECL(__fxstat)(int vers, int fd, struct stat *buf) if(ret < 0 || !S_ISREG(buf->st_mode)) return(ret); - POSIX_LOCK(); - posix_runtime_initialize(); - file = posix_file_by_fd(fd); - if(file) + POSIX_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { - POSIX_RECORD_STAT(file, buf, tm1, tm2); + POSIX_RECORD_STAT(rec_ref, buf, tm1, tm2); } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -1053,7 +1027,7 @@ int DARSHAN_DECL(__fxstat)(int vers, int fd, struct stat *buf) int DARSHAN_DECL(__fxstat64)(int vers, int fd, struct stat64 *buf) { int ret; - struct posix_file_runtime* file; + struct posix_file_record_ref *rec_ref; double tm1, tm2; MAP_OR_FAIL(__fxstat64); @@ -1065,14 +1039,13 @@ int DARSHAN_DECL(__fxstat64)(int vers, int fd, struct stat64 *buf) if(ret < 0 || !S_ISREG(buf->st_mode)) return(ret); - POSIX_LOCK(); - posix_runtime_initialize(); - file = posix_file_by_fd(fd); - if(file) + POSIX_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { - POSIX_RECORD_STAT(file, buf, tm1, tm2); + POSIX_RECORD_STAT(rec_ref, buf, tm1, tm2); } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -1081,7 +1054,7 @@ void* DARSHAN_DECL(mmap)(void *addr, size_t length, int prot, int flags, int fd, off_t offset) { void* ret; - struct posix_file_runtime* file; + struct posix_file_record_ref *rec_ref; MAP_OR_FAIL(mmap); @@ -1089,14 +1062,13 @@ void* DARSHAN_DECL(mmap)(void *addr, size_t length, int prot, int flags, if(ret == MAP_FAILED) return(ret); - POSIX_LOCK(); - posix_runtime_initialize(); - file = posix_file_by_fd(fd); - if(file) + POSIX_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { - file->file_record->counters[POSIX_MMAPS] += 1; + rec_ref->file_rec->counters[POSIX_MMAPS] += 1; } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -1105,7 +1077,7 @@ void* DARSHAN_DECL(mmap64)(void *addr, size_t length, int prot, int flags, int fd, off64_t offset) { void* ret; - struct posix_file_runtime* file; + struct posix_file_record_ref *rec_ref; MAP_OR_FAIL(mmap64); @@ -1113,14 +1085,13 @@ void* DARSHAN_DECL(mmap64)(void *addr, size_t length, int prot, int flags, if(ret == MAP_FAILED) return(ret); - POSIX_LOCK(); - posix_runtime_initialize(); - file = posix_file_by_fd(fd); - if(file) + POSIX_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { - file->file_record->counters[POSIX_MMAPS] += 1; + rec_ref->file_rec->counters[POSIX_MMAPS] += 1; } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -1128,7 +1099,7 @@ void* DARSHAN_DECL(mmap64)(void *addr, size_t length, int prot, int flags, int DARSHAN_DECL(fsync)(int fd) { int ret; - struct posix_file_runtime* file; + struct posix_file_record_ref *rec_ref; double tm1, tm2; MAP_OR_FAIL(fsync); @@ -1140,17 +1111,16 @@ int DARSHAN_DECL(fsync)(int fd) if(ret < 0) return(ret); - POSIX_LOCK(); - posix_runtime_initialize(); - file = posix_file_by_fd(fd); - if(file) + POSIX_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { DARSHAN_TIMER_INC_NO_OVERLAP( - file->file_record->fcounters[POSIX_F_WRITE_TIME], - tm1, tm2, file->last_write_end); - file->file_record->counters[POSIX_FSYNCS] += 1; + rec_ref->file_rec->fcounters[POSIX_F_WRITE_TIME], + tm1, tm2, rec_ref->last_write_end); + rec_ref->file_rec->counters[POSIX_FSYNCS] += 1; } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -1158,7 +1128,7 @@ int DARSHAN_DECL(fsync)(int fd) int DARSHAN_DECL(fdatasync)(int fd) { int ret; - struct posix_file_runtime* file; + struct posix_file_record_ref *rec_ref; double tm1, tm2; MAP_OR_FAIL(fdatasync); @@ -1170,26 +1140,25 @@ int DARSHAN_DECL(fdatasync)(int fd) if(ret < 0) return(ret); - POSIX_LOCK(); - posix_runtime_initialize(); - file = posix_file_by_fd(fd); - if(file) + POSIX_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { DARSHAN_TIMER_INC_NO_OVERLAP( - file->file_record->fcounters[POSIX_F_WRITE_TIME], - tm1, tm2, file->last_write_end); - file->file_record->counters[POSIX_FDSYNCS] += 1; + rec_ref->file_rec->fcounters[POSIX_F_WRITE_TIME], + tm1, tm2, rec_ref->last_write_end); + rec_ref->file_rec->counters[POSIX_FDSYNCS] += 1; } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } int DARSHAN_DECL(close)(int fd) { - struct posix_file_runtime* file; - double tm1, tm2; int ret; + struct posix_file_record_ref *rec_ref; + double tm1, tm2; MAP_OR_FAIL(close); @@ -1197,31 +1166,30 @@ int DARSHAN_DECL(close)(int fd) ret = __real_close(fd); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); - file = posix_file_by_fd(fd); - if(file) + POSIX_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { - file->last_byte_written = 0; - file->last_byte_read = 0; - file->file_record->fcounters[POSIX_F_CLOSE_TIMESTAMP] = + rec_ref->last_byte_written = 0; + rec_ref->last_byte_read = 0; + rec_ref->file_rec->fcounters[POSIX_F_CLOSE_TIMESTAMP] = darshan_core_wtime(); DARSHAN_TIMER_INC_NO_OVERLAP( - file->file_record->fcounters[POSIX_F_META_TIME], - tm1, tm2, file->last_meta_end); - posix_file_close_fd(fd); + rec_ref->file_rec->fcounters[POSIX_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + darshan_delete_record_ref(&(posix_runtime->fd_hash), &fd, sizeof(int)); } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } int DARSHAN_DECL(fclose)(FILE *fp) { - struct posix_file_runtime* file; + int ret; + struct posix_file_record_ref *rec_ref; int fd = fileno(fp); double tm1, tm2; - int ret; MAP_OR_FAIL(fclose); @@ -1229,21 +1197,20 @@ int DARSHAN_DECL(fclose)(FILE *fp) ret = __real_fclose(fp); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); - file = posix_file_by_fd(fd); - if(file) + POSIX_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { - file->last_byte_written = 0; - file->last_byte_read = 0; - file->file_record->fcounters[POSIX_F_CLOSE_TIMESTAMP] = + rec_ref->last_byte_written = 0; + rec_ref->last_byte_read = 0; + rec_ref->file_rec->fcounters[POSIX_F_CLOSE_TIMESTAMP] = darshan_core_wtime(); DARSHAN_TIMER_INC_NO_OVERLAP( - file->file_record->fcounters[POSIX_F_META_TIME], - tm1, tm2, file->last_meta_end); - posix_file_close_fd(fd); + rec_ref->file_rec->fcounters[POSIX_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + darshan_delete_record_ref(&(posix_runtime->fd_hash), &fd, sizeof(int)); } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -1257,10 +1224,9 @@ int DARSHAN_DECL(aio_read)(struct aiocb *aiocbp) ret = __real_aio_read(aiocbp); if(ret == 0) { - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); posix_aio_tracker_add(aiocbp->aio_fildes, aiocbp); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); } return(ret); @@ -1275,10 +1241,9 @@ int DARSHAN_DECL(aio_write)(struct aiocb *aiocbp) ret = __real_aio_write(aiocbp); if(ret == 0) { - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); posix_aio_tracker_add(aiocbp->aio_fildes, aiocbp); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); } return(ret); @@ -1293,10 +1258,9 @@ int DARSHAN_DECL(aio_read64)(struct aiocb64 *aiocbp) ret = __real_aio_read64(aiocbp); if(ret == 0) { - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); posix_aio_tracker_add(aiocbp->aio_fildes, aiocbp); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); } return(ret); @@ -1311,10 +1275,9 @@ int DARSHAN_DECL(aio_write64)(struct aiocb64 *aiocbp) ret = __real_aio_write64(aiocbp); if(ret == 0) { - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); posix_aio_tracker_add(aiocbp->aio_fildes, aiocbp); - POSIX_UNLOCK(); + POSIX_POST_RECORD(); } return(ret); @@ -1332,10 +1295,9 @@ ssize_t DARSHAN_DECL(aio_return)(struct aiocb *aiocbp) ret = __real_aio_return(aiocbp); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); tmp = posix_aio_tracker_del(aiocbp->aio_fildes, aiocbp); - if (tmp) + if(tmp) { if((unsigned long)aiocbp->aio_buf % darshan_mem_alignment == 0) aligned_flag = 1; @@ -1353,7 +1315,7 @@ ssize_t DARSHAN_DECL(aio_return)(struct aiocb *aiocbp) } free(tmp); } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -1370,10 +1332,9 @@ ssize_t DARSHAN_DECL(aio_return64)(struct aiocb64 *aiocbp) ret = __real_aio_return64(aiocbp); tm2 = darshan_core_wtime(); - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); tmp = posix_aio_tracker_del(aiocbp->aio_fildes, aiocbp); - if (tmp) + if(tmp) { if((unsigned long)aiocbp->aio_buf % darshan_mem_alignment == 0) aligned_flag = 1; @@ -1391,7 +1352,7 @@ ssize_t DARSHAN_DECL(aio_return64)(struct aiocb64 *aiocbp) } free(tmp); } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); return(ret); } @@ -1407,13 +1368,12 @@ int DARSHAN_DECL(lio_listio)(int mode, struct aiocb *const aiocb_list[], ret = __real_lio_listio(mode, aiocb_list, nitems, sevp); if(ret == 0) { - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); for(i = 0; i < nitems; i++) { posix_aio_tracker_add(aiocb_list[i]->aio_fildes, aiocb_list[i]); } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); } return(ret); @@ -1430,13 +1390,12 @@ int DARSHAN_DECL(lio_listio64)(int mode, struct aiocb64 *const aiocb_list[], ret = __real_lio_listio64(mode, aiocb_list, nitems, sevp); if(ret == 0) { - POSIX_LOCK(); - posix_runtime_initialize(); + POSIX_PRE_RECORD(); for(i = 0; i < nitems; i++) { posix_aio_tracker_add(aiocb_list[i]->aio_fildes, aiocb_list[i]); } - POSIX_UNLOCK(); + POSIX_POST_RECORD(); } return(ret); @@ -1449,234 +1408,106 @@ int DARSHAN_DECL(lio_listio64)(int mode, struct aiocb64 *const aiocb_list[], /* initialize internal POSIX module data structures and register with darshan-core */ static void posix_runtime_initialize() { - int mem_limit; - struct darshan_module_funcs posix_mod_fns = - { - .begin_shutdown = &posix_begin_shutdown, - .get_output_data = &posix_get_output_data, - .shutdown = &posix_shutdown - }; + int psx_buf_size; - /* don't do anything if already initialized or instrumenation is disabled */ - if(posix_runtime || instrumentation_disabled) - return; + /* try and store a default number of records for this module */ + psx_buf_size = DARSHAN_DEF_MOD_REC_COUNT * sizeof(struct darshan_posix_file); - /* register the posix module with darshan core */ + /* register the POSIX module with darshan core */ darshan_core_register_module( DARSHAN_POSIX_MOD, - &posix_mod_fns, + &posix_shutdown, + &psx_buf_size, &my_rank, - &mem_limit, &darshan_mem_alignment); - /* return if no memory assigned by darshan core */ - if(mem_limit == 0) + /* return if darshan-core does not provide enough module memory */ + if(psx_buf_size < sizeof(struct darshan_posix_file)) + { + darshan_core_unregister_module(DARSHAN_POSIX_MOD); return; + } posix_runtime = malloc(sizeof(*posix_runtime)); if(!posix_runtime) - return; - memset(posix_runtime, 0, sizeof(*posix_runtime)); - - /* set maximum number of file records according to max memory limit */ - /* NOTE: maximum number of records is based on the size of a posix file record */ - /* TODO: should we base memory usage off file record or total runtime structure sizes? */ - posix_runtime->file_array_size = mem_limit / sizeof(struct darshan_posix_file); - posix_runtime->file_array_ndx = 0; - - /* allocate array of runtime file records */ - posix_runtime->file_runtime_array = malloc(posix_runtime->file_array_size * - sizeof(struct posix_file_runtime)); - posix_runtime->file_record_array = malloc(posix_runtime->file_array_size * - sizeof(struct darshan_posix_file)); - if(!posix_runtime->file_runtime_array || !posix_runtime->file_record_array) { - posix_runtime->file_array_size = 0; + darshan_core_unregister_module(DARSHAN_POSIX_MOD); return; } - memset(posix_runtime->file_runtime_array, 0, posix_runtime->file_array_size * - sizeof(struct posix_file_runtime)); - memset(posix_runtime->file_record_array, 0, posix_runtime->file_array_size * - sizeof(struct darshan_posix_file)); + memset(posix_runtime, 0, sizeof(*posix_runtime)); return; } -/* get a POSIX file record for the given file path */ -static struct posix_file_runtime* posix_file_by_name(const char *name) +static struct posix_file_record_ref *posix_track_new_file_record( + darshan_record_id rec_id, const char *path) { - struct posix_file_runtime *file = NULL; - char *newname = NULL; - darshan_record_id file_id; + struct darshan_posix_file *file_rec = NULL; + struct posix_file_record_ref *rec_ref = NULL; int file_alignment; - int limit_flag; - - if(!posix_runtime || instrumentation_disabled) - return(NULL); - - newname = darshan_clean_file_path(name); - if(!newname) - newname = (char*)name; - - limit_flag = (posix_runtime->file_array_ndx >= posix_runtime->file_array_size); - - /* get a unique id for this file from darshan core */ - darshan_core_register_record( - (void*)newname, - strlen(newname), - DARSHAN_POSIX_MOD, - 1, - limit_flag, - &file_id, - &file_alignment); + int ret; - /* the file record id is set to 0 if no memory is available for tracking - * new records -- just fall through and ignore this record - */ - if(file_id == 0) - { - if(newname != name) - free(newname); + rec_ref = malloc(sizeof(*rec_ref)); + if(!rec_ref) return(NULL); - } + memset(rec_ref, 0, sizeof(*rec_ref)); - /* search the hash table for this file record, and return if found */ - HASH_FIND(hlink, posix_runtime->file_hash, &file_id, sizeof(darshan_record_id), file); - if(file) + /* add a reference to this file record based on record id */ + ret = darshan_add_record_ref(&(posix_runtime->rec_id_hash), &rec_id, + sizeof(darshan_record_id), rec_ref); + if(ret == 0) { - if(newname != name) - free(newname); - return(file); - } - - /* no existing record, assign a new file record from the global array */ - file = &(posix_runtime->file_runtime_array[posix_runtime->file_array_ndx]); - file->file_record = &(posix_runtime->file_record_array[posix_runtime->file_array_ndx]); - file->file_record->f_id = file_id; - file->file_record->rank = my_rank; - file->file_record->counters[POSIX_MEM_ALIGNMENT] = darshan_mem_alignment; - file->file_record->counters[POSIX_FILE_ALIGNMENT] = file_alignment; - - /* add new record to file hash table */ - HASH_ADD(hlink, posix_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file); - posix_runtime->file_array_ndx++; - - if(newname != name) - free(newname); - return(file); -} - -/* get a POSIX file record for the given file path, and also create a - * reference structure using the returned file descriptor - */ -static struct posix_file_runtime* posix_file_by_name_setfd(const char* name, int fd) -{ - struct posix_file_runtime* file; - struct posix_file_runtime_ref* ref; - - if(!posix_runtime || instrumentation_disabled) - return(NULL); - - /* find file record by name first */ - file = posix_file_by_name(name); - - if(!file) + free(rec_ref); return(NULL); - - /* search hash table for existing file ref for this fd */ - HASH_FIND(hlink, posix_runtime->fd_hash, &fd, sizeof(int), ref); - if(ref) - { - /* we have a reference. Make sure it points to the correct file - * and return it - */ - ref->file = file; - return(file); } - /* if we hit this point, then we don't have a reference for this fd - * in the table yet. Add it. + /* register the actual file record with darshan-core so it is persisted + * in the log file */ - ref = malloc(sizeof(*ref)); - if(!ref) - return(NULL); - memset(ref, 0, sizeof(*ref)); - - ref->file = file; - ref->fd = fd; - HASH_ADD(hlink, posix_runtime->fd_hash, fd, sizeof(int), ref); - - return(file); -} - -/* get a POSIX file record for the given file descriptor */ -static struct posix_file_runtime* posix_file_by_fd(int fd) -{ - struct posix_file_runtime_ref* ref; - - if(!posix_runtime || instrumentation_disabled) - return(NULL); - - /* search hash table for existing file ref for this fd */ - HASH_FIND(hlink, posix_runtime->fd_hash, &fd, sizeof(int), ref); - if(ref) - return(ref->file); - - return(NULL); -} - -/* free up reference data structures for the given file descriptor */ -static void posix_file_close_fd(int fd) -{ - struct posix_file_runtime_ref* ref; - - if(!posix_runtime || instrumentation_disabled) - return; + file_rec = darshan_core_register_record( + rec_id, + path, + DARSHAN_POSIX_MOD, + sizeof(struct darshan_posix_file), + &file_alignment); - /* search hash table for this fd */ - HASH_FIND(hlink, posix_runtime->fd_hash, &fd, sizeof(int), ref); - if(ref) + if(!file_rec) { - /* we have a reference, delete it */ - HASH_DELETE(hlink, posix_runtime->fd_hash, ref); - free(ref); + darshan_delete_record_ref(&(posix_runtime->rec_id_hash), + &rec_id, sizeof(darshan_record_id)); + free(rec_ref); + return(NULL); } - return; -} - -/* compare function for sorting file records by descending rank */ -static int posix_record_compare(const void* a_p, const void* b_p) -{ - const struct darshan_posix_file* a = a_p; - const struct darshan_posix_file* b = b_p; + /* registering this file record was successful, so initialize some fields */ + file_rec->base_rec.id = rec_id; + file_rec->base_rec.rank = my_rank; + file_rec->counters[POSIX_MEM_ALIGNMENT] = darshan_mem_alignment; + file_rec->counters[POSIX_FILE_ALIGNMENT] = file_alignment; + rec_ref->file_rec = file_rec; + posix_runtime->file_rec_count++; - if(a->rank < b->rank) - return 1; - if(a->rank > b->rank) - return -1; - - return 0; + return(rec_ref); } /* finds the tracker structure for a given aio operation, removes it from - * the linked list for the darshan_file structure, and returns a pointer. + * the associated linked list for this file record, and returns a pointer. * * returns NULL if aio operation not found */ static struct posix_aio_tracker* posix_aio_tracker_del(int fd, void *aiocbp) { struct posix_aio_tracker *tracker = NULL, *iter, *tmp; - struct posix_file_runtime* file; + struct posix_file_record_ref *rec_ref; - file = posix_file_by_fd(fd); - if (file) + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { - LL_FOREACH_SAFE(file->aio_list, iter, tmp) + LL_FOREACH_SAFE(rec_ref->aio_list, iter, tmp) { - if (iter->aiocbp == aiocbp) + if(iter->aiocbp == aiocbp) { - LL_DELETE(file->aio_list, iter); + LL_DELETE(rec_ref->aio_list, iter); tracker = iter; break; } @@ -1690,23 +1521,33 @@ static struct posix_aio_tracker* posix_aio_tracker_del(int fd, void *aiocbp) static void posix_aio_tracker_add(int fd, void *aiocbp) { struct posix_aio_tracker* tracker; - struct posix_file_runtime* file; + struct posix_file_record_ref *rec_ref; - file = posix_file_by_fd(fd); - if (file) + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int)); + if(rec_ref) { tracker = malloc(sizeof(*tracker)); - if (tracker) + if(tracker) { tracker->tm1 = darshan_core_wtime(); tracker->aiocbp = aiocbp; - LL_PREPEND(file->aio_list, tracker); + LL_PREPEND(rec_ref->aio_list, tracker); } } return; } +static void posix_finalize_file_records(void *rec_ref_p) +{ + struct posix_file_record_ref *rec_ref = + (struct posix_file_record_ref *)rec_ref_p; + + tdestroy(rec_ref->access_root, free); + tdestroy(rec_ref->stride_root, free); + return; +} + static void posix_record_reduction_op(void* infile_v, void* inoutfile_v, int *len, MPI_Datatype *datatype) { @@ -1715,13 +1556,11 @@ static void posix_record_reduction_op(void* infile_v, void* inoutfile_v, struct darshan_posix_file *inoutfile = inoutfile_v; int i, j, k; - assert(posix_runtime); - for(i=0; i<*len; i++) { memset(&tmp_file, 0, sizeof(struct darshan_posix_file)); - tmp_file.f_id = infile->f_id; - tmp_file.rank = -1; + tmp_file.base_rec.id = infile->base_rec.id; + tmp_file.base_rec.rank = -1; /* sum */ for(j=POSIX_OPENS; j<=POSIX_FDSYNCS; j++) @@ -1788,14 +1627,14 @@ static void posix_record_reduction_op(void* infile_v, void* inoutfile_v, { DARSHAN_COMMON_VAL_COUNTER_INC(&(tmp_file.counters[POSIX_STRIDE1_STRIDE]), &(tmp_file.counters[POSIX_STRIDE1_COUNT]), infile->counters[j], - infile->counters[j+4]); + infile->counters[j+4], 0); } /* second set */ for(j=POSIX_STRIDE1_STRIDE; j<=POSIX_STRIDE4_STRIDE; j++) { DARSHAN_COMMON_VAL_COUNTER_INC(&(tmp_file.counters[POSIX_STRIDE1_STRIDE]), &(tmp_file.counters[POSIX_STRIDE1_COUNT]), inoutfile->counters[j], - inoutfile->counters[j+4]); + inoutfile->counters[j+4], 0); } /* same for access counts */ @@ -1819,14 +1658,14 @@ static void posix_record_reduction_op(void* infile_v, void* inoutfile_v, { DARSHAN_COMMON_VAL_COUNTER_INC(&(tmp_file.counters[POSIX_ACCESS1_ACCESS]), &(tmp_file.counters[POSIX_ACCESS1_COUNT]), infile->counters[j], - infile->counters[j+4]); + infile->counters[j+4], 0); } /* second set */ for(j=POSIX_ACCESS1_ACCESS; j<=POSIX_ACCESS4_ACCESS; j++) { DARSHAN_COMMON_VAL_COUNTER_INC(&(tmp_file.counters[POSIX_ACCESS1_ACCESS]), &(tmp_file.counters[POSIX_ACCESS1_COUNT]), inoutfile->counters[j], - inoutfile->counters[j+4]); + inoutfile->counters[j+4], 0); } /* min non-zero (if available) value */ @@ -2020,6 +1859,17 @@ static void posix_shared_record_variance(MPI_Comm mod_comm, return; } +static void posix_cleanup_runtime() +{ + darshan_clear_record_refs(&(posix_runtime->fd_hash), 0); + darshan_clear_record_refs(&(posix_runtime->rec_id_hash), 1); + + free(posix_runtime); + posix_runtime = NULL; + + return; +} + /* posix module shutdown benchmark routine */ void darshan_posix_shutdown_bench_setup(int test_case) { @@ -2029,7 +1879,7 @@ void darshan_posix_shutdown_bench_setup(int test_case) int i; if(posix_runtime) - posix_shutdown(); + posix_cleanup_runtime(); posix_runtime_initialize(); @@ -2092,56 +1942,35 @@ void darshan_posix_shutdown_bench_setup(int test_case) return; } -/************************************************************************ - * Functions exported by this module for coordinating with darshan-core * - ************************************************************************/ - -static void posix_begin_shutdown() -{ - assert(posix_runtime); - - POSIX_LOCK(); - /* disable further instrumentation while Darshan shuts down */ - instrumentation_disabled = 1; - POSIX_UNLOCK(); +/******************************************************************************** + * shutdown function exported by this module for coordinating with darshan-core * + ********************************************************************************/ - return; -} - -static void posix_get_output_data( +static void posix_shutdown( MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **posix_buf, int *posix_buf_sz) { - struct posix_file_runtime *file; - struct posix_file_runtime *tmp; - int i; + struct posix_file_record_ref *rec_ref; + struct darshan_posix_file *posix_rec_buf = *(struct darshan_posix_file **)posix_buf; + int posix_rec_count; double posix_time; struct darshan_posix_file *red_send_buf = NULL; struct darshan_posix_file *red_recv_buf = NULL; MPI_Datatype red_type; MPI_Op red_op; + int i; + POSIX_LOCK(); assert(posix_runtime); + posix_rec_count = posix_runtime->file_rec_count; - /* go through file access data for each record and set the 4 most common - * stride/access size counters. + /* perform any final transformations on POSIX file records before + * writing them out to log file */ - for(i = 0; i < posix_runtime->file_array_ndx; i++) - { - tmp = &(posix_runtime->file_runtime_array[i]); - - /* common accesses */ - darshan_walk_common_vals(tmp->access_root, - &(tmp->file_record->counters[POSIX_ACCESS1_ACCESS]), - &(tmp->file_record->counters[POSIX_ACCESS1_COUNT])); - /* common strides */ - darshan_walk_common_vals(tmp->stride_root, - &(tmp->file_record->counters[POSIX_STRIDE1_STRIDE]), - &(tmp->file_record->counters[POSIX_STRIDE1_COUNT])); - } + darshan_iter_record_refs(posix_runtime->rec_id_hash, &posix_finalize_file_records); /* if there are globally shared files, do a shared file reduction */ /* NOTE: the shared file reduction is also skipped if the @@ -2152,55 +1981,54 @@ static void posix_get_output_data( /* necessary initialization of shared records */ for(i = 0; i < shared_rec_count; i++) { - HASH_FIND(hlink, posix_runtime->file_hash, &shared_recs[i], - sizeof(darshan_record_id), file); - assert(file); + rec_ref = darshan_lookup_record_ref(posix_runtime->rec_id_hash, + &shared_recs[i], sizeof(darshan_record_id)); + assert(rec_ref); posix_time = - file->file_record->fcounters[POSIX_F_READ_TIME] + - file->file_record->fcounters[POSIX_F_WRITE_TIME] + - file->file_record->fcounters[POSIX_F_META_TIME]; + rec_ref->file_rec->fcounters[POSIX_F_READ_TIME] + + rec_ref->file_rec->fcounters[POSIX_F_WRITE_TIME] + + rec_ref->file_rec->fcounters[POSIX_F_META_TIME]; /* initialize fastest/slowest info prior to the reduction */ - file->file_record->counters[POSIX_FASTEST_RANK] = - file->file_record->rank; - file->file_record->counters[POSIX_FASTEST_RANK_BYTES] = - file->file_record->counters[POSIX_BYTES_READ] + - file->file_record->counters[POSIX_BYTES_WRITTEN]; - file->file_record->fcounters[POSIX_F_FASTEST_RANK_TIME] = + rec_ref->file_rec->counters[POSIX_FASTEST_RANK] = + rec_ref->file_rec->base_rec.rank; + rec_ref->file_rec->counters[POSIX_FASTEST_RANK_BYTES] = + rec_ref->file_rec->counters[POSIX_BYTES_READ] + + rec_ref->file_rec->counters[POSIX_BYTES_WRITTEN]; + rec_ref->file_rec->fcounters[POSIX_F_FASTEST_RANK_TIME] = posix_time; /* until reduction occurs, we assume that this rank is both * the fastest and slowest. It is up to the reduction operator * to find the true min and max. */ - file->file_record->counters[POSIX_SLOWEST_RANK] = - file->file_record->counters[POSIX_FASTEST_RANK]; - file->file_record->counters[POSIX_SLOWEST_RANK_BYTES] = - file->file_record->counters[POSIX_FASTEST_RANK_BYTES]; - file->file_record->fcounters[POSIX_F_SLOWEST_RANK_TIME] = - file->file_record->fcounters[POSIX_F_FASTEST_RANK_TIME]; - - file->file_record->rank = -1; + rec_ref->file_rec->counters[POSIX_SLOWEST_RANK] = + rec_ref->file_rec->counters[POSIX_FASTEST_RANK]; + rec_ref->file_rec->counters[POSIX_SLOWEST_RANK_BYTES] = + rec_ref->file_rec->counters[POSIX_FASTEST_RANK_BYTES]; + rec_ref->file_rec->fcounters[POSIX_F_SLOWEST_RANK_TIME] = + rec_ref->file_rec->fcounters[POSIX_F_FASTEST_RANK_TIME]; + + rec_ref->file_rec->base_rec.rank = -1; } - /* sort the array of files descending by rank so that we get all of the - * shared files (marked by rank -1) in a contiguous portion at end - * of the array + /* sort the array of records so we get all of the shared records + * (marked by rank -1) in a contiguous portion at end of the array */ - qsort(posix_runtime->file_record_array, posix_runtime->file_array_ndx, - sizeof(struct darshan_posix_file), posix_record_compare); + darshan_record_sort(posix_rec_buf, posix_rec_count, + sizeof(struct darshan_posix_file)); + + /* make send_buf point to the shared files at the end of sorted array */ + red_send_buf = &(posix_rec_buf[posix_rec_count-shared_rec_count]); - /* make *send_buf point to the shared files at the end of sorted array */ - red_send_buf = - &(posix_runtime->file_record_array[posix_runtime->file_array_ndx-shared_rec_count]); - /* allocate memory for the reduction output on rank 0 */ if(my_rank == 0) { red_recv_buf = malloc(shared_rec_count * sizeof(struct darshan_posix_file)); if(!red_recv_buf) { + POSIX_UNLOCK(); return; } } @@ -2226,46 +2054,30 @@ static void posix_get_output_data( /* clean up reduction state */ if(my_rank == 0) { - int tmp_ndx = posix_runtime->file_array_ndx - shared_rec_count; - memcpy(&(posix_runtime->file_record_array[tmp_ndx]), red_recv_buf, + int tmp_ndx = posix_rec_count - shared_rec_count; + memcpy(&(posix_rec_buf[tmp_ndx]), red_recv_buf, shared_rec_count * sizeof(struct darshan_posix_file)); free(red_recv_buf); } else { - posix_runtime->file_array_ndx -= shared_rec_count; + posix_rec_count -= shared_rec_count; } DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type); DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op); } - *posix_buf = (void *)(posix_runtime->file_record_array); - *posix_buf_sz = posix_runtime->file_array_ndx * sizeof(struct darshan_posix_file); - - return; -} - -static void posix_shutdown() -{ - struct posix_file_runtime_ref *ref, *tmp; + /* update output buffer size to account for shared file reduction */ + *posix_buf_sz = posix_rec_count * sizeof(struct darshan_posix_file); - assert(posix_runtime); + /* shutdown internal structures used for instrumenting */ + posix_cleanup_runtime(); - HASH_ITER(hlink, posix_runtime->fd_hash, ref, tmp) - { - HASH_DELETE(hlink, posix_runtime->fd_hash, ref); - free(ref); - } - - HASH_CLEAR(hlink, posix_runtime->file_hash); /* these entries are freed all at once below */ + /* disable further instrumentation */ + instrumentation_disabled = 1; - free(posix_runtime->file_runtime_array); - free(posix_runtime->file_record_array); - free(posix_runtime); - posix_runtime = NULL; - instrumentation_disabled = 0; - + POSIX_UNLOCK(); return; } diff --git a/darshan-runtime/share/darshan-mmap-epilog.sh.in b/darshan-runtime/share/darshan-mmap-epilog.sh.in new file mode 100644 index 0000000000000000000000000000000000000000..62c788c8051448be1092451ed5909d0c29788e11 --- /dev/null +++ b/darshan-runtime/share/darshan-mmap-epilog.sh.in @@ -0,0 +1,79 @@ +#!/bin/bash + +# +# Copyright (C) 2015 University of Chicago. +# See COPYRIGHT notice in top-level directory. +# + +# grab the prefix of the darshan install directory +DARSHAN_INSTALL_DIR=@prefix@ + +# use the log dir specified at configure time +DARSHAN_LOG_DIR=@__DARSHAN_LOG_PATH@ + +JOB_END=$(date +"%s") + +# use the default mmap log directory (/tmp), unless the +# env variable is set to something +if [ -z "$DARSHAN_MMAP_LOGPATH" ]; then + DARSHAN_MMAP_LOG_DIR=/tmp +else + DARSHAN_MMAP_LOG_DIR=$DARSHAN_MMAP_LOGPATH +fi + +DARSHAN_MMAP_LOG_GLOB=${DARSHAN_MMAP_LOG_DIR}/*id${SLURM_JOB_ID}*.darshan + +# if no mmap logs found for this job, we have nothing to do +DARSHAN_MMAP_LOG1=$(ls $DARSHAN_MMAP_LOG_GLOB 2>/dev/null | head -n 1) +if [ -z $DARSHAN_MMAP_LOG1 ]; then + exit 0 +fi + +# get the job start time from the first log file +JOB_START_DATE=$(${DARSHAN_INSTALL_DIR}/bin/darshan-parser $DARSHAN_MMAP_LOG1 | + grep "# start_time_asci" | cut -d':' -f 2- | cut -d' ' -f 2-) +OUTPUT_YEAR=$(date --date="$(printf "$JOB_START_DATE")" +"%Y") +OUTPUT_MON=$(date --date="$(printf "$JOB_START_DATE")" +"%-m") +OUTPUT_DAY=$(date --date="$(printf "$JOB_START_DATE")" +"%-d") +OUTPUT_SECS=$(( + ($(date --date="$(printf "$JOB_START_DATE")" +"%-H") * 60 * 60) + + ($(date --date="$(printf "$JOB_START_DATE")" +"%-M") * 60) + + ($(date --date="$(printf "$JOB_START_DATE")" +"%-S")) +)) + +LOG_NAME_PRE=$(basename $DARSHAN_MMAP_LOG1 | cut -d'_' -f 1-3) + +# construct full name of directory to store output log(s) +OUTPUT_LOG_DIR=${DARSHAN_LOG_DIR}/${OUTPUT_YEAR}/${OUTPUT_MON}/${OUTPUT_DAY}/ +OUTPUT_NAME_PRE=${LOG_NAME_PRE}_${OUTPUT_MON}-${OUTPUT_DAY}-${OUTPUT_SECS} + +if [ $SLURM_NNODES -gt 1 ]; then + NODE_LOG_DIR=${OUTPUT_LOG_DIR}/${OUTPUT_NAME_PRE} + NODE_NAME=$(uname -n) + + # multiple nodes, create a node log directory for everyone to write to + mkdir -p $NODE_LOG_DIR + + # construct the per-node log file and store in the output directory + $DARSHAN_INSTALL_DIR/bin/darshan-merge --job-end-time $JOB_END \ + --output ${NODE_LOG_DIR}/${LOG_NAME_PRE}_${NODE_NAME}.darshan \ + $DARSHAN_MMAP_LOG_GLOB +else + TMP_LOG=${OUTPUT_NAME_PRE}.darshan + + # single node, just create the final output darshan log + LOG_WRITE_START=$(date +%s) + $DARSHAN_INSTALL_DIR/bin/darshan-merge --job-end-time $JOB_END \ + --shared-redux --output ${OUTPUT_LOG_DIR}/${TMP_LOG} \ + $DARSHAN_MMAP_LOG_GLOB + LOG_WRITE_END=$(date +%s) + + WRITE_TM=$(($LOG_WRITE_END - $LOG_WRITE_START + 1)) + FINAL_LOG=${OUTPUT_NAME_PRE}-${RANDOM}_${WRITE_TM}.darshan + + mv ${OUTPUT_LOG_DIR}/${TMP_LOG} ${OUTPUT_LOG_DIR}/${FINAL_LOG} +fi + +rm -f $DARSHAN_MMAP_LOG_GLOB + +exit 0 diff --git a/darshan-util/Makefile.in b/darshan-util/Makefile.in index 13264596c11aa7ba36bdf7a85890120f861905bf..eab620bc10cd8246187ad413f3e2aa2a536852bf 100644 --- a/darshan-util/Makefile.in +++ b/darshan-util/Makefile.in @@ -1,4 +1,4 @@ -all: libdarshan-util.a darshan-null-logutils.o darshan-analyzer darshan-convert darshan-diff darshan-parser jenkins-hash-gen +all: libdarshan-util.a darshan-null-logutils.o darshan-analyzer darshan-convert darshan-diff darshan-parser darshan-merge jenkins-hash-gen DESTDIR = srcdir = @srcdir@ @@ -30,7 +30,7 @@ cp_zlib_include_flags = @__DARSHAN_ZLIB_INCLUDE_FLAGS@ # deliberately avoid large file support for host side utilities to avoid # potentially buggy libz 64 bit offset support -CFLAGS = -I . -I $(srcdir) -I $(srcdir)/../ -DDARSHAN_CONFIG_H=\"darshan-util-config.h\" @CFLAGS@ @CPPFLAGS@ +CFLAGS = -I . -I .. -I $(srcdir) -I $(srcdir)/../ -DDARSHAN_CONFIG_H=\"darshan-util-config.h\" @CFLAGS@ @CPPFLAGS@ CFLAGS_SHARED = $(CFLAGS) -shared -fpic -DPIC LDFLAGS=@LDFLAGS@ @@ -84,7 +84,7 @@ darshan-bgq-logutils.po: darshan-bgq-logutils.c darshan-logutils.h darshan-bgq-l libdarshan-util.a: darshan-logutils.o $(DARSHAN_STATIC_MOD_OBJS) ar rcs libdarshan-util.a $^ -libdarshan-util.so: darshan-logutils.po $(DARSHAN_DYNAMIC_MOD_OBJS) +libdarshan-util.so: darshan-logutils.po $(DARSHAN_DYNAMIC_MOD_OBJS) $(CC) $(CFLAGS_SHARED) $(LDFLAGS) -o $@ $^ $(LIBS) jenkins-hash-gen: jenkins-hash-gen.c lookup3.o @@ -96,7 +96,7 @@ lookup3.o: lookup3.c darshan-analyzer: darshan-analyzer.c darshan-logutils.h $(DARSHAN_LOG_FORMAT) $(DARSHAN_MOD_LOGUTIL_HEADERS) $(DARSHAN_MOD_LOG_FORMATS) libdarshan-util.a | uthash-1.9.2 $(CC) $(CFLAGS) $(LDFLAGS) $< libdarshan-util.a -o $@ $(LIBS) -darshan-convert: darshan-convert.c darshan-logutils.h $(DARSHAN_LOG_FORMAT) libdarshan-util.a lookup3.o | uthash-1.9.2 +darshan-convert: darshan-convert.c darshan-logutils.h $(DARSHAN_LOG_FORMAT) $(DARSHAN_MOD_LOGUTIL_HEADERS) $(DARSHAN_MOD_LOG_FORMATS) libdarshan-util.a lookup3.o | uthash-1.9.2 $(CC) $(CFLAGS) $(LDFLAGS) $< lookup3.o libdarshan-util.a -o $@ $(LIBS) darshan-diff: darshan-diff.c darshan-logutils.h $(DARSHAN_LOG_FORMAT) $(DARSHAN_MOD_LOGUTIL_HEADERS) $(DARSHAN_MOD_LOG_FORMATS) libdarshan-util.a | uthash-1.9.2 @@ -105,6 +105,9 @@ darshan-diff: darshan-diff.c darshan-logutils.h $(DARSHAN_LOG_FORMAT) $(DARSHAN_ darshan-parser: darshan-parser.c darshan-logutils.h $(DARSHAN_LOG_FORMAT) $(DARSHAN_MOD_LOGUTIL_HEADERS) $(DARSHAN_MOD_LOG_FORMATS) libdarshan-util.a | uthash-1.9.2 $(CC) $(CFLAGS) $(LDFLAGS) $< libdarshan-util.a -o $@ $(LIBS) +darshan-merge: darshan-merge.c darshan-logutils.h $(DARSHAN_LOG_FORMAT) $(DARSHAN_MOD_LOGUTIL_HEADERS) $(DARSHAN_MOD_LOG_FORMATS) libdarshan-util.a | uthash-1.9.2 + $(CC) $(CFLAGS) $(LDFLAGS) $< libdarshan-util.a -o $@ $(LIBS) + #test/gztest: test/gztest.c mktestdir # $(CC) $(CFLAGS) $(LDFLAGS) -lz $< -o $@ @@ -120,6 +123,7 @@ install:: all install -m 755 darshan-convert $(bindir) install -m 755 darshan-diff $(bindir) install -m 755 darshan-parser $(bindir) + install -m 755 darshan-merge $(bindir) install -m 755 $(srcdir)/darshan-summary-per-file.sh $(bindir) install -m 755 libdarshan-util.a $(libdir) ifeq ($(DARSHAN_ENABLE_SHARED),1) @@ -140,7 +144,7 @@ endif install -m 644 $(srcdir)/../darshan-bgq-log-format.h $(includedir) install -d $(includedir)/uthash-1.9.2 install -d $(includedir)/uthash-1.9.2/src - install -m 644 uthash-1.9.2/src/uthash.h $(includedir)/uthash-1.9.2/src/ + install -m 644 $(srcdir)/uthash-1.9.2/src/uthash.h $(includedir)/uthash-1.9.2/src/ install -m 644 $(DARSHAN_LOG_FORMAT) $(includedir) install -m 755 darshan-job-summary/bin/darshan-job-summary.pl $(bindir) install -d $(libdir)/TeX @@ -154,7 +158,7 @@ endif clean:: - rm -f *.o *.po *.a darshan-analyzer darshan-convert darshan-diff darshan-parser jenkins-hash-gen + rm -f *.o *.po *.a *.so darshan-analyzer darshan-convert darshan-diff darshan-parser darshan-merge jenkins-hash-gen distclean:: clean rm -f darshan-runtime-config.h aclocal.m4 autom4te.cache/* config.status config.log Makefile util/bin/darshan-job-summary.pl diff --git a/darshan-util/darshan-analyzer.c b/darshan-util/darshan-analyzer.c index 55bfe24c40bb7ab082362df666ba0ccf69675aba..00f5f3c89383a282b84058ae61528bfff1b0b53a 100644 --- a/darshan-util/darshan-analyzer.c +++ b/darshan-util/darshan-analyzer.c @@ -38,7 +38,6 @@ int process_log(const char *fname, double *io_ratio, int *used_mpio, int *used_p struct darshan_job job; struct darshan_mod_logutil_funcs *psx_mod = mod_logutils[DARSHAN_POSIX_MOD]; struct darshan_posix_file psx_rec; - darshan_record_id rec_id; int f_count; double total_io_time; double total_job_time; @@ -53,10 +52,10 @@ int process_log(const char *fname, double *io_ratio, int *used_mpio, int *used_p return -1; } - ret = darshan_log_getjob(file, &job); + ret = darshan_log_get_job(file, &job); if (ret < 0) { - fprintf(stderr, "darshan_log_getjob() failed on file %s.\n", fname); + fprintf(stderr, "darshan_log_get_job() failed on file %s.\n", fname); darshan_log_close(file); return -1; } @@ -64,11 +63,11 @@ int process_log(const char *fname, double *io_ratio, int *used_mpio, int *used_p f_count = 0; total_io_time = 0.0; - while((ret = psx_mod->log_get_record(file, &psx_rec, &rec_id)) == 1) + while((ret = psx_mod->log_get_record(file, &psx_rec)) == 1) { f_count += 1; - if (psx_rec.rank == -1) + if (psx_rec.base_rec.rank == -1) *used_shared = 1; else *used_fpp = 1; diff --git a/darshan-util/darshan-bgq-logutils.c b/darshan-util/darshan-bgq-logutils.c index ef35ba2c0a48b81b0524b55c30184eb4b9e5c0b8..2609bbda8ff288ec8da21bd0e5e0836bb095e2a5 100644 --- a/darshan-util/darshan-bgq-logutils.c +++ b/darshan-util/darshan-bgq-logutils.c @@ -30,14 +30,23 @@ char *bgq_f_counter_names[] = { }; #undef X -static int darshan_log_get_bgq_rec(darshan_fd fd, void* bgq_buf, - darshan_record_id* rec_id); +/* old definitions for enforcing backwards compatibility */ +struct darshan_bgq_record_1 +{ + struct darshan_base_record base_rec; + int alignment; + int64_t counters[BGQ_NUM_INDICES]; + double fcounters[BGQ_F_NUM_INDICES]; +}; + +static int darshan_log_get_bgq_rec(darshan_fd fd, void* bgq_buf); static int darshan_log_put_bgq_rec(darshan_fd fd, void* bgq_buf, int ver); static void darshan_log_print_bgq_rec(void *file_rec, char *file_name, char *mnt_pt, char *fs_type, int ver); static void darshan_log_print_bgq_description(void); static void darshan_log_print_bgq_rec_diff(void *file_rec1, char *file_name1, void *file_rec2, char *file_name2); +static void darshan_log_agg_bgq_recs(void *rec, void *agg_rec, int init_flag); struct darshan_mod_logutil_funcs bgq_logutils = { @@ -45,37 +54,63 @@ struct darshan_mod_logutil_funcs bgq_logutils = .log_put_record = &darshan_log_put_bgq_rec, .log_print_record = &darshan_log_print_bgq_rec, .log_print_description = &darshan_log_print_bgq_description, - .log_print_diff = &darshan_log_print_bgq_rec_diff + .log_print_diff = &darshan_log_print_bgq_rec_diff, + .log_agg_records = &darshan_log_agg_bgq_recs }; -static int darshan_log_get_bgq_rec(darshan_fd fd, void* bgq_buf, - darshan_record_id* rec_id) +static int darshan_log_get_bgq_rec(darshan_fd fd, void* bgq_buf) { - struct darshan_bgq_record *rec; + int log_rec_len; + struct darshan_bgq_record *rec = + (struct darshan_bgq_record *)bgq_buf; int i; - int ret; + int ret = -1; + + /* read the BGQ record from file, checking the version first so we + * can read it correctly + */ + if(fd->mod_ver[DARSHAN_BGQ_MOD] == 1) + { + struct darshan_bgq_record_1 bgq_rec_1; + log_rec_len = sizeof(struct darshan_bgq_record_1); + + ret = darshan_log_get_mod(fd, DARSHAN_BGQ_MOD, &bgq_rec_1, + log_rec_len); + if(ret == log_rec_len) + { + /* up-convert old BGQ format to new format */ + rec->base_rec = bgq_rec_1.base_rec; + memcpy(rec->counters, bgq_rec_1.counters, + BGQ_NUM_INDICES * sizeof(int64_t)); + memcpy(rec->fcounters, bgq_rec_1.fcounters, + BGQ_F_NUM_INDICES * sizeof(double)); + } + } + else if(fd->mod_ver[DARSHAN_BGQ_MOD] == 2) + { + log_rec_len = sizeof(struct darshan_bgq_record); + + ret = darshan_log_get_mod(fd, DARSHAN_BGQ_MOD, rec, + log_rec_len); + } - ret = darshan_log_getmod(fd, DARSHAN_BGQ_MOD, bgq_buf, - sizeof(struct darshan_bgq_record)); if(ret < 0) return(-1); - else if(ret < sizeof(struct darshan_bgq_record)) + else if(ret < log_rec_len) return(0); else { - rec = (struct darshan_bgq_record *)bgq_buf; if(fd->swap_flag) { /* swap bytes if necessary */ - DARSHAN_BSWAP64(&rec->f_id); - DARSHAN_BSWAP64(&rec->rank); + DARSHAN_BSWAP64(&(rec->base_rec.id)); + DARSHAN_BSWAP64(&(rec->base_rec.rank)); for(i=0; icounters[i]); for(i=0; ifcounters[i]); } - *rec_id = rec->f_id; return(1); } } @@ -85,7 +120,7 @@ static int darshan_log_put_bgq_rec(darshan_fd fd, void* bgq_buf, int ver) struct darshan_bgq_record *rec = (struct darshan_bgq_record *)bgq_buf; int ret; - ret = darshan_log_putmod(fd, DARSHAN_BGQ_MOD, rec, + ret = darshan_log_put_mod(fd, DARSHAN_BGQ_MOD, rec, sizeof(struct darshan_bgq_record), ver); if(ret < 0) return(-1); @@ -103,15 +138,17 @@ static void darshan_log_print_bgq_rec(void *file_rec, char *file_name, for(i=0; irank, bgq_file_rec->f_id, bgq_counter_names[i], - bgq_file_rec->counters[i], file_name, mnt_pt, fs_type); + bgq_file_rec->base_rec.rank, bgq_file_rec->base_rec.id, + bgq_counter_names[i], bgq_file_rec->counters[i], + file_name, mnt_pt, fs_type); } for(i=0; irank, bgq_file_rec->f_id, bgq_f_counter_names[i], - bgq_file_rec->fcounters[i], file_name, mnt_pt, fs_type); + bgq_file_rec->base_rec.rank, bgq_file_rec->base_rec.id, + bgq_f_counter_names[i], bgq_file_rec->fcounters[i], + file_name, mnt_pt, fs_type); } return; @@ -149,7 +186,7 @@ static void darshan_log_print_bgq_rec_diff(void *file_rec1, char *file_name1, { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD], - file1->rank, file1->f_id, bgq_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, bgq_counter_names[i], file1->counters[i], file_name1, "", ""); } @@ -157,18 +194,18 @@ static void darshan_log_print_bgq_rec_diff(void *file_rec1, char *file_name1, { printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD], - file2->rank, file2->f_id, bgq_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, bgq_counter_names[i], file2->counters[i], file_name2, "", ""); } else if(file1->counters[i] != file2->counters[i]) { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD], - file1->rank, file1->f_id, bgq_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, bgq_counter_names[i], file1->counters[i], file_name1, "", ""); printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD], - file2->rank, file2->f_id, bgq_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, bgq_counter_names[i], file2->counters[i], file_name2, "", ""); } } @@ -179,7 +216,7 @@ static void darshan_log_print_bgq_rec_diff(void *file_rec1, char *file_name1, { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD], - file1->rank, file1->f_id, bgq_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, bgq_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); } @@ -187,18 +224,18 @@ static void darshan_log_print_bgq_rec_diff(void *file_rec1, char *file_name1, { printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD], - file2->rank, file2->f_id, bgq_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, bgq_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } else if(file1->fcounters[i] != file2->fcounters[i]) { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD], - file1->rank, file1->f_id, bgq_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, bgq_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD], - file2->rank, file2->f_id, bgq_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, bgq_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } } @@ -207,6 +244,12 @@ static void darshan_log_print_bgq_rec_diff(void *file_rec1, char *file_name1, } +static void darshan_log_agg_bgq_recs(void *rec, void *agg_rec, int init_flag) +{ + /* TODO: how would aggregation work for the BG/Q module ? */ + return; +} + /* * Local variables: * c-indent-level: 4 diff --git a/darshan-util/darshan-convert.c b/darshan-util/darshan-convert.c index 3a28a7ce9c494548334912f12959c39572472c7c..f74c1e85b4b1b3a107fa92347c63e6f75fa84a0f 100644 --- a/darshan-util/darshan-convert.c +++ b/darshan-util/darshan-convert.c @@ -140,21 +140,25 @@ void obfuscate_exe(int key, char *exe) return; } -void obfuscate_filenames(int key, struct darshan_record_ref *rec_hash) +void obfuscate_filenames(int key, struct darshan_name_record_ref *name_hash) { - struct darshan_record_ref *ref, *tmp; + struct darshan_name_record_ref *ref, *tmp; uint32_t hashed; char tmp_string[128] = {0}; + darshan_record_id tmp_id; - HASH_ITER(hlink, rec_hash, ref, tmp) + HASH_ITER(hlink, name_hash, ref, tmp) { - hashed = darshan_hashlittle(ref->rec.name, strlen(ref->rec.name), key); + tmp_id = ref->name_record->id; + hashed = darshan_hashlittle(ref->name_record->name, + strlen(ref->name_record->name), key); sprintf(tmp_string, "%u", hashed); - free(ref->rec.name); - ref->rec.name = malloc(strlen(tmp_string) + 1); - assert(ref->rec.name); - memcpy(ref->rec.name, tmp_string, strlen(tmp_string)); - ref->rec.name[strlen(tmp_string)] = '\0'; + free(ref->name_record); + ref->name_record = malloc(sizeof(struct darshan_name_record) + + strlen(tmp_string)); + assert(ref->name_record); + ref->name_record->id = tmp_id; + strcpy(ref->name_record->name, tmp_string); } return; @@ -202,16 +206,17 @@ void add_annotation (char *annotation, return; } -static void remove_hash_recs(struct darshan_record_ref **rec_hash, darshan_record_id hash) +static void remove_hash_recs(struct darshan_name_record_ref **name_hash, + darshan_record_id hash) { - struct darshan_record_ref *ref, *tmp; + struct darshan_name_record_ref *ref, *tmp; - HASH_ITER(hlink, *rec_hash, ref, tmp) + HASH_ITER(hlink, *name_hash, ref, tmp) { - if(ref->rec.id != hash) + if(ref->name_record->id != hash) { - HASH_DELETE(hlink, *rec_hash, ref); - free(ref->rec.name); + HASH_DELETE(hlink, *name_hash, ref); + free(ref->name_record); free(ref); } } @@ -231,8 +236,8 @@ int main(int argc, char **argv) int i; int mount_count; struct darshan_mnt_info *mnt_data_array; - struct darshan_record_ref *rec_hash = NULL; - struct darshan_record_ref *ref, *tmp; + struct darshan_name_record_ref *name_hash = NULL; + struct darshan_name_record_ref *ref, *tmp; char mod_buf[DEF_MOD_BUF_SIZE]; enum darshan_comp_type comp_type; int bzip2; @@ -249,7 +254,7 @@ int main(int argc, char **argv) if(!infile) return(-1); - comp_type = bzip2 ? comp_type = DARSHAN_BZIP2_COMP : DARSHAN_ZLIB_COMP; + comp_type = bzip2 ? DARSHAN_BZIP2_COMP : DARSHAN_ZLIB_COMP; outfile = darshan_log_create(outfile_name, comp_type, infile->partial_flag); if(!outfile) { @@ -258,7 +263,7 @@ int main(int argc, char **argv) } /* read job info */ - ret = darshan_log_getjob(infile, &job); + ret = darshan_log_get_job(infile, &job); if(ret < 0) { darshan_log_close(infile); @@ -271,7 +276,7 @@ int main(int argc, char **argv) if (obfuscate) obfuscate_job(key, &job); if (annotation) add_annotation(annotation, &job); - ret = darshan_log_putjob(outfile, &job); + ret = darshan_log_put_job(outfile, &job); if (ret < 0) { darshan_log_close(infile); @@ -279,7 +284,7 @@ int main(int argc, char **argv) return(-1); } - ret = darshan_log_getexe(infile, tmp_string); + ret = darshan_log_get_exe(infile, tmp_string); if(ret < 0) { darshan_log_close(infile); @@ -290,7 +295,7 @@ int main(int argc, char **argv) if (obfuscate) obfuscate_exe(key, tmp_string); - ret = darshan_log_putexe(outfile, tmp_string); + ret = darshan_log_put_exe(outfile, tmp_string); if(ret < 0) { darshan_log_close(infile); @@ -298,7 +303,7 @@ int main(int argc, char **argv) return(-1); } - ret = darshan_log_getmounts(infile, &mnt_data_array, &mount_count); + ret = darshan_log_get_mounts(infile, &mnt_data_array, &mount_count); if(ret < 0) { darshan_log_close(infile); @@ -307,7 +312,7 @@ int main(int argc, char **argv) return(-1); } - ret = darshan_log_putmounts(outfile, mnt_data_array, mount_count); + ret = darshan_log_put_mounts(outfile, mnt_data_array, mount_count); if(ret < 0) { darshan_log_close(infile); @@ -315,7 +320,7 @@ int main(int argc, char **argv) return(-1); } - ret = darshan_log_gethash(infile, &rec_hash); + ret = darshan_log_get_namehash(infile, &name_hash); if(ret < 0) { darshan_log_close(infile); @@ -327,10 +332,10 @@ int main(int argc, char **argv) /* NOTE: obfuscating filepaths breaks the ability to map files * to the corresponding FS & mount info maintained by darshan */ - if(obfuscate) obfuscate_filenames(key, rec_hash); - if(hash) remove_hash_recs(&rec_hash, hash); + if(obfuscate) obfuscate_filenames(key, name_hash); + if(hash) remove_hash_recs(&name_hash, hash); - ret = darshan_log_puthash(outfile, rec_hash); + ret = darshan_log_put_namehash(outfile, name_hash); if(ret < 0) { darshan_log_close(infile); @@ -341,7 +346,7 @@ int main(int argc, char **argv) /* loop over each module and convert it's data to the new format */ for(i=0; imod_map[i].len == 0) @@ -356,7 +361,7 @@ int main(int argc, char **argv) /* we have module data to convert */ memset(mod_buf, 0, DEF_MOD_BUF_SIZE); - ret = mod_logutils[i]->log_get_record(infile, mod_buf, &rec_id); + ret = mod_logutils[i]->log_get_record(infile, mod_buf); if(ret != 1) { fprintf(stderr, "Error: failed to parse the first %s module record.\n", @@ -370,7 +375,9 @@ int main(int argc, char **argv) /* loop over each of the module's records and convert */ do { - if(!hash || hash == rec_id) + base_rec = (struct darshan_base_record *)mod_buf; + + if(!hash || hash == base_rec->id) { ret = mod_logutils[i]->log_put_record(outfile, mod_buf, infile->mod_ver[i]); if(ret < 0) @@ -382,7 +389,7 @@ int main(int argc, char **argv) memset(mod_buf, 0, DEF_MOD_BUF_SIZE); } - } while((ret = mod_logutils[i]->log_get_record(infile, mod_buf, &rec_id)) == 1); + } while((ret = mod_logutils[i]->log_get_record(infile, mod_buf)) == 1); } darshan_log_close(infile); @@ -391,10 +398,10 @@ int main(int argc, char **argv) if(mount_count > 0) free(mnt_data_array); - HASH_ITER(hlink, rec_hash, ref, tmp) + HASH_ITER(hlink, name_hash, ref, tmp) { - HASH_DELETE(hlink, rec_hash, ref); - free(ref->rec.name); + HASH_DELETE(hlink, name_hash, ref); + free(ref->name_record); free(ref); } diff --git a/darshan-util/darshan-diff.c b/darshan-util/darshan-diff.c index 602fcf1770ffa60ef1295e18d28fe3404a7bbddc..c737e746e26cdf610c20be22dd17efe46687a8c5 100644 --- a/darshan-util/darshan-diff.c +++ b/darshan-util/darshan-diff.c @@ -15,13 +15,6 @@ #define DEF_MOD_BUF_SIZE 1024 /* 1 KiB is enough for all current mod records ... */ -/* XXX: this structure is a temporary hack to get at the rank for each module's record */ -struct darshan_base_rec -{ - darshan_record_id f_id; - int64_t rank; -}; - struct darshan_mod_record_ref { int rank; @@ -60,13 +53,13 @@ int main(int argc, char *argv[]) darshan_fd file1, file2; struct darshan_job job1, job2; char exe1[4096], exe2[4096]; - struct darshan_record_ref *name_hash1 = NULL, *name_hash2 = NULL; - struct darshan_record_ref *name_ref1, *name_ref2; + struct darshan_name_record_ref *name_hash1 = NULL, *name_hash2 = NULL; + struct darshan_name_record_ref *name_ref1, *name_ref2; struct darshan_file_record_ref *rec_hash1 = NULL, *rec_hash2 = NULL; struct darshan_file_record_ref *rec_ref1, *rec_ref2, *rec_tmp; struct darshan_mod_record_ref *mod_rec1, *mod_rec2; void *mod_buf1, *mod_buf2; - struct darshan_base_rec *base_rec1, *base_rec2; + struct darshan_base_record *base_rec1, *base_rec2; char *file_name1, *file_name2; int i; int ret; @@ -96,7 +89,7 @@ int main(int argc, char *argv[]) } /* get job data for each log file */ - ret = darshan_log_getjob(file1, &job1); + ret = darshan_log_get_job(file1, &job1); if(ret < 0) { darshan_log_close(file1); @@ -105,7 +98,7 @@ int main(int argc, char *argv[]) return(-1); } - ret = darshan_log_getjob(file2, &job2); + ret = darshan_log_get_job(file2, &job2); if(ret < 0) { darshan_log_close(file1); @@ -115,7 +108,7 @@ int main(int argc, char *argv[]) } /* get exe string for each log file */ - ret = darshan_log_getexe(file1, exe1); + ret = darshan_log_get_exe(file1, exe1); if(ret < 0) { darshan_log_close(file1); @@ -124,7 +117,7 @@ int main(int argc, char *argv[]) return(-1); } - ret = darshan_log_getexe(file2, exe2); + ret = darshan_log_get_exe(file2, exe2); if(ret < 0) { darshan_log_close(file1); @@ -151,7 +144,7 @@ int main(int argc, char *argv[]) (int64_t)(job2.end_time - job2.start_time + 1)); /* get hash of record ids to file names for each log */ - ret = darshan_log_gethash(file1, &name_hash1); + ret = darshan_log_get_namehash(file1, &name_hash1); if(ret < 0) { darshan_log_close(file1); @@ -160,7 +153,7 @@ int main(int argc, char *argv[]) return(-1); } - ret = darshan_log_gethash(file2, &name_hash2); + ret = darshan_log_get_namehash(file2, &name_hash2); if(ret < 0) { darshan_log_close(file1); @@ -227,9 +220,8 @@ int main(int argc, char *argv[]) else mod_buf2 = mod_rec2->mod_dat; - base_rec1 = (struct darshan_base_rec *)mod_buf1; - base_rec2 = (struct darshan_base_rec *)mod_buf2; - + base_rec1 = (struct darshan_base_record *)mod_buf1; + base_rec2 = (struct darshan_base_record *)mod_buf2; if(!base_rec1 && !base_rec2) { /* break out if there are no more records for this module */ @@ -251,17 +243,17 @@ int main(int argc, char *argv[]) /* get corresponding file name for each record */ if(mod_buf1) { - HASH_FIND(hlink, name_hash1, &(base_rec1->f_id), + HASH_FIND(hlink, name_hash1, &(base_rec1->id), sizeof(darshan_record_id), name_ref1); assert(name_ref1); - file_name1 = name_ref1->rec.name; + file_name1 = name_ref1->name_record->name; } if(mod_buf2) { - HASH_FIND(hlink, name_hash2, &(base_rec2->f_id), + HASH_FIND(hlink, name_hash2, &(base_rec2->id), sizeof(darshan_record_id), name_ref2); assert(name_ref2); - file_name2 = name_ref2->rec.name; + file_name2 = name_ref2->name_record->name; } mod_logutils[i]->log_print_diff(mod_buf1, file_name1, mod_buf2, file_name2); @@ -312,19 +304,17 @@ int main(int argc, char *argv[]) */ HASH_ITER(hlink, rec_hash2, rec_ref2, rec_tmp) { - printf("\n"); - for(i = 0; i < DARSHAN_MAX_MODS; i++) { while(rec_ref2->mod_recs[i]) { mod_rec2 = rec_ref2->mod_recs[i]; - base_rec2 = (struct darshan_base_rec *)mod_rec2->mod_dat; + base_rec2 = (struct darshan_base_record *)mod_rec2->mod_dat; - HASH_FIND(hlink, name_hash2, &(base_rec2->f_id), + HASH_FIND(hlink, name_hash2, &(base_rec2->id), sizeof(darshan_record_id), name_ref2); assert(name_ref2); - file_name2 = name_ref2->rec.name; + file_name2 = name_ref2->name_record->name; mod_logutils[i]->log_print_diff(NULL, NULL, mod_rec2->mod_dat, file_name2); @@ -350,13 +340,13 @@ int main(int argc, char *argv[]) HASH_ITER(hlink, name_hash1, name_ref1, name_ref2) { HASH_DELETE(hlink, name_hash1, name_ref1); - free(name_ref1->rec.name); + free(name_ref1->name_record); free(name_ref1); } HASH_ITER(hlink, name_hash2, name_ref2, name_ref1) { HASH_DELETE(hlink, name_hash2, name_ref2); - free(name_ref2->rec.name); + free(name_ref2->name_record); free(name_ref2); } @@ -372,7 +362,7 @@ static int darshan_build_global_record_hash( struct darshan_mod_record_ref *mod_rec; struct darshan_file_record_ref *file_rec; darshan_record_id tmp_rec_id; - struct darshan_base_rec *base_rec; + struct darshan_base_record *base_rec; int i; int ret; @@ -389,7 +379,7 @@ static int darshan_build_global_record_hash( assert(mod_rec); memset(mod_rec, 0, sizeof(struct darshan_mod_record_ref)); - ret = mod_logutils[i]->log_get_record(fd, mod_rec->mod_dat, &tmp_rec_id); + ret = mod_logutils[i]->log_get_record(fd, mod_rec->mod_dat); if(ret < 0) { fprintf(stderr, "Error: unable to read module %s data from log file.\n", @@ -404,7 +394,7 @@ static int darshan_build_global_record_hash( } else { - base_rec = (struct darshan_base_rec *)mod_rec->mod_dat; + base_rec = (struct darshan_base_record *)mod_rec->mod_dat; mod_rec->rank = base_rec->rank; HASH_FIND(hlink, *rec_hash, &tmp_rec_id, sizeof(darshan_record_id), file_rec); diff --git a/darshan-util/darshan-hdf5-logutils.c b/darshan-util/darshan-hdf5-logutils.c index 031eb909fae463940c77647947b4e3160273c544..74e60bb402d57a4c955b9701369ca4d170715f3c 100644 --- a/darshan-util/darshan-hdf5-logutils.c +++ b/darshan-util/darshan-hdf5-logutils.c @@ -30,14 +30,14 @@ char *hdf5_f_counter_names[] = { }; #undef X -static int darshan_log_get_hdf5_file(darshan_fd fd, void* hdf5_buf, - darshan_record_id* rec_id); +static int darshan_log_get_hdf5_file(darshan_fd fd, void* hdf5_buf); static int darshan_log_put_hdf5_file(darshan_fd fd, void* hdf5_buf, int ver); static void darshan_log_print_hdf5_file(void *file_rec, char *file_name, char *mnt_pt, char *fs_type, int ver); static void darshan_log_print_hdf5_description(void); static void darshan_log_print_hdf5_file_diff(void *file_rec1, char *file_name1, void *file_rec2, char *file_name2); +static void darshan_log_agg_hdf5_files(void *rec, void *agg_rec, int init_flag); struct darshan_mod_logutil_funcs hdf5_logutils = { @@ -45,17 +45,17 @@ struct darshan_mod_logutil_funcs hdf5_logutils = .log_put_record = &darshan_log_put_hdf5_file, .log_print_record = &darshan_log_print_hdf5_file, .log_print_description = &darshan_log_print_hdf5_description, - .log_print_diff = &darshan_log_print_hdf5_file_diff + .log_print_diff = &darshan_log_print_hdf5_file_diff, + .log_agg_records = &darshan_log_agg_hdf5_files }; -static int darshan_log_get_hdf5_file(darshan_fd fd, void* hdf5_buf, - darshan_record_id* rec_id) +static int darshan_log_get_hdf5_file(darshan_fd fd, void* hdf5_buf) { struct darshan_hdf5_file *file; int i; int ret; - ret = darshan_log_getmod(fd, DARSHAN_HDF5_MOD, hdf5_buf, + ret = darshan_log_get_mod(fd, DARSHAN_HDF5_MOD, hdf5_buf, sizeof(struct darshan_hdf5_file)); if(ret < 0) return(-1); @@ -67,15 +67,14 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void* hdf5_buf, if(fd->swap_flag) { /* swap bytes if necessary */ - DARSHAN_BSWAP64(&file->f_id); - DARSHAN_BSWAP64(&file->rank); + DARSHAN_BSWAP64(&(file->base_rec.id)); + DARSHAN_BSWAP64(&(file->base_rec.rank)); for(i=0; icounters[i]); for(i=0; ifcounters[i]); } - *rec_id = file->f_id; return(1); } } @@ -85,7 +84,7 @@ static int darshan_log_put_hdf5_file(darshan_fd fd, void* hdf5_buf, int ver) struct darshan_hdf5_file *file = (struct darshan_hdf5_file *)hdf5_buf; int ret; - ret = darshan_log_putmod(fd, DARSHAN_HDF5_MOD, file, + ret = darshan_log_put_mod(fd, DARSHAN_HDF5_MOD, file, sizeof(struct darshan_hdf5_file), ver); if(ret < 0) return(-1); @@ -103,15 +102,17 @@ static void darshan_log_print_hdf5_file(void *file_rec, char *file_name, for(i=0; irank, hdf5_file_rec->f_id, hdf5_counter_names[i], - hdf5_file_rec->counters[i], file_name, mnt_pt, fs_type); + hdf5_file_rec->base_rec.rank, hdf5_file_rec->base_rec.id, + hdf5_counter_names[i], hdf5_file_rec->counters[i], + file_name, mnt_pt, fs_type); } for(i=0; irank, hdf5_file_rec->f_id, hdf5_f_counter_names[i], - hdf5_file_rec->fcounters[i], file_name, mnt_pt, fs_type); + hdf5_file_rec->base_rec.rank, hdf5_file_rec->base_rec.id, + hdf5_f_counter_names[i], hdf5_file_rec->fcounters[i], + file_name, mnt_pt, fs_type); } return; @@ -144,7 +145,7 @@ static void darshan_log_print_hdf5_file_diff(void *file_rec1, char *file_name1, { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_HDF5_MOD], - file1->rank, file1->f_id, hdf5_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, hdf5_counter_names[i], file1->counters[i], file_name1, "", ""); } @@ -152,18 +153,18 @@ static void darshan_log_print_hdf5_file_diff(void *file_rec1, char *file_name1, { printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_HDF5_MOD], - file2->rank, file2->f_id, hdf5_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, hdf5_counter_names[i], file2->counters[i], file_name2, "", ""); } else if(file1->counters[i] != file2->counters[i]) { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_HDF5_MOD], - file1->rank, file1->f_id, hdf5_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, hdf5_counter_names[i], file1->counters[i], file_name1, "", ""); printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_HDF5_MOD], - file2->rank, file2->f_id, hdf5_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, hdf5_counter_names[i], file2->counters[i], file_name2, "", ""); } } @@ -174,7 +175,7 @@ static void darshan_log_print_hdf5_file_diff(void *file_rec1, char *file_name1, { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_HDF5_MOD], - file1->rank, file1->f_id, hdf5_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, hdf5_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); } @@ -182,18 +183,18 @@ static void darshan_log_print_hdf5_file_diff(void *file_rec1, char *file_name1, { printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_HDF5_MOD], - file2->rank, file2->f_id, hdf5_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, hdf5_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } else if(file1->fcounters[i] != file2->fcounters[i]) { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_HDF5_MOD], - file1->rank, file1->f_id, hdf5_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, hdf5_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_HDF5_MOD], - file2->rank, file2->f_id, hdf5_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, hdf5_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } } @@ -201,6 +202,54 @@ static void darshan_log_print_hdf5_file_diff(void *file_rec1, char *file_name1, return; } +static void darshan_log_agg_hdf5_files(void *rec, void *agg_rec, int init_flag) +{ + struct darshan_hdf5_file *hdf5_rec = (struct darshan_hdf5_file *)rec; + struct darshan_hdf5_file *agg_hdf5_rec = (struct darshan_hdf5_file *)agg_rec; + int i; + + for(i = 0; i < HDF5_NUM_INDICES; i++) + { + switch(i) + { + case HDF5_OPENS: + /* sum */ + agg_hdf5_rec->counters[i] += hdf5_rec->counters[i]; + break; + default: + agg_hdf5_rec->counters[i] = -1; + break; + } + } + + for(i = 0; i < HDF5_F_NUM_INDICES; i++) + { + switch(i) + { + case HDF5_F_OPEN_TIMESTAMP: + /* minimum non-zero */ + if((hdf5_rec->fcounters[i] > 0) && + ((agg_hdf5_rec->fcounters[i] == 0) || + (hdf5_rec->fcounters[i] < agg_hdf5_rec->fcounters[i]))) + { + agg_hdf5_rec->fcounters[i] = hdf5_rec->fcounters[i]; + } + break; + case HDF5_F_CLOSE_TIMESTAMP: + /* maximum */ + if(hdf5_rec->fcounters[i] > agg_hdf5_rec->fcounters[i]) + { + agg_hdf5_rec->fcounters[i] = hdf5_rec->fcounters[i]; + } + break; + default: + agg_hdf5_rec->fcounters[i] = -1; + break; + } + } + + return; +} /* * Local variables: diff --git a/darshan-util/darshan-logutils.c b/darshan-util/darshan-logutils.c index 0bbcdd46f0878cddbd71e26f049b9023eb9f3e20..5c933119644be961ba2cf29ab281cd0ceb2c372c 100644 --- a/darshan-util/darshan-logutils.c +++ b/darshan-util/darshan-logutils.c @@ -27,20 +27,22 @@ */ #define DARSHAN_HEADER_REGION_ID (-3) #define DARSHAN_JOB_REGION_ID (-2) -#define DARSHAN_REC_MAP_REGION_ID (-1) +#define DARSHAN_NAME_MAP_REGION_ID (-1) struct darshan_dz_state { - /* (libz/bzip2) stream data structure for managing - * compression and decompression state */ - void *strm; + /* pointer to arbitrary data structure used for managing + * compression/decompression state (e.g., z_stream + * structure needed for libz) + */ + void *comp_dat; /* buffer for staging compressed data to/from log file */ unsigned char *buf; /* size of staging buffer */ - int size; + unsigned int size; /* for reading logs, flag indicating end of log file region */ int eor; - /* the region we last tried reading/writing */ + /* the region id we last tried reading/writing */ int prev_reg_id; }; @@ -53,47 +55,67 @@ struct darshan_fd_int_state int64_t pos; /* flag indicating whether log file was created (and written) */ int creat_flag; - /* compression type used on log file (libz or bzip2) */ - enum darshan_comp_type comp_type; /* log file path name */ char logfile_path[PATH_MAX]; /* pointer to exe & mount data in darshan job data structure */ char *exe_mnt_data; /* whether previous file operations have failed */ int err; + /* log format version-specific function calls for getting + * data from the log file + */ + int (*get_namerecs)(void *, int, int, struct darshan_name_record_ref **); - /* compression/decompression state */ + /* compression/decompression stream read/write state */ struct darshan_dz_state dz; }; +/* each module's implementation of the darshan logutil functions */ +#define X(a, b, c, d) d, +struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS] = +{ + DARSHAN_MODULE_IDS +}; +#undef X + +/* internal helper functions */ static int darshan_mnt_info_cmp(const void *a, const void *b); -static int darshan_log_getheader(darshan_fd fd); -static int darshan_log_putheader(darshan_fd fd); +static int darshan_log_get_namerecs(void *name_rec_buf, int buf_len, + int swap_flag, struct darshan_name_record_ref **hash); +static int darshan_log_get_header(darshan_fd fd); +static int darshan_log_put_header(darshan_fd fd); static int darshan_log_seek(darshan_fd fd, off_t offset); static int darshan_log_read(darshan_fd fd, void *buf, int len); static int darshan_log_write(darshan_fd fd, void *buf, int len); -static int darshan_log_dzinit(struct darshan_fd_int_state *state); -static void darshan_log_dzdestroy(struct darshan_fd_int_state *state); +static int darshan_log_dzinit(darshan_fd fd); +static void darshan_log_dzdestroy(darshan_fd fd); static int darshan_log_dzread(darshan_fd fd, int region_id, void *buf, int len); static int darshan_log_dzwrite(darshan_fd fd, int region_id, void *buf, int len); -static int darshan_log_libz_read(darshan_fd fd, int region_id, void *buf, int len); -static int darshan_log_libz_write(darshan_fd fd, int region_id, void *buf, int len); +static int darshan_log_libz_read(darshan_fd fd, struct darshan_log_map map, + void *buf, int len, int reset_strm_flag); +static int darshan_log_libz_write(darshan_fd fd, struct darshan_log_map *map_p, + void *buf, int len, int flush_strm_flag); static int darshan_log_libz_flush(darshan_fd fd, int region_id); #ifdef HAVE_LIBBZ2 -static int darshan_log_bzip2_read(darshan_fd fd, int region_id, void *buf, int len); -static int darshan_log_bzip2_write(darshan_fd fd, int region_id, void *buf, int len); +static int darshan_log_bzip2_read(darshan_fd fd, struct darshan_log_map map, + void *buf, int len, int reset_strm_flag); +static int darshan_log_bzip2_write(darshan_fd fd, struct darshan_log_map *map_p, + void *buf, int len, int flush_strm_flag); static int darshan_log_bzip2_flush(darshan_fd fd, int region_id); #endif static int darshan_log_dzload(darshan_fd fd, struct darshan_log_map map); static int darshan_log_dzunload(darshan_fd fd, struct darshan_log_map *map_p); +static int darshan_log_noz_read(darshan_fd fd, struct darshan_log_map map, + void *buf, int len, int reset_strm_flag); -/* each module's implementation of the darshan logutil functions */ -#define X(a, b, c, d) d, -struct darshan_mod_logutil_funcs *mod_logutils[DARSHAN_MAX_MODS] = -{ - DARSHAN_MODULE_IDS -}; -#undef X +/* backwards compatibility functions */ +int darshan_log_get_namerecs_3_00(void *name_rec_buf, int buf_len, + int swap_flag, struct darshan_name_record_ref **hash); + + +/******************************************************** + * publically exposed logutil functions * + ********************************************************/ /* darshan_log_open() * @@ -131,7 +153,7 @@ darshan_fd darshan_log_open(const char *name) strncpy(tmp_fd->state->logfile_path, name, PATH_MAX); /* read the header from the log file to init fd data structures */ - ret = darshan_log_getheader(tmp_fd); + ret = darshan_log_get_header(tmp_fd); if(ret < 0) { fprintf(stderr, "Error: failed to read darshan log file header.\n"); @@ -142,7 +164,7 @@ darshan_fd darshan_log_open(const char *name) } /* initialize compression data structures */ - ret = darshan_log_dzinit(tmp_fd->state); + ret = darshan_log_dzinit(tmp_fd); if(ret < 0) { fprintf(stderr, "Error: failed to initialize decompression data structures.\n"); @@ -179,6 +201,7 @@ darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type return(NULL); } memset(tmp_fd->state, 0, sizeof(struct darshan_fd_int_state)); + tmp_fd->comp_type = comp_type; /* create the log for writing, making sure to not overwrite existing log */ tmp_fd->state->fildes = creat(name, 0400); @@ -190,7 +213,6 @@ darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type return(NULL); } tmp_fd->state->creat_flag = 1; - tmp_fd->state->comp_type = comp_type; tmp_fd->partial_flag = partial_flag; strncpy(tmp_fd->state->logfile_path, name, PATH_MAX); @@ -210,7 +232,7 @@ darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type } /* initialize compression data structures */ - ret = darshan_log_dzinit(tmp_fd->state); + ret = darshan_log_dzinit(tmp_fd); if(ret < 0) { fprintf(stderr, "Error: failed to initialize compression data structures.\n"); @@ -224,13 +246,13 @@ darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type return(tmp_fd); } -/* darshan_log_getjob() +/* darshan_log_get_job() * * read job level metadata from the darshan log file * * returns 0 on success, -1 on failure */ -int darshan_log_getjob(darshan_fd fd, struct darshan_job *job) +int darshan_log_get_job(darshan_fd fd, struct darshan_job *job) { struct darshan_fd_int_state *state = fd->state; char job_buf[DARSHAN_JOB_RECORD_SIZE] = {0}; @@ -242,7 +264,7 @@ int darshan_log_getjob(darshan_fd fd, struct darshan_job *job) /* read the compressed job data from the log file */ ret = darshan_log_dzread(fd, DARSHAN_JOB_REGION_ID, job_buf, job_buf_sz); - if(ret <= sizeof(*job)) + if(ret <= (int)sizeof(*job)) { fprintf(stderr, "Error: failed to read darshan log file job data.\n"); return(-1); @@ -270,13 +292,13 @@ int darshan_log_getjob(darshan_fd fd, struct darshan_job *job) return(0); } -/* darshan_log_putjob() +/* darshan_log_put_job() * * write job level metadata to darshan log file * * returns 0 on success, -1 on failure */ -int darshan_log_putjob(darshan_fd fd, struct darshan_job *job) +int darshan_log_put_job(darshan_fd fd, struct darshan_job *job) { struct darshan_fd_int_state *state = fd->state; struct darshan_job job_copy; @@ -311,13 +333,13 @@ int darshan_log_putjob(darshan_fd fd, struct darshan_job *job) return(0); } -/* darshan_log_getexe() +/* darshan_log_get_exe() * * reads the application exe name from darshan log file * * returns 0 on success, -1 on failure */ -int darshan_log_getexe(darshan_fd fd, char *buf) +int darshan_log_get_exe(darshan_fd fd, char *buf) { struct darshan_fd_int_state *state = fd->state; char *newline; @@ -329,7 +351,7 @@ int darshan_log_getexe(darshan_fd fd, char *buf) if(!(state->exe_mnt_data)) { struct darshan_job job; - ret = darshan_log_getjob(fd, &job); + ret = darshan_log_get_job(fd, &job); if(ret < 0 || !(state->exe_mnt_data)) return(-1); @@ -345,7 +367,7 @@ int darshan_log_getexe(darshan_fd fd, char *buf) return (0); } -/* darshan_log_putexe() +/* darshan_log_put_exe() * * wrties the application exe name to darshan log file * NOTE: this needs to be called immediately following put_job as it @@ -354,7 +376,7 @@ int darshan_log_getexe(darshan_fd fd, char *buf) * * returns 0 on success, -1 on failure */ -int darshan_log_putexe(darshan_fd fd, char *buf) +int darshan_log_put_exe(darshan_fd fd, char *buf) { struct darshan_fd_int_state *state = fd->state; int len = strlen(buf); @@ -373,7 +395,7 @@ int darshan_log_putexe(darshan_fd fd, char *buf) return(0); } -/* darshan_log_getmounts() +/* darshan_log_get_mounts() * * retrieves mount table information from the log. Note that mnt_data_array * is an array that will be allocated by the function and must be @@ -381,7 +403,7 @@ int darshan_log_putexe(darshan_fd fd, char *buf) * * returns 0 on success, -1 on failure */ -int darshan_log_getmounts(darshan_fd fd, struct darshan_mnt_info **mnt_data_array, +int darshan_log_get_mounts(darshan_fd fd, struct darshan_mnt_info **mnt_data_array, int* count) { struct darshan_fd_int_state *state = fd->state; @@ -395,7 +417,7 @@ int darshan_log_getmounts(darshan_fd fd, struct darshan_mnt_info **mnt_data_arra if(!(state->exe_mnt_data)) { struct darshan_job job; - ret = darshan_log_getjob(fd, &job); + ret = darshan_log_get_job(fd, &job); if(ret < 0 || !(state->exe_mnt_data)) return(-1); @@ -441,7 +463,7 @@ int darshan_log_getmounts(darshan_fd fd, struct darshan_mnt_info **mnt_data_arra return(0); } -/* darshan_log_putmounts() +/* darshan_log_put_mounts() * * writes mount information to the darshan log file * NOTE: this function call should follow immediately after the call @@ -450,7 +472,7 @@ int darshan_log_getmounts(darshan_fd fd, struct darshan_mnt_info **mnt_data_arra * * returns 0 on success, -1 on failure */ -int darshan_log_putmounts(darshan_fd fd, struct darshan_mnt_info *mnt_data_array, +int darshan_log_put_mounts(darshan_fd fd, struct darshan_mnt_info *mnt_data_array, int count) { struct darshan_fd_int_state *state = fd->state; @@ -485,205 +507,126 @@ int darshan_log_putmounts(darshan_fd fd, struct darshan_mnt_info *mnt_data_array return(0); } -/* darshan_log_gethash() +/* darshan_log_get_namehash() * - * read the hash of records from the darshan log file + * read the set of name records from the darshan log file and add to the + * given hash table * * returns 0 on success, -1 on failure */ -int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash) +int darshan_log_get_namehash(darshan_fd fd, struct darshan_name_record_ref **hash) { struct darshan_fd_int_state *state = fd->state; - char *hash_buf; - int hash_buf_sz; - char *buf_ptr; - darshan_record_id *rec_id_ptr; - uint32_t *path_len_ptr, tmp_path_len; - char *path_ptr; - struct darshan_record_ref *ref; + char *name_rec_buf; + int name_rec_buf_sz; int read; int read_req_sz; - int buf_remaining = 0; + int buf_len = 0; + int buf_processed; assert(state); - /* just return if there is no record mapping data */ - if(fd->rec_map.len == 0) + /* just return if there is no name record mapping data */ + if(fd->name_map.len == 0) { *hash = NULL; return(0); } - /* default to hash buffer twice as big as default compression buf */ - hash_buf = malloc(DARSHAN_DEF_COMP_BUF_SZ * 2); - if(!hash_buf) + /* default to buffer twice as big as default compression buf */ + name_rec_buf_sz = DARSHAN_DEF_COMP_BUF_SZ * 2; + name_rec_buf = malloc(name_rec_buf_sz); + if(!name_rec_buf) return(-1); - memset(hash_buf, 0, DARSHAN_DEF_COMP_BUF_SZ * 2); - hash_buf_sz = DARSHAN_DEF_COMP_BUF_SZ * 2; + memset(name_rec_buf, 0, name_rec_buf_sz); do { - /* read chunks of the darshan record id -> file name mapping from log file, + /* read chunks of the darshan record id -> name mapping from log file, * constructing a hash table in the process */ - read_req_sz = hash_buf_sz - buf_remaining; - read = darshan_log_dzread(fd, DARSHAN_REC_MAP_REGION_ID, - hash_buf + buf_remaining, read_req_sz); + read_req_sz = name_rec_buf_sz - buf_len; + read = darshan_log_dzread(fd, DARSHAN_NAME_MAP_REGION_ID, + name_rec_buf + buf_len, read_req_sz); if(read < 0) { - fprintf(stderr, "Error: failed to read record hash from darshan log file.\n"); - free(hash_buf); + fprintf(stderr, "Error: failed to read name hash from darshan log file.\n"); + free(name_rec_buf); return(-1); } + buf_len += read; - /* work through the hash buffer -- deserialize the mapping data and - * add to the output hash table - * NOTE: these mapping pairs are variable in length, so we have to be able - * to handle incomplete mappings temporarily here - */ - buf_ptr = hash_buf; - buf_remaining += read; - while(buf_remaining > (sizeof(darshan_record_id) + sizeof(uint32_t))) - { - /* see if we have enough buf space to read in the next full record */ - tmp_path_len = *(uint32_t *)(buf_ptr + sizeof(darshan_record_id)); - if(fd->swap_flag) - DARSHAN_BSWAP32(&tmp_path_len); - - /* we need to read more before we continue deserializing */ - if(buf_remaining < - (sizeof(darshan_record_id) + sizeof(uint32_t) + tmp_path_len)) - break; - - /* get pointers for each field of this darshan record */ - /* NOTE: darshan record hash serialization method: - * ... darshan_record_id | (uint32_t) path_len | path ... - */ - rec_id_ptr = (darshan_record_id *)buf_ptr; - buf_ptr += sizeof(darshan_record_id); - path_len_ptr = (uint32_t *)buf_ptr; - buf_ptr += sizeof(uint32_t); - path_ptr = (char *)buf_ptr; - - if(fd->swap_flag) - { - /* we need to sort out endianness issues before deserializing */ - DARSHAN_BSWAP64(rec_id_ptr); - DARSHAN_BSWAP32(path_len_ptr); - } - - HASH_FIND(hlink, *hash, rec_id_ptr, sizeof(darshan_record_id), ref); - if(!ref) - { - ref = malloc(sizeof(*ref)); - if(!ref) - { - free(hash_buf); - return(-1); - } - ref->rec.name = malloc(*path_len_ptr + 1); - if(!ref->rec.name) - { - free(ref); - free(hash_buf); - return(-1); - } - - /* set the fields for this record */ - ref->rec.id = *rec_id_ptr; - memcpy(ref->rec.name, path_ptr, *path_len_ptr); - ref->rec.name[*path_len_ptr] = '\0'; - - /* add this record to the hash */ - HASH_ADD(hlink, *hash, rec.id, sizeof(darshan_record_id), ref); - } - - buf_ptr += *path_len_ptr; - buf_remaining -= - (sizeof(darshan_record_id) + sizeof(uint32_t) + *path_len_ptr); - } + /* extract any name records in the buffer */ + buf_processed = state->get_namerecs(name_rec_buf, buf_len, fd->swap_flag, hash); /* copy any leftover data to beginning of buffer to parse next */ - memcpy(hash_buf, buf_ptr, buf_remaining); + memcpy(name_rec_buf, name_rec_buf + buf_processed, buf_len - buf_processed); + buf_len -= buf_processed; /* we keep reading until we get a short read informing us we have * read all of the record hash */ } while(read == read_req_sz); - assert(buf_remaining == 0); + assert(buf_len == 0); - free(hash_buf); + free(name_rec_buf); return(0); } -/* darshan_log_puthash() +/* darshan_log_put_namehash() * - * writes the hash table of records to the darshan log file + * writes the hash table of name records to the darshan log file * NOTE: this function call should follow immediately after the call * to darshan_log_putmounts(), as it assumes the darshan log file pointer * is pointing to the offset immediately following the mount information * * returns 0 on success, -1 on failure */ -int darshan_log_puthash(darshan_fd fd, struct darshan_record_ref *hash) +int darshan_log_put_namehash(darshan_fd fd, struct darshan_name_record_ref *hash) { struct darshan_fd_int_state *state = fd->state; - char *hash_buf; - int hash_buf_sz; - struct darshan_record_ref *ref, *tmp; - char *buf_ptr; - int path_len; + struct darshan_name_record_ref *ref, *tmp; + struct darshan_name_record_ref *name_rec; + int name_rec_len; int wrote; assert(state); /* allocate memory for largest possible hash record */ - hash_buf_sz = sizeof(darshan_record_id) + sizeof(uint32_t) + PATH_MAX; - hash_buf = malloc(hash_buf_sz); - if(!hash_buf) + name_rec = malloc(sizeof(struct darshan_name_record) + PATH_MAX); + if(!name_rec) return(-1); - memset(hash_buf, 0, hash_buf_sz); + memset(name_rec, 0, sizeof(struct darshan_name_record) + PATH_MAX); /* individually serialize each hash record and write to log file */ HASH_ITER(hlink, hash, ref, tmp) { - buf_ptr = hash_buf; - path_len = strlen(ref->rec.name); - - /* the hash buffer has space to serialize this record - * NOTE: darshan record hash serialization method: - * ... darshan_record_id | (uint32_t) path_len | path ... - */ - *((darshan_record_id *)buf_ptr) = ref->rec.id; - buf_ptr += sizeof(darshan_record_id); - *((uint32_t *)buf_ptr) = path_len; - buf_ptr += sizeof(uint32_t); - memcpy(buf_ptr, ref->rec.name, path_len); - buf_ptr += path_len; + name_rec_len = sizeof(struct darshan_name_record) + strlen(ref->name_record->name); + memcpy(name_rec, ref->name_record, name_rec_len); /* write this hash entry to log file */ - wrote = darshan_log_dzwrite(fd, DARSHAN_REC_MAP_REGION_ID, - hash_buf, (buf_ptr - hash_buf)); - if(wrote != (buf_ptr - hash_buf)) + wrote = darshan_log_dzwrite(fd, DARSHAN_NAME_MAP_REGION_ID, + name_rec, name_rec_len); + if(wrote != name_rec_len) { state->err = -1; - fprintf(stderr, "Error: failed to write record hash to darshan log file.\n"); - free(hash_buf); + fprintf(stderr, "Error: failed to write name hash to darshan log file.\n"); + free(name_rec); return(-1); } } - free(hash_buf); + free(name_rec); return(0); } -/* darshan_log_getmod() +/* darshan_log_get_mod() * * get a chunk of module data from the darshan log file * * returns number of bytes read on success, -1 on failure */ -int darshan_log_getmod(darshan_fd fd, darshan_module_id mod_id, +int darshan_log_get_mod(darshan_fd fd, darshan_module_id mod_id, void *mod_buf, int mod_buf_sz) { struct darshan_fd_int_state *state = fd->state; @@ -713,7 +656,7 @@ int darshan_log_getmod(darshan_fd fd, darshan_module_id mod_id, return(ret); } -/* darshan_log_putmod() +/* darshan_log_put_mod() * * write a chunk of module data to the darshan log file * NOTE: this function call should be called directly after the @@ -725,7 +668,7 @@ int darshan_log_getmod(darshan_fd fd, darshan_module_id mod_id, * * returns number of bytes written on success, -1 on failure */ -int darshan_log_putmod(darshan_fd fd, darshan_module_id mod_id, +int darshan_log_put_mod(darshan_fd fd, darshan_module_id mod_id, void *mod_buf, int mod_buf_sz, int ver) { struct darshan_fd_int_state *state = fd->state; @@ -773,7 +716,7 @@ void darshan_log_close(darshan_fd fd) if(state->creat_flag) { /* flush the last region of the log to file */ - switch(state->comp_type) + switch(fd->comp_type) { case DARSHAN_ZLIB_COMP: ret = darshan_log_libz_flush(fd, state->dz.prev_reg_id); @@ -795,7 +738,7 @@ void darshan_log_close(darshan_fd fd) /* if no errors flushing, write the log header before closing */ if(state->err != -1) { - ret = darshan_log_putheader(fd); + ret = darshan_log_put_header(fd); if(ret < 0) state->err = -1; } @@ -811,7 +754,7 @@ void darshan_log_close(darshan_fd fd) unlink(state->logfile_path); } - darshan_log_dzdestroy(state); + darshan_log_dzdestroy(fd); if(state->exe_mnt_data) free(state->exe_mnt_data); free(state); @@ -820,7 +763,9 @@ void darshan_log_close(darshan_fd fd) return; } -/* **************************************************** */ +/******************************************************** + * internal helper functions * + ********************************************************/ static int darshan_mnt_info_cmp(const void *a, const void *b) { @@ -835,14 +780,76 @@ static int darshan_mnt_info_cmp(const void *a, const void *b) return(0); } +static int darshan_log_get_namerecs(void *name_rec_buf, int buf_len, + int swap_flag, struct darshan_name_record_ref **hash) +{ + struct darshan_name_record_ref *ref; + struct darshan_name_record *name_rec; + char *tmp_p; + int buf_processed = 0; + int rec_len; + + /* work through the name record buffer -- deserialize the record data + * and add to the output hash table + * NOTE: these mapping pairs are variable in length, so we have to be able + * to handle incomplete mappings temporarily here + */ + name_rec = (struct darshan_name_record *)name_rec_buf; + while(buf_len > sizeof(darshan_record_id) + 1) + { + if(strnlen(name_rec->name, buf_len - sizeof(darshan_record_id)) == + (buf_len - sizeof(darshan_record_id))) + { + /* if this record name's terminating null character is not + * present, we need to read more of the buffer before continuing + */ + break; + } + rec_len = sizeof(darshan_record_id) + strlen(name_rec->name) + 1; + + if(swap_flag) + { + /* we need to sort out endianness issues before deserializing */ + DARSHAN_BSWAP64(&(name_rec->id)); + } + + HASH_FIND(hlink, *hash, &(name_rec->id), sizeof(darshan_record_id), ref); + if(!ref) + { + ref = malloc(sizeof(*ref)); + if(!ref) + return(-1); + + ref->name_record = malloc(rec_len); + if(!ref->name_record) + { + free(ref); + return(-1); + } + + /* copy the name record over from the hash buffer */ + memcpy(ref->name_record, name_rec, rec_len); + + /* add this record to the hash */ + HASH_ADD(hlink, *hash, name_record->id, sizeof(darshan_record_id), ref); + } + + tmp_p = (char *)name_rec + rec_len; + name_rec = (struct darshan_name_record *)tmp_p; + buf_len -= rec_len; + buf_processed += rec_len; + } + + return(buf_processed); +} + /* read the header of the darshan log and set internal fd data structures * NOTE: this is the only portion of the darshan log that is uncompressed * * returns 0 on success, -1 on failure */ -static int darshan_log_getheader(darshan_fd fd) +static int darshan_log_get_header(darshan_fd fd) { - struct darshan_fd_int_state *state = fd->state; struct darshan_header header; int i; int ret; @@ -863,10 +870,19 @@ static int darshan_log_getheader(darshan_fd fd) } /* other log file versions can be detected and handled here */ - if(strcmp(fd->version, "3.00")) + if(strcmp(fd->version, "3.00") == 0) + { + fd->state->get_namerecs = darshan_log_get_namerecs_3_00; + } + else if(strcmp(fd->version, "3.01") == 0) + { + fd->state->get_namerecs = darshan_log_get_namerecs; + } + else { fprintf(stderr, "Error: incompatible darshan file.\n"); - fprintf(stderr, "Error: expected version %s\n", DARSHAN_LOG_VERSION); + fprintf(stderr, "Error: expected version %s, but got %s\n", + DARSHAN_LOG_VERSION, fd->version); return(-1); } @@ -880,7 +896,7 @@ static int darshan_log_getheader(darshan_fd fd) /* read uncompressed header from log file */ ret = darshan_log_read(fd, &header, sizeof(header)); - if(ret != sizeof(header)) + if(ret != (int)sizeof(header)) { fprintf(stderr, "Error: failed to read darshan log file header.\n"); return(-1); @@ -900,12 +916,13 @@ static int darshan_log_getheader(darshan_fd fd) fd->swap_flag = 1; /* swap the log map variables in the header */ - DARSHAN_BSWAP64(&(header.rec_map.off)); - DARSHAN_BSWAP64(&(header.rec_map.len)); + DARSHAN_BSWAP64(&(header.name_map.off)); + DARSHAN_BSWAP64(&(header.name_map.len)); for(i = 0; i < DARSHAN_MAX_MODS; i++) { DARSHAN_BSWAP64(&(header.mod_map[i].off)); DARSHAN_BSWAP64(&(header.mod_map[i].len)); + DARSHAN_BSWAP32(&(header.mod_ver[i])); } } else @@ -917,16 +934,43 @@ static int darshan_log_getheader(darshan_fd fd) } /* set some fd fields based on what's stored in the header */ - state->comp_type = header.comp_type; + fd->comp_type = header.comp_type; fd->partial_flag = header.partial_flag; memcpy(fd->mod_ver, header.mod_ver, DARSHAN_MAX_MODS * sizeof(uint32_t)); /* save the mapping of data within log file to this file descriptor */ - fd->job_map.off = sizeof(struct darshan_header); - fd->job_map.len = header.rec_map.off - fd->job_map.off; - memcpy(&fd->rec_map, &(header.rec_map), sizeof(struct darshan_log_map)); + memcpy(&fd->name_map, &(header.name_map), sizeof(struct darshan_log_map)); memcpy(&fd->mod_map, &(header.mod_map), DARSHAN_MAX_MODS * sizeof(struct darshan_log_map)); + /* there may be nothing following the job data, so safety check map */ + fd->job_map.off = sizeof(struct darshan_header); + if(fd->name_map.off == 0) + { + for(i = 0; i < DARSHAN_MAX_MODS; i++) + { + if(fd->mod_map[i].off != 0) + { + fd->job_map.len = fd->mod_map[i].off - fd->job_map.off; + break; + } + } + + if(fd->job_map.len == 0) + { + struct stat sbuf; + if(fstat(fd->state->fildes, &sbuf) != 0) + { + fprintf(stderr, "Error: unable to stat darshan log file.\n"); + return(-1); + } + fd->job_map.len = sbuf.st_size - fd->job_map.off; + } + } + else + { + fd->job_map.len = fd->name_map.off - fd->job_map.off; + } + return(0); } @@ -934,9 +978,8 @@ static int darshan_log_getheader(darshan_fd fd) * * returns 0 on success, -1 on failure */ -static int darshan_log_putheader(darshan_fd fd) +static int darshan_log_put_header(darshan_fd fd) { - struct darshan_fd_int_state *state = fd->state; struct darshan_header header; int ret; @@ -950,16 +993,15 @@ static int darshan_log_putheader(darshan_fd fd) memset(&header, 0, sizeof(header)); strcpy(header.version_string, DARSHAN_LOG_VERSION); header.magic_nr = DARSHAN_MAGIC_NR; - header.comp_type = state->comp_type; + header.comp_type = fd->comp_type; header.partial_flag = fd->partial_flag; - - /* copy the mapping information to the header */ - memcpy(&header.rec_map, &fd->rec_map, sizeof(struct darshan_log_map)); - memcpy(&header.mod_map, &fd->mod_map, DARSHAN_MAX_MODS * sizeof(struct darshan_log_map)); + memcpy(&header.name_map, &fd->name_map, sizeof(struct darshan_log_map)); + memcpy(header.mod_map, fd->mod_map, DARSHAN_MAX_MODS * sizeof(struct darshan_log_map)); + memcpy(header.mod_ver, fd->mod_ver, DARSHAN_MAX_MODS * sizeof(uint32_t)); /* write header to file */ ret = darshan_log_write(fd, &header, sizeof(header)); - if(ret != sizeof(header)) + if(ret != (int)sizeof(header)) { fprintf(stderr, "Error: failed to write Darshan log file header.\n"); return(-1); @@ -994,13 +1036,20 @@ static int darshan_log_read(darshan_fd fd, void* buf, int len) { struct darshan_fd_int_state *state = fd->state; int ret; + unsigned int read_so_far = 0; - /* read data from the log file using the given map */ - ret = read(state->fildes, buf, len); - if(ret > 0) - state->pos += ret; + do + { + ret = read(state->fildes, buf + read_so_far, len - read_so_far); + if(ret <= 0) + break; + read_so_far += ret; + } while(read_so_far < len); + if(ret < 0) + return(-1); - return(ret); + state->pos += read_so_far; + return(read_so_far); } /* return amount written on success, -1 on failure. @@ -1009,26 +1058,37 @@ static int darshan_log_write(darshan_fd fd, void* buf, int len) { struct darshan_fd_int_state *state = fd->state; int ret; + unsigned int wrote_so_far = 0; - ret = write(state->fildes, buf, len); - if(ret > 0) - state->pos += ret; + do + { + ret = write(state->fildes, buf + wrote_so_far, len - wrote_so_far); + if(ret <= 0) + break; + wrote_so_far += ret; + } while(wrote_so_far < len); + if(ret < 0) + return(-1); - return(ret); + state->pos += wrote_so_far; + return(wrote_so_far); } -static int darshan_log_dzinit(struct darshan_fd_int_state *state) +static int darshan_log_dzinit(darshan_fd fd) { + struct darshan_fd_int_state *state = fd->state; int ret; - /* initialize buffers for staging compressed data to/from log file */ + /* initialize buffers for staging compressed data + * to/from log file + */ state->dz.buf = malloc(DARSHAN_DEF_COMP_BUF_SZ); if(state->dz.buf == NULL) return(-1); - + state->dz.size = 0; state->dz.prev_reg_id = DARSHAN_HEADER_REGION_ID; - switch(state->comp_type) + switch(fd->comp_type) { case DARSHAN_ZLIB_COMP: { @@ -1063,7 +1123,7 @@ static int darshan_log_dzinit(struct darshan_fd_int_state *state) free(state->dz.buf); return(-1); } - state->dz.strm = tmp_zstrm; + state->dz.comp_dat = tmp_zstrm; break; } #ifdef HAVE_LIBBZ2 @@ -1079,7 +1139,7 @@ static int darshan_log_dzinit(struct darshan_fd_int_state *state) tmp_bzstrm->bzfree = NULL; tmp_bzstrm->opaque = NULL; tmp_bzstrm->avail_in = 0; - tmp_bzstrm->next_in = Z_NULL; + tmp_bzstrm->next_in = NULL; if(!(state->creat_flag)) { @@ -1099,10 +1159,18 @@ static int darshan_log_dzinit(struct darshan_fd_int_state *state) free(state->dz.buf); return(-1); } - state->dz.strm = tmp_bzstrm; + state->dz.comp_dat = tmp_bzstrm; break; } #endif + case DARSHAN_NO_COMP: + { + /* we just track an offset into the staging buffers for no_comp */ + int *buf_off = malloc(sizeof(int)); + *buf_off = 0; + state->dz.comp_dat = buf_off; + break; + } default: fprintf(stderr, "Error: invalid compression type.\n"); return(-1); @@ -1111,30 +1179,34 @@ static int darshan_log_dzinit(struct darshan_fd_int_state *state) return(0); } -static void darshan_log_dzdestroy(struct darshan_fd_int_state *state) +static void darshan_log_dzdestroy(darshan_fd fd) { - switch(state->comp_type) + struct darshan_fd_int_state *state = fd->state; + + switch(fd->comp_type) { case DARSHAN_ZLIB_COMP: if(!(state->creat_flag)) - inflateEnd(state->dz.strm); + inflateEnd((z_stream *)state->dz.comp_dat); else - deflateEnd(state->dz.strm); - free(state->dz.strm); + deflateEnd((z_stream *)state->dz.comp_dat); break; #ifdef HAVE_LIBBZ2 case DARSHAN_BZIP2_COMP: if(!(state->creat_flag)) - BZ2_bzDecompressEnd(state->dz.strm); + BZ2_bzDecompressEnd((bz_stream *)state->dz.comp_dat); else - BZ2_bzCompressEnd(state->dz.strm); - free(state->dz.strm); + BZ2_bzCompressEnd((bz_stream *)state->dz.comp_dat); break; #endif + case DARSHAN_NO_COMP: + /* do nothing */ + break; default: fprintf(stderr, "Error: invalid compression type.\n"); } + free(state->dz.comp_dat); free(state->dz.buf); return; } @@ -1142,74 +1214,110 @@ static void darshan_log_dzdestroy(struct darshan_fd_int_state *state) static int darshan_log_dzread(darshan_fd fd, int region_id, void *buf, int len) { struct darshan_fd_int_state *state = fd->state; + struct darshan_log_map map; + int reset_strm_flag = 0; int ret; - switch(state->comp_type) + /* if new log region, we reload buffers and clear eor flag */ + if(region_id != state->dz.prev_reg_id) + { + state->dz.eor = 0; + state->dz.size = 0; + reset_strm_flag = 1; /* reset libz/bzip2 streams */ + } + + if(region_id == DARSHAN_JOB_REGION_ID) + map = fd->job_map; + else if(region_id == DARSHAN_NAME_MAP_REGION_ID) + map = fd->name_map; + else + map = fd->mod_map[region_id]; + + switch(fd->comp_type) { case DARSHAN_ZLIB_COMP: - ret = darshan_log_libz_read(fd, region_id, buf, len); + ret = darshan_log_libz_read(fd, map, buf, len, reset_strm_flag); break; #ifdef HAVE_LIBBZ2 case DARSHAN_BZIP2_COMP: - ret = darshan_log_bzip2_read(fd, region_id, buf, len); + ret = darshan_log_bzip2_read(fd, map, buf, len, reset_strm_flag); break; #endif + case DARSHAN_NO_COMP: + ret = darshan_log_noz_read(fd, map, buf, len, reset_strm_flag); + break; default: fprintf(stderr, "Error: invalid compression type.\n"); return(-1); } + state->dz.prev_reg_id = region_id; return(ret); } static int darshan_log_dzwrite(darshan_fd fd, int region_id, void *buf, int len) { struct darshan_fd_int_state *state = fd->state; + struct darshan_log_map *map_p; + int flush_strm_flag = 0; int ret; - switch(state->comp_type) + /* if new log region, finish prev region's zstream and flush to log file */ + if(region_id != state->dz.prev_reg_id) + { + /* error out if the region we are writing to precedes the previous + * region we wrote -- we shouldn't be moving backwards in the log + */ + if(region_id < state->dz.prev_reg_id) + return(-1); + + if(state->dz.prev_reg_id != DARSHAN_HEADER_REGION_ID) + flush_strm_flag = 1; + } + + if(region_id == DARSHAN_JOB_REGION_ID) + map_p = &(fd->job_map); + else if(region_id == DARSHAN_NAME_MAP_REGION_ID) + map_p = &(fd->name_map); + else + map_p = &(fd->mod_map[region_id]); + + switch(fd->comp_type) { case DARSHAN_ZLIB_COMP: - ret = darshan_log_libz_write(fd, region_id, buf, len); + ret = darshan_log_libz_write(fd, map_p, buf, len, flush_strm_flag); break; #ifdef HAVE_LIBBZ2 case DARSHAN_BZIP2_COMP: - ret = darshan_log_bzip2_write(fd, region_id, buf, len); + ret = darshan_log_bzip2_write(fd, map_p, buf, len, flush_strm_flag); break; #endif + case DARSHAN_NO_COMP: + fprintf(stderr, + "Error: uncompressed writing of log files is not supported.\n"); + return(-1); default: fprintf(stderr, "Error: invalid compression type.\n"); return(-1); } + state->dz.prev_reg_id = region_id; return(ret); } -static int darshan_log_libz_read(darshan_fd fd, int region_id, void *buf, int len) +static int darshan_log_libz_read(darshan_fd fd, struct darshan_log_map map, + void *buf, int len, int reset_stream_flag) { struct darshan_fd_int_state *state = fd->state; int ret; int total_bytes = 0; int tmp_out_bytes; - struct darshan_log_map map; - z_stream *z_strmp = (z_stream *)state->dz.strm; + z_stream *z_strmp = (z_stream *)state->dz.comp_dat; assert(z_strmp); - /* if new log region, we reload buffers and clear eor flag */ - if(region_id != state->dz.prev_reg_id) - { + if(reset_stream_flag) z_strmp->avail_in = 0; - state->dz.eor = 0; - state->dz.prev_reg_id = region_id; - } - - if(region_id == DARSHAN_JOB_REGION_ID) - map = fd->job_map; - else if(region_id == DARSHAN_REC_MAP_REGION_ID) - map = fd->rec_map; - else - map = fd->mod_map[region_id]; z_strmp->avail_out = len; z_strmp->next_out = buf; @@ -1258,44 +1366,26 @@ static int darshan_log_libz_read(darshan_fd fd, int region_id, void *buf, int le return(total_bytes); } -static int darshan_log_libz_write(darshan_fd fd, int region_id, void *buf, int len) +static int darshan_log_libz_write(darshan_fd fd, struct darshan_log_map *map_p, + void *buf, int len, int flush_strm_flag) { struct darshan_fd_int_state *state = fd->state; int ret; int total_bytes = 0; int tmp_in_bytes; int tmp_out_bytes; - struct darshan_log_map *map_p; - z_stream *z_strmp = (z_stream *)state->dz.strm; + z_stream *z_strmp = (z_stream *)state->dz.comp_dat; assert(z_strmp); - /* if new log region, finish prev region's zstream and flush to log file */ - if(region_id != state->dz.prev_reg_id) + /* flush compressed output buffer if we are moving to a new log region */ + if(flush_strm_flag) { - /* error out if the region we are writing to precedes the previous - * region we wrote -- we shouldn't be moving backwards in the log - */ - if(region_id < state->dz.prev_reg_id) + ret = darshan_log_libz_flush(fd, state->dz.prev_reg_id); + if(ret < 0) return(-1); - - if(state->dz.prev_reg_id != DARSHAN_HEADER_REGION_ID) - { - ret = darshan_log_libz_flush(fd, state->dz.prev_reg_id); - if(ret < 0) - return(-1); - } - - state->dz.prev_reg_id = region_id; } - if(region_id == DARSHAN_JOB_REGION_ID) - map_p = &(fd->job_map); - else if(region_id == DARSHAN_REC_MAP_REGION_ID) - map_p = &(fd->rec_map); - else - map_p = &(fd->mod_map[region_id]); - z_strmp->avail_in = len; z_strmp->next_in = buf; @@ -1336,14 +1426,14 @@ static int darshan_log_libz_flush(darshan_fd fd, int region_id) int ret; int tmp_out_bytes; struct darshan_log_map *map_p; - z_stream *z_strmp = (z_stream *)state->dz.strm; + z_stream *z_strmp = (z_stream *)state->dz.comp_dat; assert(z_strmp); if(region_id == DARSHAN_JOB_REGION_ID) map_p = &(fd->job_map); - else if(region_id == DARSHAN_REC_MAP_REGION_ID) - map_p = &(fd->rec_map); + else if(region_id == DARSHAN_NAME_MAP_REGION_ID) + map_p = &(fd->name_map); else map_p = &(fd->mod_map[region_id]); @@ -1377,32 +1467,19 @@ static int darshan_log_libz_flush(darshan_fd fd, int region_id) } #ifdef HAVE_LIBBZ2 - -static int darshan_log_bzip2_read(darshan_fd fd, int region_id, void *buf, int len) +static int darshan_log_bzip2_read(darshan_fd fd, struct darshan_log_map map, + void *buf, int len, int reset_strm_flag) { struct darshan_fd_int_state *state = fd->state; int ret; int total_bytes = 0; int tmp_out_bytes; - struct darshan_log_map map; - bz_stream *bz_strmp = (bz_stream *)state->dz.strm; + bz_stream *bz_strmp = (bz_stream *)state->dz.comp_dat; assert(bz_strmp); - /* if new log region, we reload buffers and clear eor flag */ - if(region_id != state->dz.prev_reg_id) - { + if(reset_strm_flag) bz_strmp->avail_in = 0; - state->dz.eor = 0; - state->dz.prev_reg_id = region_id; - } - - if(region_id == DARSHAN_JOB_REGION_ID) - map = fd->job_map; - else if(region_id == DARSHAN_REC_MAP_REGION_ID) - map = fd->rec_map; - else - map = fd->mod_map[region_id]; bz_strmp->avail_out = len; bz_strmp->next_out = buf; @@ -1454,44 +1531,26 @@ static int darshan_log_bzip2_read(darshan_fd fd, int region_id, void *buf, int l return(total_bytes); } -static int darshan_log_bzip2_write(darshan_fd fd, int region_id, void *buf, int len) +static int darshan_log_bzip2_write(darshan_fd fd, struct darshan_log_map *map_p, + void *buf, int len, int flush_strm_flag) { struct darshan_fd_int_state *state = fd->state; int ret; int total_bytes = 0; int tmp_in_bytes; int tmp_out_bytes; - struct darshan_log_map *map_p; - bz_stream *bz_strmp = (bz_stream *)state->dz.strm; + bz_stream *bz_strmp = (bz_stream *)state->dz.comp_dat; assert(bz_strmp); - /* if new log region, finish prev region's zstream and flush to log file */ - if(region_id != state->dz.prev_reg_id) + /* flush compressed output buffer if we are moving to a new log region */ + if(flush_strm_flag) { - /* error out if the region we are writing to precedes the previous - * region we wrote -- we shouldn't be moving backwards in the log - */ - if(region_id < state->dz.prev_reg_id) + ret = darshan_log_bzip2_flush(fd, state->dz.prev_reg_id); + if(ret < 0) return(-1); - - if(state->dz.prev_reg_id != DARSHAN_HEADER_REGION_ID) - { - ret = darshan_log_bzip2_flush(fd, state->dz.prev_reg_id); - if(ret < 0) - return(-1); - } - - state->dz.prev_reg_id = region_id; } - if(region_id == DARSHAN_JOB_REGION_ID) - map_p = &(fd->job_map); - else if(region_id == DARSHAN_REC_MAP_REGION_ID) - map_p = &(fd->rec_map); - else - map_p = &(fd->mod_map[region_id]); - bz_strmp->avail_in = len; bz_strmp->next_in = buf; @@ -1532,14 +1591,14 @@ static int darshan_log_bzip2_flush(darshan_fd fd, int region_id) int ret; int tmp_out_bytes; struct darshan_log_map *map_p; - bz_stream *bz_strmp = (bz_stream *)state->dz.strm; + bz_stream *bz_strmp = (bz_stream *)state->dz.comp_dat; assert(bz_strmp); if(region_id == DARSHAN_JOB_REGION_ID) map_p = &(fd->job_map); - else if(region_id == DARSHAN_REC_MAP_REGION_ID) - map_p = &(fd->rec_map); + else if(region_id == DARSHAN_NAME_MAP_REGION_ID) + map_p = &(fd->name_map); else map_p = &(fd->mod_map[region_id]); @@ -1567,24 +1626,65 @@ static int darshan_log_bzip2_flush(darshan_fd fd, int region_id) bz_strmp->next_out = (char *)state->dz.buf; } } while (ret != BZ_STREAM_END); - BZ2_bzCompressEnd(bz_strmp); BZ2_bzCompressInit(bz_strmp, 9, 1, 30); return(0); } - #endif +static int darshan_log_noz_read(darshan_fd fd, struct darshan_log_map map, + void *buf, int len, int reset_strm_flag) +{ + struct darshan_fd_int_state *state = fd->state; + int ret; + int total_bytes = 0; + int cp_size; + int *buf_off = (int *)state->dz.comp_dat; + + if(reset_strm_flag) + *buf_off = state->dz.size; + + /* we just read data from the given log file region until we have + * accumulated 'len' bytes, or until the region ends + */ + while(total_bytes < len) + { + /* check if we need to load more data from log file */ + if(*buf_off == state->dz.size) + { + /* if the eor flag is set, clear it and return -- future + * reads of this log region will restart at the beginning + */ + if(state->dz.eor) + { + state->dz.eor = 0; + break; + } + + /* read more data from input file */ + ret = darshan_log_dzload(fd, map); + if(ret < 0) + return(-1); + assert(state->dz.size > 0); + } + + cp_size = (len > (state->dz.size - *buf_off)) ? + state->dz.size - *buf_off : len; + memcpy(buf, state->dz.buf + *buf_off, cp_size); + total_bytes += cp_size; + *buf_off += cp_size; + } + + return(total_bytes); +} + static int darshan_log_dzload(darshan_fd fd, struct darshan_log_map map) { struct darshan_fd_int_state *state = fd->state; int ret; unsigned int remaining; unsigned int read_size; - unsigned int read_so_far = 0; - - state->dz.size = 0; /* seek to the appropriate portion of the log file, if out of range */ if((state->pos < map.off) || (state->pos >= (map.off + map.len))) @@ -1601,24 +1701,18 @@ static int darshan_log_dzload(darshan_fd fd, struct darshan_log_map map) remaining = (map.off + map.len) - state->pos; read_size = (remaining > DARSHAN_DEF_COMP_BUF_SZ) ? DARSHAN_DEF_COMP_BUF_SZ : remaining; - do - { - ret = darshan_log_read(fd, state->dz.buf + read_so_far, - read_size - read_so_far); - if(ret <= 0) - break; - read_so_far += ret; - } while(read_so_far < read_size); - if(ret < 0) + + ret = darshan_log_read(fd, state->dz.buf, read_size); + if(ret < (int)read_size) { fprintf(stderr, "Error: unable to read compressed data from file.\n"); return(-1); } - if((read_size == remaining) || (ret == 0)) + + if(ret == (int)remaining) { state->dz.eor = 1; } - state->dz.size = read_size; return(0); } @@ -1627,30 +1721,101 @@ static int darshan_log_dzunload(darshan_fd fd, struct darshan_log_map *map_p) { struct darshan_fd_int_state *state = fd->state; int ret; - unsigned int write_so_far = 0; /* initialize map structure for this log region */ if(map_p->off == 0) map_p->off = state->pos; /* write more compressed data from staging buffer to file */ - do + ret = darshan_log_write(fd, state->dz.buf, state->dz.size); + if(ret < (int)state->dz.size) { - ret = darshan_log_write(fd, state->dz.buf + write_so_far, - state->dz.size - write_so_far); - if(ret <= 0) - { - fprintf(stderr, "Error: unable to write compressed data to file.\n"); - return(-1); - } - write_so_far += ret; - } while(write_so_far < state->dz.size); + fprintf(stderr, "Error: unable to write compressed data to file.\n"); + return(-1); + } map_p->len += state->dz.size; state->dz.size = 0; return (0); } +/******************************************************** + * backwards compatibility functions * + ********************************************************/ + +int darshan_log_get_namerecs_3_00(void *name_rec_buf, int buf_len, + int swap_flag, struct darshan_name_record_ref **hash) +{ + struct darshan_name_record_ref *ref; + char *buf_ptr; + darshan_record_id *rec_id_ptr; + uint32_t *path_len_ptr; + char *path_ptr; + int rec_len; + int buf_processed = 0; + + /* work through the name record buffer -- deserialize the mapping data and + * add to the output hash table + * NOTE: these mapping pairs are variable in length, so we have to be able + * to handle incomplete mappings temporarily here + */ + buf_ptr = name_rec_buf; + while(buf_len > (sizeof(darshan_record_id) + sizeof(uint32_t))) + { + /* see if we have enough buf space to read in the next full record */ + path_len_ptr = (uint32_t *)(buf_ptr + sizeof(darshan_record_id)); + if(swap_flag) + DARSHAN_BSWAP32(path_len_ptr); + rec_len = sizeof(darshan_record_id) + sizeof(uint32_t) + *path_len_ptr; + + /* we need to read more before we continue deserializing */ + if(buf_len < rec_len) + break; + + /* get pointers for each field of this darshan record */ + /* NOTE: darshan record hash serialization method: + * ... darshan_record_id | (uint32_t) path_len | path ... + */ + rec_id_ptr = (darshan_record_id *)buf_ptr; + path_ptr = (char *)(buf_ptr + sizeof(darshan_record_id) + sizeof(uint32_t)); + + if(swap_flag) + /* we need to sort out endianness issues before deserializing */ + DARSHAN_BSWAP64(rec_id_ptr); + + HASH_FIND(hlink, *hash, rec_id_ptr, sizeof(darshan_record_id), ref); + if(!ref) + { + ref = malloc(sizeof(*ref)); + if(!ref) + return(-1); + + ref->name_record = malloc(rec_len - sizeof(uint32_t) + 1); + if(!ref->name_record) + { + free(ref); + return(-1); + } + + /* transform the serialized name record into the zero-length + * array structure darshan uses to track name records + */ + ref->name_record->id = *rec_id_ptr; + memcpy(ref->name_record->name, path_ptr, *path_len_ptr); + ref->name_record->name[*path_len_ptr] = '\0'; + + /* add this record to the hash */ + HASH_ADD(hlink, *hash, name_record->id, sizeof(darshan_record_id), ref); + } + + buf_ptr += rec_len; + buf_len -= rec_len; + buf_processed += rec_len; + } + + return(buf_processed); +} + /* * Local variables: * c-indent-level: 4 diff --git a/darshan-util/darshan-logutils.h b/darshan-util/darshan-logutils.h index 1881bb49722bcf66de4e09bfdf323e94c7ef6684..b76fc31913fbaca7dc6e29aa8662c817b916f4aa 100644 --- a/darshan-util/darshan-logutils.h +++ b/darshan-util/darshan-logutils.h @@ -29,9 +29,11 @@ struct darshan_fd_s int swap_flag; /* flag indicating whether a log file contains partial data */ int partial_flag; + /* compression type used on log file */ + enum darshan_comp_type comp_type; /* log file offset/length maps for each log file region */ struct darshan_log_map job_map; - struct darshan_log_map rec_map; + struct darshan_log_map name_map; struct darshan_log_map mod_map[DARSHAN_MAX_MODS]; /* module-specific log-format versions contained in log */ uint32_t mod_ver[DARSHAN_MAX_MODS]; @@ -41,9 +43,9 @@ struct darshan_fd_s }; typedef struct darshan_fd_s* darshan_fd; -struct darshan_record_ref +struct darshan_name_record_ref { - struct darshan_record rec; + struct darshan_name_record *name_record; UT_hash_handle hlink; }; @@ -67,8 +69,7 @@ struct darshan_mod_logutil_funcs */ int (*log_get_record)( darshan_fd fd, - void* buf, - darshan_record_id* rec_id + void* buf ); /* put a single module record into the log file. * return 0 on success, -1 on error @@ -104,6 +105,12 @@ struct darshan_mod_logutil_funcs void *rec2, char *name2 ); + /* combine two records into a single aggregate record */ + void (*log_agg_records)( + void *rec, + void *agg_rec, + int init_flag + ); }; extern struct darshan_mod_logutil_funcs *mod_logutils[]; @@ -117,19 +124,19 @@ extern struct darshan_mod_logutil_funcs *mod_logutils[]; darshan_fd darshan_log_open(const char *name); darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type, int partial_flag); -int darshan_log_getjob(darshan_fd fd, struct darshan_job *job); -int darshan_log_putjob(darshan_fd fd, struct darshan_job *job); -int darshan_log_getexe(darshan_fd fd, char *buf); -int darshan_log_putexe(darshan_fd fd, char *buf); -int darshan_log_getmounts(darshan_fd fd, struct darshan_mnt_info **mnt_data_array, +int darshan_log_get_job(darshan_fd fd, struct darshan_job *job); +int darshan_log_put_job(darshan_fd fd, struct darshan_job *job); +int darshan_log_get_exe(darshan_fd fd, char *buf); +int darshan_log_put_exe(darshan_fd fd, char *buf); +int darshan_log_get_mounts(darshan_fd fd, struct darshan_mnt_info **mnt_data_array, int* count); -int darshan_log_putmounts(darshan_fd fd, struct darshan_mnt_info *mnt_data_array, +int darshan_log_put_mounts(darshan_fd fd, struct darshan_mnt_info *mnt_data_array, int count); -int darshan_log_gethash(darshan_fd fd, struct darshan_record_ref **hash); -int darshan_log_puthash(darshan_fd fd, struct darshan_record_ref *hash); -int darshan_log_getmod(darshan_fd fd, darshan_module_id mod_id, +int darshan_log_get_namehash(darshan_fd fd, struct darshan_name_record_ref **hash); +int darshan_log_put_namehash(darshan_fd fd, struct darshan_name_record_ref *hash); +int darshan_log_get_mod(darshan_fd fd, darshan_module_id mod_id, void *mod_buf, int mod_buf_sz); -int darshan_log_putmod(darshan_fd fd, darshan_module_id mod_id, +int darshan_log_put_mod(darshan_fd fd, darshan_module_id mod_id, void *mod_buf, int mod_buf_sz, int ver); void darshan_log_close(darshan_fd file); diff --git a/darshan-util/darshan-merge.c b/darshan-util/darshan-merge.c new file mode 100644 index 0000000000000000000000000000000000000000..d1df3916fc205497fb3f40a1a9eb00c46133d68a --- /dev/null +++ b/darshan-util/darshan-merge.c @@ -0,0 +1,484 @@ +#include +#include +#include +#include +#include +#include + +#include "uthash-1.9.2/src/uthash.h" + +#include "darshan-logutils.h" + +#define DEF_MOD_BUF_SIZE 1024 /* 1 KiB is enough for all current mod records ... */ + +struct darshan_shared_record_ref +{ + darshan_record_id id; + int ref_cnt; + char agg_rec[DEF_MOD_BUF_SIZE]; + UT_hash_handle hlink; +}; + +void usage(char *exename) +{ + fprintf(stderr, "Usage: %s --output [options] \n", exename); + fprintf(stderr, "This utility merges multiple Darshan log files into a single output log file.\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, "\t--output\t(REQUIRED) Full path of the output darshan log file.\n"); + fprintf(stderr, "\t--shared-redux\tReduce globally shared records into a single record.\n"); + fprintf(stderr, "\t--job-end-time\tSet the output log's job end time (requires argument of seconds since Epoch).\n"); + + exit(1); +} + +void parse_args(int argc, char **argv, char ***infile_list, int *n_files, + char **outlog_path, int *shared_redux, int64_t *job_end_time) +{ + int index; + char *check; + static struct option long_opts[] = + { + {"output", required_argument, NULL, 'o'}, + {"shared-redux", no_argument, NULL, 's'}, + {"job-end-time", required_argument, NULL, 'e'}, + {0, 0, 0, 0} + }; + + *shared_redux = 0; + *outlog_path = NULL; + *job_end_time = 0; + + while(1) + { + int c = getopt_long(argc, argv, "", long_opts, &index); + + if(c == -1) break; + + switch(c) + { + case 's': + *shared_redux = 1; + break; + case 'o': + *outlog_path = optarg; + break; + case 'e': + *job_end_time = strtol(optarg, &check, 10); + if(optarg == check) + { + fprintf(stderr, "Error: unable to parse job end time value.\n"); + exit(1); + } + break; + case '?': + default: + usage(argv[0]); + break; + } + } + + if(*outlog_path == NULL) + { + usage(argv[0]); + } + + *infile_list = &argv[optind]; + *n_files = argc - optind; + + return; +} + +int build_mod_shared_rec_hash(char **infile_list, int n_infiles, + darshan_module_id mod_id, int nprocs, char *mod_buf, + struct darshan_shared_record_ref **shared_rec_hash) +{ + darshan_fd in_fd; + struct darshan_base_record *base_rec; + struct darshan_shared_record_ref *ref, *tmp; + int init_rank = -1; + int ret; + int i; + + /* loop over each input log file */ + for(i = 0; i < n_infiles; i++) + { + in_fd = darshan_log_open(infile_list[i]); + if(in_fd == NULL) + { + fprintf(stderr, + "Error: unable to open input Darshan log file %s.\n", + infile_list[i]); + return(-1); + } + + while((ret = mod_logutils[mod_id]->log_get_record(in_fd, mod_buf)) == 1) + { + base_rec = (struct darshan_base_record *)mod_buf; + if(init_rank == -1) + init_rank = base_rec->rank; + + /* initialize the hash with the first rank's records */ + if(base_rec->rank == init_rank) + { + struct darshan_base_record *agg_base; + + /* create a new ref and add to the hash */ + ref = malloc(sizeof(*ref)); + if(!ref) + { + darshan_log_close(in_fd); + return(-1); + } + memset(ref, 0, sizeof(*ref)); + + /* initialize the aggregate record with this rank's record */ + agg_base = (struct darshan_base_record *)ref->agg_rec; + agg_base->id = base_rec->id; + agg_base->rank = -1; + mod_logutils[mod_id]->log_agg_records(mod_buf, ref->agg_rec, 1); + + ref->id = base_rec->id; + ref->ref_cnt = 1; + HASH_ADD(hlink, *shared_rec_hash, id, sizeof(darshan_record_id), ref); + } + else + { + /* search for this record in shared record hash */ + HASH_FIND(hlink, *shared_rec_hash, &(base_rec->id), + sizeof(darshan_record_id), ref); + if(ref) + { + /* if found, aggregate this rank's record into the shared record */ + mod_logutils[mod_id]->log_agg_records(mod_buf, ref->agg_rec, 0); + ref->ref_cnt++; + } + } + } + if(ret < 0) + { + fprintf(stderr, + "Error: unable to read %s module record from input log file %s.\n", + darshan_module_names[mod_id], infile_list[i]); + darshan_log_close(in_fd); + return(-1); + } + + darshan_log_close(in_fd); + } + + /* prune any non-shared records from the hash one last time */ + HASH_ITER(hlink, *shared_rec_hash, ref, tmp) + { + if(ref->ref_cnt != nprocs) + { + HASH_DELETE(hlink, *shared_rec_hash, ref); + free(ref); + } + } + + return(0); +} + +int main(int argc, char *argv[]) +{ + char **infile_list; + int n_infiles; + int shared_redux; + int64_t job_end_time = 0; + char *outlog_path; + darshan_fd in_fd, merge_fd; + struct darshan_job in_job, merge_job; + char merge_exe[DARSHAN_EXE_LEN+1] = {0}; + struct darshan_mnt_info *merge_mnt_array; + int merge_mnt_count = 0; + struct darshan_name_record_ref *in_hash = NULL; + struct darshan_name_record_ref *merge_hash = NULL; + struct darshan_name_record_ref *ref, *tmp, *found; + struct darshan_shared_record_ref *shared_rec_hash = NULL; + struct darshan_shared_record_ref *sref, *stmp; + struct darshan_base_record *base_rec; + char mod_buf[DEF_MOD_BUF_SIZE]; + int i, j; + int ret; + + /* grab command line arguments */ + parse_args(argc, argv, &infile_list, &n_infiles, &outlog_path, &shared_redux, &job_end_time); + + memset(&merge_job, 0, sizeof(struct darshan_job)); + + /* first pass at merging together logs: + * - compose output job-level metadata structure (including exe & mount data) + * - compose output record_id->file_name mapping + */ + for(i = 0; i < n_infiles; i++) + { + memset(&in_job, 0, sizeof(struct darshan_job)); + + in_fd = darshan_log_open(infile_list[i]); + if(in_fd == NULL) + { + fprintf(stderr, + "Error: unable to open input Darshan log file %s.\n", + infile_list[i]); + return(-1); + } + + /* read job-level metadata from the input file */ + ret = darshan_log_get_job(in_fd, &in_job); + if(ret < 0) + { + fprintf(stderr, + "Error: unable to read job data from input Darshan log file %s.\n", + infile_list[i]); + darshan_log_close(in_fd); + return(-1); + } + + /* if the input darshan log has metadata set indicating the darshan + * shutdown procedure was called on the log, then we error out. if the + * shutdown procedure was started, then it's possible the log has + * incomplete or corrupt data, so we just throw out the data for now. + */ + if(strstr(in_job.metadata, "darshan_shutdown=yes")) + { + fprintf(stderr, + "Error: potentially corrupt data found in input log file %s.\n", + infile_list[i]); + darshan_log_close(in_fd); + return(-1); + } + + if(i == 0) + { + /* get job data, exe, & mounts directly from the first input log */ + memcpy(&merge_job, &in_job, sizeof(struct darshan_job)); + + ret = darshan_log_get_exe(in_fd, merge_exe); + if(ret < 0) + { + fprintf(stderr, + "Error: unable to read exe string from input Darshan log file %s.\n", + infile_list[i]); + darshan_log_close(in_fd); + return(-1); + } + + ret = darshan_log_get_mounts(in_fd, &merge_mnt_array, &merge_mnt_count); + if(ret < 0) + { + fprintf(stderr, + "Error: unable to read mount info from input Darshan log file %s.\n", + infile_list[i]); + darshan_log_close(in_fd); + return(-1); + } + } + else + { + /* potentially update job timestamps using remaining logs */ + if(in_job.start_time < merge_job.start_time) + merge_job.start_time = in_job.start_time; + if(in_job.end_time > merge_job.end_time) + merge_job.end_time = in_job.end_time; + } + + /* read the hash of ids->names for the input log */ + ret = darshan_log_get_namehash(in_fd, &in_hash); + if(ret < 0) + { + fprintf(stderr, + "Error: unable to read job data from input Darshan log file %s.\n", + infile_list[i]); + darshan_log_close(in_fd); + return(-1); + } + + /* iterate the input hash, copying over record id->name mappings + * that have not already been copied to the output hash + */ + HASH_ITER(hlink, in_hash, ref, tmp) + { + HASH_FIND(hlink, merge_hash, &(ref->name_record->id), + sizeof(darshan_record_id), found); + if(!found) + { + HASH_ADD(hlink, merge_hash, name_record->id, + sizeof(darshan_record_id), ref); + } + else if(strcmp(ref->name_record->name, found->name_record->name)) + { + fprintf(stderr, + "Error: invalid Darshan record table entry.\n"); + darshan_log_close(in_fd); + return(-1); + } + } + + darshan_log_close(in_fd); + } + + /* if a job end time was passed in, apply it to the output job */ + if(job_end_time > 0) + merge_job.end_time = job_end_time; + + /* create the output "merged" log */ + merge_fd = darshan_log_create(outlog_path, DARSHAN_ZLIB_COMP, 1); + if(merge_fd == NULL) + { + fprintf(stderr, "Error: unable to create output darshan log.\n"); + return(-1); + } + + /* write the darshan job info, exe string, and mount data to output file */ + ret = darshan_log_put_job(merge_fd, &merge_job); + if(ret < 0) + { + fprintf(stderr, "Error: unable to write job data to output darshan log.\n"); + darshan_log_close(merge_fd); + unlink(outlog_path); + return(-1); + } + + ret = darshan_log_put_exe(merge_fd, merge_exe); + if(ret < 0) + { + fprintf(stderr, "Error: unable to write exe string to output darshan log.\n"); + darshan_log_close(merge_fd); + unlink(outlog_path); + return(-1); + } + + ret = darshan_log_put_mounts(merge_fd, merge_mnt_array, merge_mnt_count); + if(ret < 0) + { + fprintf(stderr, "Error: unable to write mount data to output darshan log.\n"); + darshan_log_close(merge_fd); + unlink(outlog_path); + return(-1); + } + + /* write the merged table of records to output file */ + ret = darshan_log_put_namehash(merge_fd, merge_hash); + if(ret < 0) + { + fprintf(stderr, "Error: unable to write record table to output darshan log.\n"); + darshan_log_close(merge_fd); + unlink(outlog_path); + return(-1); + } + + /* iterate over active darshan modules and gather module data to write + * to the merged output log + */ + for(i = 0; i < DARSHAN_MAX_MODS; i++) + { + if(!mod_logutils[i]) continue; + + if(shared_redux) + { + /* build the hash of records shared globally by this module */ + ret = build_mod_shared_rec_hash(infile_list, n_infiles, i, + merge_job.nprocs, mod_buf, &shared_rec_hash); + if(ret < 0) + { + fprintf(stderr, + "Error: unable to build list of %s module's shared records.\n", + darshan_module_names[i]); + darshan_log_close(merge_fd); + unlink(outlog_path); + return(-1); + } + + } + + for(j = 0; j < n_infiles; j++) + { + in_fd = darshan_log_open(infile_list[j]); + if(in_fd == NULL) + { + fprintf(stderr, + "Error: unable to open input Darshan log file %s.\n", + infile_list[j]); + darshan_log_close(merge_fd); + unlink(outlog_path); + return(-1); + } + + if(j == 0 && shared_rec_hash) + { + /* write out the shared records first */ + HASH_ITER(hlink, shared_rec_hash, sref, stmp) + { + ret = mod_logutils[i]->log_put_record(merge_fd, sref->agg_rec, in_fd->mod_ver[i]); + if(ret < 0) + { + fprintf(stderr, + "Error: unable to write %s module record to output darshan log.\n", + darshan_module_names[i]); + darshan_log_close(in_fd); + darshan_log_close(merge_fd); + unlink(outlog_path); + return(-1); + } + } + } + + /* loop over module records and write them to output file */ + while((ret = mod_logutils[i]->log_get_record(in_fd, mod_buf)) == 1) + { + base_rec = (struct darshan_base_record *)mod_buf; + + HASH_FIND(hlink, shared_rec_hash, &(base_rec->id), sizeof(darshan_record_id), sref); + if(sref) + continue; /* skip shared records */ + + ret = mod_logutils[i]->log_put_record(merge_fd, mod_buf, in_fd->mod_ver[i]); + if(ret < 0) + { + fprintf(stderr, + "Error: unable to write %s module record to output log file %s.\n", + darshan_module_names[i], infile_list[j]); + darshan_log_close(in_fd); + darshan_log_close(merge_fd); + unlink(outlog_path); + return(-1); + } + } + if(ret < 0) + { + fprintf(stderr, + "Error: unable to read %s module record from input log file %s.\n", + darshan_module_names[i], infile_list[j]); + darshan_log_close(in_fd); + darshan_log_close(merge_fd); + unlink(outlog_path); + return(-1); + } + + darshan_log_close(in_fd); + } + + /* clear the shared record hash for the next module */ + if(shared_redux) + { + HASH_ITER(hlink, shared_rec_hash, sref, stmp) + { + HASH_DELETE(hlink, shared_rec_hash, sref); + free(sref); + } + } + } + + darshan_log_close(merge_fd); + + return(0); +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/darshan-util/darshan-mpiio-logutils.c b/darshan-util/darshan-mpiio-logutils.c index 9e29abb5ce35a3930b748a5dd191b565eac074c7..0ac339f32748eddc567ea0df9ba6e90492b4456e 100644 --- a/darshan-util/darshan-mpiio-logutils.c +++ b/darshan-util/darshan-mpiio-logutils.c @@ -30,14 +30,14 @@ char *mpiio_f_counter_names[] = { }; #undef X -static int darshan_log_get_mpiio_file(darshan_fd fd, void* mpiio_buf, - darshan_record_id* rec_id); +static int darshan_log_get_mpiio_file(darshan_fd fd, void* mpiio_buf); static int darshan_log_put_mpiio_file(darshan_fd fd, void* mpiio_buf, int ver); static void darshan_log_print_mpiio_file(void *file_rec, char *file_name, char *mnt_pt, char *fs_type, int ver); static void darshan_log_print_mpiio_description(void); static void darshan_log_print_mpiio_file_diff(void *file_rec1, char *file_name1, void *file_rec2, char *file_name2); +static void darshan_log_agg_mpiio_files(void *rec, void *agg_rec, int init_flag); struct darshan_mod_logutil_funcs mpiio_logutils = { @@ -45,17 +45,17 @@ struct darshan_mod_logutil_funcs mpiio_logutils = .log_put_record = &darshan_log_put_mpiio_file, .log_print_record = &darshan_log_print_mpiio_file, .log_print_description = &darshan_log_print_mpiio_description, - .log_print_diff = &darshan_log_print_mpiio_file_diff + .log_print_diff = &darshan_log_print_mpiio_file_diff, + .log_agg_records = &darshan_log_agg_mpiio_files }; -static int darshan_log_get_mpiio_file(darshan_fd fd, void* mpiio_buf, - darshan_record_id* rec_id) +static int darshan_log_get_mpiio_file(darshan_fd fd, void* mpiio_buf) { struct darshan_mpiio_file *file; int i; int ret; - ret = darshan_log_getmod(fd, DARSHAN_MPIIO_MOD, mpiio_buf, + ret = darshan_log_get_mod(fd, DARSHAN_MPIIO_MOD, mpiio_buf, sizeof(struct darshan_mpiio_file)); if(ret < 0) return(-1); @@ -67,15 +67,14 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void* mpiio_buf, if(fd->swap_flag) { /* swap bytes if necessary */ - DARSHAN_BSWAP64(&file->f_id); - DARSHAN_BSWAP64(&file->rank); + DARSHAN_BSWAP64(&(file->base_rec.id)); + DARSHAN_BSWAP64(&(file->base_rec.rank)); for(i=0; icounters[i]); for(i=0; ifcounters[i]); } - *rec_id = file->f_id; return(1); } } @@ -85,7 +84,7 @@ static int darshan_log_put_mpiio_file(darshan_fd fd, void* mpiio_buf, int ver) struct darshan_mpiio_file *file = (struct darshan_mpiio_file *)mpiio_buf; int ret; - ret = darshan_log_putmod(fd, DARSHAN_MPIIO_MOD, file, + ret = darshan_log_put_mod(fd, DARSHAN_MPIIO_MOD, file, sizeof(struct darshan_mpiio_file), ver); if(ret < 0) return(-1); @@ -103,15 +102,17 @@ static void darshan_log_print_mpiio_file(void *file_rec, char *file_name, for(i=0; irank, mpiio_file_rec->f_id, mpiio_counter_names[i], - mpiio_file_rec->counters[i], file_name, mnt_pt, fs_type); + mpiio_file_rec->base_rec.rank, mpiio_file_rec->base_rec.id, + mpiio_counter_names[i], mpiio_file_rec->counters[i], + file_name, mnt_pt, fs_type); } for(i=0; irank, mpiio_file_rec->f_id, mpiio_f_counter_names[i], - mpiio_file_rec->fcounters[i], file_name, mnt_pt, fs_type); + mpiio_file_rec->base_rec.rank, mpiio_file_rec->base_rec.id, + mpiio_f_counter_names[i], mpiio_file_rec->fcounters[i], + file_name, mnt_pt, fs_type); } return; @@ -166,7 +167,7 @@ static void darshan_log_print_mpiio_file_diff(void *file_rec1, char *file_name1, { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_MPIIO_MOD], - file1->rank, file1->f_id, mpiio_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, mpiio_counter_names[i], file1->counters[i], file_name1, "", ""); } @@ -174,18 +175,18 @@ static void darshan_log_print_mpiio_file_diff(void *file_rec1, char *file_name1, { printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_MPIIO_MOD], - file2->rank, file2->f_id, mpiio_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, mpiio_counter_names[i], file2->counters[i], file_name2, "", ""); } else if(file1->counters[i] != file2->counters[i]) { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_MPIIO_MOD], - file1->rank, file1->f_id, mpiio_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, mpiio_counter_names[i], file1->counters[i], file_name1, "", ""); printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_MPIIO_MOD], - file2->rank, file2->f_id, mpiio_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, mpiio_counter_names[i], file2->counters[i], file_name2, "", ""); } } @@ -196,7 +197,7 @@ static void darshan_log_print_mpiio_file_diff(void *file_rec1, char *file_name1, { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_MPIIO_MOD], - file1->rank, file1->f_id, mpiio_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, mpiio_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); } @@ -204,18 +205,18 @@ static void darshan_log_print_mpiio_file_diff(void *file_rec1, char *file_name1, { printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_MPIIO_MOD], - file2->rank, file2->f_id, mpiio_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, mpiio_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } else if(file1->fcounters[i] != file2->fcounters[i]) { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_MPIIO_MOD], - file1->rank, file1->f_id, mpiio_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, mpiio_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_MPIIO_MOD], - file2->rank, file2->f_id, mpiio_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, mpiio_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } } @@ -223,6 +224,300 @@ static void darshan_log_print_mpiio_file_diff(void *file_rec1, char *file_name1, return; } +/* simple helper struct for determining time & byte variances */ +struct var_t +{ + double n; + double M; + double S; +}; + +static void darshan_log_agg_mpiio_files(void *rec, void *agg_rec, int init_flag) +{ + struct darshan_mpiio_file *mpi_rec = (struct darshan_mpiio_file *)rec; + struct darshan_mpiio_file *agg_mpi_rec = (struct darshan_mpiio_file *)agg_rec; + int i, j, k; + int total_count; + int64_t tmp_val[4]; + int64_t tmp_cnt[4]; + int tmp_ndx; + double old_M; + double mpi_time = mpi_rec->fcounters[MPIIO_F_READ_TIME] + + mpi_rec->fcounters[MPIIO_F_WRITE_TIME] + + mpi_rec->fcounters[MPIIO_F_META_TIME]; + double mpi_bytes = (double)mpi_rec->counters[MPIIO_BYTES_READ] + + mpi_rec->counters[MPIIO_BYTES_WRITTEN]; + struct var_t *var_time_p = (struct var_t *) + ((char *)rec + sizeof(struct darshan_mpiio_file)); + struct var_t *var_bytes_p = (struct var_t *) + ((char *)var_time_p + sizeof(struct var_t)); + + for(i = 0; i < MPIIO_NUM_INDICES; i++) + { + switch(i) + { + case MPIIO_INDEP_OPENS: + case MPIIO_COLL_OPENS: + case MPIIO_INDEP_READS: + case MPIIO_INDEP_WRITES: + case MPIIO_COLL_READS: + case MPIIO_COLL_WRITES: + case MPIIO_SPLIT_READS: + case MPIIO_SPLIT_WRITES: + case MPIIO_NB_READS: + case MPIIO_NB_WRITES: + case MPIIO_SYNCS: + case MPIIO_HINTS: + case MPIIO_VIEWS: + case MPIIO_BYTES_READ: + case MPIIO_BYTES_WRITTEN: + case MPIIO_RW_SWITCHES: + case MPIIO_SIZE_READ_AGG_0_100: + case MPIIO_SIZE_READ_AGG_100_1K: + case MPIIO_SIZE_READ_AGG_1K_10K: + case MPIIO_SIZE_READ_AGG_10K_100K: + case MPIIO_SIZE_READ_AGG_100K_1M: + case MPIIO_SIZE_READ_AGG_1M_4M: + case MPIIO_SIZE_READ_AGG_4M_10M: + case MPIIO_SIZE_READ_AGG_10M_100M: + case MPIIO_SIZE_READ_AGG_100M_1G: + case MPIIO_SIZE_READ_AGG_1G_PLUS: + case MPIIO_SIZE_WRITE_AGG_0_100: + case MPIIO_SIZE_WRITE_AGG_100_1K: + case MPIIO_SIZE_WRITE_AGG_1K_10K: + case MPIIO_SIZE_WRITE_AGG_10K_100K: + case MPIIO_SIZE_WRITE_AGG_100K_1M: + case MPIIO_SIZE_WRITE_AGG_1M_4M: + case MPIIO_SIZE_WRITE_AGG_4M_10M: + case MPIIO_SIZE_WRITE_AGG_10M_100M: + case MPIIO_SIZE_WRITE_AGG_100M_1G: + case MPIIO_SIZE_WRITE_AGG_1G_PLUS: + /* sum */ + agg_mpi_rec->counters[i] += mpi_rec->counters[i]; + break; + case MPIIO_MODE: + /* just set to the input value */ + agg_mpi_rec->counters[i] = mpi_rec->counters[i]; + break; + case MPIIO_MAX_READ_TIME_SIZE: + case MPIIO_MAX_WRITE_TIME_SIZE: + case MPIIO_FASTEST_RANK: + case MPIIO_FASTEST_RANK_BYTES: + case MPIIO_SLOWEST_RANK: + case MPIIO_SLOWEST_RANK_BYTES: + /* these are set with the FP counters */ + break; + case MPIIO_ACCESS1_ACCESS: + /* increment common value counters */ + if(mpi_rec->counters[i] == 0) break; + + /* first, collapse duplicates */ + for(j = i; j < i + 4; j++) + { + for(k = 0; k < 4; k++) + { + if(agg_mpi_rec->counters[i + k] == mpi_rec->counters[j]) + { + agg_mpi_rec->counters[i + k + 4] += mpi_rec->counters[j + 4]; + mpi_rec->counters[j] = mpi_rec->counters[j + 4] = 0; + } + } + } + + /* second, add new counters */ + for(j = i; j < i + 4; j++) + { + tmp_ndx = 0; + memset(tmp_val, 0, 4 * sizeof(int64_t)); + memset(tmp_cnt, 0, 4 * sizeof(int64_t)); + + if(mpi_rec->counters[j] == 0) break; + for(k = 0; k < 4; k++) + { + if(agg_mpi_rec->counters[i + k] == mpi_rec->counters[j]) + { + total_count = agg_mpi_rec->counters[i + k + 4] + + mpi_rec->counters[j + 4]; + break; + } + } + if(k == 4) total_count = mpi_rec->counters[j + 4]; + + for(k = 0; k < 4; k++) + { + if((agg_mpi_rec->counters[i + k + 4] > total_count) || + ((agg_mpi_rec->counters[i + k + 4] == total_count) && + (agg_mpi_rec->counters[i + k] > mpi_rec->counters[j]))) + { + tmp_val[tmp_ndx] = agg_mpi_rec->counters[i + k]; + tmp_cnt[tmp_ndx] = agg_mpi_rec->counters[i + k + 4]; + tmp_ndx++; + } + else break; + } + if(tmp_ndx == 4) break; + + tmp_val[tmp_ndx] = mpi_rec->counters[j]; + tmp_cnt[tmp_ndx] = mpi_rec->counters[j + 4]; + tmp_ndx++; + + while(tmp_ndx != 4) + { + if(agg_mpi_rec->counters[i + k] != mpi_rec->counters[j]) + { + tmp_val[tmp_ndx] = agg_mpi_rec->counters[i + k]; + tmp_cnt[tmp_ndx] = agg_mpi_rec->counters[i + k + 4]; + tmp_ndx++; + } + k++; + } + memcpy(&(agg_mpi_rec->counters[i]), tmp_val, 4 * sizeof(int64_t)); + memcpy(&(agg_mpi_rec->counters[i + 4]), tmp_cnt, 4 * sizeof(int64_t)); + } + break; + case MPIIO_ACCESS2_ACCESS: + case MPIIO_ACCESS3_ACCESS: + case MPIIO_ACCESS4_ACCESS: + case MPIIO_ACCESS1_COUNT: + case MPIIO_ACCESS2_COUNT: + case MPIIO_ACCESS3_COUNT: + case MPIIO_ACCESS4_COUNT: + /* these are set all at once with common counters above */ + break; + default: + agg_mpi_rec->counters[i] = -1; + break; + } + } + + for(i = 0; i < MPIIO_F_NUM_INDICES; i++) + { + switch(i) + { + case MPIIO_F_READ_TIME: + case MPIIO_F_WRITE_TIME: + case MPIIO_F_META_TIME: + /* sum */ + agg_mpi_rec->fcounters[i] += mpi_rec->fcounters[i]; + break; + case MPIIO_F_OPEN_TIMESTAMP: + case MPIIO_F_READ_START_TIMESTAMP: + case MPIIO_F_WRITE_START_TIMESTAMP: + /* minimum non-zero */ + if((mpi_rec->fcounters[i] > 0) && + ((agg_mpi_rec->fcounters[i] == 0) || + (mpi_rec->fcounters[i] < agg_mpi_rec->fcounters[i]))) + { + agg_mpi_rec->fcounters[i] = mpi_rec->fcounters[i]; + } + break; + case MPIIO_F_READ_END_TIMESTAMP: + case MPIIO_F_WRITE_END_TIMESTAMP: + case MPIIO_F_CLOSE_TIMESTAMP: + /* maximum */ + if(mpi_rec->fcounters[i] > agg_mpi_rec->fcounters[i]) + { + agg_mpi_rec->fcounters[i] = mpi_rec->fcounters[i]; + } + break; + case MPIIO_F_MAX_READ_TIME: + if(mpi_rec->fcounters[i] > agg_mpi_rec->fcounters[i]) + { + agg_mpi_rec->fcounters[i] = mpi_rec->fcounters[i]; + agg_mpi_rec->counters[MPIIO_MAX_READ_TIME_SIZE] = + mpi_rec->counters[MPIIO_MAX_READ_TIME_SIZE]; + } + break; + case MPIIO_F_MAX_WRITE_TIME: + if(mpi_rec->fcounters[i] > agg_mpi_rec->fcounters[i]) + { + agg_mpi_rec->fcounters[i] = mpi_rec->fcounters[i]; + agg_mpi_rec->counters[MPIIO_MAX_WRITE_TIME_SIZE] = + mpi_rec->counters[MPIIO_MAX_WRITE_TIME_SIZE]; + } + break; + case MPIIO_F_FASTEST_RANK_TIME: + if(init_flag) + { + /* set fastest rank counters according to root rank. these counters + * will be determined as the aggregation progresses. + */ + agg_mpi_rec->counters[MPIIO_FASTEST_RANK] = mpi_rec->base_rec.rank; + agg_mpi_rec->counters[MPIIO_FASTEST_RANK_BYTES] = mpi_bytes; + agg_mpi_rec->fcounters[MPIIO_F_FASTEST_RANK_TIME] = mpi_time; + } + + if(mpi_time < agg_mpi_rec->fcounters[MPIIO_F_FASTEST_RANK_TIME]) + { + agg_mpi_rec->counters[MPIIO_FASTEST_RANK] = mpi_rec->base_rec.rank; + agg_mpi_rec->counters[MPIIO_FASTEST_RANK_BYTES] = mpi_bytes; + agg_mpi_rec->fcounters[MPIIO_F_FASTEST_RANK_TIME] = mpi_time; + } + break; + case MPIIO_F_SLOWEST_RANK_TIME: + if(init_flag) + { + /* set slowest rank counters according to root rank. these counters + * will be determined as the aggregation progresses. + */ + agg_mpi_rec->counters[MPIIO_SLOWEST_RANK] = mpi_rec->base_rec.rank; + agg_mpi_rec->counters[MPIIO_SLOWEST_RANK_BYTES] = mpi_bytes; + agg_mpi_rec->fcounters[MPIIO_F_SLOWEST_RANK_TIME] = mpi_time; + } + + if(mpi_time > agg_mpi_rec->fcounters[MPIIO_F_SLOWEST_RANK_TIME]) + { + agg_mpi_rec->counters[MPIIO_SLOWEST_RANK] = mpi_rec->base_rec.rank; + agg_mpi_rec->counters[MPIIO_SLOWEST_RANK_BYTES] = mpi_bytes; + agg_mpi_rec->fcounters[MPIIO_F_SLOWEST_RANK_TIME] = mpi_time; + } + break; + case MPIIO_F_VARIANCE_RANK_TIME: + if(init_flag) + { + var_time_p->n = 1; + var_time_p->M = mpi_time; + var_time_p->S = 0; + } + else + { + old_M = var_time_p->M; + + var_time_p->n++; + var_time_p->M += (mpi_time - var_time_p->M) / var_time_p->n; + var_time_p->S += (mpi_time - var_time_p->M) * (mpi_time - old_M); + + agg_mpi_rec->fcounters[MPIIO_F_VARIANCE_RANK_TIME] = + var_time_p->S / var_time_p->n; + } + break; + case MPIIO_F_VARIANCE_RANK_BYTES: + if(init_flag) + { + var_bytes_p->n = 1; + var_bytes_p->M = mpi_bytes; + var_bytes_p->S = 0; + } + else + { + old_M = var_bytes_p->M; + + var_bytes_p->n++; + var_bytes_p->M += (mpi_bytes - var_bytes_p->M) / var_bytes_p->n; + var_bytes_p->S += (mpi_bytes - var_bytes_p->M) * (mpi_bytes - old_M); + + agg_mpi_rec->fcounters[MPIIO_F_VARIANCE_RANK_BYTES] = + var_bytes_p->S / var_bytes_p->n; + } + break; + default: + agg_mpi_rec->fcounters[i] = -1; + break; + } + } + + return; +} /* * Local variables: diff --git a/darshan-util/darshan-null-logutils.c b/darshan-util/darshan-null-logutils.c index 6d55e7766013a7dc878a9196a0e8e5b23e257809..934aa5e35db725fad6ef8152fd28e7cd49f725ef 100644 --- a/darshan-util/darshan-null-logutils.c +++ b/darshan-util/darshan-null-logutils.c @@ -32,14 +32,14 @@ char *null_f_counter_names[] = { #undef X /* prototypes for each of the NULL module's logutil functions */ -static int darshan_log_get_null_record(darshan_fd fd, void* null_buf, - darshan_record_id* rec_id); +static int darshan_log_get_null_record(darshan_fd fd, void* null_buf); static int darshan_log_put_null_record(darshan_fd fd, void* null_buf, int ver); static void darshan_log_print_null_record(void *file_rec, char *file_name, char *mnt_pt, char *fs_type, int ver); static void darshan_log_print_null_description(void); static void darshan_log_print_null_record_diff(void *file_rec1, char *file_name1, void *file_rec2, char *file_name2); +static void darshan_log_agg_null_records(void *rec, void *agg_rec, int init_flag); /* structure storing each function needed for implementing the darshan * logutil interface. these functions are used for reading, writing, and @@ -51,7 +51,8 @@ struct darshan_mod_logutil_funcs null_logutils = .log_put_record = &darshan_log_put_null_record, .log_print_record = &darshan_log_print_null_record, .log_print_description = &darshan_log_print_null_description, - .log_print_diff = &darshan_log_print_null_record_diff + .log_print_diff = &darshan_log_print_null_record_diff, + .log_agg_records = &darshan_log_agg_null_records }; /* retrieve a NULL record from log file descriptor 'fd', storing the @@ -59,15 +60,14 @@ struct darshan_mod_logutil_funcs null_logutils = * 'rec_id'. Return 1 on successful record read, 0 on no more data, * and -1 on error. */ -static int darshan_log_get_null_record(darshan_fd fd, void* null_buf, - darshan_record_id* rec_id) +static int darshan_log_get_null_record(darshan_fd fd, void* null_buf) { struct darshan_null_record *rec; int i; int ret; /* read a NULL module record from the darshan log file */ - ret = darshan_log_getmod(fd, DARSHAN_NULL_MOD, null_buf, + ret = darshan_log_get_mod(fd, DARSHAN_NULL_MOD, null_buf, sizeof(struct darshan_null_record)); if(ret < 0) return(-1); @@ -80,16 +80,14 @@ static int darshan_log_get_null_record(darshan_fd fd, void* null_buf, if(fd->swap_flag) { /* swap bytes if necessary */ - DARSHAN_BSWAP64(&rec->f_id); - DARSHAN_BSWAP64(&rec->rank); + DARSHAN_BSWAP64(&(rec->base_rec.id)); + DARSHAN_BSWAP64(&(rec->base_rec.rank)); for(i=0; icounters[i]); for(i=0; ifcounters[i]); } - /* set the output record id */ - *rec_id = rec->f_id; return(1); } } @@ -103,7 +101,7 @@ static int darshan_log_put_null_record(darshan_fd fd, void* null_buf, int ver) int ret; /* append NULL record to darshan log file */ - ret = darshan_log_putmod(fd, DARSHAN_NULL_MOD, rec, + ret = darshan_log_put_mod(fd, DARSHAN_NULL_MOD, rec, sizeof(struct darshan_null_record), ver); if(ret < 0) return(-1); @@ -124,16 +122,18 @@ static void darshan_log_print_null_record(void *file_rec, char *file_name, { /* macro defined in darshan-logutils.h */ DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_NULL_MOD], - null_rec->rank, null_rec->f_id, null_counter_names[i], - null_rec->counters[i], file_name, mnt_pt, fs_type); + null_rec->base_rec.rank, null_rec->base_rec.id, + null_counter_names[i], null_rec->counters[i], + file_name, mnt_pt, fs_type); } for(i=0; irank, null_rec->f_id, null_f_counter_names[i], - null_rec->fcounters[i], file_name, mnt_pt, fs_type); + null_rec->base_rec.rank, null_rec->base_rec.id, + null_f_counter_names[i], null_rec->fcounters[i], + file_name, mnt_pt, fs_type); } return; @@ -166,7 +166,7 @@ static void darshan_log_print_null_record_diff(void *file_rec1, char *file_name1 { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_NULL_MOD], - file1->rank, file1->f_id, null_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, null_counter_names[i], file1->counters[i], file_name1, "", ""); } @@ -174,18 +174,18 @@ static void darshan_log_print_null_record_diff(void *file_rec1, char *file_name1 { printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_NULL_MOD], - file2->rank, file2->f_id, null_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, null_counter_names[i], file2->counters[i], file_name2, "", ""); } else if(file1->counters[i] != file2->counters[i]) { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_NULL_MOD], - file1->rank, file1->f_id, null_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, null_counter_names[i], file1->counters[i], file_name1, "", ""); printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_NULL_MOD], - file2->rank, file2->f_id, null_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, null_counter_names[i], file2->counters[i], file_name2, "", ""); } } @@ -196,7 +196,7 @@ static void darshan_log_print_null_record_diff(void *file_rec1, char *file_name1 { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_NULL_MOD], - file1->rank, file1->f_id, null_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, null_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); } @@ -204,18 +204,18 @@ static void darshan_log_print_null_record_diff(void *file_rec1, char *file_name1 { printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_NULL_MOD], - file2->rank, file2->f_id, null_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, null_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } else if(file1->fcounters[i] != file2->fcounters[i]) { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_NULL_MOD], - file1->rank, file1->f_id, null_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, null_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_NULL_MOD], - file2->rank, file2->f_id, null_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, null_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } } @@ -223,6 +223,10 @@ static void darshan_log_print_null_record_diff(void *file_rec1, char *file_name1 return; } +static void darshan_log_agg_null_records(void *rec, void *agg_rec, int init_flag) +{ + return; +} /* * Local variables: diff --git a/darshan-util/darshan-parser.c b/darshan-util/darshan-parser.c index 5112d92a00e986873871b7f70129e860603d41e8..cb12ee9b5b06327e226f1960235d45b7a891073a 100644 --- a/darshan-util/darshan-parser.c +++ b/darshan-util/darshan-parser.c @@ -107,13 +107,13 @@ void posix_accum_file(struct darshan_posix_file *pfile, hash_entry_t *hfile, int void posix_accum_perf(struct darshan_posix_file *pfile, perf_data_t *pdata); void posix_calc_file(hash_entry_t *file_hash, file_data_t *fdata); void posix_print_total_file(struct darshan_posix_file *pfile); -void posix_file_list(hash_entry_t *file_hash, struct darshan_record_ref *rec_hash, int detail_flag); +void posix_file_list(hash_entry_t *file_hash, struct darshan_name_record_ref *name_hash, int detail_flag); void mpiio_accum_file(struct darshan_mpiio_file *mfile, hash_entry_t *hfile, int64_t nprocs); void mpiio_accum_perf(struct darshan_mpiio_file *mfile, perf_data_t *pdata); void mpiio_calc_file(hash_entry_t *file_hash, file_data_t *fdata); void mpiio_print_total_file(struct darshan_mpiio_file *mfile); -void mpiio_file_list(hash_entry_t *file_hash, struct darshan_record_ref *rec_hash, int detail_flag); +void mpiio_file_list(hash_entry_t *file_hash, struct darshan_name_record_ref *name_hash, int detail_flag); void calc_perf(perf_data_t *pdata, int64_t nprocs); @@ -199,14 +199,16 @@ int main(int argc, char **argv) int mask; int i, j; char *filename; + char *comp_str; char tmp_string[4096] = {0}; darshan_fd fd; struct darshan_job job; - struct darshan_record_ref *rec_hash = NULL; - struct darshan_record_ref *ref, *tmp_ref; + struct darshan_name_record_ref *name_hash = NULL; + struct darshan_name_record_ref *ref, *tmp_ref; int mount_count; struct darshan_mnt_info *mnt_data_array; time_t tmp_time = 0; + int64_t run_time = 0; char *token; char *save; char buffer[DARSHAN_JOB_METADATA_LEN]; @@ -231,7 +233,7 @@ int main(int argc, char **argv) return(-1); /* read darshan job info */ - ret = darshan_log_getjob(fd, &job); + ret = darshan_log_get_job(fd, &job); if(ret < 0) { darshan_log_close(fd); @@ -239,7 +241,7 @@ int main(int argc, char **argv) } /* get the original command line for this job */ - ret = darshan_log_getexe(fd, tmp_string); + ret = darshan_log_get_exe(fd, tmp_string); if(ret < 0) { darshan_log_close(fd); @@ -247,7 +249,7 @@ int main(int argc, char **argv) } /* get the mount information for this log */ - ret = darshan_log_getmounts(fd, &mnt_data_array, &mount_count); + ret = darshan_log_get_mounts(fd, &mnt_data_array, &mount_count); if(ret < 0) { darshan_log_close(fd); @@ -255,15 +257,25 @@ int main(int argc, char **argv) } /* read hash of darshan records */ - ret = darshan_log_gethash(fd, &rec_hash); + ret = darshan_log_get_namehash(fd, &name_hash); if(ret < 0) { darshan_log_close(fd); return(-1); } + if(fd->comp_type == DARSHAN_ZLIB_COMP) + comp_str = "ZLIB"; + else if (fd->comp_type == DARSHAN_BZIP2_COMP) + comp_str = "BZIP2"; + else if (fd->comp_type == DARSHAN_NO_COMP) + comp_str = "NONE"; + else + comp_str = "UNKNOWN"; + /* print job summary */ printf("# darshan log version: %s\n", fd->version); + printf("# compression method: %s\n", comp_str); printf("# exe: %s\n", tmp_string); printf("# uid: %" PRId64 "\n", job.uid); printf("# jobid: %" PRId64 "\n", job.jobid); @@ -275,7 +287,9 @@ int main(int argc, char **argv) tmp_time += job.end_time; printf("# end_time_asci: %s", ctime(&tmp_time)); printf("# nprocs: %" PRId64 "\n", job.nprocs); - printf("# run time: %" PRId64 "\n", job.end_time - job.start_time + 1); + if(job.end_time >= job.start_time) + run_time = job.end_time - job.start_time + 1; + printf("# run time: %" PRId64 "\n", run_time); for(token=strtok_r(job.metadata, "\n", &save); token != NULL; token=strtok_r(NULL, "\n", &save)) @@ -302,7 +316,7 @@ int main(int argc, char **argv) printf("# -------------------------------------------------------\n"); printf("# header: %zu bytes (uncompressed)\n", sizeof(struct darshan_header)); printf("# job data: %zu bytes (compressed)\n", fd->job_map.len); - printf("# record table: %zu bytes (compressed)\n", fd->rec_map.len); + printf("# record table: %zu bytes (compressed)\n", fd->name_map.len); for(i=0; imod_map[i].len) @@ -352,7 +366,7 @@ int main(int argc, char **argv) for(i=0; ilog_print_description(); } - ret = mod_logutils[i]->log_get_record(fd, mod_buf, &rec_id); + ret = mod_logutils[i]->log_get_record(fd, mod_buf); if(ret != 1) { fprintf(stderr, "Error: failed to parse the first %s module record.\n", @@ -408,23 +422,35 @@ int main(int argc, char **argv) { char *mnt_pt = NULL; char *fs_type = NULL; + char *rec_name = NULL; hash_entry_t *hfile = NULL; + base_rec = (struct darshan_base_record *)mod_buf; /* get the pathname for this record */ - HASH_FIND(hlink, rec_hash, &rec_id, sizeof(darshan_record_id), ref); - assert(ref); + HASH_FIND(hlink, name_hash, &(base_rec->id), sizeof(darshan_record_id), ref); - /* get mount point and fs type associated with this record */ - for(j=0; jrec.name, - strlen(mnt_data_array[j].mnt_path)) == 0) + rec_name = ref->name_record->name; + + /* get mount point and fs type associated with this record */ + for(j=0; jlog_print_record(mod_buf, ref->rec.name, + mod_logutils[i]->log_print_record(mod_buf, rec_name, mnt_pt, fs_type, fd->mod_ver[i]); } @@ -443,7 +469,7 @@ int main(int argc, char **argv) if(i != DARSHAN_POSIX_MOD && i != DARSHAN_MPIIO_MOD) continue; - HASH_FIND(hlink, file_hash, &rec_id, sizeof(darshan_record_id), hfile); + HASH_FIND(hlink, file_hash, &(base_rec->id), sizeof(darshan_record_id), hfile); if(!hfile) { hfile = malloc(sizeof(*hfile)); @@ -455,14 +481,14 @@ int main(int argc, char **argv) /* init */ memset(hfile, 0, sizeof(*hfile)); - hfile->rec_id = rec_id; + hfile->rec_id = base_rec->id; hfile->type = 0; hfile->procs = 0; hfile->rec_dat = NULL; hfile->cumul_time = 0.0; hfile->slowest_time = 0.0; - HASH_ADD(hlink, file_hash, rec_id, sizeof(darshan_record_id), hfile); + HASH_ADD(hlink, file_hash,rec_id, sizeof(darshan_record_id), hfile); } if(i == DARSHAN_POSIX_MOD) @@ -480,7 +506,7 @@ int main(int argc, char **argv) memset(mod_buf, 0, DEF_MOD_BUF_SIZE); - } while((ret = mod_logutils[i]->log_get_record(fd, mod_buf, &rec_id)) == 1); + } while((ret = mod_logutils[i]->log_get_record(fd, mod_buf)) == 1); if (ret < 0) { ret = -1; @@ -584,16 +610,16 @@ int main(int argc, char **argv) if(i == DARSHAN_POSIX_MOD) { if(mask & OPTION_FILE_LIST_DETAILED) - posix_file_list(file_hash, rec_hash, 1); + posix_file_list(file_hash, name_hash, 1); else - posix_file_list(file_hash, rec_hash, 0); + posix_file_list(file_hash, name_hash, 0); } else if(i == DARSHAN_MPIIO_MOD) { if(mask & OPTION_FILE_LIST_DETAILED) - mpiio_file_list(file_hash, rec_hash, 1); + mpiio_file_list(file_hash, name_hash, 1); else - mpiio_file_list(file_hash, rec_hash, 0); + mpiio_file_list(file_hash, name_hash, 0); } } @@ -626,10 +652,10 @@ cleanup: free(pdata.rank_cumul_md_time); /* free record hash data */ - HASH_ITER(hlink, rec_hash, ref, tmp_ref) + HASH_ITER(hlink, name_hash, ref, tmp_ref) { - HASH_DELETE(hlink, rec_hash, ref); - free(ref->rec.name); + HASH_DELETE(hlink, name_hash, ref); + free(ref->name_record); free(ref); } @@ -654,7 +680,7 @@ void posix_accum_file(struct darshan_posix_file *pfile, hfile->procs += 1; - if(pfile->rank == -1) + if(pfile->base_rec.rank == -1) { hfile->slowest_time = pfile->fcounters[POSIX_F_SLOWEST_RANK_TIME]; } @@ -666,7 +692,7 @@ void posix_accum_file(struct darshan_posix_file *pfile, pfile->fcounters[POSIX_F_WRITE_TIME])); } - if(pfile->rank == -1) + if(pfile->base_rec.rank == -1) { hfile->procs = nprocs; hfile->type |= FILETYPE_SHARED; @@ -857,7 +883,7 @@ void mpiio_accum_file(struct darshan_mpiio_file *mfile, hfile->procs += 1; - if(mfile->rank == -1) + if(mfile->base_rec.rank == -1) { hfile->slowest_time = mfile->fcounters[MPIIO_F_SLOWEST_RANK_TIME]; } @@ -869,7 +895,7 @@ void mpiio_accum_file(struct darshan_mpiio_file *mfile, mfile->fcounters[MPIIO_F_WRITE_TIME])); } - if(mfile->rank == -1) + if(mfile->base_rec.rank == -1) { hfile->procs = nprocs; hfile->type |= FILETYPE_SHARED; @@ -1028,7 +1054,7 @@ void posix_accum_perf(struct darshan_posix_file *pfile, * by_slowest: use slowest rank time from log data * (most accurate but requires newer log version) */ - if(pfile->rank == -1) + if(pfile->base_rec.rank == -1) { /* by_open */ if(pfile->fcounters[POSIX_F_CLOSE_TIMESTAMP] > @@ -1079,11 +1105,12 @@ void posix_accum_perf(struct darshan_posix_file *pfile, */ else { - pdata->rank_cumul_io_time[pfile->rank] += + pdata->rank_cumul_io_time[pfile->base_rec.rank] += (pfile->fcounters[POSIX_F_META_TIME] + pfile->fcounters[POSIX_F_READ_TIME] + pfile->fcounters[POSIX_F_WRITE_TIME]); - pdata->rank_cumul_md_time[pfile->rank] += pfile->fcounters[POSIX_F_META_TIME]; + pdata->rank_cumul_md_time[pfile->base_rec.rank] += + pfile->fcounters[POSIX_F_META_TIME]; } return; @@ -1109,7 +1136,7 @@ void mpiio_accum_perf(struct darshan_mpiio_file *mfile, * by_slowest: use slowest rank time from log data * (most accurate but requires newer log version) */ - if(mfile->rank == -1) + if(mfile->base_rec.rank == -1) { /* by_open */ if(mfile->fcounters[MPIIO_F_CLOSE_TIMESTAMP] > @@ -1160,11 +1187,12 @@ void mpiio_accum_perf(struct darshan_mpiio_file *mfile, */ else { - pdata->rank_cumul_io_time[mfile->rank] += + pdata->rank_cumul_io_time[mfile->base_rec.rank] += (mfile->fcounters[MPIIO_F_META_TIME] + mfile->fcounters[MPIIO_F_READ_TIME] + mfile->fcounters[MPIIO_F_WRITE_TIME]); - pdata->rank_cumul_md_time[mfile->rank] += mfile->fcounters[MPIIO_F_META_TIME]; + pdata->rank_cumul_md_time[mfile->base_rec.rank] += + mfile->fcounters[MPIIO_F_META_TIME]; } return; @@ -1390,13 +1418,13 @@ void mpiio_print_total_file(struct darshan_mpiio_file *mfile) } void posix_file_list(hash_entry_t *file_hash, - struct darshan_record_ref *rec_hash, + struct darshan_name_record_ref *name_hash, int detail_flag) { hash_entry_t *curr = NULL; hash_entry_t *tmp = NULL; struct darshan_posix_file *file_rec = NULL; - struct darshan_record_ref *ref = NULL; + struct darshan_name_record_ref *ref = NULL; int i; /* list of columns: @@ -1454,12 +1482,12 @@ void posix_file_list(hash_entry_t *file_hash, file_rec = (struct darshan_posix_file*)curr->rec_dat; assert(file_rec); - HASH_FIND(hlink, rec_hash, &(curr->rec_id), sizeof(darshan_record_id), ref); + HASH_FIND(hlink, name_hash, &(curr->rec_id), sizeof(darshan_record_id), ref); assert(ref); printf("%" PRIu64 "\t%s\t%" PRId64 "\t%f\t%f", curr->rec_id, - ref->rec.name, + ref->name_record->name, curr->procs, curr->slowest_time, curr->cumul_time/(double)curr->procs); @@ -1481,13 +1509,13 @@ void posix_file_list(hash_entry_t *file_hash, } void mpiio_file_list(hash_entry_t *file_hash, - struct darshan_record_ref *rec_hash, + struct darshan_name_record_ref *name_hash, int detail_flag) { hash_entry_t *curr = NULL; hash_entry_t *tmp = NULL; struct darshan_mpiio_file *file_rec = NULL; - struct darshan_record_ref *ref = NULL; + struct darshan_name_record_ref *ref = NULL; int i; /* list of columns: @@ -1548,12 +1576,12 @@ void mpiio_file_list(hash_entry_t *file_hash, file_rec = (struct darshan_mpiio_file*)curr->rec_dat; assert(file_rec); - HASH_FIND(hlink, rec_hash, &(curr->rec_id), sizeof(darshan_record_id), ref); + HASH_FIND(hlink, name_hash, &(curr->rec_id), sizeof(darshan_record_id), ref); assert(ref); printf("%" PRIu64 "\t%s\t%" PRId64 "\t%f\t%f", curr->rec_id, - ref->rec.name, + ref->name_record->name, curr->procs, curr->slowest_time, curr->cumul_time/(double)curr->procs); diff --git a/darshan-util/darshan-pnetcdf-logutils.c b/darshan-util/darshan-pnetcdf-logutils.c index 7cff6bce5ae53ccf57c34360590adaa16c8a1ad1..41fc4ef997e0a1c141c6398559084b73ae0b7f7e 100644 --- a/darshan-util/darshan-pnetcdf-logutils.c +++ b/darshan-util/darshan-pnetcdf-logutils.c @@ -30,14 +30,14 @@ char *pnetcdf_f_counter_names[] = { }; #undef X -static int darshan_log_get_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf, - darshan_record_id* rec_id); +static int darshan_log_get_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf); static int darshan_log_put_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf, int ver); static void darshan_log_print_pnetcdf_file(void *file_rec, char *file_name, char *mnt_pt, char *fs_type, int ver); static void darshan_log_print_pnetcdf_description(void); static void darshan_log_print_pnetcdf_file_diff(void *file_rec1, char *file_name1, void *file_rec2, char *file_name2); +static void darshan_log_agg_pnetcdf_files(void *rec, void *agg_rec, int init_flag); struct darshan_mod_logutil_funcs pnetcdf_logutils = { @@ -45,17 +45,17 @@ struct darshan_mod_logutil_funcs pnetcdf_logutils = .log_put_record = &darshan_log_put_pnetcdf_file, .log_print_record = &darshan_log_print_pnetcdf_file, .log_print_description = &darshan_log_print_pnetcdf_description, - .log_print_diff = &darshan_log_print_pnetcdf_file_diff + .log_print_diff = &darshan_log_print_pnetcdf_file_diff, + .log_agg_records = &darshan_log_agg_pnetcdf_files }; -static int darshan_log_get_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf, - darshan_record_id* rec_id) +static int darshan_log_get_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf) { struct darshan_pnetcdf_file *file; int i; int ret; - ret = darshan_log_getmod(fd, DARSHAN_PNETCDF_MOD, pnetcdf_buf, + ret = darshan_log_get_mod(fd, DARSHAN_PNETCDF_MOD, pnetcdf_buf, sizeof(struct darshan_pnetcdf_file)); if(ret < 0) return(-1); @@ -67,15 +67,14 @@ static int darshan_log_get_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf, if(fd->swap_flag) { /* swap bytes if necessary */ - DARSHAN_BSWAP64(&file->f_id); - DARSHAN_BSWAP64(&file->rank); + DARSHAN_BSWAP64(&(file->base_rec.id)); + DARSHAN_BSWAP64(&(file->base_rec.rank)); for(i=0; icounters[i]); for(i=0; ifcounters[i]); } - *rec_id = file->f_id; return(1); } } @@ -85,7 +84,7 @@ static int darshan_log_put_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf, int ve struct darshan_pnetcdf_file *file = (struct darshan_pnetcdf_file *)pnetcdf_buf; int ret; - ret = darshan_log_putmod(fd, DARSHAN_PNETCDF_MOD, file, + ret = darshan_log_put_mod(fd, DARSHAN_PNETCDF_MOD, file, sizeof(struct darshan_pnetcdf_file), ver); if(ret < 0) return(-1); @@ -103,15 +102,17 @@ static void darshan_log_print_pnetcdf_file(void *file_rec, char *file_name, for(i=0; irank, pnetcdf_file_rec->f_id, pnetcdf_counter_names[i], - pnetcdf_file_rec->counters[i], file_name, mnt_pt, fs_type); + pnetcdf_file_rec->base_rec.rank, pnetcdf_file_rec->base_rec.id, + pnetcdf_counter_names[i], pnetcdf_file_rec->counters[i], + file_name, mnt_pt, fs_type); } for(i=0; irank, pnetcdf_file_rec->f_id, pnetcdf_f_counter_names[i], - pnetcdf_file_rec->fcounters[i], file_name, mnt_pt, fs_type); + pnetcdf_file_rec->base_rec.rank, pnetcdf_file_rec->base_rec.id, + pnetcdf_f_counter_names[i], pnetcdf_file_rec->fcounters[i], + file_name, mnt_pt, fs_type); } return; @@ -145,7 +146,7 @@ static void darshan_log_print_pnetcdf_file_diff(void *file_rec1, char *file_name { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_PNETCDF_MOD], - file1->rank, file1->f_id, pnetcdf_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, pnetcdf_counter_names[i], file1->counters[i], file_name1, "", ""); } @@ -153,18 +154,18 @@ static void darshan_log_print_pnetcdf_file_diff(void *file_rec1, char *file_name { printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_PNETCDF_MOD], - file2->rank, file2->f_id, pnetcdf_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, pnetcdf_counter_names[i], file2->counters[i], file_name2, "", ""); } else if(file1->counters[i] != file2->counters[i]) { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_PNETCDF_MOD], - file1->rank, file1->f_id, pnetcdf_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, pnetcdf_counter_names[i], file1->counters[i], file_name1, "", ""); printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_PNETCDF_MOD], - file2->rank, file2->f_id, pnetcdf_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, pnetcdf_counter_names[i], file2->counters[i], file_name2, "", ""); } } @@ -175,7 +176,7 @@ static void darshan_log_print_pnetcdf_file_diff(void *file_rec1, char *file_name { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_PNETCDF_MOD], - file1->rank, file1->f_id, pnetcdf_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, pnetcdf_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); } @@ -183,18 +184,18 @@ static void darshan_log_print_pnetcdf_file_diff(void *file_rec1, char *file_name { printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_PNETCDF_MOD], - file2->rank, file2->f_id, pnetcdf_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, pnetcdf_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } else if(file1->fcounters[i] != file2->fcounters[i]) { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_PNETCDF_MOD], - file1->rank, file1->f_id, pnetcdf_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, pnetcdf_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_PNETCDF_MOD], - file2->rank, file2->f_id, pnetcdf_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, pnetcdf_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } } @@ -202,6 +203,56 @@ static void darshan_log_print_pnetcdf_file_diff(void *file_rec1, char *file_name return; } +static void darshan_log_agg_pnetcdf_files(void *rec, void *agg_rec, int init_flag) +{ + struct darshan_pnetcdf_file *pnc_rec = (struct darshan_pnetcdf_file *)rec; + struct darshan_pnetcdf_file *agg_pnc_rec = (struct darshan_pnetcdf_file *)agg_rec; + int i; + + for(i = 0; i < PNETCDF_NUM_INDICES; i++) + { + switch(i) + { + case PNETCDF_INDEP_OPENS: + case PNETCDF_COLL_OPENS: + /* sum */ + agg_pnc_rec->counters[i] += pnc_rec->counters[i]; + break; + default: + agg_pnc_rec->counters[i] = -1; + break; + } + } + + for(i = 0; i < PNETCDF_F_NUM_INDICES; i++) + { + switch(i) + { + case PNETCDF_F_OPEN_TIMESTAMP: + /* minimum non-zero */ + if((pnc_rec->fcounters[i] > 0) && + ((agg_pnc_rec->fcounters[i] == 0) || + (pnc_rec->fcounters[i] < agg_pnc_rec->fcounters[i]))) + { + agg_pnc_rec->fcounters[i] = pnc_rec->fcounters[i]; + } + break; + case PNETCDF_F_CLOSE_TIMESTAMP: + /* maximum */ + if(pnc_rec->fcounters[i] > agg_pnc_rec->fcounters[i]) + { + agg_pnc_rec->fcounters[i] = pnc_rec->fcounters[i]; + } + break; + default: + agg_pnc_rec->fcounters[i] = -1; + break; + } + } + + return; +} + /* * Local variables: * c-indent-level: 4 diff --git a/darshan-util/darshan-posix-logutils.c b/darshan-util/darshan-posix-logutils.c index 34f7db993b8bbb88bb91404389cd51e56a5d6620..bf66a88607a9bb80d616a8f95c814c0b2d86fc77 100644 --- a/darshan-util/darshan-posix-logutils.c +++ b/darshan-util/darshan-posix-logutils.c @@ -30,14 +30,14 @@ char *posix_f_counter_names[] = { }; #undef X -static int darshan_log_get_posix_file(darshan_fd fd, void* posix_buf, - darshan_record_id* rec_id); +static int darshan_log_get_posix_file(darshan_fd fd, void* posix_buf); static int darshan_log_put_posix_file(darshan_fd fd, void* posix_buf, int ver); static void darshan_log_print_posix_file(void *file_rec, char *file_name, char *mnt_pt, char *fs_type, int ver); static void darshan_log_print_posix_description(void); static void darshan_log_print_posix_file_diff(void *file_rec1, char *file_name1, void *file_rec2, char *file_name2); +static void darshan_log_agg_posix_files(void *rec, void *agg_rec, int init_flag); struct darshan_mod_logutil_funcs posix_logutils = { @@ -45,17 +45,17 @@ struct darshan_mod_logutil_funcs posix_logutils = .log_put_record = &darshan_log_put_posix_file, .log_print_record = &darshan_log_print_posix_file, .log_print_description = &darshan_log_print_posix_description, - .log_print_diff = &darshan_log_print_posix_file_diff + .log_print_diff = &darshan_log_print_posix_file_diff, + .log_agg_records = &darshan_log_agg_posix_files, }; -static int darshan_log_get_posix_file(darshan_fd fd, void* posix_buf, - darshan_record_id* rec_id) +static int darshan_log_get_posix_file(darshan_fd fd, void* posix_buf) { struct darshan_posix_file *file; int i; int ret; - ret = darshan_log_getmod(fd, DARSHAN_POSIX_MOD, posix_buf, + ret = darshan_log_get_mod(fd, DARSHAN_POSIX_MOD, posix_buf, sizeof(struct darshan_posix_file)); if(ret < 0) return(-1); @@ -67,15 +67,14 @@ static int darshan_log_get_posix_file(darshan_fd fd, void* posix_buf, if(fd->swap_flag) { /* swap bytes if necessary */ - DARSHAN_BSWAP64(&file->f_id); - DARSHAN_BSWAP64(&file->rank); + DARSHAN_BSWAP64(&file->base_rec.id); + DARSHAN_BSWAP64(&file->base_rec.rank); for(i=0; icounters[i]); for(i=0; ifcounters[i]); } - *rec_id = file->f_id; return(1); } } @@ -85,7 +84,7 @@ static int darshan_log_put_posix_file(darshan_fd fd, void* posix_buf, int ver) struct darshan_posix_file *file = (struct darshan_posix_file *)posix_buf; int ret; - ret = darshan_log_putmod(fd, DARSHAN_POSIX_MOD, file, + ret = darshan_log_put_mod(fd, DARSHAN_POSIX_MOD, file, sizeof(struct darshan_posix_file), ver); if(ret < 0) return(-1); @@ -103,15 +102,17 @@ static void darshan_log_print_posix_file(void *file_rec, char *file_name, for(i=0; irank, posix_file_rec->f_id, posix_counter_names[i], - posix_file_rec->counters[i], file_name, mnt_pt, fs_type); + posix_file_rec->base_rec.rank, posix_file_rec->base_rec.id, + posix_counter_names[i], posix_file_rec->counters[i], + file_name, mnt_pt, fs_type); } for(i=0; irank, posix_file_rec->f_id, posix_f_counter_names[i], - posix_file_rec->fcounters[i], file_name, mnt_pt, fs_type); + posix_file_rec->base_rec.rank, posix_file_rec->base_rec.id, + posix_f_counter_names[i], posix_file_rec->fcounters[i], + file_name, mnt_pt, fs_type); } return; @@ -167,26 +168,26 @@ static void darshan_log_print_posix_file_diff(void *file_rec1, char *file_name1, { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_POSIX_MOD], - file1->rank, file1->f_id, posix_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, posix_counter_names[i], file1->counters[i], file_name1, "", ""); - + } else if(!file1) { printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_POSIX_MOD], - file2->rank, file2->f_id, posix_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, posix_counter_names[i], file2->counters[i], file_name2, "", ""); } else if(file1->counters[i] != file2->counters[i]) { printf("- "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_POSIX_MOD], - file1->rank, file1->f_id, posix_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, posix_counter_names[i], file1->counters[i], file_name1, "", ""); printf("+ "); DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_POSIX_MOD], - file2->rank, file2->f_id, posix_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, posix_counter_names[i], file2->counters[i], file_name2, "", ""); } } @@ -197,26 +198,26 @@ static void darshan_log_print_posix_file_diff(void *file_rec1, char *file_name1, { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_POSIX_MOD], - file1->rank, file1->f_id, posix_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, posix_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); - + } else if(!file1) { printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_POSIX_MOD], - file2->rank, file2->f_id, posix_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, posix_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } else if(file1->fcounters[i] != file2->fcounters[i]) { printf("- "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_POSIX_MOD], - file1->rank, file1->f_id, posix_f_counter_names[i], + file1->base_rec.rank, file1->base_rec.id, posix_f_counter_names[i], file1->fcounters[i], file_name1, "", ""); printf("+ "); DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_POSIX_MOD], - file2->rank, file2->f_id, posix_f_counter_names[i], + file2->base_rec.rank, file2->base_rec.id, posix_f_counter_names[i], file2->fcounters[i], file_name2, "", ""); } } @@ -224,6 +225,323 @@ static void darshan_log_print_posix_file_diff(void *file_rec1, char *file_name1, return; } +/* simple helper struct for determining time & byte variances */ +struct var_t +{ + double n; + double M; + double S; +}; + +static void darshan_log_agg_posix_files(void *rec, void *agg_rec, int init_flag) +{ + struct darshan_posix_file *psx_rec = (struct darshan_posix_file *)rec; + struct darshan_posix_file *agg_psx_rec = (struct darshan_posix_file *)agg_rec; + int i, j, k; + int total_count; + int64_t tmp_val[4]; + int64_t tmp_cnt[4]; + int tmp_ndx; + double old_M; + double psx_time = psx_rec->fcounters[POSIX_F_READ_TIME] + + psx_rec->fcounters[POSIX_F_WRITE_TIME] + + psx_rec->fcounters[POSIX_F_META_TIME]; + double psx_bytes = (double)psx_rec->counters[POSIX_BYTES_READ] + + psx_rec->counters[POSIX_BYTES_WRITTEN]; + struct var_t *var_time_p = (struct var_t *) + ((char *)rec + sizeof(struct darshan_posix_file)); + struct var_t *var_bytes_p = (struct var_t *) + ((char *)var_time_p + sizeof(struct var_t)); + + for(i = 0; i < POSIX_NUM_INDICES; i++) + { + switch(i) + { + case POSIX_OPENS: + case POSIX_READS: + case POSIX_WRITES: + case POSIX_SEEKS: + case POSIX_STATS: + case POSIX_MMAPS: + case POSIX_FOPENS: + case POSIX_FREADS: + case POSIX_FWRITES: + case POSIX_FSEEKS: + case POSIX_FSYNCS: + case POSIX_FDSYNCS: + case POSIX_BYTES_READ: + case POSIX_BYTES_WRITTEN: + case POSIX_CONSEC_READS: + case POSIX_CONSEC_WRITES: + case POSIX_SEQ_READS: + case POSIX_SEQ_WRITES: + case POSIX_RW_SWITCHES: + case POSIX_MEM_NOT_ALIGNED: + case POSIX_FILE_NOT_ALIGNED: + case POSIX_SIZE_READ_0_100: + case POSIX_SIZE_READ_100_1K: + case POSIX_SIZE_READ_1K_10K: + case POSIX_SIZE_READ_10K_100K: + case POSIX_SIZE_READ_100K_1M: + case POSIX_SIZE_READ_1M_4M: + case POSIX_SIZE_READ_4M_10M: + case POSIX_SIZE_READ_10M_100M: + case POSIX_SIZE_READ_100M_1G: + case POSIX_SIZE_READ_1G_PLUS: + case POSIX_SIZE_WRITE_0_100: + case POSIX_SIZE_WRITE_100_1K: + case POSIX_SIZE_WRITE_1K_10K: + case POSIX_SIZE_WRITE_10K_100K: + case POSIX_SIZE_WRITE_100K_1M: + case POSIX_SIZE_WRITE_1M_4M: + case POSIX_SIZE_WRITE_4M_10M: + case POSIX_SIZE_WRITE_10M_100M: + case POSIX_SIZE_WRITE_100M_1G: + case POSIX_SIZE_WRITE_1G_PLUS: + /* sum */ + agg_psx_rec->counters[i] += psx_rec->counters[i]; + break; + case POSIX_MODE: + case POSIX_MEM_ALIGNMENT: + case POSIX_FILE_ALIGNMENT: + /* just set to the input value */ + agg_psx_rec->counters[i] = psx_rec->counters[i]; + break; + case POSIX_MAX_BYTE_READ: + case POSIX_MAX_BYTE_WRITTEN: + /* max */ + if(psx_rec->counters[i] > agg_psx_rec->counters[i]) + { + agg_psx_rec->counters[i] = psx_rec->counters[i]; + } + break; + case POSIX_MAX_READ_TIME_SIZE: + case POSIX_MAX_WRITE_TIME_SIZE: + case POSIX_FASTEST_RANK: + case POSIX_FASTEST_RANK_BYTES: + case POSIX_SLOWEST_RANK: + case POSIX_SLOWEST_RANK_BYTES: + /* these are set with the FP counters */ + break; + case POSIX_STRIDE1_STRIDE: + case POSIX_ACCESS1_ACCESS: + /* increment common value counters */ + + /* first, collapse duplicates */ + for(j = i; j < i + 4; j++) + { + for(k = 0; k < 4; k++) + { + if(agg_psx_rec->counters[i + k] == psx_rec->counters[j]) + { + agg_psx_rec->counters[i + k + 4] += psx_rec->counters[j + 4]; + psx_rec->counters[j] = psx_rec->counters[j + 4] = 0; + } + } + } + + /* second, add new counters */ + for(j = i; j < i + 4; j++) + { + tmp_ndx = 0; + memset(tmp_val, 0, 4 * sizeof(int64_t)); + memset(tmp_cnt, 0, 4 * sizeof(int64_t)); + + if(psx_rec->counters[j] == 0) break; + for(k = 0; k < 4; k++) + { + if(agg_psx_rec->counters[i + k] == psx_rec->counters[j]) + { + total_count = agg_psx_rec->counters[i + k + 4] + + psx_rec->counters[j + 4]; + break; + } + } + if(k == 4) total_count = psx_rec->counters[j + 4]; + + for(k = 0; k < 4; k++) + { + if((agg_psx_rec->counters[i + k + 4] > total_count) || + ((agg_psx_rec->counters[i + k + 4] == total_count) && + (agg_psx_rec->counters[i + k] > psx_rec->counters[j]))) + { + tmp_val[tmp_ndx] = agg_psx_rec->counters[i + k]; + tmp_cnt[tmp_ndx] = agg_psx_rec->counters[i + k + 4]; + tmp_ndx++; + } + else break; + } + if(tmp_ndx == 4) break; + + tmp_val[tmp_ndx] = psx_rec->counters[j]; + tmp_cnt[tmp_ndx] = psx_rec->counters[j + 4]; + tmp_ndx++; + + while(tmp_ndx != 4) + { + if(agg_psx_rec->counters[i + k] != psx_rec->counters[j]) + { + tmp_val[tmp_ndx] = agg_psx_rec->counters[i + k]; + tmp_cnt[tmp_ndx] = agg_psx_rec->counters[i + k + 4]; + tmp_ndx++; + } + k++; + } + memcpy(&(agg_psx_rec->counters[i]), tmp_val, 4 * sizeof(int64_t)); + memcpy(&(agg_psx_rec->counters[i + 4]), tmp_cnt, 4 * sizeof(int64_t)); + } + break; + case POSIX_STRIDE2_STRIDE: + case POSIX_STRIDE3_STRIDE: + case POSIX_STRIDE4_STRIDE: + case POSIX_STRIDE1_COUNT: + case POSIX_STRIDE2_COUNT: + case POSIX_STRIDE3_COUNT: + case POSIX_STRIDE4_COUNT: + case POSIX_ACCESS2_ACCESS: + case POSIX_ACCESS3_ACCESS: + case POSIX_ACCESS4_ACCESS: + case POSIX_ACCESS1_COUNT: + case POSIX_ACCESS2_COUNT: + case POSIX_ACCESS3_COUNT: + case POSIX_ACCESS4_COUNT: + /* these are set all at once with common counters above */ + break; + default: + agg_psx_rec->counters[i] = -1; + break; + } + } + + for(i = 0; i < POSIX_F_NUM_INDICES; i++) + { + switch(i) + { + case POSIX_F_READ_TIME: + case POSIX_F_WRITE_TIME: + case POSIX_F_META_TIME: + /* sum */ + agg_psx_rec->fcounters[i] += psx_rec->fcounters[i]; + break; + case POSIX_F_OPEN_TIMESTAMP: + case POSIX_F_READ_START_TIMESTAMP: + case POSIX_F_WRITE_START_TIMESTAMP: + /* minimum non-zero */ + if((psx_rec->fcounters[i] > 0) && + ((agg_psx_rec->fcounters[i] == 0) || + (psx_rec->fcounters[i] < agg_psx_rec->fcounters[i]))) + { + agg_psx_rec->fcounters[i] = psx_rec->fcounters[i]; + } + break; + case POSIX_F_READ_END_TIMESTAMP: + case POSIX_F_WRITE_END_TIMESTAMP: + case POSIX_F_CLOSE_TIMESTAMP: + /* maximum */ + if(psx_rec->fcounters[i] > agg_psx_rec->fcounters[i]) + { + agg_psx_rec->fcounters[i] = psx_rec->fcounters[i]; + } + break; + case POSIX_F_MAX_READ_TIME: + if(psx_rec->fcounters[i] > agg_psx_rec->fcounters[i]) + { + agg_psx_rec->fcounters[i] = psx_rec->fcounters[i]; + agg_psx_rec->counters[POSIX_MAX_READ_TIME_SIZE] = + psx_rec->counters[POSIX_MAX_READ_TIME_SIZE]; + } + break; + case POSIX_F_MAX_WRITE_TIME: + if(psx_rec->fcounters[i] > agg_psx_rec->fcounters[i]) + { + agg_psx_rec->fcounters[i] = psx_rec->fcounters[i]; + agg_psx_rec->counters[POSIX_MAX_WRITE_TIME_SIZE] = + psx_rec->counters[POSIX_MAX_WRITE_TIME_SIZE]; + } + break; + case POSIX_F_FASTEST_RANK_TIME: + if(init_flag) + { + /* set fastest rank counters according to root rank. these counters + * will be determined as the aggregation progresses. + */ + agg_psx_rec->counters[POSIX_FASTEST_RANK] = psx_rec->base_rec.rank; + agg_psx_rec->counters[POSIX_FASTEST_RANK_BYTES] = psx_bytes; + agg_psx_rec->fcounters[POSIX_F_FASTEST_RANK_TIME] = psx_time; + } + + if(psx_time < agg_psx_rec->fcounters[POSIX_F_FASTEST_RANK_TIME]) + { + agg_psx_rec->counters[POSIX_FASTEST_RANK] = psx_rec->base_rec.rank; + agg_psx_rec->counters[POSIX_FASTEST_RANK_BYTES] = psx_bytes; + agg_psx_rec->fcounters[POSIX_F_FASTEST_RANK_TIME] = psx_time; + } + break; + case POSIX_F_SLOWEST_RANK_TIME: + if(init_flag) + { + /* set slowest rank counters according to root rank. these counters + * will be determined as the aggregation progresses. + */ + agg_psx_rec->counters[POSIX_SLOWEST_RANK] = psx_rec->base_rec.rank; + agg_psx_rec->counters[POSIX_SLOWEST_RANK_BYTES] = psx_bytes; + agg_psx_rec->fcounters[POSIX_F_SLOWEST_RANK_TIME] = psx_time; + } + + if(psx_time > agg_psx_rec->fcounters[POSIX_F_SLOWEST_RANK_TIME]) + { + agg_psx_rec->counters[POSIX_SLOWEST_RANK] = psx_rec->base_rec.rank; + agg_psx_rec->counters[POSIX_SLOWEST_RANK_BYTES] = psx_bytes; + agg_psx_rec->fcounters[POSIX_F_SLOWEST_RANK_TIME] = psx_time; + } + break; + case POSIX_F_VARIANCE_RANK_TIME: + if(init_flag) + { + var_time_p->n = 1; + var_time_p->M = psx_time; + var_time_p->S = 0; + } + else + { + old_M = var_time_p->M; + + var_time_p->n++; + var_time_p->M += (psx_time - var_time_p->M) / var_time_p->n; + var_time_p->S += (psx_time - var_time_p->M) * (psx_time - old_M); + + agg_psx_rec->fcounters[POSIX_F_VARIANCE_RANK_TIME] = + var_time_p->S / var_time_p->n; + } + break; + case POSIX_F_VARIANCE_RANK_BYTES: + if(init_flag) + { + var_bytes_p->n = 1; + var_bytes_p->M = psx_bytes; + var_bytes_p->S = 0; + } + else + { + old_M = var_bytes_p->M; + + var_bytes_p->n++; + var_bytes_p->M += (psx_bytes - var_bytes_p->M) / var_bytes_p->n; + var_bytes_p->S += (psx_bytes - var_bytes_p->M) * (psx_bytes - old_M); + + agg_psx_rec->fcounters[POSIX_F_VARIANCE_RANK_BYTES] = + var_bytes_p->S / var_bytes_p->n; + } + break; + default: + agg_psx_rec->fcounters[i] = -1; + break; + } + } + + return; +} + /* * Local variables: * c-indent-level: 4 diff --git a/doc/darshan-modularization-design-notes.txt b/doc/darshan-modularization-design-notes.txt deleted file mode 100644 index be6673e7896f6957ba43e7cd5df9484ffa692f32..0000000000000000000000000000000000000000 --- a/doc/darshan-modularization-design-notes.txt +++ /dev/null @@ -1,112 +0,0 @@ -Rough design notes on modularizing Darshan -2014-09-24 ------------------------- - -- Darshan is split into two parts (subdirs in the same repo): - - runtime: runtime instrumentation for MPI programs - - util: post-processing of logs - -Runtime design ----------------- - -- current code has the following responsibilities: - - init: - - set up data structures - - during runtime: - - track file names and handles - - memory allocation - - intercepting function calls - - updating counters - - shutdown: - - identify shared files - - aggregation/reduction - - compression - - write log - -- propose division of code in modular runtime library: - (these aren't literally separate libraries, they are probably all - combined): - - core lib: - - central component that modules register with, coordinates shutdown - - modules: - - posix, mpi-io, pnetcdf, hdf5, asg, etc. - - register with the core lib and track statistics for a single API - - common/utility lib: - - contains utility functions - - not mandatory for a module to use this, but may make things easier - -- responsibilities of core library: - - track file names and map them to generic IDs - (keep full path names) - - tell modules how much memory they can consume - - kick off shutdown procedure - - perform generic (zlib) compression - -- at shutdown time, the core library will: - - create output file - - write header and index information - - write out filename->ID mapping - - perform its own aggregation step to identify files shared across ranks - -API: -- core API (presented by core library, used by modules): - - register(const char* name, int* runtime_mem_limit, struct mod_fns *mfns) - - lets module register with the core library, provide its name and table - of function pointers, and get back a limit on how much RAM it can - consume - - lookup_id(void* name, int len, int64* ID, int printable_flag); - - used by module to convert a file name to a generic ID. printable_flag - tells Darshan that the "name" is not a string (as in ASG use case) - -- module API (will be function pointers in struct mod_fns above, this is the - API that each module must present to the core library) - - prep_for_shutdown() - - tells the module that it should stop instrumenting and perform any - module-specific aggregation or custom compression that it wants to do - before Darshan stores its results - - get_output(void **buffer, int size) - - called by core library to get a pointer to the data that should be - written into the log file. Darshan will zlib compress it and put it - in the right position in the output file. - -- how will the asg module fit in? - - it doesn't have file names - - will pass in object IDs instead that will still get mapped to generic - Darshan IDs just like a file name would have - - set flag telling Darshan that the "name" won't be printable - -- compiler script: - - how much do we want to modularize here? - - don't need to do this initially, but we could have the compiler script - call out to a predefined directory to look for scripts or files that let - each module describe the linker arguments to add - - avoid extremely large ld arguments - -- utility library: - - this is the part run to process existing logs - - file format: - - - header (endianness, version number, etc.) - - job information (cmd line, start time, end time, etc.) - - indices - - location/size of name->id mapping table - - location/size of each module's opaque data (with name) - - table of name->id mapping - - needs to handle variable length names (some of which won't be - printable) - - format it however makes sense for parsing - - compress this part since it will often contain mostly text - - opaque blobs containing data for each module - - modules will refer to files using ID from name->id table, won't - store full paths here - - - each module can define its own parser, grapher, etc. as needed - - for convenience we may integrate posix and mpi-io support into the default - darshan tools - -- development notes - - do development in git branch - - ignore compatibility (we'll work that out later) - - strip down to basic example - - just do one or two posix counters to start, but exercise all of the - API and code organization stuff diff --git a/doc/darshan-modularization-whiteboard.pdf b/doc/darshan-modularization-whiteboard.pdf deleted file mode 100644 index a8b98a016866695810109ea4794448cb115316b9..0000000000000000000000000000000000000000 Binary files a/doc/darshan-modularization-whiteboard.pdf and /dev/null differ