Commit e245da9d authored by Shane Snyder's avatar Shane Snyder
Browse files

Merge branch 'dev-270-lustre-ioctl-crash' into 'master'

modify Darshan's Lustre module to use fgetxattr for getting stripe info

See merge request !89
parents 7d4a80ab 332f4ebb
...@@ -623,6 +623,7 @@ ac_subst_vars='LTLIBOBJS ...@@ -623,6 +623,7 @@ ac_subst_vars='LTLIBOBJS
LIBOBJS LIBOBJS
DARSHAN_MDHIM_LD_OPTS DARSHAN_MDHIM_LD_OPTS
BUILD_MDHIM_MODULE BUILD_MDHIM_MODULE
DARSHAN_LUSTRE_LD_FLAGS
BUILD_LUSTRE_MODULE BUILD_LUSTRE_MODULE
BUILD_BGQ_MODULE BUILD_BGQ_MODULE
DARSHAN_HDF5_LD_FLAGS DARSHAN_HDF5_LD_FLAGS
...@@ -684,7 +685,6 @@ infodir ...@@ -684,7 +685,6 @@ infodir
docdir docdir
oldincludedir oldincludedir
includedir includedir
runstatedir
localstatedir localstatedir
sharedstatedir sharedstatedir
sysconfdir sysconfdir
...@@ -779,7 +779,6 @@ datadir='${datarootdir}' ...@@ -779,7 +779,6 @@ datadir='${datarootdir}'
sysconfdir='${prefix}/etc' sysconfdir='${prefix}/etc'
sharedstatedir='${prefix}/com' sharedstatedir='${prefix}/com'
localstatedir='${prefix}/var' localstatedir='${prefix}/var'
runstatedir='${localstatedir}/run'
includedir='${prefix}/include' includedir='${prefix}/include'
oldincludedir='/usr/include' oldincludedir='/usr/include'
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
...@@ -1032,15 +1031,6 @@ do ...@@ -1032,15 +1031,6 @@ do
| -silent | --silent | --silen | --sile | --sil) | -silent | --silent | --silen | --sile | --sil)
silent=yes ;; silent=yes ;;
-runstatedir | --runstatedir | --runstatedi | --runstated \
| --runstate | --runstat | --runsta | --runst | --runs \
| --run | --ru | --r)
ac_prev=runstatedir ;;
-runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
| --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
| --run=* | --ru=* | --r=*)
runstatedir=$ac_optarg ;;
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
ac_prev=sbindir ;; ac_prev=sbindir ;;
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
...@@ -1178,7 +1168,7 @@ fi ...@@ -1178,7 +1168,7 @@ fi
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \ datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
libdir localedir mandir runstatedir libdir localedir mandir
do do
eval ac_val=\$$ac_var eval ac_val=\$$ac_var
# Remove trailing slashes. # Remove trailing slashes.
...@@ -1331,7 +1321,6 @@ Fine tuning of the installation directories: ...@@ -1331,7 +1321,6 @@ Fine tuning of the installation directories:
--sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
--localstatedir=DIR modifiable single-machine data [PREFIX/var] --localstatedir=DIR modifiable single-machine data [PREFIX/var]
--runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
--libdir=DIR object code libraries [EPREFIX/lib] --libdir=DIR object code libraries [EPREFIX/lib]
--includedir=DIR C header files [PREFIX/include] --includedir=DIR C header files [PREFIX/include]
--oldincludedir=DIR C header files for non-gcc [/usr/include] --oldincludedir=DIR C header files for non-gcc [/usr/include]
...@@ -4338,9 +4327,53 @@ fi ...@@ -4338,9 +4327,53 @@ fi
# if lustre module not disabled, check for needed Lustre module header # if lustre module not disabled, check for needed Lustre module header
if test x$enable_lustre_mod != xno; then if test x$enable_lustre_mod != xno; then
ac_fn_c_check_header_mongrel "$LINENO" "lustre/lustre_user.h" "ac_cv_header_lustre_lustre_user_h" "$ac_includes_default" ac_fn_c_check_header_mongrel "$LINENO" "lustre/lustreapi.h" "ac_cv_header_lustre_lustreapi_h" "$ac_includes_default"
if test "x$ac_cv_header_lustre_lustre_user_h" = xyes; then : if test "x$ac_cv_header_lustre_lustreapi_h" = xyes; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for llapi_layout_get_by_xattr in -llustreapi" >&5
$as_echo_n "checking for llapi_layout_get_by_xattr in -llustreapi... " >&6; }
if ${ac_cv_lib_lustreapi_llapi_layout_get_by_xattr+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-llustreapi $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char llapi_layout_get_by_xattr ();
int
main ()
{
return llapi_layout_get_by_xattr ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_lustreapi_llapi_layout_get_by_xattr=yes
else
ac_cv_lib_lustreapi_llapi_layout_get_by_xattr=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" >&5
$as_echo "$ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" >&6; }
if test "x$ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" = xyes; then :
BUILD_LUSTRE_MODULE=1 BUILD_LUSTRE_MODULE=1
DARSHAN_LUSTRE_LD_FLAGS="-llustreapi"
else
if test "x$enable_lustre_mod" = xyes; then :
as_fn_error $? "Cannot find required llapi_layout_get_by_xattr function for the Lustre module" "$LINENO" 5
fi
fi
else else
if test "x$enable_lustre_mod" = xyes; then : if test "x$enable_lustre_mod" = xyes; then :
as_fn_error $? "Cannot find required headers for the Lustre module" "$LINENO" 5 as_fn_error $? "Cannot find required headers for the Lustre module" "$LINENO" 5
...@@ -5067,6 +5100,7 @@ DARSHAN_VERSION="3.2.1" ...@@ -5067,6 +5100,7 @@ DARSHAN_VERSION="3.2.1"
ac_config_files="$ac_config_files Makefile darshan-mk-log-dirs.pl darshan-gen-cc.pl darshan-gen-cxx.pl darshan-gen-fortran.pl darshan-config share/craype-1.x/darshan-module share/craype-2.x/darshan-module lib/pkgconfig/darshan-runtime.pc share/mpi-profile/darshan-cc.conf share/mpi-profile/darshan-cxx.conf share/mpi-profile/darshan-f.conf share/mpi-profile/darshan-cc-static.conf share/mpi-profile/darshan-cxx-static.conf share/mpi-profile/darshan-f-static.conf share/ld-opts/darshan-base-ld-opts share/ld-opts/darshan-stdio-ld-opts share/ld-opts/darshan-hdf5-ld-opts" ac_config_files="$ac_config_files Makefile darshan-mk-log-dirs.pl darshan-gen-cc.pl darshan-gen-cxx.pl darshan-gen-fortran.pl darshan-config share/craype-1.x/darshan-module share/craype-2.x/darshan-module lib/pkgconfig/darshan-runtime.pc share/mpi-profile/darshan-cc.conf share/mpi-profile/darshan-cxx.conf share/mpi-profile/darshan-f.conf share/mpi-profile/darshan-cc-static.conf share/mpi-profile/darshan-cxx-static.conf share/mpi-profile/darshan-f-static.conf share/ld-opts/darshan-base-ld-opts share/ld-opts/darshan-stdio-ld-opts share/ld-opts/darshan-hdf5-ld-opts"
......
...@@ -291,8 +291,13 @@ AC_ARG_ENABLE(lustre-mod, ...@@ -291,8 +291,13 @@ AC_ARG_ENABLE(lustre-mod,
[enable_lustre_mod=check]) [enable_lustre_mod=check])
# if lustre module not disabled, check for needed Lustre module header # if lustre module not disabled, check for needed Lustre module header
if test x$enable_lustre_mod != xno; then if test x$enable_lustre_mod != xno; then
AC_CHECK_HEADER([lustre/lustre_user.h], AC_CHECK_HEADER([lustre/lustreapi.h],
BUILD_LUSTRE_MODULE=1, [AC_CHECK_LIB(lustreapi, llapi_layout_get_by_xattr,
[BUILD_LUSTRE_MODULE=1
DARSHAN_LUSTRE_LD_FLAGS="-llustreapi"],
AS_IF([test "x$enable_lustre_mod" = xyes],
AC_MSG_ERROR(Cannot find required llapi_layout_get_by_xattr function for the Lustre module),
[]))],
AS_IF([test "x$enable_lustre_mod" = xyes], AS_IF([test "x$enable_lustre_mod" = xyes],
AC_MSG_ERROR(Cannot find required headers for the Lustre module), AC_MSG_ERROR(Cannot find required headers for the Lustre module),
[])) []))
...@@ -530,6 +535,7 @@ AC_SUBST(DARSHAN_HDF5_ADD_DFLUSH_LD_OPTS) ...@@ -530,6 +535,7 @@ AC_SUBST(DARSHAN_HDF5_ADD_DFLUSH_LD_OPTS)
AC_SUBST(DARSHAN_HDF5_LD_FLAGS) AC_SUBST(DARSHAN_HDF5_LD_FLAGS)
AC_SUBST(BUILD_BGQ_MODULE) AC_SUBST(BUILD_BGQ_MODULE)
AC_SUBST(BUILD_LUSTRE_MODULE) AC_SUBST(BUILD_LUSTRE_MODULE)
AC_SUBST(DARSHAN_LUSTRE_LD_FLAGS)
AC_SUBST(BUILD_MDHIM_MODULE) AC_SUBST(BUILD_MDHIM_MODULE)
AC_SUBST(DARSHAN_MDHIM_LD_OPTS) AC_SUBST(DARSHAN_MDHIM_LD_OPTS)
AC_OUTPUT(Makefile AC_OUTPUT(Makefile
......
...@@ -14,13 +14,13 @@ DARSHAN_LOG_ENV="@__DARSHAN_LOG_ENV@" ...@@ -14,13 +14,13 @@ DARSHAN_LOG_ENV="@__DARSHAN_LOG_ENV@"
# app used a library which in turn used one of those HLLs). # app used a library which in turn used one of those HLLs).
PRE_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -ldarshan -lz -Wl,@$DARSHAN_SHARE_PATH/ld-opts/darshan-base-ld-opts" PRE_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -ldarshan -lz -Wl,@$DARSHAN_SHARE_PATH/ld-opts/darshan-base-ld-opts"
POST_LD_FLAGS="-L$DARSHAN_LIB_PATH -ldarshan -lz -lrt -lpthread" POST_LD_FLAGS="-L$DARSHAN_LIB_PATH -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ -lz -lrt -lpthread"
# NOTE: # NOTE:
# - when dynamic linking there is no need for wrapping options, we simply # - when dynamic linking there is no need for wrapping options, we simply
# need to get the darshan symbol definitions early enough in the link # need to get the darshan symbol definitions early enough in the link
# order. We also set no-as-needed for linkers that may not identify # order. We also set no-as-needed for linkers that may not identify
DYN_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -Wl,-rpath=$DARSHAN_LIB_PATH -Wl,-no-as-needed -ldarshan @DARSHAN_HDF5_LD_FLAGS@" DYN_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -Wl,-rpath=$DARSHAN_LIB_PATH -Wl,-no-as-needed -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ @DARSHAN_HDF5_LD_FLAGS@"
# NOTE: # NOTE:
# - construct complete list of log path options, separated by commas. # - construct complete list of log path options, separated by commas.
......
...@@ -16,9 +16,10 @@ ...@@ -16,9 +16,10 @@
#include <stdlib.h> #include <stdlib.h>
#include <assert.h> #include <assert.h>
#include <pthread.h> #include <pthread.h>
#include <sys/ioctl.h> #include <limits.h>
#include <sys/xattr.h>
#include <lustre/lustre_user.h> #include <lustre/lustreapi.h>
#include "darshan.h" #include "darshan.h"
#include "darshan-dynamic.h" #include "darshan-dynamic.h"
...@@ -50,10 +51,6 @@ static int my_rank = -1; ...@@ -50,10 +51,6 @@ static int my_rank = -1;
#define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex) #define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex)
#define LUSTRE_UNLOCK() pthread_mutex_unlock(&lustre_runtime_mutex) #define LUSTRE_UNLOCK() pthread_mutex_unlock(&lustre_runtime_mutex)
#ifndef LOV_MAX_STRIPE_COUNT /* for Lustre < 2.4 */
#define LOV_MAX_STRIPE_COUNT 2000
#endif
void darshan_instrument_lustre_file(const char* filepath, int fd) void darshan_instrument_lustre_file(const char* filepath, int fd)
{ {
struct lustre_record_ref *rec_ref; struct lustre_record_ref *rec_ref;
...@@ -61,9 +58,12 @@ void darshan_instrument_lustre_file(const char* filepath, int fd) ...@@ -61,9 +58,12 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
struct darshan_fs_info fs_info; struct darshan_fs_info fs_info;
darshan_record_id rec_id; darshan_record_id rec_id;
int i; int i;
struct lov_user_md *lum; void *lustre_xattr_val;
size_t lumsize = sizeof(struct lov_user_md) + size_t lustre_xattr_size = XATTR_SIZE_MAX;
LOV_MAX_STRIPE_COUNT * sizeof(struct lov_user_ost_data); struct llapi_layout *lustre_layout;
uint64_t stripe_size;
uint64_t stripe_count;
uint64_t tmp_ost;
size_t rec_size; size_t rec_size;
int ret; int ret;
...@@ -85,23 +85,40 @@ void darshan_instrument_lustre_file(const char* filepath, int fd) ...@@ -85,23 +85,40 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
&rec_id, sizeof(darshan_record_id)); &rec_id, sizeof(darshan_record_id));
if(!rec_ref) if(!rec_ref)
{ {
/* first issue LUSTRE ioctl to see if we can get stripe data */ if ( (lustre_xattr_val = calloc(1, lustre_xattr_size)) == NULL )
/* if we can't issue ioctl, we have no counter data at all */
if ( (lum = calloc(1, lumsize)) == NULL )
{ {
LUSTRE_UNLOCK(); LUSTRE_UNLOCK();
return; return;
} }
/* find out the OST count of this file so we can allocate memory */ /* -1 means fgetxattr failed, likely because file isn't on Lustre, but maybe because
lum->lmm_magic = LOV_USER_MAGIC; * the Lustre version doesn't support this method of obtaining striping info
lum->lmm_stripe_count = LOV_MAX_STRIPE_COUNT; */
if ( (lustre_xattr_size = fgetxattr( fd, "lustre.lov", lustre_xattr_val, lustre_xattr_size)) == -1 )
{
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
/* -1 means ioctl failed, likely because file isn't on Lustre */ /* get corresponding Lustre file layout, then extract stripe params */
if ( ioctl( fd, LL_IOC_LOV_GETSTRIPE, (void *)lum ) == -1 ) if ( (lustre_layout = llapi_layout_get_by_xattr(lustre_xattr_val, lustre_xattr_size, 0)) == NULL)
{ {
free(lum); free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
if (llapi_layout_stripe_size_get(lustre_layout, &stripe_size) == -1)
{
llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
if (llapi_layout_stripe_count_get(lustre_layout, &stripe_count) == -1)
{
llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK(); LUSTRE_UNLOCK();
return; return;
} }
...@@ -110,7 +127,8 @@ void darshan_instrument_lustre_file(const char* filepath, int fd) ...@@ -110,7 +127,8 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
rec_ref = malloc(sizeof(*rec_ref)); rec_ref = malloc(sizeof(*rec_ref));
if(!rec_ref) if(!rec_ref)
{ {
free(lum); llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK(); LUSTRE_UNLOCK();
return; return;
} }
...@@ -120,12 +138,13 @@ void darshan_instrument_lustre_file(const char* filepath, int fd) ...@@ -120,12 +138,13 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
if(ret == 0) if(ret == 0)
{ {
free(rec_ref); free(rec_ref);
free(lum); llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK(); LUSTRE_UNLOCK();
return; return;
} }
rec_size = LUSTRE_RECORD_SIZE( lum->lmm_stripe_count ); rec_size = LUSTRE_RECORD_SIZE( stripe_count );
/* register a Lustre file record with Darshan */ /* register a Lustre file record with Darshan */
fs_info.fs_type = -1; fs_info.fs_type = -1;
...@@ -142,7 +161,8 @@ void darshan_instrument_lustre_file(const char* filepath, int fd) ...@@ -142,7 +161,8 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
darshan_delete_record_ref(&(lustre_runtime->record_id_hash), darshan_delete_record_ref(&(lustre_runtime->record_id_hash),
&rec_id, sizeof(darshan_record_id)); &rec_id, sizeof(darshan_record_id));
free(rec_ref); free(rec_ref);
free(lum); llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK(); LUSTRE_UNLOCK();
return; return;
} }
...@@ -161,12 +181,25 @@ void darshan_instrument_lustre_file(const char* filepath, int fd) ...@@ -161,12 +181,25 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
rec->counters[LUSTRE_MDTS] = -1; rec->counters[LUSTRE_MDTS] = -1;
} }
rec->counters[LUSTRE_STRIPE_SIZE] = lum->lmm_stripe_size; rec->counters[LUSTRE_STRIPE_SIZE] = stripe_size;
rec->counters[LUSTRE_STRIPE_WIDTH] = lum->lmm_stripe_count; rec->counters[LUSTRE_STRIPE_WIDTH] = stripe_count;
rec->counters[LUSTRE_STRIPE_OFFSET] = lum->lmm_stripe_offset; rec->counters[LUSTRE_STRIPE_OFFSET] = -1; // no longer captured
for ( i = 0; i < lum->lmm_stripe_count; i++ ) for ( i = 0; i < stripe_count; i++ )
rec->ost_ids[i] = lum->lmm_objects[i].l_ost_idx; {
free(lum); if (llapi_layout_ost_index_get(lustre_layout, i, &tmp_ost) == -1)
{
darshan_delete_record_ref(&(lustre_runtime->record_id_hash),
&rec_id, sizeof(darshan_record_id));
free(rec_ref);
llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
rec->ost_ids[i] = (int64_t)tmp_ost;
}
free(lustre_xattr_val);
llapi_layout_free(lustre_layout);
rec->base_rec.id = rec_id; rec->base_rec.id = rec_id;
rec->base_rec.rank = my_rank; rec->base_rec.rank = my_rank;
......
...@@ -15,5 +15,5 @@ darshan_libdir= -L${darshan_prefix}/lib ...@@ -15,5 +15,5 @@ darshan_libdir= -L${darshan_prefix}/lib
darshan_linkopts="-Wl,@${darshan_share}/ld-opts/darshan-base-ld-opts" darshan_linkopts="-Wl,@${darshan_share}/ld-opts/darshan-base-ld-opts"
Cflags: Cflags:
Libs: ${darshan_libdir} -Wl,-rpath=${darshan_prefix}/lib -Wl,-no-as-needed -ldarshan @DARSHAN_HDF5_LD_FLAGS@ Libs: ${darshan_libdir} -Wl,-rpath=${darshan_prefix}/lib -Wl,-no-as-needed -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ @DARSHAN_HDF5_LD_FLAGS@
Libs.private: ${darshan_linkopts} ${darshan_libdir} -lfmpich -lmpichcxx -ldarshan Libs.private: ${darshan_linkopts} ${darshan_libdir} -lfmpich -lmpichcxx -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ -lz -lrt -lpthread
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment