Commit e245da9d authored by Shane Snyder's avatar Shane Snyder
Browse files

Merge branch 'dev-270-lustre-ioctl-crash' into 'master'

modify Darshan's Lustre module to use fgetxattr for getting stripe info

See merge request !89
parents 7d4a80ab 332f4ebb
......@@ -623,6 +623,7 @@ ac_subst_vars='LTLIBOBJS
LIBOBJS
DARSHAN_MDHIM_LD_OPTS
BUILD_MDHIM_MODULE
DARSHAN_LUSTRE_LD_FLAGS
BUILD_LUSTRE_MODULE
BUILD_BGQ_MODULE
DARSHAN_HDF5_LD_FLAGS
......@@ -684,7 +685,6 @@ infodir
docdir
oldincludedir
includedir
runstatedir
localstatedir
sharedstatedir
sysconfdir
......@@ -779,7 +779,6 @@ datadir='${datarootdir}'
sysconfdir='${prefix}/etc'
sharedstatedir='${prefix}/com'
localstatedir='${prefix}/var'
runstatedir='${localstatedir}/run'
includedir='${prefix}/include'
oldincludedir='/usr/include'
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
......@@ -1032,15 +1031,6 @@ do
| -silent | --silent | --silen | --sile | --sil)
silent=yes ;;
-runstatedir | --runstatedir | --runstatedi | --runstated \
| --runstate | --runstat | --runsta | --runst | --runs \
| --run | --ru | --r)
ac_prev=runstatedir ;;
-runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
| --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
| --run=* | --ru=* | --r=*)
runstatedir=$ac_optarg ;;
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
ac_prev=sbindir ;;
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
......@@ -1178,7 +1168,7 @@ fi
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
libdir localedir mandir runstatedir
libdir localedir mandir
do
eval ac_val=\$$ac_var
# Remove trailing slashes.
......@@ -1331,7 +1321,6 @@ Fine tuning of the installation directories:
--sysconfdir=DIR read-only single-machine data [PREFIX/etc]
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
--localstatedir=DIR modifiable single-machine data [PREFIX/var]
--runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
--libdir=DIR object code libraries [EPREFIX/lib]
--includedir=DIR C header files [PREFIX/include]
--oldincludedir=DIR C header files for non-gcc [/usr/include]
......@@ -4338,9 +4327,53 @@ fi
# if lustre module not disabled, check for needed Lustre module header
if test x$enable_lustre_mod != xno; then
ac_fn_c_check_header_mongrel "$LINENO" "lustre/lustre_user.h" "ac_cv_header_lustre_lustre_user_h" "$ac_includes_default"
if test "x$ac_cv_header_lustre_lustre_user_h" = xyes; then :
ac_fn_c_check_header_mongrel "$LINENO" "lustre/lustreapi.h" "ac_cv_header_lustre_lustreapi_h" "$ac_includes_default"
if test "x$ac_cv_header_lustre_lustreapi_h" = xyes; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for llapi_layout_get_by_xattr in -llustreapi" >&5
$as_echo_n "checking for llapi_layout_get_by_xattr in -llustreapi... " >&6; }
if ${ac_cv_lib_lustreapi_llapi_layout_get_by_xattr+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-llustreapi $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char llapi_layout_get_by_xattr ();
int
main ()
{
return llapi_layout_get_by_xattr ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_lustreapi_llapi_layout_get_by_xattr=yes
else
ac_cv_lib_lustreapi_llapi_layout_get_by_xattr=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" >&5
$as_echo "$ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" >&6; }
if test "x$ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" = xyes; then :
BUILD_LUSTRE_MODULE=1
DARSHAN_LUSTRE_LD_FLAGS="-llustreapi"
else
if test "x$enable_lustre_mod" = xyes; then :
as_fn_error $? "Cannot find required llapi_layout_get_by_xattr function for the Lustre module" "$LINENO" 5
fi
fi
else
if test "x$enable_lustre_mod" = xyes; then :
as_fn_error $? "Cannot find required headers for the Lustre module" "$LINENO" 5
......@@ -5067,6 +5100,7 @@ DARSHAN_VERSION="3.2.1"
ac_config_files="$ac_config_files Makefile darshan-mk-log-dirs.pl darshan-gen-cc.pl darshan-gen-cxx.pl darshan-gen-fortran.pl darshan-config share/craype-1.x/darshan-module share/craype-2.x/darshan-module lib/pkgconfig/darshan-runtime.pc share/mpi-profile/darshan-cc.conf share/mpi-profile/darshan-cxx.conf share/mpi-profile/darshan-f.conf share/mpi-profile/darshan-cc-static.conf share/mpi-profile/darshan-cxx-static.conf share/mpi-profile/darshan-f-static.conf share/ld-opts/darshan-base-ld-opts share/ld-opts/darshan-stdio-ld-opts share/ld-opts/darshan-hdf5-ld-opts"
......
......@@ -291,8 +291,13 @@ AC_ARG_ENABLE(lustre-mod,
[enable_lustre_mod=check])
# if lustre module not disabled, check for needed Lustre module header
if test x$enable_lustre_mod != xno; then
AC_CHECK_HEADER([lustre/lustre_user.h],
BUILD_LUSTRE_MODULE=1,
AC_CHECK_HEADER([lustre/lustreapi.h],
[AC_CHECK_LIB(lustreapi, llapi_layout_get_by_xattr,
[BUILD_LUSTRE_MODULE=1
DARSHAN_LUSTRE_LD_FLAGS="-llustreapi"],
AS_IF([test "x$enable_lustre_mod" = xyes],
AC_MSG_ERROR(Cannot find required llapi_layout_get_by_xattr function for the Lustre module),
[]))],
AS_IF([test "x$enable_lustre_mod" = xyes],
AC_MSG_ERROR(Cannot find required headers for the Lustre module),
[]))
......@@ -530,6 +535,7 @@ AC_SUBST(DARSHAN_HDF5_ADD_DFLUSH_LD_OPTS)
AC_SUBST(DARSHAN_HDF5_LD_FLAGS)
AC_SUBST(BUILD_BGQ_MODULE)
AC_SUBST(BUILD_LUSTRE_MODULE)
AC_SUBST(DARSHAN_LUSTRE_LD_FLAGS)
AC_SUBST(BUILD_MDHIM_MODULE)
AC_SUBST(DARSHAN_MDHIM_LD_OPTS)
AC_OUTPUT(Makefile
......
......@@ -14,13 +14,13 @@ DARSHAN_LOG_ENV="@__DARSHAN_LOG_ENV@"
# app used a library which in turn used one of those HLLs).
PRE_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -ldarshan -lz -Wl,@$DARSHAN_SHARE_PATH/ld-opts/darshan-base-ld-opts"
POST_LD_FLAGS="-L$DARSHAN_LIB_PATH -ldarshan -lz -lrt -lpthread"
POST_LD_FLAGS="-L$DARSHAN_LIB_PATH -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ -lz -lrt -lpthread"
# NOTE:
# - when dynamic linking there is no need for wrapping options, we simply
# need to get the darshan symbol definitions early enough in the link
# order. We also set no-as-needed for linkers that may not identify
DYN_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -Wl,-rpath=$DARSHAN_LIB_PATH -Wl,-no-as-needed -ldarshan @DARSHAN_HDF5_LD_FLAGS@"
DYN_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -Wl,-rpath=$DARSHAN_LIB_PATH -Wl,-no-as-needed -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ @DARSHAN_HDF5_LD_FLAGS@"
# NOTE:
# - construct complete list of log path options, separated by commas.
......
......@@ -16,9 +16,10 @@
#include <stdlib.h>
#include <assert.h>
#include <pthread.h>
#include <sys/ioctl.h>
#include <limits.h>
#include <sys/xattr.h>
#include <lustre/lustre_user.h>
#include <lustre/lustreapi.h>
#include "darshan.h"
#include "darshan-dynamic.h"
......@@ -50,10 +51,6 @@ static int my_rank = -1;
#define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex)
#define LUSTRE_UNLOCK() pthread_mutex_unlock(&lustre_runtime_mutex)
#ifndef LOV_MAX_STRIPE_COUNT /* for Lustre < 2.4 */
#define LOV_MAX_STRIPE_COUNT 2000
#endif
void darshan_instrument_lustre_file(const char* filepath, int fd)
{
struct lustre_record_ref *rec_ref;
......@@ -61,9 +58,12 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
struct darshan_fs_info fs_info;
darshan_record_id rec_id;
int i;
struct lov_user_md *lum;
size_t lumsize = sizeof(struct lov_user_md) +
LOV_MAX_STRIPE_COUNT * sizeof(struct lov_user_ost_data);
void *lustre_xattr_val;
size_t lustre_xattr_size = XATTR_SIZE_MAX;
struct llapi_layout *lustre_layout;
uint64_t stripe_size;
uint64_t stripe_count;
uint64_t tmp_ost;
size_t rec_size;
int ret;
......@@ -85,23 +85,40 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
&rec_id, sizeof(darshan_record_id));
if(!rec_ref)
{
/* first issue LUSTRE ioctl to see if we can get stripe data */
/* if we can't issue ioctl, we have no counter data at all */
if ( (lum = calloc(1, lumsize)) == NULL )
if ( (lustre_xattr_val = calloc(1, lustre_xattr_size)) == NULL )
{
LUSTRE_UNLOCK();
return;
}
/* find out the OST count of this file so we can allocate memory */
lum->lmm_magic = LOV_USER_MAGIC;
lum->lmm_stripe_count = LOV_MAX_STRIPE_COUNT;
/* -1 means fgetxattr failed, likely because file isn't on Lustre, but maybe because
* the Lustre version doesn't support this method of obtaining striping info
*/
if ( (lustre_xattr_size = fgetxattr( fd, "lustre.lov", lustre_xattr_val, lustre_xattr_size)) == -1 )
{
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
/* -1 means ioctl failed, likely because file isn't on Lustre */
if ( ioctl( fd, LL_IOC_LOV_GETSTRIPE, (void *)lum ) == -1 )
/* get corresponding Lustre file layout, then extract stripe params */
if ( (lustre_layout = llapi_layout_get_by_xattr(lustre_xattr_val, lustre_xattr_size, 0)) == NULL)
{
free(lum);
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
if (llapi_layout_stripe_size_get(lustre_layout, &stripe_size) == -1)
{
llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
if (llapi_layout_stripe_count_get(lustre_layout, &stripe_count) == -1)
{
llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
......@@ -110,7 +127,8 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
rec_ref = malloc(sizeof(*rec_ref));
if(!rec_ref)
{
free(lum);
llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
......@@ -120,12 +138,13 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
if(ret == 0)
{
free(rec_ref);
free(lum);
llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
rec_size = LUSTRE_RECORD_SIZE( lum->lmm_stripe_count );
rec_size = LUSTRE_RECORD_SIZE( stripe_count );
/* register a Lustre file record with Darshan */
fs_info.fs_type = -1;
......@@ -142,7 +161,8 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
darshan_delete_record_ref(&(lustre_runtime->record_id_hash),
&rec_id, sizeof(darshan_record_id));
free(rec_ref);
free(lum);
llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
......@@ -161,12 +181,25 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
rec->counters[LUSTRE_MDTS] = -1;
}
rec->counters[LUSTRE_STRIPE_SIZE] = lum->lmm_stripe_size;
rec->counters[LUSTRE_STRIPE_WIDTH] = lum->lmm_stripe_count;
rec->counters[LUSTRE_STRIPE_OFFSET] = lum->lmm_stripe_offset;
for ( i = 0; i < lum->lmm_stripe_count; i++ )
rec->ost_ids[i] = lum->lmm_objects[i].l_ost_idx;
free(lum);
rec->counters[LUSTRE_STRIPE_SIZE] = stripe_size;
rec->counters[LUSTRE_STRIPE_WIDTH] = stripe_count;
rec->counters[LUSTRE_STRIPE_OFFSET] = -1; // no longer captured
for ( i = 0; i < stripe_count; i++ )
{
if (llapi_layout_ost_index_get(lustre_layout, i, &tmp_ost) == -1)
{
darshan_delete_record_ref(&(lustre_runtime->record_id_hash),
&rec_id, sizeof(darshan_record_id));
free(rec_ref);
llapi_layout_free(lustre_layout);
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
rec->ost_ids[i] = (int64_t)tmp_ost;
}
free(lustre_xattr_val);
llapi_layout_free(lustre_layout);
rec->base_rec.id = rec_id;
rec->base_rec.rank = my_rank;
......
......@@ -15,5 +15,5 @@ darshan_libdir= -L${darshan_prefix}/lib
darshan_linkopts="-Wl,@${darshan_share}/ld-opts/darshan-base-ld-opts"
Cflags:
Libs: ${darshan_libdir} -Wl,-rpath=${darshan_prefix}/lib -Wl,-no-as-needed -ldarshan @DARSHAN_HDF5_LD_FLAGS@
Libs.private: ${darshan_linkopts} ${darshan_libdir} -lfmpich -lmpichcxx -ldarshan
Libs: ${darshan_libdir} -Wl,-rpath=${darshan_prefix}/lib -Wl,-no-as-needed -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ @DARSHAN_HDF5_LD_FLAGS@
Libs.private: ${darshan_linkopts} ${darshan_libdir} -lfmpich -lmpichcxx -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ -lz -lrt -lpthread
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment