Commit a22233d7 authored by Shane Snyder's avatar Shane Snyder

improve slurm epilog script

epilog will now skip on-node compression if only a single compute
node is used and will generate the final log file instead
parent c276e4c2
......@@ -391,8 +391,7 @@ void darshan_core_shutdown()
if(getenv("DARSHAN_INTERNAL_TIMING"))
internal_timing_flag = 1;
if(internal_timing_flag)
start_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
start_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
/* disable darhan-core while we shutdown */
DARSHAN_CORE_LOCK();
......
......@@ -11,8 +11,6 @@ DARSHAN_INSTALL_DIR=@prefix@
# use the log dir specified at configure time
DARSHAN_LOG_DIR=@__DARSHAN_LOG_PATH@
NODE_NAME=$(uname -n)
# use the default mmap log directory (/tmp), unless the
# env variable is set to something
if [ -z "$DARSHAN_MMAP_LOGPATH" ]; then
......@@ -24,36 +22,54 @@ fi
DARSHAN_MMAP_LOG_GLOB=${DARSHAN_MMAP_LOG_DIR}/*id${SLURM_JOB_ID}*.darshan
# if no mmap logs found for this job, we have nothing to do
DARSHAN_TEST_LOG=$(ls $DARSHAN_MMAP_LOG_GLOB 2>/dev/null | head -n 1)
if [ -z $DARSHAN_TEST_LOG ]; then
DARSHAN_MMAP_LOG1=$(ls $DARSHAN_MMAP_LOG_GLOB 2>/dev/null | head -n 1)
if [ -z $DARSHAN_MMAP_LOG1 ]; then
exit 0
fi
# get the job start time from the first log file
JOB_START_DATE=$(${DARSHAN_INSTALL_DIR}/bin/darshan-parser $DARSHAN_TEST_LOG |
JOB_START_DATE=$(${DARSHAN_INSTALL_DIR}/bin/darshan-parser $DARSHAN_MMAP_LOG1 |
grep "# start_time_asci" | cut -d':' -f 2- | cut -d' ' -f 2-)
OUTLOG_YEAR=$(date --date="$(printf "$JOB_START_DATE")" +"%Y")
OUTLOG_MON=$(date --date="$(printf "$JOB_START_DATE")" +"%-m")
OUTLOG_DAY=$(date --date="$(printf "$JOB_START_DATE")" +"%-d")
OUTLOG_SECS=$((
OUTPUT_YEAR=$(date --date="$(printf "$JOB_START_DATE")" +"%Y")
OUTPUT_MON=$(date --date="$(printf "$JOB_START_DATE")" +"%-m")
OUTPUT_DAY=$(date --date="$(printf "$JOB_START_DATE")" +"%-d")
OUTPUT_SECS=$((
($(date --date="$(printf "$JOB_START_DATE")" +"%-H") * 60 * 60) +
($(date --date="$(printf "$JOB_START_DATE")" +"%-M") * 60) +
($(date --date="$(printf "$JOB_START_DATE")" +"%-S"))
))
MMAP_LOG_PRE=$(basename $DARSHAN_TEST_LOG | cut -d'_' -f 1-3)
LOG_NAME_PRE=$(basename $DARSHAN_MMAP_LOG1 | cut -d'_' -f 1-3)
# construct full name of directory to store output log(s)
OUTPUT_LOG_DIR=${DARSHAN_LOG_DIR}/${OUTPUT_YEAR}/${OUTPUT_MON}/${OUTPUT_DAY}/
OUTPUT_NAME_PRE=${LOG_NAME_PRE}_${OUTPUT_MON}-${OUTPUT_DAY}-${OUTPUT_SECS}
if [ $SLURM_NNODES -gt 1 ]; then
NODE_LOG_DIR=${OUTPUT_LOG_DIR}/${OUTPUT_NAME_PRE}
NODE_NAME=$(uname -n)
# construct full output log directory name
OUTLOG_DIR=${DARSHAN_LOG_DIR}/${OUTLOG_YEAR}/${OUTLOG_MON}/${OUTLOG_DAY}/
OUTLOG_DIR=${OUTLOG_DIR}/${MMAP_LOG_PRE}_${OUTLOG_MON}-${OUTLOG_DAY}-${OUTLOG_SECS}
# multiple nodes, create a node log directory for everyone to write to
mkdir -p $NODE_LOG_DIR
# TODO: do shared reduction and skip mkdir if just one compute node?
# construct the per-node log file and store in the output directory
$DARSHAN_INSTALL_DIR/bin/darshan-merge \
--output ${NODE_LOG_DIR}/${LOG_NAME_PRE}_${NODE_NAME}.darshan \
$DARSHAN_MMAP_LOG_GLOB
else
TMP_LOG=${OUTPUT_NAME_PRE}.darshan
# single node, just create the final output darshan log
LOG_WRITE_START=$(date +%s)
$DARSHAN_INSTALL_DIR/bin/darshan-merge
--shared-redux --output ${OUTPUT_LOG_DIR}/${TMP_LOG} \
$DARSHAN_MMAP_LOG_GLOB
LOG_WRITE_END=$(date +%s)
# create the output directory for this job
mkdir -p $OUTLOG_DIR
WRITE_TM=$(($LOG_WRITE_END - $LOG_WRITE_START + 1))
FINAL_LOG=${OUTPUT_NAME_PRE}-${RANDOM}_${WRITE_TM}.darshan
# construct the per-node log files and store in the output directory
$DARSHAN_INSTALL_DIR/bin/darshan-merge --output ${OUTLOG_DIR}/${MMAP_LOG_PRE}_${NODE_NAME}.darshan \
$DARSHAN_MMAP_LOG_GLOB
mv ${OUTPUT_LOG_DIR}/${TMP_LOG} ${OUTPUT_LOG_DIR}/${FINAL_LOG}
fi
exit 0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment