From 875506df4e5afdfb6c47f7b38a27a6be577f4364 Mon Sep 17 00:00:00 2001 From: Phil Carns Date: Wed, 7 Aug 2019 19:32:55 +0000 Subject: [PATCH] add 3-way tcp shootout script for cooley - may need some tweaks to confirm/correct which interface is used --- .../cooley/margo-regression-tcp.qsub | 59 +++++++++++ perf-regression/cooley/packages-tcp.yaml | 94 ++++++++++++++++++ perf-regression/cooley/run-regression-tcp.sh | 97 +++++++++++++++++++ 3 files changed, 250 insertions(+) create mode 100755 perf-regression/cooley/margo-regression-tcp.qsub create mode 100644 perf-regression/cooley/packages-tcp.yaml create mode 100755 perf-regression/cooley/run-regression-tcp.sh diff --git a/perf-regression/cooley/margo-regression-tcp.qsub b/perf-regression/cooley/margo-regression-tcp.qsub new file mode 100755 index 0000000..9f9467c --- /dev/null +++ b/perf-regression/cooley/margo-regression-tcp.qsub @@ -0,0 +1,59 @@ +#!/bin/bash +#COBALT -n 2 +#COBALT -t 30 +#COBALT --mode script +#COBALT -A radix-io +#COBALT -q ibleaf3-debug + +export HOME=$SANDBOX + +. $SANDBOX/spack/share/spack/setup-env.sh +spack load -r ssg + +module list + +# echo "## MPI (one way, double the latency for round trip):" +# mpirun -f $COBALT_NODEFILE -n 2 ./osu_latency + +echo "### NOTE: all benchmarks are using numactl to keep processes on socket 1" + +host0=`head -n 1 $COBALT_NODEFILE` +host1=`tail -n 1 $COBALT_NODEFILE` + +for xport in bmi+tcp ofi+tcp ofi+sockets; do + sleep 1 + + echo "## Margo $xport (round trip):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-latency -i 100000 -n "$xport://$host0:3334" : -n 1 numactl -N 1 -m 1 ./margo-p2p-latency -i 100000 -n "$xport://$host1:3334" + echo "## Margo $xport (bw, 1MiB):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1048576 -n "$xport://$host0:3334" -c 1 -D 20 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1048576 -n "$xport://$host1:3334" -c 1 -D 20 + echo "## Margo $xport (bw, 1MiB, 8x concurrency):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1048576 -n "$xport://$host0:3334" -c 8 -D 20 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1048576 -n "$xport://$host1:3334" -c 8 -D 20 + echo "## Margo $xport (bw, 8MiB):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 8388608 -n "$xport://$host0:3334" -c 1 -D 20 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 8388608 -n "$xport://$host1:3334" -c 1 -D 20 + echo "## Margo $xport (bw, 8MiB, 8x concurrency):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 8388608 -n "$xport://$host0:3334" -c 8 -D 20 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 8388608 -n "$xport://$host1:3334" -c 8 -D 20 + echo "## Margo $xport (bw, 1MB unaligned):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1000000 -n "$xport://$host0:3334" -c 1 -D 20 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1000000 -n "$xport://$host1:3334" -c 1 -D 20 + echo "## Margo $xport (bw, 1MB unaligned, 8x concurrency):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1000000 -n "$xport://$host0:3334" -c 8 -D 20 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1000000 -n "$xport://$host1:3334" -c 8 -D 20 + + sleep 1 + + echo "## Margo $xport (round trip, Hg busy spin):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-latency -i 100000 -n "$xport://$host0:3334" -t 0,0 : -n 1 numactl -N 1 -m 1 ./margo-p2p-latency -i 100000 -n "$xport://$host1:3334" -t 0,0 + echo "## Margo $xport (bw, 1MiB, Hg busy spin):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1048576 -n "$xport://$host0:3334" -c 1 -D 20 -t 0,0 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1048576 -n "$xport://$host1:3334" -c 1 -D 20 -t 0,0 + echo "## Margo $xport (bw, 1MiB, 8x concurrency, Hg busy spin):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1048576 -n "$xport://$host0:3334" -c 8 -D 20 -t 0,0 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1048576 -n "$xport://$host1:3334" -c 8 -D 20 -t 0,0 + echo "## Margo $xport (bw, 8MiB, Hg busy spin):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 8388608 -n "$xport://$host0:3334" -c 1 -D 20 -t 0,0 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 8388608 -n "$xport://$host1:3334" -c 1 -D 20 -t 0,0 + echo "## Margo $xport (bw, 8MiB, 8x concurrency, Hg busy spin):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 8388608 -n "$xport://$host0:3334" -c 8 -D 20 -t 0,0 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 8388608 -n "$xport://$host1:3334" -c 8 -D 20 -t 0,0 + echo "## Margo $xport (bw, 1MB unaligned, Hg busy spin):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1000000 -n "$xport://$host0:3334" -c 1 -D 20 -t 0,0 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1000000 -n "$xport://$host1:3334" -c 1 -D 20 -t 0,0 + echo "## Margo $xport (bw, 1MB unaligned, 8x concurrency, Hg busy spin):" + mpirun -f $COBALT_NODEFILE -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1000000 -n "$xport://$host0:3334" -c 8 -D 20 -t 0,0 : -n 1 numactl -N 1 -m 1 ./margo-p2p-bw -x 1000000 -n "$xport://$host1:3334" -c 8 -D 20 -t 0,0 + +done + diff --git a/perf-regression/cooley/packages-tcp.yaml b/perf-regression/cooley/packages-tcp.yaml new file mode 100644 index 0000000..3fc555a --- /dev/null +++ b/perf-regression/cooley/packages-tcp.yaml @@ -0,0 +1,94 @@ +packages: + openssl: + paths: + openssl@1.0.2k: /usr + buildable: False + mvapich2: + paths: + mvapich2@2.2: /soft/libraries/mpi/mvapich2/gcc + buildable: False + cmake: + paths: + cmake@3.9.1: /soft/buildtools/cmake/3.9.1 + buildable: False + autoconf: + paths: + autoconf@2.69: /soft/buildtools/autotools/feb2016 + buildable: False + automake: + paths: + automake@1.15: /soft/buildtools/autotools/feb2016 + buildable: False + bzip2: + paths: + bzip2@1.0.6: / + buildable: False + bison: + paths: + bison@3.0.4: / + buildable: False + flex: + paths: + flex@2.5.37: / + buildable: False + coreutils: + paths: + coreutils@8.22: /usr + buildable: False + zlib: + paths: + zlib@1.2.7: /usr + buildable: False + tar: + paths: + tar@1.26: / + buildable: False + gettext: + paths: + gettext@0.19: /usr + buildable: False + ncurses: + paths: + ncurses@5.9: /usr + buildable: False + tcl: + paths: + tcl@8.5.13: /usr + buildable: False + findutils: + paths: + findutils@4.5.11: /usr + buildable: False + gdbm: + paths: + gdbm@1.10: /usr + buildable: False + libtool: + paths: + libtool@2.4.2: /usr + buildable: False + perl: + paths: + perl@5.16.3: /usr + buildable: False + readline: + paths: + readline@6.2: /usr + buildable: False + texinfo: + paths: + texinfo@5.1: /usr + buildable: False + ssg: + variants: +mpi + libfabric: + variants: fabrics=sockets,tcp,rxm + mercury: + variants: ~boostsys+ofi+bmi + libxml2: + paths: + libxml2@2.9.1: /usr/ + buildable: False + all: + providers: + mpi: [mvapich2,mpich,openmpi] diff --git a/perf-regression/cooley/run-regression-tcp.sh b/perf-regression/cooley/run-regression-tcp.sh new file mode 100755 index 0000000..301c6db --- /dev/null +++ b/perf-regression/cooley/run-regression-tcp.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +# This is a shell script to be run from a login node of the Cooley system at +# the ALCF, that will download, compile, and execute Mochi performance +# regression tests, including any dependencies + +# This variation is meant exclusively to test TCP performance using all +# available methods in Mercury. + +# exit on any error +set -e + +# grab helpful things (especially newer compiler) from softenv +source /etc/profile.d/00softenv.sh +soft add +gcc-8.2.0 +soft add +autotools-feb2016 +soft add +cmake-3.9.1 +soft add +mvapich2 + +# location of this script +ORIGIN=$PWD +# scratch area for builds +SANDBOX=$PWD/mochi-regression-sandbox-$$ +# install destination +PREFIX=$PWD/mochi-regression-install-$$ +# job submission dir +JOBDIR=$PWD/mochi-regression-job-$$ +# modify HOME env variable so that we don't perturb ~/.spack/ files for the +# users calling this script +export HOME=$SANDBOX + +mkdir $SANDBOX +mkdir $PREFIX +mkdir $JOBDIR +cp $ORIGIN/margo-regression-tcp.qsub $JOBDIR + +# set up build environment +cd $SANDBOX +git clone https://github.com/spack/spack.git +git clone https://xgitlab.cels.anl.gov/sds/sds-repo.git +git clone https://xgitlab.cels.anl.gov/sds/sds-tests.git + +echo "=== BUILD SPACK PACKAGES AND LOAD ===" +. $SANDBOX/spack/share/spack/setup-env.sh +spack compiler find +spack compilers + +# use our own packages.yaml for cooley-specific preferences +cp $ORIGIN/packages-tcp.yaml $SPACK_ROOT/etc/spack/packages.yaml +# add external repo for mochi. Note that this will not modify the +# user's ~/.spack/ files because we modified $HOME above +spack repo add ${SANDBOX}/sds-repo +# sanity check +spack repo list +# underlying tools needed by spack +spack bootstrap +# clean out any stray packages from previous runs, just in case +spack uninstall -R -y argobots mercury rdma-core libfabric bmi || true +# ssg acts as our "apex" package here, causing several other packages to build +spack install ssg^mercury@master +# deliberately repeat setup-env step after building modules to ensure +# that we pick up the right module paths +. $SANDBOX/spack/share/spack/setup-env.sh +# load ssg and bake because they are needed by things compiled outside of +# spack later in this script +spack load -r ssg + +# sds-tests +echo "=== BUILDING SDS TEST PROGRAMS ===" +cd $SANDBOX/sds-tests +libtoolize +./prepare.sh +mkdir build +cd build +../configure --prefix=$PREFIX CC=mpicc +make -j 3 +make install + +# set up job to run +echo "=== SUBMITTING AND WAITING FOR JOB ===" +cp $PREFIX/bin/margo-p2p-latency $JOBDIR +cp $PREFIX/bin/margo-p2p-bw $JOBDIR +cd $JOBDIR + +# note: previously we also set --env LD_LIBRARY_PATH=$PREFIX/lib, hopefully no longer needed +JOBID=`qsub --env SANDBOX=$SANDBOX ./margo-regression-tcp.qsub` +cqwait $JOBID + +echo "=== JOB DONE, COLLECTING AND SENDING RESULTS ===" +# gather output, strip out funny characters, mail +cat $JOBID.* > combined.$JOBID.txt +dos2unix combined.$JOBID.txt +mailx -s "mochi-regression (cooley, TCP/IP)" sds-commits@lists.mcs.anl.gov < combined.$JOBID.txt + +cd /tmp +# rm -rf $SANDBOX +# rm -rf $PREFIX -- 2.26.2