Commit 97611636 authored by Matthieu Dorier's avatar Matthieu Dorier

added files for MCS workstation

parent 881847be
These are example scripts for executing an automated regression test on the
MCS workstations. The entire process is handled by the
"run-regression.sh" script, which is suitable for execution within a cron job.
Your $HOME/.soft file should have the following packages loaded:
+gcc-8.2.0
+git-2.10.1
+cmake-3.14.3
@default
(another version of gcc, such as 7.3.0, should work, as long as it supports
C++14 by default).
#!/bin/bash
#COBALT -n 2
#COBALT -t 20
#COBALT --mode script
#COBALT -A CSC250STDM12
#COBALT -q debug-cache-quad
# XXX xalt module currently eating '-M' flag for mercury-runner...disabling for now
# module unload xalt
# necessary when using the udreg option in Mercury
export MPICH_GNI_NDREG_ENTRIES=1024
module swap PrgEnv-intel PrgEnv-gnu
module load cce
. $SANDBOX/spack/share/spack/setup-env.sh
spack load -r ssg
spack load -r bake
module list
# NOTE: rpath doesn't seem to be set correctly, and the paths we need are
# in LIBRARY_PATH instead of LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LIBRARY_PATH
# find nodes in job. We have to do this so that we can manually specify
# in each aprun so that server ranks consitently run on node where we
# set up storage space
declare -a nodes=($(python /home/carns/bin/run_on_all_nids.py));
echo "### NOTE: all benchmarks are using numactl to keep processes on socket 0"
echo "## testing launcher placement:"
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} hostname
echo "## Bake OFI/GNI:"
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 2 -N 1 -L ${nodes[0]},${nodes[1]} numactl -N 0 -m 0 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 1
echo "## Bake OFI/GNI (8x concurrency):"
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 2 -N 1 -L ${nodes[0]},${nodes[1]} numactl -N 0 -m 0 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 8
echo "## Bake OFI/GNI (Hg busy spin):"
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 2 -N 1 -L ${nodes[0]},${nodes[1]} numactl -N 0 -m 0 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 1 -t 0,0
echo "## Bake OFI/GNI (8x concurrency, Hg busy spin):"
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 2 -N 1 -L ${nodes[0]},${nodes[1]} numactl -N 0 -m 0 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 8 -t 0,0
#!/bin/bash
#COBALT -n 2
#COBALT -t 20
#COBALT --mode script
#COBALT -A CSC250STDM12
#COBALT -q debug-cache-quad
# XXX xalt module currently eating '-M' flag for mercury-runner...disabling for now
# module unload xalt
# necessary when using the udreg option in Mercury
export MPICH_GNI_NDREG_ENTRIES=1024
module swap PrgEnv-intel PrgEnv-gnu
module load cce
. $SANDBOX/spack/share/spack/setup-env.sh
spack load -r ssg
spack load -r bake
module list
export LD_LIBRARY_PATH=$LIBRARY_PATH
echo "### NOTE: all benchmarks are using numactl to keep processes on socket 0"
#ldd ./margo-p2p-latency
# NOTE: needed as of January 2018 to avoid conflicts between MPI and
# libfabric GNI provider
# NOTE: doing this with -e option to aprun
# NOTE: update as as of September 2018, this is no longer required now that
# mercury has switched back to alternative registration method for GNI
# export MPICH_GNI_NDREG_ENTRIES=2000
# echo "## MPI (one way, double the latency for round trip):"
# aprun -n 2 -N 1 ./osu_latency
echo "## Margo OFI/GNI (round trip):"
aprun -n 2 -N 1 ./margo-p2p-latency -i 100000 -n ofi+gni://ipogif0:5000
echo "## Margo OFI/GNI (bw, 1MiB):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 1048576 -n ofi+gni://ipogif0:5000 -c 1 -D 20
echo "## Margo OFI/GNI (bw, 1MiB, 8x concurrency):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 1048576 -n ofi+gni://ipogif0:5000 -c 8 -D 20
echo "## Margo OFI/GNI (bw, 8MiB):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 8388608 -n ofi+gni://ipogif0:5000 -c 1 -D 20
echo "## Margo OFI/GNI (bw, 8MiB, 8x concurrency):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 8388608 -n ofi+gni://ipogif0:5000 -c 8 -D 20
echo "## Margo OFI/GNI (bw, 1MB unaligned):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 1000000 -n ofi+gni://ipogif0:5000 -c 1 -D 20
echo "## Margo OFI/GNI (bw, 1MB unaligned, 8x concurrency):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 1000000 -n ofi+gni://ipogif0:5000 -c 8 -D 20
echo "## Margo OFI/GNI (round trip, Hg busy spin):"
aprun -n 2 -N 1 ./margo-p2p-latency -i 100000 -n ofi+gni://ipogif0:5000 -t 0,0
echo "## Margo OFI/GNI (bw, 1MiB, Hg busy spin):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 1048576 -n ofi+gni://ipogif0:5000 -c 1 -D 20 -t 0,0
echo "## Margo OFI/GNI (bw, 1MiB, 8x concurrency, Hg busy spin):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 1048576 -n ofi+gni://ipogif0:5000 -c 8 -D 20 -t 0,0
echo "## Margo OFI/GNI (bw, 8MiB, Hg busy spin):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 8388608 -n ofi+gni://ipogif0:5000 -c 1 -D 20 -t 0,0
echo "## Margo OFI/GNI (bw, 8MiB, 8x concurrency, Hg busy spin):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 8388608 -n ofi+gni://ipogif0:5000 -c 8 -D 20-t 0,0
echo "## Margo OFI/GNI (bw, 1MB unaligned, Hg busy spin):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 1000000 -n ofi+gni://ipogif0:5000 -c 1 -D 20-t 0,0
echo "## Margo OFI/GNI (bw, 1MB unaligned, 8x concurrency, Hg busy spin):"
aprun -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 1000000 -n ofi+gni://ipogif0:5000 -c 8 -D 20 -t 0,0
# echo "## Margo BMI/TCP (round trip):"
# aprun -e MPICH_GNI_NDREG_ENTRIES=2000 -n 2 -N 1 ./margo-p2p-latency -i 100000 -n bmi+tcp://
# echo "## Margo BMI/TCP (bw):"
# aprun -e MPICH_GNI_NDREG_ENTRIES=2000 -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 1048576 -n bmi+tcp:// -c 1 -D 20
# echo "## Margo BMI/TCP (bw, 8x concurrency):"
# aprun -e MPICH_GNI_NDREG_ENTRIES=2000 -n 2 -N 1 numactl -N 0 -m 0 ./margo-p2p-bw -x 1048576 -n bmi+tcp:// -c 8 -D 20
# echo "## WARNING: Margo OFI/TCP (disabled because it hangs as of 8/10/2017)"
# aprun -n 2 -N 1 ./margo-p2p-latency -i 100000 -n ofi+tcp://ipogif0:5000
# figure out nodes and hostnames for use with fi_
# declare -a nodes=($(python /home/carns/bin/run_on_all_nids.py));
# server_hostname=$(printf "nid%05d\n" ${nodes[0]})
# echo "## fi_pingpong OFI/GNI (one way, double the usec/xfer for round trip):"
# aprun -n 1 -N 1 -L ${nodes[0]} -b -p carns-test ./fi_pingpong -p gni -S 1 -I 10000 > $COBALT_JOBID.fi_pingpong_server.log &
# sleep 5
# aprun -n 1 -N 1 -L ${nodes[1]} -b -p carns-test ./fi_pingpong -p gni -S 1 -I 10000 $server_hostname > $COBALT_JOBID.fi_pingpong_client.log &
# wait
# cat $COBALT_JOBID.fi_pingpong_client.log
# one more margo run, this time with diagnostics enabled
# echo "## Margo OFI/GNI (round trip) with diagnostics:"
# aprun -n 2 -N 1 ./margo-p2p-latency -i 100000 -n ofi+gni://ipogif0:5000 -d $COBALT_JOBID.diag
# echo "## Mercury-runner OFI/GNI (round trip):"
# aprun -e MPICH_GNI_NDREG_ENTRIES=2000 -n 2 -N 1 ./mercury-runner -q -c 100000 -l 1 -m c -M -d `pwd` 1 h0=ofi+gni://ipogif0:5000 h1
packages:
all:
providers:
mpi: [ mpich, openmpi ]
autoconf:
paths:
autoconf@2.69: /usr
buildable: False
automake:
paths:
automake@1.14.1: /usr
buildable: False
binutils:
paths:
binutils@2.24: /usr
buildable: False
bison:
paths:
bison@3.0.2: /usr
buildable: False
bzip2:
paths:
bzip2@1.0.6: /usr
buildable: False
cmake:
paths:
cmake@3.14.3: /soft/apps/packages/cmake-3.14.3
buildable: False
coreutils:
paths:
coreutils@8.21: /usr
buildable: False
flex:
paths:
flex@2.5.35: /usr
buildable: False
gettext:
paths:
gettext@0.13.3: /usr
buildable: False
git:
paths:
git@2.10.1: /soft/apps/packages/git-2.10.1/bin/git
buildable: False
libtool:
paths:
libtool@2.4.2: /usr
buildable: False
m4:
paths:
m4@1.4.17: /usr
buildable: False
mercury:
variants: ~boostsys
openssl:
paths:
openssl@1.0.1f: /usr
buildable: False
perl:
paths:
perl@5.18.2: /usr
buildable: False
pkg-config:
paths:
pkg-config@0.26: /usr
buildable: False
python:
version: [3.6.5]
ssg:
variants: +mpi
tar:
paths:
tar@1.27.1: /usr
buildable: False
xz:
paths:
xz@5.1.0: /usr
buildable: False
zlib:
paths:
zlib@1.2.8: /usr
buildable: False
#!/bin/bash
#COBALT -n 1
#COBALT -t 20
#COBALT --mode script
#COBALT -A CSC250STDM12
#COBALT -q debug-cache-quad
# XXX xalt module currently eating '-M' flag for mercury-runner...disabling for now
# module unload xalt
# necessary when using the udreg option in Mercury
export MPICH_GNI_NDREG_ENTRIES=1024
module swap PrgEnv-intel PrgEnv-gnu
module load cce
. $SANDBOX/spack/share/spack/setup-env.sh
spack load -r ssg
spack load -r bake
module list
# NOTE: rpath doesn't seem to be set correctly, and the paths we need are
# in LIBRARY_PATH instead of LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LIBRARY_PATH
# find nodes in job. We have to do this so that we can manually specify
# in each aprun so that server ranks consitently run on node where we
# set up storage space
declare -a nodes=($(python /home/carns/bin/run_on_all_nids.py));
echo "## PMDK (8x concurrency):"
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} truncate -s 60G /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} pmempool create obj /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ./pmdk-bw -x 16777216 -m 34359738368 -p /dev/shm/foo.dat -c 8
echo "## PMDK (8x concurrency, 8 es):"
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} truncate -s 60G /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} pmempool create obj /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ./pmdk-bw -x 16777216 -m 34359738368 -p /dev/shm/foo.dat -c 8 -T 8
echo "## PMDK (8x concurrency, preallocated pool):"
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} dd if=/dev/zero of=/dev/shm/foo.dat bs=1M count=61440
aprun -n 1 -N 1 -L ${nodes[0]} pmempool create obj /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ./pmdk-bw -x 16777216 -m 34359738368 -p /dev/shm/foo.dat -c 8
echo "## PMDK (8x concurrency, 8 es, preallocated pool):"
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} dd if=/dev/zero of=/dev/shm/foo.dat bs=1M count=61440
aprun -n 1 -N 1 -L ${nodes[0]} pmempool create obj /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ./pmdk-bw -x 16777216 -m 34359738368 -p /dev/shm/foo.dat -c 8 -T 8
#!/bin/bash
# This is a shell script to be run from an MCS workstation
# that will download, compile, and execute the Mochi performance
# regression tests, including any dependencies
# exit on any error
set -e
# default to O3 optimizations unless otherwise specified
export CFLAGS="-O3"
# dynamic link everything by default
export CRAYPE_LINK_TYPE=dynamic
# location of this script
ORIGIN=$PWD
# scratch area for builds
SANDBOX=$PWD/mochi-regression-sandbox-$$
# install destination
PREFIX=$PWD/mochi-regression-install-$$
# job submission dir
JOBDIR=$PWD/mochi-regression-job-$$
# modify HOME env variable so that we don't perturb ~/.spack/ files for the
# users calling this script
export HOME=$SANDBOX
mkdir $SANDBOX
mkdir $PREFIX
mkdir $JOBDIR
cp $ORIGIN/margo-regression.qsub $JOBDIR
cp $ORIGIN/bake-regression.qsub $JOBDIR
cp $ORIGIN/pmdk-regression.qsub $JOBDIR
# set up build environment
cd $SANDBOX
git clone https://github.com/spack/spack.git
git clone https://xgitlab.cels.anl.gov/sds/sds-repo.git
git clone https://xgitlab.cels.anl.gov/sds/sds-tests.git
echo "=== BUILD SPACK PACKAGES AND LOAD ==="
. $SANDBOX/spack/share/spack/setup-env.sh
spack compiler find
spack compilers
# use our own packages.yaml for theta-specific preferences
cp $ORIGIN/packages.yaml $SPACK_ROOT/etc/spack
# add external repo for mochi. Note that this will not modify the
# user's ~/.spack/ files because we modified $HOME above
spack repo add ${SANDBOX}/sds-repo
# sanity check
spack repo list
# clean out any stray packages from previous runs, just in case
spack uninstall -R -y argobots mercury libfabric || true
# ior acts as our "apex" package here, causing several other packages to build
spack install ior@develop +mobject
# deliberately repeat setup-env step after building modules to ensure
# that we pick up the right module paths
. $SANDBOX/spack/share/spack/setup-env.sh
# load ssg and bake because they are needed by things compiled outside of
# spack later in this script
spack load -r ssg
spack load -r bake
# sds-tests
echo "=== BUILDING SDS TEST PROGRAMS ==="
cd $SANDBOX/sds-tests
libtoolize
./prepare.sh
mkdir build
cd build
../configure --prefix=$PREFIX CC=cc
make -j 3
make install
# set up job to run
echo "=== SUBMITTING AND WAITING FOR JOB ==="
cp $PREFIX/bin/margo-p2p-latency $JOBDIR
cp $PREFIX/bin/margo-p2p-bw $JOBDIR
cp $PREFIX/bin/bake-p2p-bw $JOBDIR
cp $PREFIX/bin/pmdk-bw $JOBDIR
cd $JOBDIR
JOBID=`qsub --env SANDBOX=$SANDBOX ./margo-regression.qsub`
cqwait $JOBID
JOBID2=`qsub --env SANDBOX=$SANDBOX ./bake-regression.qsub`
cqwait $JOBID2
JOBID3=`qsub --env SANDBOX=$SANDBOX ./pmdk-regression.qsub`
cqwait $JOBID3
echo "=== JOB DONE, COLLECTING AND SENDING RESULTS ==="
# gather output, strip out funny characters, mail
cat $JOBID.* $JOBID2.* $JOBID3.* > combined.$JOBID.txt
#dos2unix combined.$JOBID.txt
mailx -r carns@mcs.anl.gov -s "mochi-regression (theta)" sds-commits@lists.mcs.anl.gov < combined.$JOBID.txt
cat combined.$JOBID.txt
cd /tmp
rm -rf $SANDBOX
rm -rf $PREFIX
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment