bake-regression.qsub 2.89 KB
Newer Older
1 2 3 4
#!/bin/bash
#COBALT -n 2
#COBALT -t 20
#COBALT --mode script
5
#COBALT -A CSC250STDM12
6 7
#COBALT -q debug-cache-quad

Philip Carns's avatar
Philip Carns committed
8
export HOME=$SANDBOX
9 10 11
# XXX xalt module currently eating '-M' flag for mercury-runner...disabling for now
# module unload xalt

12 13 14
# necessary when using the udreg option in Mercury
export MPICH_GNI_NDREG_ENTRIES=1024

15 16 17 18
module swap PrgEnv-intel PrgEnv-gnu
module load cce

. $SANDBOX/spack/share/spack/setup-env.sh
Philip Carns's avatar
Philip Carns committed
19 20
spack load -r mochi-ssg
spack load -r mochi-bake
21

Rob Latham's avatar
Rob Latham committed
22
spack find --loaded
23

24 25 26
# NOTE: rpath doesn't seem to be set correctly, and the paths we need are 
#  in LIBRARY_PATH instead of LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LIBRARY_PATH
27

28 29 30 31 32
# find nodes in job.  We have to do this so that we can manually specify 
# in each aprun so that server ranks consitently run on node where we
# set up storage space
declare -a nodes=($(python /home/carns/bin/run_on_all_nids.py));

33
echo "### NOTE: all benchmarks are using aprun -cc none to allow processes to run on all available cores; the default aprun settings limit processes to one core and produce poor performance because of contention between internal threads"
34 35
echo "### NOTE: all benchmarks are using numactl to keep processes on socket 0"

36
echo "## testing launcher placement:"
37 38 39 40
aprun -cc none -n 1 -N 1 -L ${nodes[0]} hostname
aprun -cc none -n 1 -N 1 -L ${nodes[0]} hostname
aprun -cc none -n 1 -N 1 -L ${nodes[0]} hostname
aprun -cc none -n 1 -N 1 -L ${nodes[0]} hostname
41

Philip Carns's avatar
Philip Carns committed
42
echo "## Bake OFI/GNI:"
43 44 45 46
aprun -cc none -n 1 -N 1 -L ${nodes[0]} hostname
aprun -cc none -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -cc none -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -cc none -n 2 -N 1 -L ${nodes[0]},${nodes[1]} numactl -N 0 -m 0 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "gni://" -p /dev/shm/foo.dat -c 1 
47

Philip Carns's avatar
Philip Carns committed
48
echo "## Bake OFI/GNI (8x concurrency):"
49 50 51 52
aprun -cc none -n 1 -N 1 -L ${nodes[0]} hostname
aprun -cc none -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -cc none -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -cc none -n 2 -N 1 -L ${nodes[0]},${nodes[1]} numactl -N 0 -m 0 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "gni://" -p /dev/shm/foo.dat -c 8 
53

Philip Carns's avatar
Philip Carns committed
54
echo "## Bake OFI/GNI (Hg busy spin):"
55 56 57 58
aprun -cc none -n 1 -N 1 -L ${nodes[0]} hostname
aprun -cc none -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -cc none -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -cc none -n 2 -N 1 -L ${nodes[0]},${nodes[1]} numactl -N 0 -m 0 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "gni://" -p /dev/shm/foo.dat -c 1 -t 0,0
59

Philip Carns's avatar
Philip Carns committed
60
echo "## Bake OFI/GNI (8x concurrency, Hg busy spin):"
61 62 63 64
aprun -cc none -n 1 -N 1 -L ${nodes[0]} hostname
aprun -cc none -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -cc none -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -cc none -n 2 -N 1 -L ${nodes[0]},${nodes[1]} numactl -N 0 -m 0 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "gni://" -p /dev/shm/foo.dat -c 8 -t 0,0
65