margo-regression.qsub 4.58 KB
Newer Older
1 2 3 4 5 6 7 8
#!/bin/bash
#COBALT -n 2
#COBALT -t 20
#COBALT --mode script
#COBALT -A ecp-testbed-01
#COBALT -q debug-cache-quad

# XXX xalt module currently eating '-M' flag for mercury-runner...disabling for now
9 10 11 12 13 14 15 16
# module unload xalt

get_mercury_lib_path() {
        module show `spack module tcl find mercury` |&grep LIBRARY_PATH | awk '{print $3}'
}

module swap PrgEnv-intel PrgEnv-gnu
module load cce
17 18 19 20 21 22

. $SANDBOX/spack/share/spack/setup-env.sh
spack load -r ssg 

module list

23 24 25 26 27
# NOTE: as of Sept 2018, the rpath isn't being set correctly for Mercury libraries in the regression
#       test programs.  Not sure why.  This hack manually adds the correct path to LD_LIBRARY_PATH.
LIB_PATH_HACK=$(get_mercury_lib_path)
export LD_LIBRARY_PATH="$LIB_PATH_HACK:$LD_LIBRARY_PATH"
echo LD_LIBRARY_PATH: $LD_LIBRARY_PATH
Philip Carns's avatar
Philip Carns committed
28 29

#ldd ./margo-p2p-latency
30

31 32 33
# NOTE: needed as of January 2018 to avoid conflicts between MPI and 
#       libfabric GNI provider
# NOTE: doing this with -e option to aprun
Philip Carns's avatar
Philip Carns committed
34 35
# NOTE: update as as of September 2018, this is no longer required now that 
#       mercury has switched back to alternative registration method for GNI
36 37 38 39 40
# export MPICH_GNI_NDREG_ENTRIES=2000

# echo "## MPI (one way, double the latency for round trip):"
# aprun -n 2 -N 1 ./osu_latency

41 42
echo "## Margo OFI/GNI (round trip):"
aprun -n 2 -N 1 ./margo-p2p-latency -i 100000 -n ofi+gni://ipogif0:5000
43
echo "## Margo OFI/GNI (bw, 1MiB):"
44
aprun -n 2 -N 1 ./margo-p2p-bw -x 1048576 -n ofi+gni://ipogif0:5000 -c 1 -D 20
45
echo "## Margo OFI/GNI (bw, 1MiB, 8x concurrency):"
46
aprun -n 2 -N 1 ./margo-p2p-bw -x 1048576 -n ofi+gni://ipogif0:5000 -c 8 -D 20
47 48 49 50 51 52 53 54
echo "## Margo OFI/GNI (bw, 8MiB):"
aprun -n 2 -N 1 ./margo-p2p-bw -x 8388608 -n ofi+gni://ipogif0:5000 -c 1 -D 20
echo "## Margo OFI/GNI (bw, 8MiB, 8x concurrency):"
aprun -n 2 -N 1 ./margo-p2p-bw -x 8388608 -n ofi+gni://ipogif0:5000 -c 8 -D 20
echo "## Margo OFI/GNI (bw, 1MB unaligned):"
aprun -n 2 -N 1 ./margo-p2p-bw -x 1000000 -n ofi+gni://ipogif0:5000 -c 1 -D 20
echo "## Margo OFI/GNI (bw, 1MB unaligned, 8x concurrency):"
aprun -n 2 -N 1 ./margo-p2p-bw -x 1000000 -n ofi+gni://ipogif0:5000 -c 8 -D 20
55 56


57
echo "## Margo OFI/GNI (round trip, Hg busy spin):"
Philip Carns's avatar
Philip Carns committed
58
aprun -n 2 -N 1 ./margo-p2p-latency -i 100000 -n ofi+gni://ipogif0:5000 -t 0,0
59
echo "## Margo OFI/GNI (bw, 1MiB, Hg busy spin):"
Philip Carns's avatar
Philip Carns committed
60
aprun -n 2 -N 1 ./margo-p2p-bw -x 1048576 -n ofi+gni://ipogif0:5000 -c 1 -D 20 -t 0,0
61
echo "## Margo OFI/GNI (bw, 1MiB, 8x concurrency, Hg busy spin):"
Philip Carns's avatar
Philip Carns committed
62
aprun -n 2 -N 1 ./margo-p2p-bw -x 1048576 -n ofi+gni://ipogif0:5000 -c 8 -D 20 -t 0,0
63 64 65 66 67 68 69 70
echo "## Margo OFI/GNI (bw, 8MiB, Hg busy spin):"
aprun -n 2 -N 1 ./margo-p2p-bw -x 8388608 -n ofi+gni://ipogif0:5000 -c 1 -D 20 -t 0,0
echo "## Margo OFI/GNI (bw, 8MiB, 8x concurrency, Hg busy spin):"
aprun -n 2 -N 1 ./margo-p2p-bw -x 8388608 -n ofi+gni://ipogif0:5000 -c 8 -D 20-t 0,0
echo "## Margo OFI/GNI (bw, 1MB unaligned, Hg busy spin):"
aprun -n 2 -N 1 ./margo-p2p-bw -x 1000000 -n ofi+gni://ipogif0:5000 -c 1 -D 20-t 0,0
echo "## Margo OFI/GNI (bw, 1MB unaligned, 8x concurrency, Hg busy spin):"
aprun -n 2 -N 1 ./margo-p2p-bw -x 1000000 -n ofi+gni://ipogif0:5000 -c 8 -D 20 -t 0,0
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100

# echo "## Margo BMI/TCP (round trip):"
# aprun -e MPICH_GNI_NDREG_ENTRIES=2000 -n 2 -N 1 ./margo-p2p-latency -i 100000 -n bmi+tcp://
# echo "## Margo BMI/TCP (bw):"
# aprun -e MPICH_GNI_NDREG_ENTRIES=2000 -n 2 -N 1 ./margo-p2p-bw -x 1048576 -n bmi+tcp:// -c 1 -D 20
# echo "## Margo BMI/TCP (bw, 8x concurrency):"
# aprun -e MPICH_GNI_NDREG_ENTRIES=2000 -n 2 -N 1 ./margo-p2p-bw -x 1048576 -n bmi+tcp:// -c 8 -D 20

# echo "## WARNING: Margo OFI/TCP (disabled because it hangs as of 8/10/2017)"
# aprun -n 2 -N 1 ./margo-p2p-latency -i 100000 -n ofi+tcp://ipogif0:5000


# figure out nodes and hostnames for use with fi_
# declare -a nodes=($(python /home/carns/bin/run_on_all_nids.py));
# server_hostname=$(printf "nid%05d\n" ${nodes[0]})

# echo "## fi_pingpong OFI/GNI (one way, double the usec/xfer for round trip):"
# aprun -n 1 -N 1 -L ${nodes[0]} -b -p carns-test ./fi_pingpong -p gni -S 1 -I 10000 > $COBALT_JOBID.fi_pingpong_server.log &
# sleep 5
# aprun -n 1 -N 1 -L ${nodes[1]} -b -p carns-test ./fi_pingpong -p gni -S 1 -I 10000 $server_hostname > $COBALT_JOBID.fi_pingpong_client.log &

# wait
# cat $COBALT_JOBID.fi_pingpong_client.log

# one more margo run, this time with diagnostics enabled
# echo "## Margo OFI/GNI (round trip) with diagnostics:"
# aprun -n 2 -N 1 ./margo-p2p-latency -i 100000 -n ofi+gni://ipogif0:5000 -d $COBALT_JOBID.diag

# echo "## Mercury-runner OFI/GNI (round trip):"
# aprun -e MPICH_GNI_NDREG_ENTRIES=2000 -n 2 -N 1 ./mercury-runner -q -c 100000 -l 1 -m c -M -d `pwd` 1 h0=ofi+gni://ipogif0:5000 h1