Commit 0a6bdb06 authored by Philip Carns's avatar Philip Carns

progress on theta regression script

- set LD_LIBRARY_PATH more broadly to avoid library not found problems
- force placement to specific nodes to match target location
- concurrent job crashing for unknown reasons at this point
parent 240a9441
......@@ -21,25 +21,42 @@ module list
# in LIBRARY_PATH instead of LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LIBRARY_PATH
# find nodes in job. We have to do this so that we can manually specify
# in each aprun so that server ranks consitently run on node where we
# set up storage space
declare -a nodes=($(python /home/carns/bin/run_on_all_nids.py));
echo "## testing launcher placement:"
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} hostname
echo "## Bake OFI/GNI:"
hostname
rm -f /dev/shm/foo.dat
bake-mkpool -s 60G /dev/shm/foo.dat
ls -alh /dev/shm/foo.dat
aprun -n 2 -N 1 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 1
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 2 -N 1 -L ${nodes[0]},${nodes[1]} ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 1
echo "## Bake OFI/GNI (8x concurrency):"
rm -f /dev/shm/foo.dat
bake-mkpool -s 60G /dev/shm/foo.dat
aprun -n 2 -N 1 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 8
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 2 -N 1 -L ${nodes[0]},${nodes[1]} ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 8
echo "## Bake OFI/GNI (Hg busy spin):"
rm -f /dev/shm/foo.dat
bake-mkpool -s 60G /dev/shm/foo.dat
aprun -n 2 -N 1 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 1 -t 0,0
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 2 -N 1 -L ${nodes[0]},${nodes[1]} ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 1 -t 0,0
echo "## Bake OFI/GNI (8x concurrency, Hg busy spin):"
rm -f /dev/shm/foo.dat
bake-mkpool -s 60G /dev/shm/foo.dat
aprun -n 2 -N 1 ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 8 -t 0,0
aprun -n 1 -N 1 -L ${nodes[0]} hostname
aprun -n 1 -N 1 -L ${nodes[0]} rm -f /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} bake-mkpool -s 60G /dev/shm/foo.dat
aprun -n 1 -N 1 -L ${nodes[0]} ls -alh /dev/shm/foo.dat
aprun -n 2 -N 1 -L ${nodes[0]},${nodes[1]} ./bake-p2p-bw -x 16777216 -m 34359738368 -n "ofi+gni://ipogif0:5000" -p /dev/shm/foo.dat -c 8 -t 0,0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment