Commit b6c05379 authored by Francois Tessier's avatar Francois Tessier

Update Makefile to support BG/Q and XC40 sharing the same home dir. Add MPI-IO...

Update Makefile to support BG/Q and XC40 sharing the same home dir. Add MPI-IO version of 1D-array benchmark.
parent 2cd0e737
MPICXX = mpixlcxx include ../make.inc/Makefile.$(ARCHI)
MPI_CFLAGS = -g -I$(HOME)/install_bgq/include/ -DBGQ -DDEBUG -DTIMING all: main_tapioca main_mpiio
MPI_LDFLAGS = -L$(HOME)/install_bgq/lib/ -ltapioca
all: main_tapioca main_tapioca:main_tapioca.cpp utils.cpp
$(MPICXX) $(MPI_CFLAGS) $? -o $@ $(MPI_LDFLAGS)
main_tapioca:main_tapioca.cpp main_mpiio:main_mpiio.cpp utils.cpp
$(MPICXX) $(MPI_CFLAGS) $? -o $@ $(MPI_LDFLAGS) $(MPICXX) $(MPI_CFLAGS) $? -o $@ $(MPI_LDFLAGS)
clean: clean:
rm main_tapioca rm main_tapioca main_mpiio
MPICXX = mpixlcxx
MPI_CFLAGS = -g -I$(HOME)/install_bgq/include/ -DBGQ -DDEBUG -DTIMING
MPI_LDFLAGS = -L$(HOME)/install_bgq/lib/ -ltapioca
all: simple_io_tapioca simple_io_mpiio
simple_io_tapioca:simple_io_tapioca.cpp
$(MPICXX) $(MPI_CFLAGS) $? -o $@ $(MPI_LDFLAGS)
simple_io_mpiio:simple_io_mpiio.cpp
$(MPICXX) $(MPI_CFLAGS) $? -o $@ $(MPI_LDFLAGS)
clean:
rm simple_io_tapioca simple_io_mpiio
MPICXX = CC
MPI_CFLAGS = -g -I$(HOME)/install/include/ -DXC40 -DDEBUG -DTIMING
MPI_LDFLAGS = -L$(HOME)/install/lib/ -ltapioca -llustreapi -Wl,--whole-archive,-ldmapp,--no-whole-archive
all: simple_io_tapioca simple_io_mpiio
simple_io_tapioca:simple_io_tapioca.cpp
$(MPICXX) $(MPI_CFLAGS) $? -o $@ $(MPI_LDFLAGS)
simple_io_mpiio:simple_io_mpiio.cpp
$(MPICXX) $(MPI_CFLAGS) $? -o $@ $(MPI_LDFLAGS)
clean:
rm simple_io_tapioca simple_io_mpiio
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdint.h>
#include <time.h>
#include <mpi.h>
#include "tapioca.hpp"
#include "utils.h"
#define RED "\x1b[31m"
#define GREEN "\x1b[32m"
#define BLUE "\x1b[34m"
#define RESET "\x1b[0m"
int64_t particles (int64_t max, int rank) {
int64_t n_part;
srand(time(NULL) + rank);
n_part = (rand () % max) + 10;
return n_part;
}
int main (int argc, char * argv[])
{
int world_numtasks, world_myrank, mycolor, mykey, sub_numtasks, sub_myrank, i, file_id;
int64_t num_particles;
int64_t sub_particles, tot_particles, particle_size, file_size, tot_size;
int64_t scan_size = 0, offset, hdr = 0;
double start_time, end_time, tot_time, max_time;
double io_bw;
MPI_Comm sub_comm;
MPI_File file_handle;
MPI_Info info;
MPI_Status status;
char output[100];
int64_t chunkCount[1], chunkOffset[1];
int chunkSize[1];
Tapioca tp;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &world_numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &world_myrank);
mycolor = tp.topology.BridgeNodeId ();
//mycolor = 42;
mykey = world_myrank;
MPI_Comm_split (MPI_COMM_WORLD, mycolor, mykey, &sub_comm);
MPI_Comm_size(sub_comm, &sub_numtasks);
MPI_Comm_rank(sub_comm, &sub_myrank);
if ( argv[2] != NULL )
num_particles = particles (atoi ( argv[1] ), world_myrank);
else
num_particles = atoi ( argv[1] );
#ifdef BGQ
snprintf (output, 100, "/projects/visualization/ftessier/debug/SimpleIO-%08d.dat", mycolor);
#elif XC40
snprintf (output, 100, "/lus/theta-fs0/projects/Performance/ftessier/HACC/SimpleIO-%08d.dat", mycolor);
#else
snprintf (output, 100, "./SimpleIO-%08d.dat", mycolor);
#endif
/*****************/
/* WRITE */
/*****************/
float *xx;
xx = new float[num_particles];
for (uint64_t i = 0; i< num_particles; i++)
xx[i] = (float)i;
MPI_Allreduce(&num_particles, &sub_particles, 1, MPI_LONG_LONG, MPI_SUM, sub_comm);
MPI_Allreduce(&num_particles, &tot_particles, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
particle_size = sizeof(float);
file_size = particle_size * sub_particles;
tot_size = particle_size * tot_particles;
if (sub_myrank == 0) {
MPI_File_open(MPI_COMM_SELF, output,
MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file_handle);
MPI_File_set_size(file_handle, file_size);
MPI_File_close (&file_handle);
}
MPI_Exscan (&num_particles, &scan_size, 1, MPI_LONG_LONG, MPI_SUM, sub_comm);
if (0 == sub_myrank) {
fprintf (stdout, GREEN "[INFO]" RESET " [%08d] TAPIOCA Simple I/O\n", mycolor);
fprintf (stdout, GREEN "[INFO]" RESET " [%08d] Write output file\n", mycolor);
fprintf (stdout, GREEN "[INFO]" RESET " [%08d] --> %lld particles per rank\n", mycolor, num_particles);
fprintf (stdout, GREEN "[INFO]" RESET " [%08d] --> File size: %.2f MB (%lld particles)\n",
mycolor, (double)file_size/(1024*1024), sub_particles);
}
MPI_Info_create ( &info );
MPI_Info_set ( info, "cb_nodes", getenv("TAPIOCA_NBAGGR") );
MPI_Info_set ( info, "bg_nodes_pset", getenv("TAPIOCA_NBAGGR") );
MPI_File_open(sub_comm, output,
MPI_MODE_WRONLY, info, &file_handle);
MPIIOInfo ( file_handle );
start_time = MPI_Wtime();
offset = scan_size * particle_size;
MPI_File_write_at_all (file_handle, offset, xx, num_particles, MPI_FLOAT, &status);
MPI_File_close (&file_handle);
end_time = MPI_Wtime();
tot_time = end_time - start_time;
MPI_Reduce (&tot_time, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
if (0 == world_myrank) {
io_bw = (double)tot_size / max_time / (1024 * 1024);
fprintf (stdout, BLUE "[TIMING]" RESET " Write I/O bandwidth: %.2f MBps (%.2f MB in %.2f ms)\n",
io_bw, (double)tot_size/(1024*1024), max_time * 1000);
}
MPI_Barrier (MPI_COMM_WORLD);
/*****************/
/* READ */
/*****************/
float *xx_r;
xx_r = new float[num_particles];
MPI_File_open(sub_comm, output,
MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handle);
if (0 == sub_myrank)
fprintf (stdout, GREEN "[INFO]" RESET " [%08d] Read output file\n", mycolor);
start_time = MPI_Wtime();
offset = scan_size * particle_size;
MPI_File_read_at_all (file_handle, offset, xx_r, num_particles, MPI_FLOAT, &status);
MPI_File_close (&file_handle);
end_time = MPI_Wtime();
tot_time = end_time - start_time;
MPI_Reduce (&tot_time, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
if (0 == world_myrank) {
io_bw = (double)tot_size / max_time / (1024 * 1024);
fprintf (stdout, BLUE "[TIMING]" RESET " Read I/O bandwidth: %.2f MBps (%.2f MB in %.2f ms)\n",
io_bw, (double)tot_size/(1024*1024), max_time * 1000);
}
/*****************/
/* VERIFICATION */
/*****************/
for (uint64_t i = 0; i< num_particles; i++) {
if (xx[i] != xx_r[i])
{
fprintf (stdout, RED "[ERROR]" RESET " Wrong value for particle %d\n", i);
MPI_Abort (MPI_COMM_WORLD, -1);
}
}
if (0 == sub_myrank)
fprintf (stdout, GREEN "[INFO]" RESET " [%08d] Content verified and consistent\n", mycolor);
/*****************/
/* FREE */
/*****************/
delete [] xx;
delete [] xx_r;
MPI_Finalize ();
}
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <stdint.h> #include <stdint.h>
#include <mpi.h> #include <mpi.h>
#include "utils.h"
#include "tapioca.hpp" #include "tapioca.hpp"
#define RED "\x1b[31m" #define RED "\x1b[31m"
...@@ -40,8 +41,8 @@ int main (int argc, char * argv[]) ...@@ -40,8 +41,8 @@ int main (int argc, char * argv[])
MPI_Comm_size(MPI_COMM_WORLD, &world_numtasks); MPI_Comm_size(MPI_COMM_WORLD, &world_numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &world_myrank); MPI_Comm_rank(MPI_COMM_WORLD, &world_myrank);
//mycolor = tp.topology.BridgeNodeId (); mycolor = tp.topology.BridgeNodeId ();
mycolor = 42; //mycolor = 42;
mykey = world_myrank; mykey = world_myrank;
MPI_Comm_split (MPI_COMM_WORLD, mycolor, mykey, &sub_comm); MPI_Comm_split (MPI_COMM_WORLD, mycolor, mykey, &sub_comm);
...@@ -110,6 +111,8 @@ int main (int argc, char * argv[]) ...@@ -110,6 +111,8 @@ int main (int argc, char * argv[])
MPI_File_open(sub_comm, output, MPI_File_open(sub_comm, output,
MPI_MODE_WRONLY, MPI_INFO_NULL, &file_handle); MPI_MODE_WRONLY, MPI_INFO_NULL, &file_handle);
MPIIOInfo ( file_handle );
offset = scan_size * particle_size; offset = scan_size * particle_size;
...@@ -127,6 +130,7 @@ int main (int argc, char * argv[]) ...@@ -127,6 +130,7 @@ int main (int argc, char * argv[])
io_bw, (double)tot_size/(1024*1024), max_time * 1000); io_bw, (double)tot_size/(1024*1024), max_time * 1000);
} }
tp.Finalize ();
MPI_Barrier (MPI_COMM_WORLD); MPI_Barrier (MPI_COMM_WORLD);
/*****************/ /*****************/
......
#!/bin/bash #!/bin/bash
NODES=4 NODES=1024
PPN=2 PPN=16
NPROCS=$((NODES*PPN)) NPROCS=$((NODES*PPN))
TARGET="/projects/visualization/ftessier/debug" TARGET="/projects/visualization/ftessier/debug"
...@@ -10,8 +10,8 @@ cd $HOME/TAPIOCA/examples/1D-Array ...@@ -10,8 +10,8 @@ cd $HOME/TAPIOCA/examples/1D-Array
export TAPIOCA_DEVNULL=false export TAPIOCA_DEVNULL=false
export TAPIOCA_COMMSPLIT=true export TAPIOCA_COMMSPLIT=true
export TAPIOCA_STRATEGY=TOPOLOGY_AWARE export TAPIOCA_STRATEGY=TOPOLOGY_AWARE
export TAPIOCA_NBAGGR=1 export TAPIOCA_NBAGGR=4
export TAPIOCA_BUFFERSIZE=1048576 export TAPIOCA_BUFFERSIZE=33554432
export TAPIOCA_PIPELINING=true export TAPIOCA_PIPELINING=true
function updateSettings() function updateSettings()
...@@ -31,6 +31,7 @@ function updateSettings() ...@@ -31,6 +31,7 @@ function updateSettings()
SETTINGS="$SETTINGS BGLOCKLESSMPIO_F_TYPE=0x47504653" SETTINGS="$SETTINGS BGLOCKLESSMPIO_F_TYPE=0x47504653"
SETTINGS="$SETTINGS GPFSMPIO_BALANCECONTIG=1" SETTINGS="$SETTINGS GPFSMPIO_BALANCECONTIG=1"
SETTINGS="$SETTINGS GPFSMPIO_P2PCONTIG=1" SETTINGS="$SETTINGS GPFSMPIO_P2PCONTIG=1"
#SETTINGS="$SETTINGS MPICH_MPIIO_HINTS=*:bg_nodes_pset=$TAPIOCA_NBAGGR"
} }
function outputFile () function outputFile ()
...@@ -40,8 +41,22 @@ function outputFile () ...@@ -40,8 +41,22 @@ function outputFile ()
} }
PARTICLES=50000 PARTICLES=25000
rm $TARGET/* for RUN in 1 2 3 4 5
updateSettings do
outputFile "TAPIOCA" for AGGR in 4 8 16 32 64
runjob --block $COBALT_PARTNAME --envs $SETTINGS -p $PPN --np $NPROCS : ./main_tapioca $PARTICLES > $OUTPUT do
export TAPIOCA_NBAGGR=$AGGR
updateSettings
rm $TARGET/*
outputFile "TAPIOCA"
runjob --block $COBALT_PARTNAME --envs $SETTINGS -p $PPN --np $NPROCS : ./main_tapioca $PARTICLES >> $OUTPUT
sleep 5
rm $TARGET/*
outputFile "MPIIO"
runjob --block $COBALT_PARTNAME --envs $SETTINGS -p $PPN --np $NPROCS : ./main_mpiio $PARTICLES >> $OUTPUT
sleep 5
done
done
\ No newline at end of file
#include "utils.h"
void MPIIOInfo ( MPI_File fileHandle )
{
MPI_Info info;
int flag, rank;
char value[1024];
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if ( rank == 0 ) {
MPI_File_get_info ( fileHandle, &info );
fprintf ( stdout, "[INFO] MPI Two-phases I/O\n");
MPI_Info_get ( info, "cb_buffer_size", 1024, value, &flag );
fprintf ( stdout, "[INFO] cb_buffer_size = %s\n", value );
MPI_Info_get ( info, "cb_nodes", 1024, value, &flag );
fprintf ( stdout, "[INFO] cb_nodes = %s\n", value );
MPI_Info_get ( info, "bg_nodes_pset", 1024, value, &flag );
fprintf ( stdout, "[INFO] bg_nodes_pset = %s\n", value );
MPI_Info_get ( info, "romio_cb_read", 1024, value, &flag );
fprintf ( stdout, "[INFO] romio_cb_read = %s\n", value );
MPI_Info_get ( info, "romio_cb_write", 1024, value, &flag );
fprintf ( stdout, "[INFO] romio_cb_write = %s\n", value );
MPI_Info_get ( info, "romio_no_indep_rw", 1024, value, &flag );
fprintf ( stdout, "[INFO] romio_no_indep_rw = %s\n", value );
}
}
#ifndef UTILS_H
#define UTILS_H
#include <mpi.h>
void MPIIOInfo ( MPI_File fileHandle );
#endif /* UTILS_H */
...@@ -5,7 +5,7 @@ PPN=16 ...@@ -5,7 +5,7 @@ PPN=16
NPROCS=$((NODES*PPN)) NPROCS=$((NODES*PPN))
TARGET="/projects/visualization/ftessier/debug" TARGET="/projects/visualization/ftessier/debug"
cd $HOME/TAPIOCA/examples/HACC-IO cd $HOME/install/$ARCHI/bin/
export TAPIOCA_DEVNULL=false export TAPIOCA_DEVNULL=false
export TAPIOCA_COMMSPLIT=true export TAPIOCA_COMMSPLIT=true
...@@ -35,22 +35,22 @@ function updateSettings() ...@@ -35,22 +35,22 @@ function updateSettings()
function outputFile () function outputFile ()
{ {
OUTPUT="$HOME/PUB-IPDPS18/dat/hacc_rw" OUTPUT="$HOME/PUB-IPDPS18/dat/hacc_rw"
OUTPUT="${OUTPUT}/HACC_BGQ_${BIN}_${NODES}_${PPN}" OUTPUT="${OUTPUT}/HACC_BGQ_MIRA_${BIN}_${NODES}_${PPN}"
OUTPUT="${OUTPUT}_${TAPIOCA_STRATEGY}_${TAPIOCA_NBAGGR}_${TAPIOCA_BUFFERSIZE}_${PARTICLES}" OUTPUT="${OUTPUT}_${TAPIOCA_STRATEGY}_${TAPIOCA_NBAGGR}_${TAPIOCA_BUFFERSIZE}_${PARTICLES}"
} }
updateSettings updateSettings
for run in {0..4} for run in {0..9}
do do
for PARTICLES in 100000 for PARTICLES in 5000 15000 25000 35000 50000 100000
do do
for BIN in miniHACC-AoS miniHACC-SoA miniHACC-AoS-MPIIO miniHACC-SoA-MPIIO for BIN in miniHACC-AoS miniHACC-SoA miniHACC-AoS-MPIIO miniHACC-SoA-MPIIO
do do
rm $TARGET/* rm $TARGET/*
outputFile outputFile
runjob --block $COBALT_PARTNAME --envs $SETTINGS -p $PPN --np $NPROCS : ./$BIN $PARTICLES >> $OUTPUT runjob --block $COBALT_PARTNAME --envs $SETTINGS -p $PPN --np $NPROCS : ./$BIN $PARTICLES >> $OUTPUT
sleep 5 sleep 3
done done
done done
done done
......
#!/bin/bash #!/bin/bash
NODES=1024 NODES=512
PPN=16 PPN=16
NPROCS=$((NODES*PPN)) NPROCS=$((NODES*PPN))
TARGET="/lus/theta-fs0/projects/Performance/ftessier/HACC" TARGET="/lus/theta-fs0/projects/Performance/ftessier/HACC"
...@@ -8,19 +8,13 @@ DDT="/soft/debuggers/forge/bin/ddt --connect" ...@@ -8,19 +8,13 @@ DDT="/soft/debuggers/forge/bin/ddt --connect"
STRIPE_COUNT=48 STRIPE_COUNT=48
STRIPE_SIZE=8388608 STRIPE_SIZE=8388608
cd $HOME/TAPIOCA/examples/HACC-IO cd $HOME/install/$ARCHI/bin/
export TAPIOCA_DEVNULL=false export TAPIOCA_DEVNULL=false
export TAPIOCA_COMMSPLIT=true export TAPIOCA_COMMSPLIT=true
export TAPIOCA_STRATEGY=TOPOLOGY_AWARE export TAPIOCA_STRATEGY=TOPOLOGY_AWARE
export TAPIOCA_NBAGGR=64 export TAPIOCA_NBAGGR=96
#export TAPIOCA_BUFFERSIZE=2097152
#export TAPIOCA_BUFFERSIZE=4194304
#export TAPIOCA_BUFFERSIZE=8388608
export TAPIOCA_BUFFERSIZE=16777216 export TAPIOCA_BUFFERSIZE=16777216
#export TAPIOCA_BUFFERSIZE=33554432
VARS="-e MPICH_RMA_OVER_DMAPP=1 -e MPICH_MPIIO_AGGREGATOR_PLACEMENT_DISPLAY=1 -e MPICH_MPIIO_HINTS=*:cray_cb_nodes_multiplier=1"
function setLustreFS () function setLustreFS ()
{ {
...@@ -39,28 +33,32 @@ function updateSettings() ...@@ -39,28 +33,32 @@ function updateSettings()
SETTINGS="$SETTINGS -e TAPIOCA_BUFFERSIZE=$TAPIOCA_BUFFERSIZE" SETTINGS="$SETTINGS -e TAPIOCA_BUFFERSIZE=$TAPIOCA_BUFFERSIZE"
SETTINGS="$SETTINGS -e MPICH_RMA_OVER_DMAPP=1" SETTINGS="$SETTINGS -e MPICH_RMA_OVER_DMAPP=1"
SETTINGS="$SETTINGS -e MPICH_MPIIO_AGGREGATOR_PLACEMENT_DISPLAY=1"
SETTINGS="$SETTINGS -e MPICH_MPIIO_HINTS=*:cray_cb_nodes_multiplier=2"
} }
for PARTICLES in 5000 15000 25000 35000 50000 100000 function outputFile ()
{
OUTPUT="$HOME/PUB-IPDPS18/dat/hacc_rw"
OUTPUT="${OUTPUT}/HACC_XC40_${BIN}_${NODES}_${PPN}"
OUTPUT="${OUTPUT}_${TAPIOCA_STRATEGY}_${TAPIOCA_NBAGGR}_${TAPIOCA_BUFFERSIZE}_${PARTICLES}"
}
updateSettings
for run in {0..9}
do do
updateSettings for PARTICLES in 5000 15000 25000 35000 50000 100000
setLustreFS do
for BIN in miniHACC-AoS miniHACC-SoA miniHACC-AoS-MPIIO miniHACC-SoA-MPIIO
rm $TARGET/* do
aprun $VARS $SETTINGS -n $NPROCS -N $PPN ./miniHACC-AoS $PARTICLES 1 outputFile
sleep 5 setLustreFS >> $OUTPUT
aprun $SETTINGS -n $NPROCS -N $PPN ./$BIN $PARTICLES >> $OUTPUT
rm $TARGET/* sleep 5
aprun $VARS $SETTINGS -n $NPROCS -N $PPN ./miniHACC-AoS-MPIIO $PARTICLES 1 done
sleep 5 done
rm $TARGET/*
aprun $VARS $SETTINGS -n $NPROCS -N $PPN ./miniHACC-SoA $PARTICLES 1
sleep 5
rm $TARGET/*
aprun $VARS $SETTINGS -n $NPROCS -N $PPN ./miniHACC-SoA-MPIIO $PARTICLES 1
sleep 5
done done
######################### #########################
......
...@@ -25,8 +25,10 @@ void Tapioca::Finalize () ...@@ -25,8 +25,10 @@ void Tapioca::Finalize ()
MPI_Comm_free (&this->subComm_); MPI_Comm_free (&this->subComm_);
free (this->buffer1); if ( this->amAnAggr_ ) {
free (this->buffer2); free (this->buffer1);
free (this->buffer2);
}
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment