...
 
Commits (3)
......@@ -6,6 +6,11 @@
/**********************/
/* |-- Nodes */
/**********************/
int Topology::IONodesPerFile ( char* filename, int *nodesList ) {
return 1;
}
int Topology::IONodeId () {
return MPIX_IO_node_id ();
}
......@@ -20,6 +25,13 @@ int Topology::ComputeNodeId () {
return 0;
}
int Topology::GlobalCoreId () {
return 0;
}
int Topology::LocalCoreId () {
return 0;
}
int Topology::ProcessPerNode () {
MPIX_Hardware_t hw;
......@@ -28,12 +40,6 @@ int Topology::ProcessPerNode () {
return hw.ppn;
}
int Topology::IONodesPerFile ( char* filename, int *nodesList ) {
return 1;
}
/**********************/
/* |-- Network */
/**********************/
......
......@@ -21,11 +21,13 @@ public:
/**********************/
/* |-- Nodes */
/**********************/
int IONodesPerFile ( char* filename, int *nodesList );
int IONodeId ();
int BridgeNodeId ();
int ComputeNodeId ();
int GlobalCoreId ();
int LocalCoreId ();
int ProcessPerNode ();
int IONodesPerFile ( char* filename, int *nodesList );
/**********************/
/* |-- Network */
......
......@@ -44,6 +44,7 @@ void Memory::memAlloc ( int64_t buffSize, mem_t mem, bool masterRank, char* file
printMsg ( DEBUG, "Connect to the RAN volume (%s:%d)\n", __FILE__, __LINE__ );
}
kdsa_connect(this->RAN_volume_, 0, &this->RAN_handle_);
MPI_Barrier (this->comm_);
break;
case NLS:
case PFS:
......@@ -129,6 +130,7 @@ void Memory::memFree ( ) {
}
break;
case NAM:
sleep (20);
kdsa_disconnect (this->RAN_handle_);
break;
case NLS:
......@@ -175,14 +177,38 @@ int Memory::memWrite ( void* srcBuffer, int64_t srcSize, int64_t offset, int des
err = MPI_Put ( srcBuffer, srcSize, MPI_BYTE, destRank, offset, srcSize, MPI_BYTE, this->RMAWin_ );
break;
case NAM:
sleep (2);
kdsa_mem_key_t key;
printMsg ( DEBUG, "kdsa_write at offset %lld, size %lld\n", offset, srcSize );
kdsa_register_mem (this->RAN_handle_, srcBuffer, srcSize, &key);
kdsa_write (this->RAN_handle_, key, offset, srcBuffer, srcSize);
kdsa_memory_barrier (this->RAN_handle_);
kdsa_deregister_mem(key);
break;
case NLS:
case PFS:
err = MPI_File_write_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &status );
if ( this->memSrc_ == NAM ) {
sleep (10);
printMsg ( DEBUG, "BEG kdsa_read at offset %lld, size %lld (PFS)\n", offset, srcSize );
kdsa_mem_key_t key;
void *buffer;
buffer = malloc ( srcSize );
printMsg ( DEBUG, "malloc\n");
sleep (1);
kdsa_register_mem (this->RAN_handle_, buffer, srcSize, &key);
printMsg ( DEBUG, "kdsa_register_mem\n");
sleep (1);
kdsa_read (this->RAN_handle_, key, 0, buffer, srcSize);
//kdsa_read_unregistered (this->RAN_handle_, 0, buffer, srcSize);
printMsg ( DEBUG, "kdsa_read\n");
kdsa_memory_barrier (this->RAN_handle_);
printMsg ( DEBUG, "kdsa_memory_barrier\n");
kdsa_deregister_mem(key);
printMsg ( DEBUG, "END kdsa_read at offset %lld, size %lld (PFS)\n", offset, srcSize );
err = MPI_File_write_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &status );
}
else
err = MPI_File_write_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &status );
break;
default:
printMsg ( ERROR, "Error while writing data (mem = %s)\n", this->memName () );
......@@ -212,6 +238,13 @@ int Memory::memRead ( void* srcBuffer, int64_t srcSize, int64_t offset, int des
case NLS:
case PFS:
err = MPI_File_read_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &status );
if ( this->memSrc_ == NAM ) {
kdsa_mem_key_t key;
kdsa_register_mem (this->RAN_handle_, srcBuffer, srcSize, &key);
kdsa_write (this->RAN_handle_, key, 0, srcBuffer, srcSize);
kdsa_memory_barrier (this->RAN_handle_);
kdsa_deregister_mem(key);
}
break;
default:
printMsg ( ERROR, "Error while reading data (mem = %s)\n", this->memName () );
......@@ -234,6 +267,7 @@ int Memory::memFlush ( ) {
MPI_Win_fence ( 0, this->RMAWin_ );
break;
case NAM:
sleep (2);
kdsa_memory_barrier (this->RAN_handle_);
break;
case NLS:
......
......@@ -161,7 +161,7 @@ void Topology::IONodeCoordinates ( int* coord ) {
/* |---- Distance */
int Topology::DistanceToIONode ( int srcRank ) {
return 0;
return 1;
}
/* Minimal distance between two ranks considering a dragonfly network */
......
......@@ -85,6 +85,8 @@ int main (int argc, char * argv[])
MPI_Comm_size(MPI_COMM_WORLD, &world_numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &world_myrank);
sleep (60);
parseArgs ( argc, argv );
if ( ! strcmp ( "FPN", subfiling ) ) mycolor = tp.topology.ComputeNodeId ();
......
......@@ -3,7 +3,10 @@ include ../make.inc/Makefile.$(ARCHI)
CPP_FILES := $(wildcard 1D*.cpp)
OUT_FILES := $(CPP_FILES:.cpp=)
all:bin/utils.o $(OUT_FILES)
all:bin/utils.o $(OUT_FILES) bin/normrand
bin/normrand:normrand.c
$(MPICC) $? -o $@
bin/utils.o:utils.cpp
$(MPICXX) $(MPI_CFLAGS) -c $? -o $@ $(MPI_LDFLAGS)
......@@ -12,8 +15,8 @@ bin/utils.o:utils.cpp
$(MPICXX) $(MPI_CFLAGS) $? -o bin/$@ $(MPI_LDFLAGS)
install:
cd bin/; mv $(OUT_FILES) $(INSTALL_PATH)/bin
cd bin/; mv $(OUT_FILES) normrand $(INSTALL_PATH)/bin
clean:
cd bin/; rm -f $(OUT_FILES) *.o
cd $(INSTALL_PATH)/bin/; rm -f $(OUT_FILES) *.o
cd bin/; rm -f $(OUT_FILES) normrand *.o
cd $(INSTALL_PATH)/bin/; rm -f $(OUT_FILES) normrand *.o
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define NSUM 10000
double gaussrand( int64_t max )
{
double x = 0;
int i;
for(i = 0; i < NSUM; i++)
x += (double)rand() / RAND_MAX;
x -= NSUM / 2.0;
x /= sqrt(NSUM / 12.0);
return ( (x + 4) * max ) / 6 ;
}
int main (int argc, char * argv[])
{
int *distrib, i, j, mod=20, div;
double min = NSUM, max = 0, value;
char buf[16];
int64_t nval, part, *vals;
static const char filename[] = "randtmp";
FILE *file = fopen(filename, "w");
srand(time(NULL));
nval = (int64_t)atoi (argv[1]);
part = (int64_t)atoi (argv[2]);
div = (int)(part / mod);
distrib = (int *)calloc ( mod, sizeof (int ));
vals = (int64_t *)malloc (nval * sizeof(int64_t));
for ( i = 0; i < mod; i ++ )
distrib[i] = 0;
for ( i = 0; i < nval; i ++ ) {
value = gaussrand ( part );
if ( value < 0 )
value = 10;
if ( value < min )
min = value;
if ( value > max )
max = value;
vals[i] = (int64_t)value;
}
for ( i = 0; i < nval; i ++ ) {
vals[i] = vals[i] - (int64_t)min + 10;
distrib[(int)vals[i]/div]++;
sprintf (buf, "%lld\n", vals[i]);
fputs (buf, file);
}
for ( i = 0; i < mod; i ++ ) {
for ( j = 0; j < distrib[i]; j++ )
fprintf (stdout, "*");
fprintf (stdout, "\n");
}
fprintf (stdout, "range [%.2f ; %.2f]\n", 10.0, max - min);
fclose (file);
return 0;
}
......@@ -9,10 +9,10 @@ cd $HOME/install/$ARCHI/bin/
export TAPIOCA_STRATEGY=TOPOLOGY_AWARE
export TAPIOCA_NBAGGR=2
export TAPIOCA_NBBUFFERS=2
export TAPIOCA_BUFFERSIZE=8388608
export TAPIOCA_AGGRTIER=DDR
export TAPIOCA_NBAGGR=1
export TAPIOCA_NBBUFFERS=1
export TAPIOCA_BUFFERSIZE=33554432
export TAPIOCA_AGGRTIER=NAM
export TAPIOCA_DEVNULL=false
export TAPIOCA_COMMSPLIT=true
......@@ -51,12 +51,12 @@ function generateRand ()
done
}
PARTICLES=250000
PARTICLES=1000
updateSettings
generateRand
rm $TARGET/*
for BIN in 1D-Array-Tapioca-W 1D-Array-Tapioca-R 1D-Array-MPIIO-W 1D-Array-MPIIO-R
for BIN in 1D-Array-Tapioca-W #1D-Array-Tapioca-R #1D-Array-MPIIO-W 1D-Array-MPIIO-R
do
mpirun $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -e $PARTICLES -o $TARGET -s SSF
echo
......
#!/bin/bash
NODES=16
NODES=8
PPN=16
NPROCS=$((NODES*PPN))
TARGET="/lus/theta-fs0/projects/Performance/ftessier/TAPIOCA"
#TARGET="/lus/theta-fs0/projects/Performance/ftessier/TAPIOCA"
TARGET="/local/scratch"
DDT="/soft/debuggers/ddt/bin/ddt --connect"
STRIPE_COUNT=16
STRIPE_COUNT=48
STRIPE_SIZE=8388608
AGGR_MULT=1
cd $HOME/install/$ARCHI/bin/
export TAPIOCA_STRATEGY=MEMORY_AWARE
export TAPIOCA_STRATEGY=TOPOLOGY_AWARE
export TAPIOCA_NBAGGR=2
export TAPIOCA_NBBUFFERS=3
export TAPIOCA_BUFFERSIZE=16777216
export TAPIOCA_NBAGGR=48
export TAPIOCA_NBBUFFERS=2
export TAPIOCA_BUFFERSIZE=8388608
export TAPIOCA_PERSISTENCY=false
export TAPIOCA_AGGRTIER=DDR
export TAPIOCA_DEVNULL=false
......@@ -52,6 +54,13 @@ function generateRand ()
done
}
function generateNormRand ()
{
echo "---------- RAND ----------"
aprun -n 1 -N 1 ./normrand $NODES $PARTICLES
echo "--------------------------"
}
function setLustreFS ()
{
rm $TARGET/*
......@@ -64,7 +73,7 @@ function outputFileMPIIO ()
{
OUTPUT="$HOME/xp_tapioca/paper/1D-Array/"
OUTPUT="${OUTPUT}/1D_XC40_${BIN}_${NODES}_${PPN}"
OUTPUT="${OUTPUT}_MPIIO_DDR_to_HDD_${SUBFILING}_${PARTICLES}"
OUTPUT="${OUTPUT}_MPIIO_DDR_to_HDD_SSF_${PARTICLES}_RANDNORM"
}
function outputFileTAPIOCA ()
......@@ -72,21 +81,35 @@ function outputFileTAPIOCA ()
OUTPUT="$HOME/xp_tapioca/paper/1D-Array/"
OUTPUT="${OUTPUT}/1D_XC40_${BIN}_${NODES}_${PPN}"
OUTPUT="${OUTPUT}_${TAPIOCA_STRATEGY}_${TAPIOCA_NBAGGR}"
OUTPUT="${OUTPUT}_${TAPIOCA_BUFFERSIZE}_${TAPIOCA_MEMAGGR}_to_${TAPIOCA_MEMTARGET}_${SUBFILING}_${PARTICLES}"
OUTPUT="${OUTPUT}_${TAPIOCA_BUFFERSIZE}_DDR_to_HDD_SSF_${PARTICLES}_RANDNORM"
}
PARTICLES=1000000
PARTICLES=25000
updateSettings
setLustreFS
generateRand
rm $TARGET/*
for BIN in 1D-Array-Tapioca-W 1D-Array-Tapioca-R #1D-Array-MPIIO-W 1D-Array-MPIIO-R
for RUN in {0..0}
do
aprun $SETTINGS -n $NPROCS -N $PPN $BIN -e $PARTICLES -o $TARGET -s SSF
echo
echo "---------------------------------"
echo
sleep 5
#generateNormRand
rm $TARGET/*
for BIN in 1D-Array-Tapioca-W 1D-Array-Tapioca-R
do
outputFileTAPIOCA
aprun $SETTINGS -n $NPROCS -N $PPN $BIN -e $PARTICLES -o $TARGET -s NPF -n 2
echo
echo "---------------------------------"
echo
sleep 5
done
rm $TARGET/*
for BIN in 1D-Array-MPIIO-W 1D-Array-MPIIO-R
do
outputFileMPIIO
aprun $SETTINGS -n $NPROCS -N $PPN $BIN -e $PARTICLES -o $TARGET -s NPF -n 2
echo
echo "---------------------------------"
echo
sleep 5
done
done
......@@ -75,6 +75,53 @@ int64_t randElemFromFile ( int64_t max, int nodeId ) {
}
int64_t randElemNormalDist ( int64_t max, int nodeId ) {
double x = 0;
int i, j, s, nsum=10000, mod=40, size, rank, *dist_count, div;
int64_t px, *distrib;
MPI_Comm_rank ( MPI_COMM_WORLD, &rank );
MPI_Comm_size ( MPI_COMM_WORLD, &size );
if ( rank == 0 ) {
srand ( time(NULL) );
distrib = (int64_t *)malloc (size * sizeof(int64_t));
div = (int)(max / mod);
dist_count = (int *)malloc ( mod * sizeof (int));
for ( i = 0; i < mod; i++ )
dist_count[i] = 0;
for ( s = 0; s < size; s++ ) {
for(i = 0; i < nsum; i++)
x += (double)rand() / RAND_MAX;
x -= nsum / 2.0;
x /= sqrt(nsum / 12.0);
px = (int64_t)((x+3) * max / 6);
if ( px < 0 )
px = 10;
distrib[s] = px;
dist_count[(int)(px/div)]++;
x = 0;
}
for ( i = 0; i < mod; i++ ) {
for ( j = 0; j < dist_count[i]; j++ )
fprintf (stdout, "*");
fprintf (stdout, "\n");
}
}
MPI_Barrier (MPI_COMM_WORLD);
MPI_Scatter (distrib, 1, MPI_LONG_LONG, &px, 1, MPI_LONG_LONG, 0, MPI_COMM_WORLD);
return px;
}
void MPIIOInfo ( MPI_File fileHandle )
{
MPI_Info info;
......
......@@ -4,6 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <mpi.h>
void parseArgs (int argc, char * argv[]);
......@@ -11,6 +12,7 @@ void printUsage (char *bin);
int64_t randElem ( int64_t max, int nodeId );
int64_t randElemFromFile ( int64_t max, int nodeId );
int64_t randElemNormalDist ( int64_t max, int nodeId );
void MPIIOInfo ( MPI_File fileHandle );
......
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdint.h>
#include <mpi.h>
#include "tapioca.hpp"
#define RED "\x1b[31m"
#define GREEN "\x1b[32m"
#define BLUE "\x1b[34m"
#define RESET "\x1b[0m"
static int64_t num_particles = 25000; /* ~1MB */
static char output[4096] = {0};
static char subfiling[8] = {0};
static bool aos = true;
void printUsage () {
int rank;
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
if (rank == 0) {
fprintf ( stderr, "Usage: ./HACC-IO-Tapioca-R -p <particles> -o <output directory> -s <subfiling> -l <layout>\n" );
fprintf ( stderr, " -p : Number of particles per rank (38B/part., 25K part. ~= 1MB)\n" );
fprintf ( stderr, " -o : Path of the output directory\n" );
fprintf ( stderr, " -s : Subfiling method: FPN, FPION, SSF (default)\n" );
fprintf ( stderr, " -l : Layout: SOA, AOS (default)\n" );
}
}
void parseArgs (int argc, char * argv[])
{
char flags[] = "hp:o:s:l:";
int opt = 0;
while ((opt = getopt (argc, argv, flags)) != -1) {
switch ( opt )
{
case('h'):
printUsage ();
break;
case('p'):
sscanf ( optarg, "%lld", &num_particles );
break;
case('o'):
sprintf ( output, "%s", optarg );
break;
case('s'):
sprintf ( subfiling, "%s", optarg );
break;
case('l'):
if ( ! strcmp ( "SOA", optarg ) )
aos = false;
break;
}
}
if ( num_particles <= 0 ) {
printUsage ();
fprintf (stdout, RED "[WARNING]" RESET " Wrong number of particles. Reset to the default value (25000/rank)\n");
num_particles = 25000;
}
if ( strcmp ( "FPN", subfiling ) &&
strcmp ( "FPION", subfiling ) &&
strcmp ( "SSF", subfiling )) {
printUsage ();
fprintf (stdout, RED "[WARNING]" RESET " Wrong subfiling method. Reset to the default value (SSF: Single Shared File)\n");
sprintf ( subfiling, "SSF" );
}
}
int main (int argc, char * argv[])
{
int world_numtasks, world_myrank, mycolor, mykey, sub_numtasks, sub_myrank, i, file_id;
int64_t sub_particles, tot_particles, particle_size, file_size, tot_size;
int64_t scan_size = 0, offset, hdr = 0;
double start_time, end_time, tot_time, max_time;
double io_bw;
MPI_Comm sub_comm;
MPI_Status status;
Tapioca tp;
char filename[256];
int64_t chunkCount[9], chunkOffset[9];
int chunkSize[9];
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &world_numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &world_myrank);
parseArgs ( argc, argv );
if ( ! strcmp ( "FPN", subfiling ) ) mycolor = tp.topology.ComputeNodeId ();
if ( ! strcmp ( "FPION", subfiling ) ) mycolor = tp.topology.BridgeNodeId ();
if ( ! strcmp ( "SSF", subfiling ) ) mycolor = 42;
mykey = world_myrank;
MPI_Comm_split (MPI_COMM_WORLD, mycolor, mykey, &sub_comm);
MPI_Comm_size(sub_comm, &sub_numtasks);
MPI_Comm_rank(sub_comm, &sub_myrank);
//snprintf (filename, 256, "/HACC-IO-%s-%08d.dat", aos ? "AoS" : "SoA", mycolor);
snprintf (filename, 256, "/tp_buffer_0.agg");
strcat (output, filename);
/*****************/
/* READ */
/*****************/
float *xx, *yy, *zz, *vx, *vy, *vz, *phi;
int64_t* pid;
uint16_t* mask;
xx = new float[num_particles];
yy = new float[num_particles];
zz = new float[num_particles];
vx = new float[num_particles];
vy = new float[num_particles];
vz = new float[num_particles];
phi = new float[num_particles];
pid = new int64_t[num_particles];
mask = new uint16_t[num_particles];
for (uint64_t i = 0; i< num_particles; i++)
{
xx[i] = (float)i;
yy[i] = (float)i;
zz[i] = (float)i;
vx[i] = (float)i;
vy[i] = (float)i;
vz[i] = (float)i;
phi[i] = (float)i;
pid[i] = (int64_t)i;
mask[i] = (uint16_t)world_myrank;
}
MPI_Allreduce(&num_particles, &sub_particles, 1, MPI_LONG_LONG, MPI_SUM, sub_comm);
MPI_Allreduce(&num_particles, &tot_particles, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
particle_size = (7 * sizeof(float)) + sizeof(int64_t) + sizeof(uint16_t);
file_size = particle_size * sub_particles;
tot_size = particle_size * tot_particles;
MPI_Exscan (&num_particles, &scan_size, 1, MPI_LONG_LONG, MPI_SUM, sub_comm);
if (0 == sub_myrank) {
fprintf (stdout, GREEN "[INFO]" RESET " [%08d] HACC-IO\n", mycolor );
fprintf (stdout, GREEN "[INFO]" RESET " [%08d] Read input file (%s data layout)\n", mycolor, aos ? "AoS" : "SoA");
fprintf (stdout, GREEN "[INFO]" RESET " [%08d] --> %lld particles per rank\n", mycolor, num_particles);
fprintf (stdout, GREEN "[INFO]" RESET " [%08d] --> File size: %.2f MB (%lld particles)\n",
mycolor, (double)file_size/(1024*1024), sub_particles);
}
float *xx_r, *yy_r, *zz_r, *vx_r, *vy_r, *vz_r, *phi_r;
int64_t* pid_r;
uint16_t* mask_r;
xx_r = new float[num_particles];
yy_r = new float[num_particles];
zz_r = new float[num_particles];
vx_r = new float[num_particles];
vy_r = new float[num_particles];
vz_r = new float[num_particles];
phi_r = new float[num_particles];
pid_r = new int64_t[num_particles];
mask_r = new uint16_t[num_particles];
/*****************/
/* INIT TAPIOCA */
/*****************/
for ( i = 0; i < 9; i++ ) {
chunkCount[i] = num_particles;
}
chunkSize[0] = sizeof(float);
chunkSize[1] = sizeof(float);
chunkSize[2] = sizeof(float);
chunkSize[3] = sizeof(float);
chunkSize[4] = sizeof(float);
chunkSize[5] = sizeof(float);
chunkSize[6] = sizeof(float);
chunkSize[7] = sizeof(int64_t);
chunkSize[8] = sizeof(uint16_t);
if ( aos ) {
chunkOffset[0] = hdr + scan_size * particle_size;
for ( i = 1; i < 9; i++ ) {
chunkOffset[i] = chunkOffset[i - 1] + chunkCount[i - 1] * chunkSize[i - 1];
}
}
else {
chunkOffset[0] = hdr + scan_size * chunkSize[0];
for ( i = 1; i < 9; i++ ) {
chunkOffset[i] = chunkOffset[i - 1];
chunkOffset[i] += (sub_particles - scan_size) * chunkSize[i - 1];
chunkOffset[i] += scan_size * chunkSize[i];
}
}
tp.Init (chunkCount, chunkSize, chunkOffset, 9, hdr, output, sub_comm);
/*****************/
start_time = MPI_Wtime();
offset = aos ? scan_size * particle_size : scan_size * sizeof(float);
//MPI_Pcontrol(1);
tp.Read (offset, xx_r, num_particles, MPI_FLOAT, &status);
offset += aos ? num_particles * sizeof(float) : (sub_particles - scan_size) * sizeof(float) + scan_size * sizeof(float);
tp.Read (offset, yy_r, num_particles, MPI_FLOAT, &status);
offset += aos ? num_particles * sizeof(float) : (sub_particles - scan_size) * sizeof(float) + scan_size * sizeof(float);
tp.Read (offset, zz_r, num_particles, MPI_FLOAT, &status);
offset += aos ? num_particles * sizeof(float) : (sub_particles - scan_size) * sizeof(float) + scan_size * sizeof(float);
tp.Read (offset, vx_r, num_particles, MPI_FLOAT, &status);
offset += aos ? num_particles * sizeof(float) : (sub_particles - scan_size) * sizeof(float) + scan_size * sizeof(float);
tp.Read (offset, vy_r, num_particles, MPI_FLOAT, &status);
offset += aos ? num_particles * sizeof(float) : (sub_particles - scan_size) * sizeof(float) + scan_size * sizeof(float);
tp.Read (offset, vz_r, num_particles, MPI_FLOAT, &status);
offset += aos ? num_particles * sizeof(float) : (sub_particles - scan_size) * sizeof(float) + scan_size * sizeof(float);
tp.Read (offset, phi_r, num_particles, MPI_FLOAT, &status);
offset += aos ? num_particles * sizeof(float) : (sub_particles - scan_size) * sizeof(float) + scan_size * sizeof(int64_t);
tp.Read (offset, pid_r, num_particles, MPI_LONG_LONG, &status);
offset += aos ? num_particles * sizeof(int64_t) : (sub_particles - scan_size) * sizeof(int64_t) + scan_size * sizeof(uint16_t);
tp.Read (offset, mask_r, num_particles, MPI_UNSIGNED_SHORT, &status);
tp.Finalize ();
//MPI_Pcontrol(0);
end_time = MPI_Wtime();
tot_time = end_time - start_time;
MPI_Reduce (&tot_time, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
if (0 == world_myrank) {
io_bw = (double)tot_size / max_time / (1024 * 1024);
fprintf (stdout, BLUE "[TIMING]" RESET " Read TAPIOCA I/O bandwidth: %.2f MBps (%.2f MB in %.2f ms)\n",
io_bw, (double)tot_size/(1024*1024), max_time * 1000);
}
MPI_Barrier (MPI_COMM_WORLD);
/*****************/
/* VERIFICATION */
/*****************/
for (uint64_t i = 0; i< num_particles; i++) {
if ((xx[i] != xx_r[i]) || (yy[i] != yy_r[i]) || (zz[i] != zz_r[i])
|| (vx[i] != vx_r[i]) || (vy[i] != vy_r[i]) || (vz[i] != vz_r[i])
|| (phi[i] != phi_r[i])|| (pid[i] != pid_r[i]) || (mask[i] != mask_r[i]))
{
fprintf (stdout, RED "[ERROR]" RESET "[%08d][%02d] Wrong value for particle %d (%.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %lld, %u )\n",
mycolor, world_myrank, i, xx_r[i], yy_r[i], zz_r[i], vx_r[i], vy_r[i], vz_r[i], phi_r[i], pid_r[i], mask_r[i] );
MPI_Abort (MPI_COMM_WORLD, -1);
}
}
if (0 == sub_myrank)
fprintf (stdout, GREEN "[INFO]" RESET " [%08d] Content verified and consistent\n", mycolor);
/*****************/
/* FREE */
/*****************/
delete [] xx;
delete [] xx_r;
delete [] yy;
delete [] yy_r;
delete [] zz;
delete [] zz_r;
delete [] vx;
delete [] vx_r;
delete [] vy;
delete [] vy_r;
delete [] vz;
delete [] vz_r;
delete [] phi;
delete [] phi_r;
delete [] pid;
delete [] pid_r;
delete [] mask;
delete [] mask_r;
MPI_Finalize ();
}
......@@ -103,7 +103,7 @@ int main (int argc, char * argv[])
MPI_Comm_rank(sub_comm, &sub_myrank);
snprintf (filename, 256, "/HACC-IO-%s-%08d.dat", aos ? "AoS" : "SoA", mycolor);
strcat (output, filename);
strcat (output, filename);
/*****************/
/* READ */
......
#!/bin/bash
NODES=8
NODES=64
PPN=12
NPROCS=$((NODES*PPN))
TARGET="/projects/Performance/ftessier/TAPICOA"
......@@ -12,8 +12,9 @@ export TAPIOCA_STRATEGY=TOPOLOGY_AWARE
export TAPIOCA_NBAGGR=2
export TAPIOCA_NBBUFFERS=2
export TAPIOCA_BUFFERSIZE=8388608
export TAPIOCA_AGGRTIER=DDR
export TAPIOCA_BUFFERSIZE=16777216
export TAPIOCA_PERSISTENCY=false
export TAPIOCA_AGGRTIER=NVR
export TAPIOCA_DEVNULL=false
export TAPIOCA_COMMSPLIT=true
......@@ -47,22 +48,142 @@ function updateSettings()
}
function outputFileMPIIO ()
{
OUTPUT="$HOME/xp_tapioca/paper/Workflow/"
OUTPUT="${OUTPUT}/COOLEY_${BIN}_${NODES}_${PPN}"
OUTPUT="${OUTPUT}_MPIIO_DDR_to_PFS_${LAYOUT}_${SUB}_${PARTICLES}"
}
function outputFileTAPIOCA ()
{
OUTPUT="$HOME/xp_tapioca/paper/Workflow/"
OUTPUT="${OUTPUT}/COOLEY_${BIN}_${NODES}_${PPN}"
OUTPUT="${OUTPUT}_${TAPIOCA_STRATEGY}_${TAPIOCA_NBAGGR}"
OUTPUT="${OUTPUT}_${TAPIOCA_BUFFERSIZE}_${TAPIOCA_AGGRTIER}_to_${TAPIOCA_TRGTTIER}"
OUTPUT="${OUTPUT}_${LAYOUT}_${SUB}_${PARTICLES}"
}
function uncache ()
{
ELEMENTS=250000
BIN=1D-Array-MPIIO-W
updateSettings
mpirun $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -e $ELEMENTS -o $TARGET -s FPN
sleep 5
BIN=1D-Array-MPIIO-R
updateSettings
mpirun $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -e $ELEMENTS -o $TARGET -s FPN
sleep 5
LOCAL="/scratch"
BIN=1D-Array-MPIIO-W
updateSettings
mpirun $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -e $ELEMENTS -o $LOCAL -s FPN
sleep 5
BIN=1D-Array-MPIIO-R
updateSettings
mpirun $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -e $ELEMENTS -o $LOCAL -s FPN
sleep 5
}
PARTICLES=25000
updateSettings
rm $TARGET/*
for LAYOUT in AOS
for RUN in {0..11}
do
for SUB in SSF
for LAYOUT in AOS
do
for BIN in HACC-IO-Tapioca-W HACC-IO-Tapioca-R HACC-IO-Tapioca-WR HACC-IO-MPIIO-W HACC-IO-MPIIO-R HACC-IO-MPIIO-WR
do
mpirun --env $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -p $PARTICLES -o $TARGET -s $SUB -l $LAYOUT
for SUB in FPN
do
BIN=HACC-IO-Tapioca-W
TAPIOCA_AGGRTIER=NVR
TAPIOCA_TRGTTIER=PFS
updateSettings
outputFileTAPIOCA
mpirun --env $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -p $PARTICLES -o $TARGET -s $SUB -l $LAYOUT >> $OUTPUT
echo
echo "---------------------------------"
echo
sleep 5
done
uncache
BIN=HACC-IO-Tapioca-R
TAPIOCA_AGGRTIER=DDR
TAPIOCA_TRGTTIER=PFS
updateSettings
outputFileTAPIOCA
mpirun --env $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -p $PARTICLES -o $TARGET -s $SUB -l $LAYOUT >> $OUTPUT
echo
echo "---------------------------------"
echo
sleep 5
uncache
BIN=HACC-IO-Tapioca-R-NLS
TAPIOCA_AGGRTIER=DDR
TAPIOCA_TRGTTIER=NLS
LOCAL_TARGET="/scratch"
updateSettings
outputFileTAPIOCA
mpirun --env $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -p $PARTICLES -o $LOCAL_TARGET -s $SUB -l $LAYOUT >> $OUTPUT
echo
echo "---------------------------------"
echo
sleep 5
rm $TARGET/*
BIN=HACC-IO-Tapioca-W
TAPIOCA_AGGRTIER=DDR
TAPIOCA_TRGTTIER=PFS
updateSettings
outputFileTAPIOCA
mpirun --env $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -p $PARTICLES -o $TARGET -s $SUB -l $LAYOUT >> $OUTPUT
echo
echo "---------------------------------"
echo
sleep 5
uncache
BIN=HACC-IO-Tapioca-R
TAPIOCA_AGGRTIER=DDR
TAPIOCA_TRGTTIER=PFS
updateSettings
outputFileTAPIOCA
mpirun --env $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -p $PARTICLES -o $TARGET -s $SUB -l $LAYOUT >> $OUTPUT
echo
echo "---------------------------------"
echo
sleep 5
# rm $TARGET/*
# BIN=HACC-IO-MPIIO-W
# outputFileMPIIO
# mpirun --env $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -p $PARTICLES -o $TARGET -s $SUB -l $LAYOUT >> $OUTPUT
# echo
# echo "---------------------------------"
# echo
# sleep 5
# uncache
# BIN=HACC-IO-MPIIO-R
# outputFileMPIIO
# mpirun --env $SETTINGS -f $COBALT_NODEFILE -n $NPROCS $BIN -p $PARTICLES -o $TARGET -s $SUB -l $LAYOUT >> $OUTPUT
# echo
# echo "---------------------------------"
# echo
# sleep 5
rm $TARGET/*
done
done
done
#HACC-IO-Tapioca-W HACC-IO-Tapioca-R HACC-IO-Tapioca-WR HACC-IO-MPIIO-W HACC-IO-MPIIO-R HACC-IO-MPIIO-WR
......@@ -5,11 +5,11 @@ MODE
==========================================================================================
GRID DIMENSION PARAMETERS
==========================================================================================
1024 - global number of grid points in the x-direction (nx_g)
1024 - global number of grid points in the y-direction (ny_g)
512 - global number of grid points in the z-direction (nz_g)
32 - number of processors in x-direction (npx)
32 - number of processors in y-direction (npy)
512 - global number of grid points in the x-direction (nx_g)
512 - global number of grid points in the y-direction (ny_g)
512 - global number of grid points in the z-direction (nz_g)
16 - number of processors in x-direction (npx)
16 - number of processors in y-direction (npy)
16 - number of processors in z-direction (npz)
==========================================================================================
RUN-TIME PARAMETERS
......
......@@ -7,16 +7,16 @@ TARGET="/lus/theta-fs0/projects/Performance/ftessier/TAPIOCA"
DDT="/soft/debuggers/forge/bin/ddt --connect"
STRIPE_COUNT=48
STRIPE_SIZE=16777216
AGGR_MULT=1
AGGR_MULT=2
cd $HOME/TAPIOCA/examples/S3D-IO/run/
export TAPIOCA_STRATEGY=MEMORY_AWARE
export TAPIOCA_NBAGGR=48
export TAPIOCA_NBBUFFERS=2
export TAPIOCA_NBAGGR=96
export TAPIOCA_NBBUFFERS=3
export TAPIOCA_BUFFERSIZE=16777216
export TAPIOCA_AGGRTIER=DDR
export TAPIOCA_AGGRTIER=NVR
export TAPIOCA_DEVNULL=false
export TAPIOCA_COMMSPLIT=true
......@@ -57,7 +57,7 @@ updateSettings
cp ../input/s3d.in.mpiio.$NODES ../input/s3d.in
for RUN in {0..10}
do
aprun $SETTINGS -n $NPROCS -N $PPN ./s3d_f90.x >> /home/ftessier/xp_tapioca/paper/S3DIO/mpiio.$NODES.$TAPIOCA_NBBUFFERS
aprun $SETTINGS -n $NPROCS -N $PPN ./s3d_f90.x >> /home/ftessier/xp_tapioca/paper/S3DIO/mpiio.$NODES.$TAPIOCA_NBBUFFERS.$TAPIOCA_NBAGGR.$STRIPE_COUNT.DDR
sleep 10
rm $TARGET/*
done
......@@ -65,7 +65,7 @@ done
cp ../input/s3d.in.tapioca.$NODES ../input/s3d.in
for RUN in {0..10}
do
aprun $SETTINGS -n $NPROCS -N $PPN ./s3d_f90.x >> /home/ftessier/xp_tapioca/paper/S3DIO/tapioca.$NODES.$TAPIOCA_NBBUFFERS
aprun $SETTINGS -n $NPROCS -N $PPN ./s3d_f90.x >> /home/ftessier/xp_tapioca/paper/S3DIO/tapioca.$NODES.$TAPIOCA_NBBUFFERS.$TAPIOCA_NBAGGR.$STRIPE_COUNT.DDR
sleep 10
rm $TARGET/*
done
......@@ -7,14 +7,14 @@ TARGET="/lus/theta-fs0/projects/Performance/ftessier/TAPIOCA"
DDT="/soft/debuggers/forge/bin/ddt --connect"
STRIPE_COUNT=48
STRIPE_SIZE=16777216
AGGR_MULT=4
AGGR_MULT=8
cd $HOME/TAPIOCA/examples/S3D-IO/run/
export TAPIOCA_STRATEGY=MEMORY_AWARE
export TAPIOCA_NBAGGR=192
export TAPIOCA_NBBUFFERS=2
export TAPIOCA_NBAGGR=384
export TAPIOCA_NBBUFFERS=3
export TAPIOCA_BUFFERSIZE=16777216
export TAPIOCA_AGGRTIER=DDR
......@@ -57,7 +57,7 @@ updateSettings
cp ../input/s3d.in.mpiio.$NODES ../input/s3d.in
for RUN in {0..10}
do
aprun $SETTINGS -n $NPROCS -N $PPN ./s3d_f90.x >> /home/ftessier/xp_tapioca/paper/S3DIO/mpiio.$NODES
aprun $SETTINGS -n $NPROCS -N $PPN ./s3d_f90.x >> /home/ftessier/xp_tapioca/paper/S3DIO/mpiio.$NODES.$TAPIOCA_NBAGGR.$TAPIOCA_NBBUFFERS
sleep 10
rm $TARGET/*
done
......@@ -65,7 +65,7 @@ done
cp ../input/s3d.in.tapioca.$NODES ../input/s3d.in
for RUN in {0..10}
do
aprun $SETTINGS -n $NPROCS -N $PPN ./s3d_f90.x >> /home/ftessier/xp_tapioca/paper/S3DIO/tapioca.$NODES
aprun $SETTINGS -n $NPROCS -N $PPN ./s3d_f90.x >> /home/ftessier/xp_tapioca/paper/S3DIO/tapioca.$NODES.$TAPIOCA_NBAGGR.$TAPIOCA_NBBUFFERS
sleep 10
rm $TARGET/*
done
MPICXX = mpicxx
MPIF90 = mpif90
MPICC = mpicc
MPI_CFLAGS = -g -O0 -I$(HOME)/install/$(ARCHI)/include/ -DDBG -DCOLOR #-DTIMING
MPI_LDFLAGS = -L$(HOME)/install/$(ARCHI)/lib/ -ltapioca #-lmpiP -lunwind -lbfd #-ldarshan
MPI_CFLAGS = -g -O0 -Wno-write-strings -I$(HOME)/install/$(ARCHI)/include/ -I/soft/RAN/kdsa_api/current/include/ -DDBG -DCOLOR #-DTIMING
MPI_LDFLAGS = -L$(HOME)/install/$(ARCHI)/lib/ -L/soft/RAN/kdsa_api/current/lib64/ -ltapioca -lkdsa #-lmpiP -lunwind -lbfd #-ldarshan
MPI_F90_FLAGS = $(MPI_CFLAGS)
MPI_F90_LDFLAGS = $(MPI_LDFLAGS)
......
MPICXX = mpif90
MPICXX = mpixlcxx
MPI_CFLAGS = -g -O3 -I./ -I./architectures -I./architectures/$(ARCHI)
MPI_CFLAGS += -DDBG -DCOLOR #-DTIMING
......