Commit 31b984fa authored by Francois Tessier's avatar Francois Tessier

First implementation of memory abstraction for aggregation buffers. Ongoing XP.

parent dbe6daa4
...@@ -12,7 +12,7 @@ libtapioca.a:$(OBJ_FILES) ...@@ -12,7 +12,7 @@ libtapioca.a:$(OBJ_FILES)
$(MPICXX) $(MPI_CFLAGS) -c $? -o $@ $(MPI_LDFLAGS) $(MPICXX) $(MPI_CFLAGS) -c $? -o $@ $(MPI_LDFLAGS)
install: install:
cp -r tapioca.hpp $(TP_INCLUDE) $(INSTALL_PATH)/include cp -r tapioca.hpp tp_utils.hpp $(TP_INCLUDE) $(INSTALL_PATH)/include
cp libtapioca.a $(INSTALL_PATH)/lib cp libtapioca.a $(INSTALL_PATH)/lib
clean: clean:
......
...@@ -14,12 +14,12 @@ public: ...@@ -14,12 +14,12 @@ public:
/**********************/ /**********************/
/* |-- Allocation */ /* |-- Allocation */
/**********************/ /**********************/
void memAlloc ( int64_t buffSize, mem_t mem, int masterRank, char* fileName, MPI_Comm comm ) { void memAlloc ( int64_t buffSize, mem_t mem, bool masterRank, char* fileName, MPI_Comm comm ) {
int rank; int rank;
this->mem_ = mem; this->mem_ = mem;
this->masterRank_ = masterRank; this->masterRank_ = masterRank;
this->comm_ = comm; MPI_Comm_dup( comm, &this->comm_ );
this->buffSize_ = buffSize; this->buffSize_ = buffSize;
strcpy ( this->fileName_, fileName); strcpy ( this->fileName_, fileName);
...@@ -28,7 +28,7 @@ public: ...@@ -28,7 +28,7 @@ public:
switch ( this->mem_ ) switch ( this->mem_ )
{ {
case DDR: case DDR:
if ( rank == this->masterRank_ ) { if ( this->masterRank_ ) {
this->buffer_ = malloc ( this->buffSize_ ); this->buffer_ = malloc ( this->buffSize_ );
MPI_Win_create ( this->buffer_, this->buffSize_, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ ); MPI_Win_create ( this->buffer_, this->buffSize_, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ );
} }
...@@ -51,7 +51,7 @@ public: ...@@ -51,7 +51,7 @@ public:
{ {
case DDR: case DDR:
MPI_Win_free ( &this->RMAWin_ ); MPI_Win_free ( &this->RMAWin_ );
if ( rank == this->masterRank_ ) if ( this->masterRank_ )
free ( this->buffer_ ); free ( this->buffer_ );
break; break;
default: default:
...@@ -63,14 +63,14 @@ public: ...@@ -63,14 +63,14 @@ public:
/**********************/ /**********************/
/* |-- I/O */ /* |-- I/O */
/**********************/ /**********************/
int memWrite ( void* srcBuffer, int64_t srcSize, int64_t offset ) { int memWrite ( void* srcBuffer, int64_t srcSize, int64_t offset, int destRank ) {
int err; int err;
MPI_Status status; MPI_Status status;
switch ( this->mem_ ) switch ( this->mem_ )
{ {
case DDR: case DDR:
err = MPI_Put ( srcBuffer, srcSize, MPI_BYTE, 0, offset, srcSize, MPI_BYTE, this->RMAWin_ ); err = MPI_Put ( srcBuffer, srcSize, MPI_BYTE, destRank, offset, srcSize, MPI_BYTE, this->RMAWin_ );
break; break;
default: default:
printMsg ( ERROR, "Error while writing data (mem = %s)\n", this->memName () ); printMsg ( ERROR, "Error while writing data (mem = %s)\n", this->memName () );
...@@ -80,14 +80,14 @@ public: ...@@ -80,14 +80,14 @@ public:
return err; return err;
} }
int memRead ( void* srcBuffer, int64_t srcSize, int64_t offset ) { int memRead ( void* srcBuffer, int64_t srcSize, int64_t offset, int destRank ) {
int err; int err;
MPI_Status status; MPI_Status status;
switch ( this->mem_ ) switch ( this->mem_ )
{ {
case DDR: case DDR:
err = MPI_Get ( srcBuffer, srcSize, MPI_BYTE, 0, offset, srcSize, MPI_BYTE, this->RMAWin_ ); err = MPI_Get ( srcBuffer, srcSize, MPI_BYTE, destRank, offset, srcSize, MPI_BYTE, this->RMAWin_ );
break; break;
default: default:
printMsg ( ERROR, "Error while reading data (mem = %s)\n", this->memName () ); printMsg ( ERROR, "Error while reading data (mem = %s)\n", this->memName () );
......
...@@ -11,37 +11,39 @@ public: ...@@ -11,37 +11,39 @@ public:
/************************/ /************************/
/* |-- Allocation */ /* |-- Allocation */
/************************/ /************************/
virtual void memAlloc ( int64_t buffSize, mem_t mem, int masterRank, char* fileName, MPI_Comm comm ); virtual void memAlloc ( int64_t buffSize, mem_t mem, bool masterRank, char* fileName, MPI_Comm comm ) = 0;
virtual void memFree ( ); virtual void memFree ( ) = 0;
/************************/ /************************/
/* |-- I/O */ /* |-- I/O */
/************************/ /************************/
//virtual int memWrite ( iMemory srcMem, int64_t srcSize, int64_t offset ); //virtual int memWrite ( iMemory srcMem, int64_t srcSize, int64_t offset );
//virtual int memRead ( iMemory srcMem, int64_t srcSize, int64_t offset ); //virtual int memRead ( iMemory srcMem, int64_t srcSize, int64_t offset );
virtual int memWrite ( void* srcBuffer, int64_t srcSize, int64_t offset ); virtual int memWrite ( void* srcBuffer, int64_t srcSize, int64_t offset, int destRank ) = 0;
virtual int memRead ( void* srcBuffer, int64_t srcSize, int64_t offset ); virtual int memRead ( void* srcBuffer, int64_t srcSize, int64_t offset, int destRank ) = 0;
virtual int memFlush ( ); virtual int memFlush ( ) = 0;
/************************/ /************************/
/* |-- Utils */ /* |-- Utils */
/************************/ /************************/
virtual char* memName ( ); virtual char* memName ( ) = 0;
virtual mem_t memType ( char* name ); virtual mem_t memType ( char* name ) = 0;
/************************/ /************************/
/* |-- Characteristics */ /* |-- Characteristics */
/************************/ /************************/
virtual int64_t memBandwidth ( ); virtual int64_t memBandwidth ( ) = 0;
virtual int64_t memLatency ( ); virtual int64_t memLatency ( ) = 0;
virtual int64_t memCapacity ( ); virtual int64_t memCapacity ( ) = 0;
/* Temporary */
void *buffer_;
protected: protected:
mem_t mem_; mem_t mem_;
int masterRank_; bool masterRank_;
MPI_Comm comm_; MPI_Comm comm_;
void *buffer_;
int64_t buffSize_; int64_t buffSize_;
MPI_Win RMAWin_; MPI_Win RMAWin_;
......
#!/bin/bash #!/bin/bash
NODES=1024 NODES=32
PPN=16 PPN=16
NPROCS=$((NODES*PPN)) NPROCS=$((NODES*PPN))
TARGET="/lus/theta-fs0/projects/Performance/ftessier/HACC" TARGET="/lus/theta-fs0/projects/Performance/ftessier/HACC"
DDT="/soft/debuggers/forge/bin/ddt --connect" DDT="/soft/debuggers/forge/bin/ddt --connect"
STRIPE_COUNT=48 STRIPE_COUNT=4
STRIPE_SIZE=8388608 STRIPE_SIZE=8388608
AGGR_MULT=1 AGGR_MULT=1
...@@ -14,7 +14,7 @@ cd $HOME/install/$ARCHI/bin/ ...@@ -14,7 +14,7 @@ cd $HOME/install/$ARCHI/bin/
export TAPIOCA_DEVNULL=false export TAPIOCA_DEVNULL=false
export TAPIOCA_COMMSPLIT=true export TAPIOCA_COMMSPLIT=true
export TAPIOCA_STRATEGY=TOPOLOGY_AWARE export TAPIOCA_STRATEGY=TOPOLOGY_AWARE
export TAPIOCA_NBAGGR=192 export TAPIOCA_NBAGGR=4
export TAPIOCA_BUFFERSIZE=16777216 export TAPIOCA_BUFFERSIZE=16777216
function setLustreFS () function setLustreFS ()
...@@ -41,29 +41,23 @@ function updateSettings() ...@@ -41,29 +41,23 @@ function updateSettings()
function outputFile () function outputFile ()
{ {
OUTPUT="$HOME/PUB-IPDPS18/dat/aggr_count" OUTPUT="$HOME/PUB-IPDPS18/dat/mem_abstraction"
OUTPUT="${OUTPUT}/HACC_XC40_${BIN}_${NODES}_${PPN}" OUTPUT="${OUTPUT}/HACC_XC40_${BIN}_${NODES}_${PPN}"
OUTPUT="${OUTPUT}_${TAPIOCA_STRATEGY}_${TAPIOCA_NBAGGR}_${TAPIOCA_BUFFERSIZE}_${PARTICLES}" OUTPUT="${OUTPUT}_${TAPIOCA_STRATEGY}_${TAPIOCA_NBAGGR}_${TAPIOCA_BUFFERSIZE}_${PARTICLES}"
} }
for run in {0..9} for run in {0..2}
do do
for PARTICLES in 25000 for PARTICLES in 25000
do do
for AGGR in 48 96 144 192 updateSettings
do
AGGR_MULT=$((AGGR/STRIPE_COUNT)) for BIN in miniHACC-AoS miniHACC-SoA miniHACC-AoS-MPIIO miniHACC-SoA-MPIIO
echo "cray_cb_nodes_multiplier=$AGGR_MULT" do
export TAPIOCA_NBAGGR=$AGGR outputFile
updateSettings setLustreFS >> $OUTPUT
aprun $SETTINGS -n $NPROCS -N $PPN ./$BIN $PARTICLES >> $OUTPUT
for BIN in miniHACC-AoS miniHACC-SoA miniHACC-AoS-MPIIO miniHACC-SoA-MPIIO sleep 5
do
outputFile
setLustreFS >> $OUTPUT
aprun $SETTINGS -n $NPROCS -N $PPN ./$BIN $PARTICLES >> $OUTPUT
sleep 5
done
done done
done done
done done
......
...@@ -20,15 +20,9 @@ void Tapioca::Finalize () ...@@ -20,15 +20,9 @@ void Tapioca::Finalize ()
this->commDataSize_ = 0; this->commDataSize_ = 0;
MPI_Win_free (&this->RMAWin1); this->memBuffer0.memFree ();
MPI_Win_free (&this->RMAWin2); this->memBuffer1.memFree ();
MPI_Comm_free (&this->subComm_); MPI_Comm_free (&this->subComm_);
if ( this->amAnAggr_ ) {
free (this->buffer1);
free (this->buffer2);
}
} }
...@@ -41,10 +35,10 @@ void Tapioca::GlobalFence () ...@@ -41,10 +35,10 @@ void Tapioca::GlobalFence ()
switch (buffer) switch (buffer)
{ {
case 0: case 0:
MPI_Win_fence (0, this->RMAWin1); this->memBuffer0.memFlush ();
break; break;
case 1: case 1:
MPI_Win_fence (0, this->RMAWin2); this->memBuffer1.memFlush ();
break; break;
} }
...@@ -420,26 +414,8 @@ void Tapioca::InitAggregators () ...@@ -420,26 +414,8 @@ void Tapioca::InitAggregators ()
{ {
int aggr, retval; int aggr, retval;
if ( this->amAnAggr_ ) { this->memBuffer0.memAlloc ( this->bufferSize_, DDR, this->amAnAggr_, NULL, this->subComm_ );
this->buffer1 = malloc (this->bufferSize_); this->memBuffer1.memAlloc ( this->bufferSize_, DDR, this->amAnAggr_, NULL, this->subComm_ );
this->buffer2 = malloc (this->bufferSize_);
retval = MPI_Win_create (this->buffer1, this->bufferSize_, 1, MPI_INFO_NULL, this->subComm_, &this->RMAWin1);
this->HandleMPIError (retval);
MPI_Win_create (this->buffer2, this->bufferSize_, 1, MPI_INFO_NULL, this->subComm_, &this->RMAWin2);
this->HandleMPIError (retval);
}
else {
retval = MPI_Win_create (NULL, 0, 1, MPI_INFO_NULL, this->subComm_, &this->RMAWin1);
this->HandleMPIError (retval);
retval = MPI_Win_create (NULL, 0, 1, MPI_INFO_NULL, this->subComm_, &this->RMAWin2);
this->HandleMPIError (retval);
}
retval = MPI_Win_fence (0, this->RMAWin1);
this->HandleMPIError (retval);
retval = MPI_Win_fence (0, this->RMAWin2);
this->HandleMPIError (retval);
#ifdef DEBUG #ifdef DEBUG
if (this->commRank_ == MASTER) { if (this->commRank_ == MASTER) {
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "mpi.h" #include "mpi.h"
#include "tp_network.hpp" #include "tp_network.hpp"
#include "tp_memory.hpp"
enum MAPPING_STRATEGY enum MAPPING_STRATEGY
{ {
...@@ -157,11 +158,9 @@ class Tapioca ...@@ -157,11 +158,9 @@ class Tapioca
std::vector<int> dataSize; std::vector<int> dataSize;
std::vector< std::vector<int> > chunksIndexMatching; std::vector< std::vector<int> > chunksIndexMatching;
void *buffer1; Memory memBuffer0;
void *buffer2; Memory memBuffer1;
//Memory memTarget;
MPI_Win RMAWin1;
MPI_Win RMAWin2;
/* AGGREGATOR */ /* AGGREGATOR */
bool amAnAggr_; bool amAnAggr_;
......
...@@ -130,14 +130,10 @@ int Tapioca::Read (MPI_File fileHandle, MPI_Offset offset, void *buf, ...@@ -130,14 +130,10 @@ int Tapioca::Read (MPI_File fileHandle, MPI_Offset offset, void *buf,
switch (buffer) switch (buffer)
{ {
case 0: case 0:
retval = MPI_Get (static_cast<char*>(buf) + bufOffset, subChunkDataSize, MPI_BYTE, this->memBuffer0.memRead ( static_cast<char*>(buf) + bufOffset, subChunkDataSize, winOffset, targetAggr );
targetAggr, winOffset, subChunkDataSize, MPI_BYTE, this->RMAWin1);
this->HandleMPIError (retval);
break; break;
case 1: case 1:
retval = MPI_Get (static_cast<char*>(buf) + bufOffset, subChunkDataSize, MPI_BYTE, this->memBuffer1.memRead ( static_cast<char*>(buf) + bufOffset, subChunkDataSize, winOffset, targetAggr );
targetAggr, winOffset, subChunkDataSize, MPI_BYTE, this->RMAWin2);
this->HandleMPIError (retval);
break; break;
} }
...@@ -198,11 +194,11 @@ void Tapioca::Pull (MPI_File fileHandle, MPI_Request *request) ...@@ -198,11 +194,11 @@ void Tapioca::Pull (MPI_File fileHandle, MPI_Request *request)
switch (buffer) switch (buffer)
{ {
case 0: case 0:
MPI_File_iread_at (fileHandle, offset, buffer1, dataSize, MPI_BYTE, request); MPI_File_iread_at (fileHandle, offset, this->memBuffer0.buffer_, dataSize, MPI_BYTE, request);
MPI_Wait ( request, &status ); MPI_Wait ( request, &status );
break; break;
case 1: case 1:
MPI_File_iread_at (fileHandle, offset, buffer2, dataSize, MPI_BYTE, request); MPI_File_iread_at (fileHandle, offset, this->memBuffer1.buffer_, dataSize, MPI_BYTE, request);
MPI_Wait ( request, &status ); MPI_Wait ( request, &status );
break; break;
} }
......
...@@ -126,14 +126,10 @@ int Tapioca::Write (MPI_File fileHandle, MPI_Offset offset, void *buf, ...@@ -126,14 +126,10 @@ int Tapioca::Write (MPI_File fileHandle, MPI_Offset offset, void *buf,
switch (buffer) switch (buffer)
{ {
case 0: case 0:
retval = MPI_Put (static_cast<char*>(buf) + bufOffset, subChunkDataSize, MPI_BYTE, this->memBuffer0.memWrite ( static_cast<char*>(buf) + bufOffset, subChunkDataSize, winOffset, targetAggr );
targetAggr, winOffset, subChunkDataSize, MPI_BYTE, this->RMAWin1);
this->HandleMPIError (retval);
break; break;
case 1: case 1:
retval = MPI_Put (static_cast<char*>(buf) + bufOffset, subChunkDataSize, MPI_BYTE, this->memBuffer1.memWrite ( static_cast<char*>(buf) + bufOffset, subChunkDataSize, winOffset, targetAggr );
targetAggr, winOffset, subChunkDataSize, MPI_BYTE, this->RMAWin2);
this->HandleMPIError (retval);
break; break;
} }
...@@ -196,15 +192,15 @@ void Tapioca::Push (MPI_File fileHandle, MPI_Request *request) ...@@ -196,15 +192,15 @@ void Tapioca::Push (MPI_File fileHandle, MPI_Request *request)
{ {
case 0: case 0:
if ( this->writeDevNull_ ) if ( this->writeDevNull_ )
MPI_File_iwrite_at (this->devNullFileHandle_, 0, buffer1, dataSize, MPI_BYTE, request); MPI_File_iwrite_at (this->devNullFileHandle_, 0, this->memBuffer0.buffer_, dataSize, MPI_BYTE, request);
else else
MPI_File_iwrite_at (fileHandle, offset, buffer1, dataSize, MPI_BYTE, request); MPI_File_iwrite_at (fileHandle, offset, this->memBuffer0.buffer_, dataSize, MPI_BYTE, request);
break; break;
case 1: case 1:
if ( this->writeDevNull_ ) if ( this->writeDevNull_ )
MPI_File_iwrite_at (this->devNullFileHandle_, 0, buffer2, dataSize, MPI_BYTE, request); MPI_File_iwrite_at (this->devNullFileHandle_, 0, this->memBuffer1.buffer_, dataSize, MPI_BYTE, request);
else else
MPI_File_iwrite_at (fileHandle, offset, buffer2, dataSize, MPI_BYTE, request); MPI_File_iwrite_at (fileHandle, offset, this->memBuffer1.buffer_, dataSize, MPI_BYTE, request);
break; break;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment