Commit 3bdff8ec authored by Francois Tessier's avatar Francois Tessier

Improve the network abstraction (interface and implementation for Cray XC40)

parent 164e11fe
......@@ -49,20 +49,33 @@ void Memory::memAlloc ( int64_t buffSize, mem_t mem, bool masterRank, char* file
MPI_Win_fence (0, this->RMAWin_ );
break;
case HDD:
case NLS:
case PFS:
char *name;
int resultlen;
name = (char *) malloc ( MPI_MAX_PROCESSOR_NAME * sizeof ( char ) );
MPI_Get_processor_name( name, &resultlen );
strcpy ( this->fileName_, fileName );
if ( this->masterRank_ ) {
printMsg ( DEBUG, "Open file %s on HDD (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
printMsg ( DEBUG, "Open file %s on %s from node %s (%s:%d)\n", this->fileName_, this->memName (), name, __FILE__, __LINE__ );
}
err = MPI_File_open( this->comm_, this->fileName_, MPI_MODE_RDWR | MPI_MODE_CREATE,
MPI_INFO_NULL, &this->fileHandle_ );
// Preallocate the file
if ( err != MPI_SUCCESS )
printMsg ( ERROR, "Error while opening the file %s on %s from node %s (%s:%d)\n", this->fileName_, this->memName (), name, __FILE__, __LINE__ );
break;
case NVR:
strcpy ( this->fileName_, fileName );
if ( this->mmapAllocatorRank_ == rank ) {
sprintf ( this->fileName_, "%s/%s", this->memPath (), fileName );
if ( this->masterRank_ ) {
printMsg ( DEBUG, "Map file %s in DRAM (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
// Add test for memory capacity
/*
* O_RDWR | O_CREAT : read, write, create
* S_IRWXU : read, write, execute/search by owner
*/
this->fd_ = open(this->fileName_, O_RDWR | O_CREAT, S_IRWXU);
if ( this->fd_ == -1 ) {
......@@ -75,18 +88,28 @@ void Memory::memAlloc ( int64_t buffSize, mem_t mem, bool masterRank, char* file
MPI_Abort ( MPI_COMM_WORLD, -1 );
}
this->ptrMap_ = mmap(0, this->buffSize_, PROT_READ | PROT_WRITE, MAP_SHARED, this->fd_, 0);
if (this->ptrMap_ == MAP_FAILED) {
/*
* PROT_READ | PROT_WRITE : read, write
* MAP_SHARED : Updates to the mapping are visible to other
* processes mapping the same region, and (in
* the case of file-backed mappings) are carried
* through to the underlying file.
*/
this->buffer_ = mmap(0, this->buffSize_, PROT_READ | PROT_WRITE, MAP_SHARED, this->fd_, 0);
if (this->buffer_ == MAP_FAILED) {
printMsg ( ERROR, "Mmap of file %s has failed (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
MPI_Abort ( MPI_COMM_WORLD, -1 );
}
MPI_Win_create ( this->ptrMap_, this->buffSize_, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ );
err = MPI_Win_create ( this->buffer_, this->buffSize_, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ );
}
else
MPI_Win_create ( NULL, 0, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ );
err = MPI_Win_create ( NULL, 0, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ );
MPI_Win_fence (0, this->RMAWin_ );
if ( err != MPI_SUCCESS)
printMsg ( ERROR, "Unable to create RMA window on mmapped file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__);
this->memFlush ();
break;
default:
printMsg ( ERROR, "Unable to allocate memory (mem = %s)\n", this->memName () );
......@@ -115,25 +138,28 @@ void Memory::memFree ( ) {
hbw_free ( this->buffer_ );
}
break;
case HDD:
case NLS:
case PFS:
if ( this->masterRank_ ) {
printMsg ( DEBUG, "Close file %s on HDD (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
printMsg ( DEBUG, "Close file %s on %s (%s:%d)\n", this->fileName_, this->memName (), __FILE__, __LINE__ );
}
MPI_File_close ( &this->fileHandle_ );
break;
case NVR:
this->memFlush ();
MPI_Win_free ( &this->RMAWin_ );
if ( this->mmapAllocatorRank_ == rank ) {
if ( this->masterRank_ ) {
printMsg ( DEBUG, "Free memory on NVRAM and unmap file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
err = munmap ( this->ptrMap_, this->buffSize_ );
close ( this->fd_ );
err = munmap ( this->buffer_, this->buffSize_ );
if ( err == -1 ) {
printMsg ( ERROR, "Error while unmaping the file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
MPI_Abort ( MPI_COMM_WORLD, -1 );
}
close ( this->fd_ );
}
err = MPI_Win_free ( &this->RMAWin_ );
if ( err != MPI_SUCCESS)
printMsg ( ERROR, "Unable to free RMA window on mmapped file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__);
break;
default:
printMsg ( ERROR, "Unable to free memory (mem = %s)\n", this->memName () );
......@@ -156,7 +182,8 @@ int Memory::memWrite ( void* srcBuffer, int64_t srcSize, int64_t offset, int des
case NVR:
err = MPI_Put ( srcBuffer, srcSize, MPI_BYTE, destRank, offset, srcSize, MPI_BYTE, this->RMAWin_ );
break;
case HDD:
case NLS:
case PFS:
err = MPI_File_iwrite_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &this->request_ );
break;
default:
......@@ -179,7 +206,8 @@ int Memory::memRead ( void* srcBuffer, int64_t srcSize, int64_t offset, int des
case NVR:
err = MPI_Get ( srcBuffer, srcSize, MPI_BYTE, destRank, offset, srcSize, MPI_BYTE, this->RMAWin_ );
break;
case HDD:
case NLS:
case PFS:
err = MPI_File_iread_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &this->request_ );
break;
default:
......@@ -203,20 +231,26 @@ int Memory::memFlush ( ) {
case HBM:
MPI_Win_fence ( 0, this->RMAWin_ );
break;
case HDD:
case NLS:
case PFS:
if ( this->request_ != NULL )
MPI_Wait ( &this->request_, &status );
break;
case NVR:
if ( this->mmapAllocatorRank_ == rank ) {
err = MPI_Win_fence ( 0, this->RMAWin_ );
if ( err != MPI_SUCCESS)
printMsg ( ERROR, "Unable to perform a synchronization on the RMA window (mem = %s)\n", this->memName () );
if ( this->masterRank_ ) {
printMsg ( DEBUG, "Sync memory and file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
err = msync( this->ptrMap_, this->buffSize_, MS_SYNC );
err = msync( this->buffer_, this->buffSize_, MS_ASYNC );
if ( err == -1 ) {
printMsg ( ERROR, "Error while syncing memory and file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
MPI_Abort ( MPI_COMM_WORLD, -1 );
}
}
//MPI_Win_fence ( 0, this->RMAWin_ );
err = MPI_Win_fence ( 0, this->RMAWin_ );
break;
default:
printMsg ( ERROR, "Error while flushing data (mem = %s)\n", this->memName () );
......@@ -244,11 +278,11 @@ char* Memory::memName ( ) {
case HBM:
return "HBM";
break;
case SSD:
return "SSD";
case PFS:
return "PFS";
break;
case HDD:
return "HDD";
case NLS:
return "NLS";
break;
case NVR:
return "NVR";
......@@ -260,11 +294,11 @@ char* Memory::memName ( ) {
}
mem_t Memory::memType ( char* name ) {
mem_t Memory::memTypeByName ( char* name ) {
if ( ! strcmp ( "DDR", name ) ) return DDR;
if ( ! strcmp ( "HBM", name ) ) return HBM;
if ( ! strcmp ( "SSD", name ) ) return SSD;
if ( ! strcmp ( "HDD", name ) ) return HDD;
if ( ! strcmp ( "PFS", name ) ) return PFS;
if ( ! strcmp ( "NLS", name ) ) return NLS;
if ( ! strcmp ( "NVR", name ) ) return NVR;
printMsg ( ERROR, "Wrong memory name!\n" );
......@@ -272,6 +306,15 @@ mem_t Memory::memType ( char* name ) {
}
mem_t Memory::memTypeByPath ( char* path ) {
if ( ! strncmp ( path, "/lus/theta-fs0", 14 ) ) return PFS;
if ( ! strncmp ( path, "/local/scratch", 14 ) ) return NLS;
printMsg ( ERROR, "No memory tier corresponding to %s!\n", path );
MPI_Abort ( MPI_COMM_WORLD, -1 );
}
/************************/
/* |-- Characteristics */
/************************/
......@@ -293,3 +336,11 @@ int64_t Memory::memCapacity ( ) {
bool Memory::memPersistency ( ) {
return false;
}
char* Memory::memPath ( ) {
if ( this->mem_ == NVR )
return "/local/scratch";
else
return "";
}
......@@ -36,8 +36,9 @@ public:
/**********************/
/* |-- Utils */
/**********************/
char* memName ( );
mem_t memType ( char* name );
char* memName ( );
mem_t memTypeByName ( char* name );
mem_t memTypeByPath ( char* path );
/************************/
/* |-- Characteristics */
......@@ -46,6 +47,7 @@ public:
int64_t memLatency ( );
int64_t memCapacity ( );
bool memPersistency ( );
char* memPath ( );
};
#endif // TP_MEMORY_H
......
......@@ -14,7 +14,34 @@ int Topology::BridgeNodeId () {
int Topology::ComputeNodeId () {
return 0;
char *name;
int resultlen;
int nodeId;
name = (char *) malloc ( MPI_MAX_PROCESSOR_NAME * sizeof ( char ) );
MPI_Get_processor_name( name, &resultlen );
while ( name[0] == 'n' ||
name[0] == 'i' ||
name[0] == 'd' )
name++;
nodeId = atoi ( name );
return nodeId;
}
int Topology::GlobalCoreId () {
int nodeId = this->ComputeNodeId ();
int coreId = this->LocalCoreId ();
return ( nodeId * 100 + coreId );
}
int Topology::LocalCoreId () {
return sched_getcpu();
}
......
......@@ -25,6 +25,8 @@ public:
int IONodeId ();
int BridgeNodeId ();
int ComputeNodeId ();
int GlobalCoreId ();
int LocalCoreId ();
int ProcessPerNode ();
int IONodesPerFile ( char* filename, int *nodesList );
......
......@@ -4,7 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
typedef enum { DDR, HBM, SSD, HDD, NVR } mem_t;
typedef enum { UNSET, DDR, HBM, NVR, NLS, PFS } mem_t;
class iMemory {
public:
......@@ -25,8 +25,9 @@ public:
/************************/
/* |-- Utils */
/************************/
virtual char* memName ( ) = 0;
virtual mem_t memType ( char* name ) = 0;
virtual char* memName ( ) = 0;
virtual mem_t memTypeByName ( char* name ) = 0;
virtual mem_t memTypeByPath ( char* path ) = 0;
/************************/
/* |-- Characteristics */
......@@ -35,6 +36,8 @@ public:
virtual int64_t memLatency ( ) = 0;
virtual int64_t memCapacity ( ) = 0;
virtual bool memPersistency ( ) = 0;
virtual char* memPath ( ) = 0;
/* Temporary */
void *buffer_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment