Commit 3062b7f2 authored by Francois Tessier's avatar Francois Tessier

Improve the network interconnect and memory abstraction for Cooley

parent 4bff31b1
......@@ -16,7 +16,7 @@ Memory::~Memory ()
/* |-- Allocation */
/**********************/
void Memory::memAlloc ( int64_t buffSize, mem_t mem, bool masterRank, char* fileName, MPI_Comm comm ) {
int rank, err, wrank;
int rank, err;
this->mem_ = mem;
this->masterRank_ = masterRank;
......@@ -24,7 +24,6 @@ void Memory::memAlloc ( int64_t buffSize, mem_t mem, bool masterRank, char* file
this->buffSize_ = buffSize;
MPI_Comm_rank ( this->comm_, &rank );
MPI_Comm_rank ( MPI_COMM_WORLD, &wrank );
switch ( this->mem_ )
{
......@@ -39,18 +38,26 @@ void Memory::memAlloc ( int64_t buffSize, mem_t mem, bool masterRank, char* file
MPI_Win_fence (0, this->RMAWin_ );
break;
case HDD:
case NLS:
case PFS:
char *name;
int resultlen;
name = (char *) malloc ( MPI_MAX_PROCESSOR_NAME * sizeof ( char ) );
MPI_Get_processor_name( name, &resultlen );
strcpy ( this->fileName_, fileName );
if ( this->masterRank_ ) {
printMsg ( DEBUG, "Open file %s on HDD (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
printMsg ( DEBUG, "Open file %s on %s from node %s (%s:%d)\n", this->fileName_, this->memName (), name, __FILE__, __LINE__ );
}
err = MPI_File_open( this->comm_, this->fileName_, MPI_MODE_RDWR | MPI_MODE_CREATE,
MPI_INFO_NULL, &this->fileHandle_ );
// Preallocate the file
if ( err != MPI_SUCCESS )
printMsg ( ERROR, "Error while opening the file %s on %s from node %s (%s:%d)\n", this->fileName_, this->memName (), name, __FILE__, __LINE__ );
break;
case NVR:
strcpy ( this->fileName_, fileName );
//if ( this->mmapAllocatorRank_ == rank ) {
sprintf ( this->fileName_, "%s/%s", this->memPath (), fileName );
if ( this->masterRank_ ) {
printMsg ( DEBUG, "Map file %s in DRAM (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
// Add test for memory capacity
......@@ -101,10 +108,9 @@ void Memory::memAlloc ( int64_t buffSize, mem_t mem, bool masterRank, char* file
void Memory::memFree ( ) {
int rank, wrank, err;
int rank, err;
MPI_Comm_rank ( this->comm_, &rank );
MPI_Comm_rank ( MPI_COMM_WORLD, &wrank );
switch ( this->mem_ )
{
......@@ -115,15 +121,14 @@ void Memory::memFree ( ) {
free ( this->buffer_ );
}
break;
case HDD:
case NLS:
case PFS:
if ( this->masterRank_ ) {
printMsg ( DEBUG, "Close file %s on HDD (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
printMsg ( DEBUG, "Close file %s on %s (%s:%d)\n", this->fileName_, this->memName (), __FILE__, __LINE__ );
}
//MPI_File_sync ( this->fileHandle_ );
MPI_File_close ( &this->fileHandle_ );
break;
case NVR:
//if ( this->mmapAllocatorRank_ == rank ) {
if ( this->masterRank_ ) {
printMsg ( DEBUG, "Free memory on NVRAM and unmap file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
close ( this->fd_ );
......@@ -132,7 +137,6 @@ void Memory::memFree ( ) {
printMsg ( ERROR, "Error while unmaping the file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
MPI_Abort ( MPI_COMM_WORLD, -1 );
}
unlink ( this->fileName_ );
}
err = MPI_Win_free ( &this->RMAWin_ );
......@@ -151,27 +155,18 @@ void Memory::memFree ( ) {
/* |-- I/O */
/**********************/
int Memory::memWrite ( void* srcBuffer, int64_t srcSize, int64_t offset, int destRank ) {
int err, wrank, len;
char estring[MPI_MAX_ERROR_STRING];
int err;
MPI_Status status;
MPI_Comm_rank ( MPI_COMM_WORLD, &wrank );
switch ( this->mem_ )
{
case DDR:
case NVR:
err = MPI_Put ( srcBuffer, srcSize, MPI_BYTE, destRank, offset, srcSize, MPI_BYTE, this->RMAWin_ );
if ( err != MPI_SUCCESS)
printMsg ( ERROR, "Error while writing data (mem = %s)\n", this->memName () );
break;
case HDD:
//err = MPI_File_iwrite_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &this->request_ );
err = MPI_File_write_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &status );
if ( err != MPI_SUCCESS) {
MPI_Error_string ( err, estring, &len );
printMsg ( ERROR, "Error while writing data (mem = %s): %s\n", this->memName (), estring );
}
case NLS:
case PFS:
err = MPI_File_iwrite_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &this->request_ );
break;
default:
printMsg ( ERROR, "Error while writing data (mem = %s)\n", this->memName () );
......@@ -183,25 +178,18 @@ int Memory::memWrite ( void* srcBuffer, int64_t srcSize, int64_t offset, int des
int Memory::memRead ( void* srcBuffer, int64_t srcSize, int64_t offset, int destRank ) {
int err, wrank;
int err;
MPI_Status status;
MPI_Comm_rank ( MPI_COMM_WORLD, &wrank );
switch ( this->mem_ )
{
case DDR:
case NVR:
err = MPI_Get ( srcBuffer, srcSize, MPI_BYTE, destRank, offset, srcSize, MPI_BYTE, this->RMAWin_ );
if ( err != MPI_SUCCESS)
printMsg ( ERROR, "Error while reading data (mem = %s)\n", this->memName () );
break;
case HDD:
//err = MPI_File_iread_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &this->request_ );
err = MPI_File_read_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &status );
if ( err != MPI_SUCCESS)
printMsg ( ERROR, "Error while reading data (mem = %s)\n", this->memName () );
case NLS:
case PFS:
err = MPI_File_iread_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &this->request_ );
break;
default:
printMsg ( ERROR, "Error while reading data (mem = %s)\n", this->memName () );
......@@ -213,31 +201,29 @@ int Memory::memRead ( void* srcBuffer, int64_t srcSize, int64_t offset, int des
int Memory::memFlush ( ) {
int err, rank, wrank;
int err, rank;
MPI_Status status;
MPI_Comm_rank ( this->comm_, &rank );
MPI_Comm_rank ( MPI_COMM_WORLD, &wrank );
switch ( this->mem_ )
{
case DDR:
MPI_Win_fence ( 0, this->RMAWin_ );
break;
case HDD:
case NLS:
case PFS:
if ( this->request_ != NULL )
MPI_Wait ( &this->request_, &status );
MPI_File_sync ( this->fileHandle_ );
break;
case NVR:
err = MPI_Win_fence ( 0, this->RMAWin_ );
if ( err != MPI_SUCCESS)
printMsg ( ERROR, "Unable to perform a synchronization on the RMA window (mem = %s)\n", this->memName () );
//if ( this->mmapAllocatorRank_ == rank ) {
if ( this->masterRank_ ) {
printMsg ( DEBUG, "Sync memory and file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
err = msync( this->buffer_, this->buffSize_, MS_SYNC );
err = msync( this->buffer_, this->buffSize_, MS_ASYNC );
if ( err == -1 ) {
printMsg ( ERROR, "Error while syncing memory and file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
MPI_Abort ( MPI_COMM_WORLD, -1 );
......@@ -264,19 +250,21 @@ int Memory::memUncache ( ) {
/* |-- Utils */
/**********************/
char* Memory::memName ( ) {
switch ( this->mem_ )
return this->memName ( this->mem_ );
}
char* Memory::memName ( mem_t mem ) {
switch ( mem )
{
case DDR:
return "DDR";
break;
case HBM:
return "HBM";
break;
case SSD:
return "SSD";
case PFS:
return "PFS";
break;
case HDD:
return "HDD";
case NLS:
return "NLS";
break;
case NVR:
return "NVR";
......@@ -288,11 +276,10 @@ char* Memory::memName ( ) {
}
mem_t Memory::memType ( char* name ) {
mem_t Memory::memTypeByName ( char* name ) {
if ( ! strcmp ( "DDR", name ) ) return DDR;
if ( ! strcmp ( "HBM", name ) ) return HBM;
if ( ! strcmp ( "SSD", name ) ) return SSD;
if ( ! strcmp ( "HDD", name ) ) return HDD;
if ( ! strcmp ( "PFS", name ) ) return PFS;
if ( ! strcmp ( "NLS", name ) ) return NLS;
if ( ! strcmp ( "NVR", name ) ) return NVR;
printMsg ( ERROR, "Wrong memory name!\n" );
......@@ -300,6 +287,15 @@ mem_t Memory::memType ( char* name ) {
}
mem_t Memory::memTypeByPath ( char* path ) {
if ( ! strncmp ( path, "/project", 8 ) ) return PFS;
if ( ! strncmp ( path, "/scratch", 8 ) ) return NLS;
printMsg ( ERROR, "No memory tier corresponding to %s!\n", path );
MPI_Abort ( MPI_COMM_WORLD, -1 );
}
/************************/
/* |-- Characteristics */
/************************/
......@@ -321,3 +317,72 @@ int64_t Memory::memCapacity ( ) {
bool Memory::memPersistency ( ) {
return false;
}
char* Memory::memPath ( ) {
if ( this->mem_ == NVR )
return "/scratch";
else
return "";
}
/*
* kBps
*/
int64_t Memory::memBandwidth ( mem_t mem ) {
switch ( mem )
{
case DDR:
return 90000000;
break;
case PFS:
return 1800000;
break;
case NLS:
return 1800000;
break;
case NVR:
return 100000;
break;
default:
printMsg ( ERROR, "Wrong memory type!\n" );
MPI_Abort ( MPI_COMM_WORLD, -1 );
}
}
/*
* ms
*/
int64_t Memory::memLatency ( mem_t mem ) {
switch ( mem )
{
case DDR:
return 2;
break;
case PFS:
return 30;
break;
case NLS:
return 5;
break;
case NVR:
return 10;
break;
default:
printMsg ( ERROR, "Wrong memory type!\n" );
MPI_Abort ( MPI_COMM_WORLD, -1 );
}
}
int64_t Memory::memCapacity ( mem_t mem ) {
return 0;
}
bool Memory::memPersistency ( mem_t mem ) {
return false;
}
......@@ -36,7 +36,9 @@ public:
/* |-- Utils */
/**********************/
char* memName ( );
mem_t memType ( char* name );
char* memName ( mem_t mem );
mem_t memTypeByName ( char* name );
mem_t memTypeByPath ( char* path );
/************************/
/* |-- Characteristics */
......@@ -45,6 +47,15 @@ public:
int64_t memLatency ( );
int64_t memCapacity ( );
bool memPersistency ( );
char* memPath ( );
int64_t memBandwidth ( mem_t mem );
int64_t memLatency ( mem_t mem );
int64_t memCapacity ( mem_t mem );
bool memPersistency ( mem_t mem );
private:
double ioTime;
};
#endif // TP_MEMORY_H
......
#include "tp_network.hpp"
#include "tp_topology.hpp"
#define MAX_IONODES 1
......@@ -30,6 +30,18 @@ int Topology::ComputeNodeId () {
return nodeId;
}
int Topology::GlobalCoreId () {
int nodeId = this->ComputeNodeId ();
int coreId = this->LocalCoreId ();
return ( nodeId * 100 + coreId );
}
int Topology::LocalCoreId () {
return sched_getcpu();
}
int Topology::ProcessPerNode () {
return 12;
......@@ -41,6 +53,14 @@ int Topology::IONodesPerFile ( char* filename, int *nodesList ) {
}
int Topology::ListOfMemoryTiers ( mem_t* memList ) {
memList[0] = DDR;
memList[1] = NAM;
memList[2] = NVR;
return 3;
}
/**********************/
/* |-- Network */
/**********************/
......@@ -49,6 +69,16 @@ int Topology::NetworkDimensions () {
}
int64_t Topology::NetworkBandwidth () {
return 1800000;
}
int64_t Topology::NetworkLatency () {
return 30;
}
/* |---- Coordinates */
void Topology::RankToCoordinates ( int rank, int* coord ) {
int size, node, core;
......
#ifndef TP_NETWORK_H
#define TP_NETWORK_H
#ifndef TP_TOPOLOGY_H
#define TP_TOPOLOGY_H
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <sched.h>
#include <mpi.h>
#include "tp_network_interface.hpp"
#include "tp_topology_interface.hpp"
#define MAX_IONODES 1
......@@ -18,13 +18,18 @@ public:
int IONodeId ();
int BridgeNodeId ();
int ComputeNodeId ();
int GlobalCoreId ();
int LocalCoreId ();
int ProcessPerNode ();
int IONodesPerFile ( char* filename, int *nodesList );
int ListOfMemoryTiers ( mem_t* memList );
/**********************/
/* |-- Network */
/**********************/
int NetworkDimensions ();
int64_t NetworkBandwidth ();
int64_t NetworkLatency ();
/* |---- Coordinates */
void RankToCoordinates ( int rank, int* coord );
......@@ -42,4 +47,4 @@ public:
void LinksList ( int* linksList );
};
#endif // TP_NETWORK_H
#endif // TP_TOPOLOGY_H
MPICXX = mpicxx
MPI_CFLAGS = -g -O3 -I./ -I./architectures -I./architectures/$(ARCHI)
MPI_CFLAGS = -g -Wno-write-strings -O3 -I./ -I./architectures -I./architectures/$(ARCHI)
MPI_CFLAGS += -DDBG -DCOLOR #-DTIMING
TP_INCLUDE = architectures/tp_network_interface.hpp \
MPI_LDFLAGS = -L/$(HOME)/install/$(ARCHI)/lib
TP_INCLUDE = architectures/tp_topology_interface.hpp \
architectures/tp_memory_interface.hpp \
architectures/$(ARCHI)/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment