tp_memory.cpp 10.8 KB
Newer Older
1 2 3 4 5
#include "tp_memory.hpp"

Memory::Memory ()
{
  this->request_ = NULL;
6
  this->mmapAllocatorRank_ = 0;
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
}


Memory::~Memory ()
{
}


/**********************/
/*  |-- Allocation    */
/**********************/
void Memory::memAlloc ( int64_t buffSize, mem_t mem, bool masterRank, char* fileName, MPI_Comm comm ) {
  int rank, err;
    
  this->mem_        = mem;
  this->masterRank_ = masterRank;
  MPI_Comm_dup( comm, &this->comm_ );
  this->buffSize_   = buffSize;
    
  MPI_Comm_rank ( this->comm_, &rank );

  switch ( this->mem_ )
    {
    case DDR:
      if ( this->masterRank_ ) {
	printMsg ( DEBUG, "Allocate memory on DDR (%s:%d)\n", __FILE__, __LINE__ );
	this->buffer_ = malloc ( this->buffSize_ );
	MPI_Win_create ( this->buffer_, this->buffSize_, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ );
      }
      else
	MPI_Win_create ( NULL, 0, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ );
	
      MPI_Win_fence (0, this->RMAWin_ );
      break;
    case HBM:
      if ( this->masterRank_ ) {
	printMsg ( DEBUG, "Allocate memory on HBM (%s:%d)\n", __FILE__, __LINE__ );
	this->buffer_ = hbw_malloc ( this->buffSize_ );
	MPI_Win_create ( this->buffer_, this->buffSize_, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ );
      }
      else
	MPI_Win_create ( NULL, 0, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ );
	
      MPI_Win_fence (0, this->RMAWin_ );
      break;
52 53 54 55 56 57 58
    case NLS:
    case PFS:
      char *name;
      int resultlen;
      name = (char *) malloc ( MPI_MAX_PROCESSOR_NAME * sizeof ( char ) );
      MPI_Get_processor_name( name, &resultlen );
      
59 60
      strcpy ( this->fileName_, fileName );
      if ( this->masterRank_ ) {
61
	printMsg ( DEBUG, "Open file %s on %s from node %s (%s:%d)\n", this->fileName_, this->memName (), name, __FILE__, __LINE__ );
62 63 64
      }
      err = MPI_File_open( this->comm_, this->fileName_, MPI_MODE_RDWR | MPI_MODE_CREATE,
			   MPI_INFO_NULL, &this->fileHandle_ );
65 66 67
      
      if ( err != MPI_SUCCESS )
	printMsg ( ERROR, "Error while opening the file %s on %s from node %s (%s:%d)\n", this->fileName_, this->memName (), name, __FILE__, __LINE__ );
68 69
      break;
    case NVR:
70 71 72
      sprintf ( this->fileName_, "%s/%s", this->memPath (), fileName );
      
      if ( this->masterRank_ ) {
73 74
	printMsg ( DEBUG, "Map file %s in DRAM (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
	// Add test for memory capacity
75 76 77 78
	/*
	 *  O_RDWR | O_CREAT : read, write, create
	 *  S_IRWXU : read, write, execute/search by owner 
	 */
79 80 81 82 83 84 85 86 87 88 89 90
	this->fd_ = open(this->fileName_, O_RDWR | O_CREAT, S_IRWXU);
	
	if ( this->fd_ == -1 ) {
	  printMsg ( ERROR, "Error while opening the file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
	  MPI_Abort ( MPI_COMM_WORLD, -1 );	  
	}

	if ( ftruncate ( this->fd_, this->buffSize_ ) == -1 ) {
	  printMsg ( ERROR, "Error while truncating the file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
	  MPI_Abort ( MPI_COMM_WORLD, -1 );	  
	}
	
91 92 93 94 95 96 97 98 99
	/*
	 *  PROT_READ | PROT_WRITE : read, write
	 *  MAP_SHARED : Updates to the mapping are visible to other
	 *               processes mapping the same region, and (in
	 *               the case of file-backed mappings) are carried
	 *               through to the underlying file. 
	 */
	this->buffer_ = mmap(0, this->buffSize_, PROT_READ | PROT_WRITE, MAP_SHARED, this->fd_, 0);
	if (this->buffer_ == MAP_FAILED) {
100 101 102 103
	  printMsg ( ERROR, "Mmap of file %s has failed (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
          MPI_Abort ( MPI_COMM_WORLD, -1 );
	}
	
104
	err = MPI_Win_create ( this->buffer_, this->buffSize_, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ );
105 106
      }
      else
107
	err = MPI_Win_create ( NULL, 0, 1, MPI_INFO_NULL, this->comm_, &this->RMAWin_ );
108

109 110 111 112
      if ( err != MPI_SUCCESS)
	printMsg ( ERROR, "Unable to create RMA window on mmapped file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__);
      
      this->memFlush ();
113 114 115 116 117 118 119 120 121
      break;
    default:
      printMsg ( ERROR, "Unable to allocate memory (mem = %s)\n", this->memName () );
      MPI_Abort ( MPI_COMM_WORLD, -1 );
    }
}


void Memory::memFree  ( ) {
122
  int rank, err;
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
  MPI_Comm_rank ( MPI_COMM_WORLD, &rank );
    
  switch ( this->mem_ )
    {
    case DDR:
      MPI_Win_free ( &this->RMAWin_ );
      if ( this->masterRank_ ) {
	printMsg ( DEBUG, "Free memory on DDR (%s:%d)\n", __FILE__, __LINE__ );
	free ( this->buffer_ );
      }
      break;
    case HBM:
      MPI_Win_free ( &this->RMAWin_ );
      if ( this->masterRank_ ) {
	printMsg ( DEBUG, "Free memory on HBM (%s:%d)\n", __FILE__, __LINE__ );
	hbw_free ( this->buffer_ );
      }
      break;
141 142
    case NLS:
    case PFS:
143
      if ( this->masterRank_ ) {
144
	printMsg ( DEBUG, "Close file %s on %s (%s:%d)\n", this->fileName_, this->memName (), __FILE__, __LINE__ );
145 146 147
      }
      MPI_File_close ( &this->fileHandle_ );
      break;
148
    case NVR:
149
      if ( this->masterRank_ ) {
150
	printMsg ( DEBUG, "Free memory on NVRAM and unmap file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
151 152
	close ( this->fd_ );
	err = munmap ( this->buffer_, this->buffSize_ );
153 154 155 156 157
	if ( err == -1 ) {
	  printMsg ( ERROR, "Error while unmaping the file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
          MPI_Abort ( MPI_COMM_WORLD, -1 );
	}
      }
158 159 160 161 162
      
      err = MPI_Win_free ( &this->RMAWin_ );
      if ( err != MPI_SUCCESS)
        printMsg ( ERROR, "Unable to free RMA window on mmapped file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__);
      
163
      break;
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
    default:
      printMsg ( ERROR, "Unable to free memory (mem = %s)\n", this->memName () );
      MPI_Abort ( MPI_COMM_WORLD, -1 );
    }
}


/**********************/
/*  |-- I/O           */
/**********************/
int Memory::memWrite ( void* srcBuffer, int64_t srcSize, int64_t offset, int destRank ) {
  int err;
  MPI_Status status;

  switch ( this->mem_ )
    {
    case DDR:
    case HBM:
182
    case NVR:
183 184
      err = MPI_Put ( srcBuffer, srcSize, MPI_BYTE, destRank, offset, srcSize, MPI_BYTE, this->RMAWin_ );
      break;
185 186
    case NLS:
    case PFS:
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
      err = MPI_File_iwrite_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &this->request_ );
      break;
    default:
      printMsg ( ERROR, "Error while writing data (mem = %s)\n", this->memName () );
      MPI_Abort ( MPI_COMM_WORLD, -1 );
    }

  return err;
}


int Memory::memRead  ( void* srcBuffer, int64_t srcSize, int64_t offset, int destRank ) {
  int err;
  MPI_Status status;

  switch ( this->mem_ )
    {
    case DDR:
    case HBM:
206
    case NVR:
207 208
      err = MPI_Get ( srcBuffer, srcSize, MPI_BYTE, destRank, offset, srcSize, MPI_BYTE, this->RMAWin_ );
      break;
209 210
    case NLS:
    case PFS:
211 212 213 214 215 216 217 218 219 220 221 222
      err = MPI_File_iread_at ( this->fileHandle_, offset, srcBuffer, srcSize, MPI_BYTE, &this->request_ );
      break;
    default:
      printMsg ( ERROR, "Error while reading data (mem = %s)\n", this->memName () );
      MPI_Abort ( MPI_COMM_WORLD, -1 );
    }

  return err;
}

 
int Memory::memFlush ( ) {
223
  int err, rank;
224 225
  MPI_Status status;

226 227
  MPI_Comm_rank ( MPI_COMM_WORLD, &rank );

228 229 230 231 232 233
  switch ( this->mem_ )
    {
    case DDR:
    case HBM:
      MPI_Win_fence ( 0, this->RMAWin_ );
      break;
234 235
    case NLS:
    case PFS:
236 237 238
      if ( this->request_ != NULL )
	MPI_Wait ( &this->request_, &status );
      break;
239
    case NVR:
240 241 242 243 244
      err = MPI_Win_fence ( 0, this->RMAWin_ );
      if ( err != MPI_SUCCESS)
       	printMsg ( ERROR, "Unable to perform a synchronization on the RMA window (mem = %s)\n", this->memName () );
      
      if ( this->masterRank_ ) {
245
	printMsg ( DEBUG, "Sync memory and file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
246
	err = msync( this->buffer_, this->buffSize_, MS_ASYNC );
247 248 249 250 251
	if ( err == -1 ) {
	  printMsg ( ERROR, "Error while syncing memory and file %s (%s:%d)\n", this->fileName_, __FILE__, __LINE__ );
          MPI_Abort ( MPI_COMM_WORLD, -1 );
	}
      }
252 253
      
      err = MPI_Win_fence ( 0, this->RMAWin_ );
254
      break;
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
    default:
      printMsg ( ERROR, "Error while flushing data (mem = %s)\n", this->memName () );
      MPI_Abort ( MPI_COMM_WORLD, -1 );
    }

  return err;
}


int Memory::memUncache  ( ) {
  return 0;
}


/**********************/
/*  |-- Utils         */
/**********************/
char* Memory::memName  ( ) {
  switch ( this->mem_ )
    {
    case DDR:
      return "DDR";
      break;
    case HBM:
      return "HBM";
      break;
281 282
    case PFS:
      return "PFS";
283
      break;
284 285
    case NLS:
      return "NLS";
286
      break;
287 288 289
    case NVR:
      return "NVR";
      break;
290 291 292 293 294 295 296
    default:
      printMsg ( ERROR, "Wrong memory type!\n" );
      MPI_Abort ( MPI_COMM_WORLD, -1 );
    }
}


297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
char* Memory::memName  ( mem_t mem ) {
  switch ( mem )
    {
    case DDR:
      return "DDR";
      break;
    case HBM:
      return "HBM";
      break;
    case PFS:
      return "PFS";
      break;
    case NLS:
      return "NLS";
      break;
    case NVR:
      return "NVR";
      break;
    default:
      printMsg ( ERROR, "Wrong memory type!\n" );
      MPI_Abort ( MPI_COMM_WORLD, -1 );
    }
}


322
mem_t Memory::memTypeByName  ( char* name ) {
323 324
  if ( ! strcmp ( "DDR", name ) ) return DDR;
  if ( ! strcmp ( "HBM", name ) ) return HBM;
325 326
  if ( ! strcmp ( "PFS", name ) ) return PFS;
  if ( ! strcmp ( "NLS", name ) ) return NLS;
327
  if ( ! strcmp ( "NVR", name ) ) return NVR;
328 329 330 331 332 333

  printMsg ( ERROR, "Wrong memory name!\n" );
  MPI_Abort ( MPI_COMM_WORLD, -1 );
}


334 335 336 337 338 339 340 341 342
mem_t Memory::memTypeByPath  ( char* path ) {
  if ( ! strncmp ( path, "/lus/theta-fs0", 14 ) ) return PFS;
  if ( ! strncmp ( path, "/local/scratch", 14 ) ) return NLS;
  
  printMsg ( ERROR, "No memory tier corresponding to %s!\n", path );
  MPI_Abort ( MPI_COMM_WORLD, -1 );
}


343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
/************************/
/*  |-- Characteristics */
/************************/
int64_t Memory::memBandwidth ( ) {
  return 0;
}


int64_t Memory::memLatency   ( ) {
  return 0;
}


int64_t Memory::memCapacity  ( ) {
  return 0;
}


bool Memory::memPersistency  ( ) {
  return false;
}
364 365 366 367 368 369 370 371


char* Memory::memPath ( ) {
  if ( this->mem_ == NVR )
    return "/local/scratch";
  else
    return "";      
}
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436


/*
 *  kBps
 */
int64_t Memory::memBandwidth ( mem_t mem ) {
    switch ( mem )
    {
    case DDR:
      return 90000000;
      break;
    case HBM:
      return 350000000;
      break;
    case PFS:
      return 1800000;
      break;
    case NLS:
      return 1800000;
      break;
    case NVR:
      return 400000;
      break;
    default:
      printMsg ( ERROR, "Wrong memory type!\n" );
      MPI_Abort ( MPI_COMM_WORLD, -1 );
    }
}

/*
 *  ms
 */
int64_t Memory::memLatency  ( mem_t mem ) {
    switch ( mem )
    {
    case DDR:
      return 2;
      break;
    case HBM:
      return 1;
      break;
    case PFS:
      return 30;
      break;
    case NLS:
      return 5;
      break;
    case NVR:
      return 5;
      break;
    default:
      printMsg ( ERROR, "Wrong memory type!\n" );
      MPI_Abort ( MPI_COMM_WORLD, -1 );
    }
}


int64_t Memory::memCapacity  ( mem_t mem ) {
  return 0;
}


bool Memory::memPersistency  ( mem_t mem ) {
  return false;
}