Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
TAPIOCA
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Francois Tessier
TAPIOCA
Commits
2af0cf67
Commit
2af0cf67
authored
Mar 14, 2018
by
Francois Tessier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Memory-aware aggregator placement strategy
parent
06f19beb
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
232 additions
and
22 deletions
+232
-22
TODO
TODO
+1
-2
architectures/linux-sles_12-x86_64/tp_memory.cpp
architectures/linux-sles_12-x86_64/tp_memory.cpp
+90
-0
architectures/linux-sles_12-x86_64/tp_memory.hpp
architectures/linux-sles_12-x86_64/tp_memory.hpp
+6
-0
architectures/linux-sles_12-x86_64/tp_topology.cpp
architectures/linux-sles_12-x86_64/tp_topology.cpp
+19
-0
architectures/linux-sles_12-x86_64/tp_topology.hpp
architectures/linux-sles_12-x86_64/tp_topology.hpp
+4
-0
architectures/tp_memory_interface.hpp
architectures/tp_memory_interface.hpp
+5
-0
architectures/tp_topology_interface.hpp
architectures/tp_topology_interface.hpp
+5
-0
tapioca.cpp
tapioca.cpp
+5
-0
tapioca.hpp
tapioca.hpp
+2
-2
tp_placement.cpp
tp_placement.cpp
+95
-18
No files found.
TODO
View file @
2af0cf67
...
...
@@ -4,8 +4,6 @@
* Include and adapt the getopt function (miniHACC-AoS-Tapioca-W.cpp)
* Adapt the running scripts to the binary parameters (getopt)
- Implement the Memory-aware aggregators placement
- Change fprintf to the customized printMsg
- Write a getopt function to set the subfiling/output/tiers for the benchmarks
...
...
@@ -15,6 +13,7 @@
- Ask Silvio for Vl3D for Cooley
- If NVR, reset #agg and aggregator buffer size (persistency on local storage)
- If NLS, MPI-IO VS mmap+RMA depending on processes location
- README and LICENCE
...
...
architectures/linux-sles_12-x86_64/tp_memory.cpp
View file @
2af0cf67
...
...
@@ -294,6 +294,31 @@ char* Memory::memName ( ) {
}
char
*
Memory
::
memName
(
mem_t
mem
)
{
switch
(
mem
)
{
case
DDR
:
return
"DDR"
;
break
;
case
HBM
:
return
"HBM"
;
break
;
case
PFS
:
return
"PFS"
;
break
;
case
NLS
:
return
"NLS"
;
break
;
case
NVR
:
return
"NVR"
;
break
;
default:
printMsg
(
ERROR
,
"Wrong memory type!
\n
"
);
MPI_Abort
(
MPI_COMM_WORLD
,
-
1
);
}
}
mem_t
Memory
::
memTypeByName
(
char
*
name
)
{
if
(
!
strcmp
(
"DDR"
,
name
)
)
return
DDR
;
if
(
!
strcmp
(
"HBM"
,
name
)
)
return
HBM
;
...
...
@@ -344,3 +369,68 @@ char* Memory::memPath ( ) {
else
return
""
;
}
/*
* kBps
*/
int64_t
Memory
::
memBandwidth
(
mem_t
mem
)
{
switch
(
mem
)
{
case
DDR
:
return
90000000
;
break
;
case
HBM
:
return
350000000
;
break
;
case
PFS
:
return
1800000
;
break
;
case
NLS
:
return
1800000
;
break
;
case
NVR
:
return
400000
;
break
;
default:
printMsg
(
ERROR
,
"Wrong memory type!
\n
"
);
MPI_Abort
(
MPI_COMM_WORLD
,
-
1
);
}
}
/*
* ms
*/
int64_t
Memory
::
memLatency
(
mem_t
mem
)
{
switch
(
mem
)
{
case
DDR
:
return
2
;
break
;
case
HBM
:
return
1
;
break
;
case
PFS
:
return
30
;
break
;
case
NLS
:
return
5
;
break
;
case
NVR
:
return
5
;
break
;
default:
printMsg
(
ERROR
,
"Wrong memory type!
\n
"
);
MPI_Abort
(
MPI_COMM_WORLD
,
-
1
);
}
}
int64_t
Memory
::
memCapacity
(
mem_t
mem
)
{
return
0
;
}
bool
Memory
::
memPersistency
(
mem_t
mem
)
{
return
false
;
}
architectures/linux-sles_12-x86_64/tp_memory.hpp
View file @
2af0cf67
...
...
@@ -37,6 +37,7 @@ public:
/* |-- Utils */
/**********************/
char
*
memName
(
);
char
*
memName
(
mem_t
mem
);
mem_t
memTypeByName
(
char
*
name
);
mem_t
memTypeByPath
(
char
*
path
);
...
...
@@ -48,6 +49,11 @@ public:
int64_t
memCapacity
(
);
bool
memPersistency
(
);
char
*
memPath
(
);
int64_t
memBandwidth
(
mem_t
mem
);
int64_t
memLatency
(
mem_t
mem
);
int64_t
memCapacity
(
mem_t
mem
);
bool
memPersistency
(
mem_t
mem
);
};
#endif // TP_MEMORY_H
...
...
architectures/linux-sles_12-x86_64/tp_topology.cpp
View file @
2af0cf67
...
...
@@ -99,6 +99,15 @@ int Topology::IONodesPerFile ( char* filename, int *nodesList ) {
return
nLnets
;
}
int
Topology
::
ListOfMemoryTiers
(
mem_t
*
memList
)
{
memList
[
0
]
=
DDR
;
memList
[
1
]
=
HBM
;
memList
[
2
]
=
NVR
;
return
3
;
}
/**********************/
...
...
@@ -118,6 +127,16 @@ int Topology::NetworkDimensions () {
}
int64_t
Topology
::
NetworkBandwidth
()
{
return
1800000
;
}
int64_t
Topology
::
NetworkLatency
()
{
return
30
;
}
/* |---- Coordinates */
void
Topology
::
RankToCoordinates
(
int
rank
,
int
*
coord
)
{
pmi_mesh_coord_t
xyz
;
...
...
architectures/linux-sles_12-x86_64/tp_topology.hpp
View file @
2af0cf67
...
...
@@ -29,11 +29,15 @@ public:
int
LocalCoreId
();
int
ProcessPerNode
();
int
IONodesPerFile
(
char
*
filename
,
int
*
nodesList
);
int
ListOfMemoryTiers
(
mem_t
*
memList
);
/**********************/
/* |-- Network */
/**********************/
int
NetworkDimensions
();
int64_t
NetworkBandwidth
();
int64_t
NetworkLatency
();
/* |---- Coordinates */
void
RankToCoordinates
(
int
rank
,
int
*
coord
);
...
...
architectures/tp_memory_interface.hpp
View file @
2af0cf67
...
...
@@ -26,6 +26,7 @@ public:
/* |-- Utils */
/************************/
virtual
char
*
memName
(
)
=
0
;
virtual
char
*
memName
(
mem_t
mem
)
=
0
;
virtual
mem_t
memTypeByName
(
char
*
name
)
=
0
;
virtual
mem_t
memTypeByPath
(
char
*
path
)
=
0
;
...
...
@@ -38,6 +39,10 @@ public:
virtual
bool
memPersistency
(
)
=
0
;
virtual
char
*
memPath
(
)
=
0
;
virtual
int64_t
memBandwidth
(
mem_t
mem
)
=
0
;
virtual
int64_t
memLatency
(
mem_t
mem
)
=
0
;
virtual
int64_t
memCapacity
(
mem_t
mem
)
=
0
;
virtual
bool
memPersistency
(
mem_t
mem
)
=
0
;
/* Temporary */
void
*
buffer_
;
...
...
architectures/tp_topology_interface.hpp
View file @
2af0cf67
...
...
@@ -4,6 +4,8 @@
#include <stdio.h>
#include <stdlib.h>
#include "tp_memory_interface.hpp"
class
iTopology
{
public:
/**********************/
...
...
@@ -16,11 +18,14 @@ class iTopology {
virtual
int
GlobalCoreId
()
=
0
;
virtual
int
LocalCoreId
()
=
0
;
virtual
int
ProcessPerNode
()
=
0
;
virtual
int
ListOfMemoryTiers
(
mem_t
*
memList
)
=
0
;
/**********************/
/* |-- Network */
/**********************/
virtual
int
NetworkDimensions
()
=
0
;
virtual
int64_t
NetworkBandwidth
()
=
0
;
virtual
int64_t
NetworkLatency
()
=
0
;
/* |---- Coordinates */
virtual
void
RankToCoordinates
(
int
rank
,
int
*
coord
)
=
0
;
...
...
tapioca.cpp
View file @
2af0cf67
...
...
@@ -217,6 +217,7 @@ void Tapioca::ParseEnvVariables ()
strcmp
(
envStrategy
,
"SHORTEST_PATH"
)
?
0
:
this
->
strategy_
=
SHORTEST_PATH
;
strcmp
(
envStrategy
,
"LONGEST_PATH"
)
?
0
:
this
->
strategy_
=
LONGEST_PATH
;
strcmp
(
envStrategy
,
"TOPOLOGY_AWARE"
)
?
0
:
this
->
strategy_
=
TOPOLOGY_AWARE
;
strcmp
(
envStrategy
,
"MEMORY_AWARE"
)
?
0
:
this
->
strategy_
=
MEMORY_AWARE
;
strcmp
(
envStrategy
,
"CONTENTION_AWARE"
)
?
0
:
this
->
strategy_
=
CONTENTION_AWARE
;
strcmp
(
envStrategy
,
"UNIFORM"
)
?
0
:
this
->
strategy_
=
UNIFORM
;
strcmp
(
envStrategy
,
"RANDOM"
)
?
0
:
this
->
strategy_
=
RANDOM
;
...
...
@@ -557,6 +558,9 @@ void Tapioca::ElectAggregators ()
case
TOPOLOGY_AWARE
:
aggrRank
=
this
->
RankTopologyAware
(
aggrComm
,
color
);
break
;
case
MEMORY_AWARE
:
aggrRank
=
this
->
RankMemoryAware
(
aggrComm
,
color
);
break
;
case
CONTENTION_AWARE
:
aggrRank
=
this
->
RankContentionAware
(
aggrComm
,
color
);
break
;
...
...
@@ -646,6 +650,7 @@ const char* Tapioca::getStrategyName ()
case
SHORTEST_PATH
:
return
"Shortest path"
;
case
LONGEST_PATH
:
return
"Longest path"
;
case
TOPOLOGY_AWARE
:
return
"Topology-aware placement"
;
case
MEMORY_AWARE
:
return
"Memory-aware placement"
;
case
CONTENTION_AWARE
:
return
"Contention-aware placement"
;
case
UNIFORM
:
return
"Uniform placement"
;
case
RANDOM
:
return
"Random placement"
;
...
...
tapioca.hpp
View file @
2af0cf67
...
...
@@ -2,8 +2,6 @@
#define TAPIOCA_H
#define MASTER 0
#define LATENCY 30
#define BANDWIDTH 1800000
#include <stdio.h>
#include <stdlib.h>
...
...
@@ -25,6 +23,7 @@ enum MAPPING_STRATEGY
SHORTEST_PATH
,
LONGEST_PATH
,
TOPOLOGY_AWARE
,
MEMORY_AWARE
,
CONTENTION_AWARE
,
UNIFORM
,
RANDOM
...
...
@@ -99,6 +98,7 @@ protected:
int
RankShortestPath
(
MPI_Comm
aggrComm
,
int64_t
dataSize
);
int
RankLongestPath
(
MPI_Comm
aggrComm
,
int64_t
dataSize
);
int
RankTopologyAware
(
MPI_Comm
aggrComm
,
int64_t
dataSize
);
int
RankMemoryAware
(
MPI_Comm
aggrComm
,
int64_t
dataSize
);
int
RankContentionAware
(
MPI_Comm
aggrComm
,
int64_t
dataSize
);
int
RankUniformDistribution
(
MPI_Comm
aggrComm
,
int64_t
dataSize
);
int
RankRandom
(
MPI_Comm
aggrComm
,
int64_t
dataSize
);
...
...
tp_placement.cpp
View file @
2af0cf67
...
...
@@ -89,26 +89,16 @@ int Tapioca::RankTopologyAware (MPI_Comm aggrComm, int64_t dataSize)
if
(
rank
!=
aggrCommRank
)
{
distance
=
topology
.
DistanceBetweenRanks
(
globalRanks
[
rank
],
worldRank
);
// aggrCost.cost = std::max ( distance *
LATENCY + (double)dataDistrib[rank] / BANDWIDTH
,
// aggrCost.cost = std::max ( distance *
this->topology.NetworkLatency () + (double)dataDistrib[rank] / this->topology.NetworkBandwidth ()
,
// aggrCost.cost );
aggrCost
.
cost
+=
(
distance
*
LATENCY
+
(
double
)
dataDistrib
[
rank
]
/
BANDWIDTH
);
aggrCost
.
cost
+=
(
distance
*
this
->
topology
.
NetworkLatency
()
+
(
double
)
dataDistrib
[
rank
]
/
this
->
topology
.
NetworkBandwidth
()
);
}
}
// IOnodesList = (int *) malloc ( MAX_IONODES * sizeof ( int ) );
// nIOnodes = topology.IONodesPerFile (this->filename_, IOnodesList);
// if ( this->commRank_ == 0 ) {
// fprintf (stdout, "[LUSTRE] nLnet = %d\n", nIOnodes);
// fprintf (stdout, "[LUSTRE] list = ");
// for ( int i = 0; i < nIOnodes; i++ )
// fprintf (stdout, "%d ", IOnodesList[i]);
// fprintf (stdout, "\n");
// }
#ifdef BGQ
aggrCost
.
cost
+=
topology
.
DistanceToIONode
(
worldRank
)
*
LATENCY
+
(
double
)
aggregatedData
/
BANDWIDTH
;
#endif
if
(
topology
.
DistanceToIONode
(
worldRank
)
!=
0
)
aggrCost
.
cost
+=
topology
.
DistanceToIONode
(
worldRank
)
*
this
->
topology
.
NetworkLatency
()
+
(
double
)
aggregatedData
/
this
->
topology
.
NetworkBandwidth
();
if
(
this
->
excludedNode
[
this
->
hostId_
]
)
aggrCost
.
cost
=
DBL_MAX
;
...
...
@@ -134,6 +124,93 @@ int Tapioca::RankTopologyAware (MPI_Comm aggrComm, int64_t dataSize)
}
int
Tapioca
::
RankMemoryAware
(
MPI_Comm
aggrComm
,
int64_t
dataSize
)
{
struct
{
double
cost
;
int
rank
;
}
aggrCost
,
minCost
;
double
current_cost
=
DBL_MAX
;
int
aggrCommRank
,
aggrCommSize
,
worldRank
,
rank
,
distance
,
dim
,
hops
,
aggrRank
,
nIOnodes
,
memCount
,
m
;
mem_t
memList
[
10
],
best_mem
=
DDR
;
Memory
mem
;
int64_t
*
dataDistrib
,
aggregatedData
=
0
,
latency
,
bandwidth
;
int
*
srcCoords
,
*
destCoords
,
*
globalRanks
,
*
IOnodesList
;
MPI_Comm_rank
(
aggrComm
,
&
aggrCommRank
);
MPI_Comm_size
(
aggrComm
,
&
aggrCommSize
);
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
worldRank
);
aggrCost
.
rank
=
aggrCommRank
;
aggrCost
.
cost
=
0
;
dataDistrib
=
(
int64_t
*
)
malloc
(
aggrCommSize
*
sizeof
(
int64_t
));
globalRanks
=
(
int
*
)
malloc
(
aggrCommSize
*
sizeof
(
int
));
MPI_Allgather
(
&
worldRank
,
1
,
MPI_INT
,
globalRanks
,
1
,
MPI_INT
,
aggrComm
);
MPI_Allgather
(
&
dataSize
,
1
,
MPI_LONG_LONG
,
dataDistrib
,
1
,
MPI_LONG_LONG
,
aggrComm
);
memCount
=
topology
.
ListOfMemoryTiers
(
memList
);
for
(
m
=
0
;
m
<
memCount
;
m
++
)
{
for
(
rank
=
0
;
rank
<
aggrCommSize
;
rank
++
)
{
aggregatedData
+=
dataDistrib
[
rank
];
if
(
rank
!=
aggrCommRank
)
{
distance
=
topology
.
DistanceBetweenRanks
(
globalRanks
[
rank
],
worldRank
);
if
(
distance
==
0
)
{
latency
=
mem
.
memLatency
(
memList
[
m
]
);
bandwidth
=
mem
.
memBandwidth
(
memList
[
m
]
);
}
else
{
latency
=
std
::
max
(
mem
.
memLatency
(
memList
[
m
]
),
this
->
topology
.
NetworkLatency
()
);
bandwidth
=
std
::
min
(
mem
.
memBandwidth
(
memList
[
m
]
),
this
->
topology
.
NetworkBandwidth
()
);
}
// aggrCost.cost = std::max ( distance * latency + (double)dataDistrib[rank] / bandwidth,
// aggrCost.cost );
aggrCost
.
cost
+=
(
distance
*
latency
+
(
double
)
dataDistrib
[
rank
]
/
bandwidth
);
}
}
if
(
aggrCost
.
cost
<
current_cost
)
{
current_cost
=
aggrCost
.
cost
;
best_mem
=
memList
[
m
];
}
aggrCost
.
cost
=
0
;
}
aggrCost
.
cost
=
current_cost
;
if
(
topology
.
DistanceToIONode
(
worldRank
)
!=
0
)
aggrCost
.
cost
+=
topology
.
DistanceToIONode
(
worldRank
)
*
this
->
topology
.
NetworkLatency
()
+
(
double
)
aggregatedData
/
this
->
topology
.
NetworkBandwidth
();
if
(
this
->
excludedNode
[
this
->
hostId_
]
)
aggrCost
.
cost
=
DBL_MAX
;
MPI_Allreduce
(
&
aggrCost
,
&
minCost
,
1
,
MPI_DOUBLE_INT
,
MPI_MINLOC
,
aggrComm
);
MPI_Reduce
(
&
dataSize
,
&
this
->
aggrDataSize_
,
1
,
MPI_LONG_LONG
,
MPI_SUM
,
minCost
.
rank
,
aggrComm
);
if
(
minCost
.
rank
==
aggrCommRank
)
{
aggrRank
=
this
->
commRank_
;
this
->
amAnAggr_
=
true
;
}
MPI_Bcast
(
&
aggrRank
,
1
,
MPI_INT
,
minCost
.
rank
,
aggrComm
);
MPI_Bcast
(
&
best_mem
,
1
,
MPI_INT
,
minCost
.
rank
,
aggrComm
);
this
->
memAggr_
=
best_mem
;
#ifdef DBG
if
(
minCost
.
rank
==
aggrCommRank
)
fprintf
(
stdout
,
"[DEBUG] Aggr. rank %d in aggrComm, distance to I/O node %d hops, cost: %.4f, mem: %s
\n
"
,
minCost
.
rank
,
topology
.
DistanceToIONode
(
worldRank
),
minCost
.
cost
,
mem
.
memName
(
best_mem
)
);
#endif
return
aggrRank
;
}
int
Tapioca
::
RankContentionAware
(
MPI_Comm
aggrComm
,
int64_t
dataSize
)
{
struct
{
double
cost
;
int
rank
;
}
aggrCost
,
minCost
;
...
...
@@ -226,14 +303,14 @@ int Tapioca::RankContentionAware (MPI_Comm aggrComm, int64_t dataSize)
if
(
rank
!=
aggrCommRank
)
{
aggrCost
.
cost
=
std
::
max
(
(
double
)
dataDistrib
[
rank
]
/
(
BANDWIDTH
/
routeCost
[
srcNode
]
),
aggrCost
.
cost
=
std
::
max
(
(
double
)
dataDistrib
[
rank
]
/
(
this
->
topology
.
NetworkBandwidth
()
/
routeCost
[
srcNode
]
),
aggrCost
.
cost
);
}
}
/* I/O Node */
srcNode
=
this
->
worldRank_
/
ppn
;
aggrCost
.
cost
+=
aggregatedData
/
(
BANDWIDTH
/
routeCost
[
srcNode
]
);
aggrCost
.
cost
+=
aggregatedData
/
(
this
->
topology
.
NetworkBandwidth
()
/
routeCost
[
srcNode
]
);
if
(
this
->
excludedNode
[
this
->
hostId_
]
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment