Commit 56ae532d authored by Swann Perarnau's avatar Swann Perarnau
Browse files

Merge branch 'topology_query' into 'staging'

hwloc topology query in aml

See merge request !136
parents a60466bb 926dcfb5
Pipeline #10557 passed with stages
in 3 minutes and 28 seconds
......@@ -47,8 +47,7 @@ int max_bandwidth_area()
err = aml_area_hwloc_preferred_create(
&area, initiator,
HWLOC_DISTANCES_KIND_FROM_OS |
HWLOC_DISTANCES_KIND_MEANS_LATENCY |
HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES);
HWLOC_DISTANCES_KIND_MEANS_LATENCY);
if (err != AML_SUCCESS) {
fprintf(stderr, "aml_area_hwloc_preferred_create: %s\n",
......
......@@ -181,7 +181,7 @@ struct aml_area_hwloc_options {
**/
int aml_area_hwloc_preferred_create(struct aml_area **area,
hwloc_obj_t initiator,
const enum hwloc_distances_kind_e kind);
enum hwloc_distances_kind_e kind);
/**
* Allocate an area with "preferred" policy with all the available numanodes
......@@ -200,8 +200,8 @@ int aml_area_hwloc_preferred_create(struct aml_area **area,
* @return AML_SUCCESS on success.
* @see <hwloc/distances.h>
**/
int aml_area_hwloc_preferred_local_create(
struct aml_area **area, const enum hwloc_distances_kind_e kind);
int aml_area_hwloc_preferred_local_create(struct aml_area **area,
enum hwloc_distances_kind_e kind);
/**
* Free memory space allocated for a aml_area_hwloc_preferred.
......
......@@ -29,12 +29,21 @@ hwloc_const_bitmap_t allowed_nodeset;
int aml_topology_init(void)
{
char *topology_input = getenv("AML_TOPOLOGY");
if (hwloc_topology_init(&aml_topology) == -1)
return -1;
if (hwloc_topology_set_flags(
aml_topology,
HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) == -1)
HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES |
HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM) == -1)
return -1;
if (topology_input != NULL &&
hwloc_topology_set_xml(aml_topology, topology_input) == -1)
return -1;
if (hwloc_topology_load(aml_topology) == -1)
return -1;
return 0;
......@@ -50,7 +59,6 @@ int aml_init(int *argc, char **argv[])
// Initialize topology
#if HAVE_HWLOC == 1
int err_hwloc;
err_hwloc = aml_topology_init();
if (err_hwloc < 0)
return AML_FAILURE;
......
......@@ -235,7 +235,7 @@ static int aml_area_hwloc_preferred_alloc(struct aml_area **area)
struct aml_area_hwloc_preferred_data);
policy->numanodes = AML_INNER_MALLOC_GET_ARRAY(
policy, hwloc_obj_t, struct aml_area,
a, hwloc_obj_t, struct aml_area,
struct aml_area_hwloc_preferred_data);
policy->num_nodes = (unsigned)num_nodes;
......@@ -340,6 +340,72 @@ static int aml_area_hwloc_hwloc_lt(const void *a_ptr, const void *b_ptr)
return a->distance < b->distance;
}
/**
* Allocate elements separately because when they are added to the
* topology, hwloc free them separately.
**/
static int aml_hwloc_distances_alloc(const hwloc_obj_type_t t0,
const hwloc_obj_type_t t1,
struct hwloc_distances_s **out,
unsigned *nt0,
unsigned *nt1)
{
unsigned n;
*nt0 = hwloc_get_nbobjs_by_type(aml_topology, t0);
if (t0 == t1) {
*nt1 = *nt0;
n = *nt0;
} else {
*nt1 = hwloc_get_nbobjs_by_type(aml_topology, t1);
n = *nt0 + *nt1;
}
*out = malloc(sizeof(**out));
if (*out == NULL)
return -AML_ENOMEM;
(*out)->objs = malloc(n * sizeof(hwloc_obj_t));
if ((*out)->objs == NULL)
goto err_with_distances;
(*out)->values = malloc(n * n * sizeof(*((*out)->values)));
if ((*out)->values == NULL)
goto err_with_objs;
(*out)->nbobjs = n;
for (unsigned it0 = 0; it0 < *nt0; it0++)
(*out)->objs[it0] =
hwloc_get_obj_by_type(aml_topology, t0, it0);
if (t0 != t1) {
for (unsigned it1 = 0; it1 < *nt1; it1++)
(*out)->objs[*nt0 + it1] =
hwloc_get_obj_by_type(aml_topology, t1, it1);
}
return AML_SUCCESS;
err_with_objs:
free((*out)->objs);
err_with_distances:
free(*out);
return -AML_ENOMEM;
}
#define OBJ_DIST(dist, i, j, row_stride, col_stride) \
(dist)->values[((i)->logical_index + row_stride) * (dist)->nbobjs + \
col_stride + (j)->logical_index]
#define IND_DIST(dist, i, j) (dist)->values[(i) * (dist)->nbobjs + (j)]
static void aml_hwloc_distances_free(struct hwloc_distances_s *dist)
{
free(dist->objs);
free(dist->values);
free(dist);
}
/**
* Get distance matrix in hops between two types of objects.
* Distance matrix comes in the format used in <hwloc/distance.h>.
......@@ -392,36 +458,21 @@ static int aml_hwloc_distance_hop(hwloc_const_obj_t a, hwloc_const_obj_t b)
return dist;
}
/**
* Create a distance matrix of topology hops between to object types.
**/
int aml_hwloc_distance_hop_matrix(const hwloc_obj_type_t ta,
const hwloc_obj_type_t tb,
struct hwloc_distances_s *s)
struct hwloc_distances_s **s)
{
const unsigned na = hwloc_get_nbobjs_by_type(aml_topology, ta);
const unsigned nb =
ta == tb ? 0 : hwloc_get_nbobjs_by_type(aml_topology, tb);
const unsigned n = na + nb;
hwloc_obj_t *o;
hwloc_uint64_t d, *v;
// Allocation
o = malloc(n * sizeof(*o));
if (o == NULL)
return -1;
v = malloc(n * n * sizeof(*v));
if (v == NULL) {
free(o);
return -1;
}
hwloc_uint64_t d;
unsigned na, nb;
s->nbobjs = n;
s->objs = o;
s->values = v;
s->kind = HWLOC_DISTANCES_KIND_FROM_USER |
HWLOC_DISTANCES_KIND_MEANS_LATENCY;
if (aml_hwloc_distances_alloc(ta, tb, s, &na, &nb) != AML_SUCCESS)
return -AML_ENOMEM;
// Store objects a
for (unsigned i = 0; i < na; i++)
o[i] = hwloc_get_obj_by_type(aml_topology, ta, i);
(*s)->kind = HWLOC_DISTANCES_KIND_FROM_USER |
HWLOC_DISTANCES_KIND_MEANS_LATENCY;
// Store distances of same type a
for (unsigned i = 0; i < na; i++)
......@@ -429,19 +480,15 @@ int aml_hwloc_distance_hop_matrix(const hwloc_obj_type_t ta,
d = aml_hwloc_distance_hop(
hwloc_get_obj_by_type(aml_topology, ta, i),
hwloc_get_obj_by_type(aml_topology, ta, j));
v[i * n + j] = d;
v[j * n + i] = d;
IND_DIST(*s, i, j) = d;
IND_DIST(*s, j, i) = d;
}
// If both types are equal, then we stored everything
if (ta == tb)
return 0;
else
s->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
// Store objects b
for (unsigned i = 0; i < nb; i++)
o[i + na] = hwloc_get_obj_by_type(aml_topology, tb, i);
(*s)->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
// Store distances of same type b
for (unsigned i = 0; i < nb; i++)
......@@ -449,8 +496,8 @@ int aml_hwloc_distance_hop_matrix(const hwloc_obj_type_t ta,
d = aml_hwloc_distance_hop(
hwloc_get_obj_by_type(aml_topology, tb, i),
hwloc_get_obj_by_type(aml_topology, tb, j));
v[(na + i) * n + (j + na)] = d;
v[(na + j) * n + (i + na)] = d;
IND_DIST(*s, na + i, na + j) = d;
IND_DIST(*s, na + j, na + i) = d;
}
// Store distances ab, ba
......@@ -459,115 +506,279 @@ int aml_hwloc_distance_hop_matrix(const hwloc_obj_type_t ta,
d = aml_hwloc_distance_hop(
hwloc_get_obj_by_type(aml_topology, ta, i),
hwloc_get_obj_by_type(aml_topology, tb, j));
v[(na + j) * n + i] = d;
v[i * n + (j + na)] = d;
IND_DIST(*s, na + j, i) = d;
IND_DIST(*s, i, na + j) = d;
}
return 0;
}
/**
* Take a distance matrix with a single type T0 of object and return
* a derived distance between two objects. Distances sum if
* T0 have several children of input object types.
* This allow to compute a distance between any 2 arbitrary objects
* any distance matrix exists.
**/
static int aml_hwloc_distance_match(struct hwloc_distances_s *dist,
const hwloc_obj_t obj0,
const hwloc_obj_t obj1,
hwloc_uint64_t *d0,
hwloc_uint64_t *d1)
{
hwloc_obj_t l0, l1;
const unsigned depth = dist->objs[0]->depth;
*d0 = 0;
*d1 = 0;
l0 = hwloc_get_next_obj_covering_cpuset_by_depth(
aml_topology, obj0->cpuset, depth, NULL);
do {
l1 = hwloc_get_next_obj_covering_cpuset_by_depth(
aml_topology, obj1->cpuset, depth, NULL);
do {
*d0 += OBJ_DIST(dist, l0, l1, 0, 0);
*d1 += OBJ_DIST(dist, l1, l0, 0, 0);
l1 = hwloc_get_next_obj_covering_cpuset_by_depth(
aml_topology, obj1->cpuset, depth, l1);
} while (l1 != NULL);
l0 = hwloc_get_next_obj_covering_cpuset_by_depth(
aml_topology, obj0->cpuset, depth, l0);
} while (l0 != NULL);
return 0;
}
/**
* Take a distance matrix and convert it to another matrix with different
* object types.
**/
static int aml_hwloc_distances_reshape(struct hwloc_distances_s *dist,
struct hwloc_distances_s **out,
const hwloc_obj_type_t t0,
const hwloc_obj_type_t t1)
{
if (dist->kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES)
return -AML_EINVAL;
unsigned nt0, nt1;
const unsigned depth0 = hwloc_get_type_depth(aml_topology, t0);
const unsigned depth1 = hwloc_get_type_depth(aml_topology, t1);
hwloc_obj_t obj0 = NULL, obj1 = NULL;
hwloc_uint64_t d0, d1;
if (aml_hwloc_distances_alloc(t0, t1, out, &nt0, &nt1) != AML_SUCCESS)
return -AML_ENOMEM;
// Set kind.
(*out)->kind = HWLOC_DISTANCES_KIND_FROM_USER;
if (dist->kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY)
(*out)->kind |= HWLOC_DISTANCES_KIND_MEANS_LATENCY;
if (dist->kind & HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH)
(*out)->kind |= HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH;
// Set distances t0 <-> t0.
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth0, NULL);
while (obj0 != NULL) {
OBJ_DIST(*out, obj0, obj0, 0, 0) = 0;
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth0, obj0);
while (obj1 != NULL) {
if (aml_hwloc_distance_match(dist, obj0, obj1, &d0,
&d1) != 0)
goto err_with_out;
OBJ_DIST(*out, obj0, obj1, 0, 0) = d0;
OBJ_DIST(*out, obj1, obj0, 0, 0) = d1;
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth0,
obj1);
}
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth0, obj0);
}
if (t0 == t1)
return AML_SUCCESS;
(*out)->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
// Set distances t0 <-> t1.
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth0, NULL);
while (obj0 != NULL) {
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth1, NULL);
while (obj1 != NULL) {
if (aml_hwloc_distance_match(dist, obj0, obj1, &d0,
&d1) != 0)
goto err_with_out;
OBJ_DIST(*out, obj0, obj1, 0, nt0) = d0;
OBJ_DIST(*out, obj1, obj0, nt0, 0) = d1;
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth0,
obj1);
}
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth0, obj0);
}
// Set distances t1 <-> t1.
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth1, NULL);
while (obj0 != NULL) {
OBJ_DIST(*out, obj0, obj0, nt0, nt0) = 0;
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth1, obj0);
while (obj1 != NULL) {
if (aml_hwloc_distance_match(dist, obj0, obj1, &d0,
&d1) != 0)
goto err_with_out;
OBJ_DIST(*out, obj0, obj1, nt0, nt0) = d0;
OBJ_DIST(*out, obj1, obj0, nt0, nt0) = d1;
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth1,
obj1);
}
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth1, obj0);
}
return AML_SUCCESS;
err_with_out:
aml_hwloc_distances_free(*out);
return -AML_FAILURE;
}
/**
* Get a distance matrix between an initiator type and NUMANODEs
* This method never fails.
* If no distance matrix exist, one is created with hops as distance.
* If no matrix with initiator type or NUMANODE type is found, any
* available matrix is used. The obtained matrix is reshaped to fit
* (initiator numa) x (initiator numa) matrix.
**/
int aml_hwloc_get_NUMA_distance(const hwloc_obj_type_t type,
enum hwloc_distances_kind_e kind,
struct hwloc_distances_s **out)
{
// number of hwloc matrices to return in handle
unsigned int nr = 32;
// hwloc distance matrix
struct hwloc_distances_s *handle[nr], *dist = NULL;
// Collect distances. If fail, fallback on hop distances.
if (hwloc_distances_get(aml_topology, &nr, handle, kind, 0) != 0 ||
nr == 0) {
if (aml_hwloc_distance_hop_matrix(type, HWLOC_OBJ_NUMANODE,
out) == -1)
return -1;
return 0;
}
for (unsigned i = 0; i < nr; i++) {
// If we found a matrix with same type as initiator type
// then we pick this one.
if (handle[i]->objs[0]->type == type) {
dist = handle[i];
break;
}
// We pick any distance
if (dist == NULL)
dist = handle[i];
// If we find one that is a NUMANODE distance, we chose this one
// over a default choice.
if (handle[i]->objs[0]->type == HWLOC_OBJ_NUMANODE)
dist = handle[i];
// If we find a distance that is finer grain than default,
// then we chose this one.
if (dist->objs[0]->type != HWLOC_OBJ_NUMANODE &&
dist->objs[0]->depth < handle[i]->objs[0]->depth)
dist = handle[i];
}
// If we were not able to find any matrix, we craft one.
if (dist == NULL) {
if (aml_hwloc_distance_hop_matrix(type, HWLOC_OBJ_NUMANODE,
out) != AML_SUCCESS)
return -AML_ENOMEM;
return AML_SUCCESS;
}
// We reshape whatever matrix we got to be a distance to NUMANODEs
// matrix.
if (aml_hwloc_distances_reshape(dist, out, type, HWLOC_OBJ_NUMANODE) !=
AML_SUCCESS)
return -AML_ENOMEM;
return AML_SUCCESS;
}
int aml_area_hwloc_preferred_create(struct aml_area **area,
hwloc_obj_t initiator,
const enum hwloc_distances_kind_e kind)
enum hwloc_distances_kind_e kind)
{
int err;
// The number of nodes in this system.
const unsigned num_nodes =
hwloc_get_nbobjs_by_type(aml_topology, HWLOC_OBJ_NUMANODE);
// The number of initiator
const unsigned num_initiator =
hwloc_get_nbobjs_by_type(aml_topology, initiator->type);
// output area
struct aml_area *ar = NULL;
// area numanodes
struct aml_area_hwloc_preferred_data *data;
// number of hwloc matrices to return in handle
unsigned int i, nr = 1;
// hwloc distance matrix
struct hwloc_distances_s handle[nr];
// Distances
struct hwloc_distances_s *dist;
// array of distances to sort
struct aml_area_hwloc_distance distances[num_nodes];
// node iterator
hwloc_obj_t node =
hwloc_get_obj_by_type(aml_topology, HWLOC_OBJ_NUMANODE, 0);
// distances from/to initiator, to/from target.
hwloc_uint64_t itot = 0, ttoi = 0;
// Check input
if (area == NULL)
return -AML_EINVAL;
if (initiator == NULL || initiator->cpuset == NULL ||
hwloc_bitmap_weight(initiator->cpuset) == 0)
return -AML_EINVAL;
if (initiator->depth < node->parent->depth)
if (initiator == NULL)
return -AML_EINVAL;
// Allocate structures
err = aml_area_hwloc_preferred_alloc(&ar);
aml_area_hwloc_preferred_alloc(&ar);
if (ar == NULL)
return -AML_ENOMEM;
data = (struct aml_area_hwloc_preferred_data *)ar->data;
// Collect distances
err = hwloc_distances_get(
aml_topology, &nr, (struct hwloc_distances_s **)(&handle),
kind | HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES, 0);
// If fail, fallback on hop distances.
if ((err != 0 || nr == 0) &&
aml_hwloc_distance_hop_matrix(initiator->type, HWLOC_OBJ_NUMANODE,
handle) == -1) {
err = -AML_ENOTSUP;
goto err_with_area;
}
if (aml_hwloc_get_NUMA_distance(initiator->type, kind, &dist) !=
AML_SUCCESS)
return -AML_ENOMEM;
// For each numanode compute distance to initiator
for (i = 0; i < data->num_nodes; i++) {
node = hwloc_get_obj_by_type(aml_topology, HWLOC_OBJ_NUMANODE,
i);
try_again:
err = hwloc_distances_obj_pair_values(&handle[0], initiator,
node, &itot, &ttoi);
// There were no matrix for this (initiator, numanode).
// Let's try again with initiator's parent.
if (err != 0 && initiator->depth > node->parent->depth) {
initiator = initiator->parent;
goto try_again;
}
// Did not work either... then give up.
if (err != 0) {
err = -AML_FAILURE;
goto err_with_handle;
for (unsigned i = 0; i < num_nodes; i++) {
hwloc_obj_t target = hwloc_get_obj_by_type(
aml_topology, HWLOC_OBJ_NUMANODE, i);
if (initiator->type == HWLOC_OBJ_NUMANODE) {
itot = OBJ_DIST(dist, initiator, target, 0, 0);
ttoi = OBJ_DIST(dist, target, initiator, 0, 0);
} else {
itot = OBJ_DIST(dist, initiator, target, 0,
num_initiator);
ttoi = OBJ_DIST(dist, target, initiator, num_initiator,
0);
}
// Store average distance (back and forth)
distances[i].distance = (itot + ttoi) / 2;
distances[i].index = i;
}
// Sort distances
qsort(distances, data->num_nodes, sizeof(*distances),
qsort(distances, num_nodes, sizeof(*distances),
aml_area_hwloc_hwloc_lt);
// Store sorted nodes in area data.
for (i = 0; i < data->num_nodes; i++) {
node = hwloc_get_obj_by_type(aml_topology, HWLOC_OBJ_NUMANODE,
distances[i].index);
for (unsigned i = 0; i < num_nodes; i++) {
hwloc_obj_t node = hwloc_get_obj_by_type(
aml_topology, HWLOC_OBJ_NUMANODE, distances[i].index);
data->numanodes[i] = node;
}
// Cleanup
for (i = 0; i < nr; i++)
hwloc_distances_release(aml_topology, &handle[i]);
aml_hwloc_distances_free(dist);
// Success !
*area = ar;
return AML_SUCCESS;
// Error
err_with_handle:
for (i = 0; i < nr; i++)
hwloc_distances_release(aml_topology, &handle[i]);
err_with_area:
free(ar);
return err;
}
int aml_area_hwloc_preferred_local_create(
struct aml_area **area, const enum hwloc_distances_kind_e kind)
int aml_area_hwloc_preferred_local_create(struct aml_area **area,
enum hwloc_distances_kind_e kind)
{
int err;
hwloc_cpuset_t cpuset;
......@@ -588,7 +799,7 @@ int aml_area_hwloc_preferred_local_create(
/** Match cpuset with a location on machine **/
err = hwloc_get_largest_objs_inside_cpuset(aml_topology, cpuset,
&initiator, 1);
if (err != 0) {
if (err == -1) {
err = -AML_FAILURE;
goto err_with_cpuset;
}
......
......@@ -9,6 +9,7 @@
*******************************************************************************/
#include <assert.h>
#include <hwloc.h>
#include "aml.h"
......@@ -16,6 +17,16 @@
extern hwloc_topology_t aml_topology;
const char *xml_topology_path = "aml_topology.xml";
int aml_hwloc_distance_hop_matrix(const hwloc_obj_type_t ta,
const hwloc_obj_type_t tb,
struct hwloc_distances_s **s);
//------------------------------------------------------------------------------
// Test basic API
//------------------------------------------------------------------------------
/** Number of sizes to test **/
#define ns 3
size_t sizes[ns] = {
......@@ -118,9 +129,95 @@ void check_areas()
hwloc_bitmap_free(nodeset);
}
//------------------------------------------------------------------------------
// Test preferred API
//------------------------------------------------------------------------------
void create_topology()
{
unsigned nr = 1;
struct hwloc_distances_s *hops, *xml_hops;
// Get distance matrix
assert(aml_hwloc_distance_hop_matrix(HWLOC_OBJ_CORE, HWLOC_OBJ_CORE,
&hops) == 0);
// Add matrix to topology
assert(hwloc_distances_add(aml_topology, hops->nbobjs, hops->objs,