Commit 73e99c2b authored by Nicolas Denoyelle's avatar Nicolas Denoyelle
Browse files

Any distance matrix

Compute a distance matrix no matter what.
It never happens that the system has a distance matrix between
something else that NUMANODES. Sometimes this matrix does not even
exist.
This commit will compute a hop distance matrix if the system does
not have any distance matrix. It will also reshape existing distance
matrix to match the distance query.
parent b3c75e1c
Pipeline #10556 passed with stages
in 3 minutes and 23 seconds
......@@ -47,8 +47,7 @@ int max_bandwidth_area()
err = aml_area_hwloc_preferred_create(
&area, initiator,
HWLOC_DISTANCES_KIND_FROM_OS |
HWLOC_DISTANCES_KIND_MEANS_LATENCY |
HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES);
HWLOC_DISTANCES_KIND_MEANS_LATENCY);
if (err != AML_SUCCESS) {
fprintf(stderr, "aml_area_hwloc_preferred_create: %s\n",
......
......@@ -181,7 +181,7 @@ struct aml_area_hwloc_options {
**/
int aml_area_hwloc_preferred_create(struct aml_area **area,
hwloc_obj_t initiator,
const enum hwloc_distances_kind_e kind);
enum hwloc_distances_kind_e kind);
/**
* Allocate an area with "preferred" policy with all the available numanodes
......@@ -200,8 +200,8 @@ int aml_area_hwloc_preferred_create(struct aml_area **area,
* @return AML_SUCCESS on success.
* @see <hwloc/distances.h>
**/
int aml_area_hwloc_preferred_local_create(
struct aml_area **area, const enum hwloc_distances_kind_e kind);
int aml_area_hwloc_preferred_local_create(struct aml_area **area,
enum hwloc_distances_kind_e kind);
/**
* Free memory space allocated for a aml_area_hwloc_preferred.
......
......@@ -34,17 +34,15 @@ int aml_topology_init(void)
if (hwloc_topology_init(&aml_topology) == -1)
return -1;
if (topology_input == NULL) {
if (hwloc_topology_set_flags(
aml_topology,
HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) ==
-1)
return -1;
} else {
if (hwloc_topology_set_xml(aml_topology, topology_input) ==
-1)
return -1;
}
if (hwloc_topology_set_flags(
aml_topology,
HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES |
HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM) == -1)
return -1;
if (topology_input != NULL &&
hwloc_topology_set_xml(aml_topology, topology_input) == -1)
return -1;
if (hwloc_topology_load(aml_topology) == -1)
return -1;
......
......@@ -235,7 +235,7 @@ static int aml_area_hwloc_preferred_alloc(struct aml_area **area)
struct aml_area_hwloc_preferred_data);
policy->numanodes = AML_INNER_MALLOC_GET_ARRAY(
policy, hwloc_obj_t, struct aml_area,
a, hwloc_obj_t, struct aml_area,
struct aml_area_hwloc_preferred_data);
policy->num_nodes = (unsigned)num_nodes;
......@@ -340,6 +340,72 @@ static int aml_area_hwloc_hwloc_lt(const void *a_ptr, const void *b_ptr)
return a->distance < b->distance;
}
/**
* Allocate elements separately because when they are added to the
* topology, hwloc free them separately.
**/
static int aml_hwloc_distances_alloc(const hwloc_obj_type_t t0,
const hwloc_obj_type_t t1,
struct hwloc_distances_s **out,
unsigned *nt0,
unsigned *nt1)
{
unsigned n;
*nt0 = hwloc_get_nbobjs_by_type(aml_topology, t0);
if (t0 == t1) {
*nt1 = *nt0;
n = *nt0;
} else {
*nt1 = hwloc_get_nbobjs_by_type(aml_topology, t1);
n = *nt0 + *nt1;
}
*out = malloc(sizeof(**out));
if (*out == NULL)
return -AML_ENOMEM;
(*out)->objs = malloc(n * sizeof(hwloc_obj_t));
if ((*out)->objs == NULL)
goto err_with_distances;
(*out)->values = malloc(n * n * sizeof(*((*out)->values)));
if ((*out)->values == NULL)
goto err_with_objs;
(*out)->nbobjs = n;
for (unsigned it0 = 0; it0 < *nt0; it0++)
(*out)->objs[it0] =
hwloc_get_obj_by_type(aml_topology, t0, it0);
if (t0 != t1) {
for (unsigned it1 = 0; it1 < *nt1; it1++)
(*out)->objs[*nt0 + it1] =
hwloc_get_obj_by_type(aml_topology, t1, it1);
}
return AML_SUCCESS;
err_with_objs:
free((*out)->objs);
err_with_distances:
free(*out);
return -AML_ENOMEM;
}
#define OBJ_DIST(dist, i, j, row_stride, col_stride) \
(dist)->values[((i)->logical_index + row_stride) * (dist)->nbobjs + \
col_stride + (j)->logical_index]
#define IND_DIST(dist, i, j) (dist)->values[(i) * (dist)->nbobjs + (j)]
static void aml_hwloc_distances_free(struct hwloc_distances_s *dist)
{
free(dist->objs);
free(dist->values);
free(dist);
}
/**
* Get distance matrix in hops between two types of objects.
* Distance matrix comes in the format used in <hwloc/distance.h>.
......@@ -392,36 +458,21 @@ static int aml_hwloc_distance_hop(hwloc_const_obj_t a, hwloc_const_obj_t b)
return dist;
}
/**
* Create a distance matrix of topology hops between to object types.
**/
int aml_hwloc_distance_hop_matrix(const hwloc_obj_type_t ta,
const hwloc_obj_type_t tb,
struct hwloc_distances_s *s)
struct hwloc_distances_s **s)
{
const unsigned na = hwloc_get_nbobjs_by_type(aml_topology, ta);
const unsigned nb =
ta == tb ? 0 : hwloc_get_nbobjs_by_type(aml_topology, tb);
const unsigned n = na + nb;
hwloc_obj_t *o;
hwloc_uint64_t d, *v;
// Allocation
o = malloc(n * sizeof(*o));
if (o == NULL)
return -1;
v = malloc(n * n * sizeof(*v));
if (v == NULL) {
free(o);
return -1;
}
hwloc_uint64_t d;
unsigned na, nb;
s->nbobjs = n;
s->objs = o;
s->values = v;
s->kind = HWLOC_DISTANCES_KIND_FROM_USER |
HWLOC_DISTANCES_KIND_MEANS_LATENCY;
if (aml_hwloc_distances_alloc(ta, tb, s, &na, &nb) != AML_SUCCESS)
return -AML_ENOMEM;
// Store objects a
for (unsigned i = 0; i < na; i++)
o[i] = hwloc_get_obj_by_type(aml_topology, ta, i);
(*s)->kind = HWLOC_DISTANCES_KIND_FROM_USER |
HWLOC_DISTANCES_KIND_MEANS_LATENCY;
// Store distances of same type a
for (unsigned i = 0; i < na; i++)
......@@ -429,19 +480,15 @@ int aml_hwloc_distance_hop_matrix(const hwloc_obj_type_t ta,
d = aml_hwloc_distance_hop(
hwloc_get_obj_by_type(aml_topology, ta, i),
hwloc_get_obj_by_type(aml_topology, ta, j));
v[i * n + j] = d;
v[j * n + i] = d;
IND_DIST(*s, i, j) = d;
IND_DIST(*s, j, i) = d;
}
// If both types are equal, then we stored everything
if (ta == tb)
return 0;
else
s->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
// Store objects b
for (unsigned i = 0; i < nb; i++)
o[i + na] = hwloc_get_obj_by_type(aml_topology, tb, i);
(*s)->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
// Store distances of same type b
for (unsigned i = 0; i < nb; i++)
......@@ -449,8 +496,8 @@ int aml_hwloc_distance_hop_matrix(const hwloc_obj_type_t ta,
d = aml_hwloc_distance_hop(
hwloc_get_obj_by_type(aml_topology, tb, i),
hwloc_get_obj_by_type(aml_topology, tb, j));
v[(na + i) * n + (j + na)] = d;
v[(na + j) * n + (i + na)] = d;
IND_DIST(*s, na + i, na + j) = d;
IND_DIST(*s, na + j, na + i) = d;
}
// Store distances ab, ba
......@@ -459,115 +506,279 @@ int aml_hwloc_distance_hop_matrix(const hwloc_obj_type_t ta,
d = aml_hwloc_distance_hop(
hwloc_get_obj_by_type(aml_topology, ta, i),
hwloc_get_obj_by_type(aml_topology, tb, j));
v[(na + j) * n + i] = d;
v[i * n + (j + na)] = d;
IND_DIST(*s, na + j, i) = d;
IND_DIST(*s, i, na + j) = d;
}
return 0;
}
/**
* Take a distance matrix with a single type T0 of object and return
* a derived distance between two objects. Distances sum if
* T0 have several children of input object types.
* This allow to compute a distance between any 2 arbitrary objects
* any distance matrix exists.
**/
static int aml_hwloc_distance_match(struct hwloc_distances_s *dist,
const hwloc_obj_t obj0,
const hwloc_obj_t obj1,
hwloc_uint64_t *d0,
hwloc_uint64_t *d1)
{
hwloc_obj_t l0, l1;
const unsigned depth = dist->objs[0]->depth;
*d0 = 0;
*d1 = 0;
l0 = hwloc_get_next_obj_covering_cpuset_by_depth(
aml_topology, obj0->cpuset, depth, NULL);
do {
l1 = hwloc_get_next_obj_covering_cpuset_by_depth(
aml_topology, obj1->cpuset, depth, NULL);
do {
*d0 += OBJ_DIST(dist, l0, l1, 0, 0);
*d1 += OBJ_DIST(dist, l1, l0, 0, 0);
l1 = hwloc_get_next_obj_covering_cpuset_by_depth(
aml_topology, obj1->cpuset, depth, l1);
} while (l1 != NULL);
l0 = hwloc_get_next_obj_covering_cpuset_by_depth(
aml_topology, obj0->cpuset, depth, l0);
} while (l0 != NULL);
return 0;
}
/**
* Take a distance matrix and convert it to another matrix with different
* object types.
**/
static int aml_hwloc_distances_reshape(struct hwloc_distances_s *dist,
struct hwloc_distances_s **out,
const hwloc_obj_type_t t0,
const hwloc_obj_type_t t1)
{
if (dist->kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES)
return -AML_EINVAL;
unsigned nt0, nt1;
const unsigned depth0 = hwloc_get_type_depth(aml_topology, t0);
const unsigned depth1 = hwloc_get_type_depth(aml_topology, t1);
hwloc_obj_t obj0 = NULL, obj1 = NULL;
hwloc_uint64_t d0, d1;
if (aml_hwloc_distances_alloc(t0, t1, out, &nt0, &nt1) != AML_SUCCESS)
return -AML_ENOMEM;
// Set kind.
(*out)->kind = HWLOC_DISTANCES_KIND_FROM_USER;
if (dist->kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY)
(*out)->kind |= HWLOC_DISTANCES_KIND_MEANS_LATENCY;
if (dist->kind & HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH)
(*out)->kind |= HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH;
// Set distances t0 <-> t0.
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth0, NULL);
while (obj0 != NULL) {
OBJ_DIST(*out, obj0, obj0, 0, 0) = 0;
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth0, obj0);
while (obj1 != NULL) {
if (aml_hwloc_distance_match(dist, obj0, obj1, &d0,
&d1) != 0)
goto err_with_out;
OBJ_DIST(*out, obj0, obj1, 0, 0) = d0;
OBJ_DIST(*out, obj1, obj0, 0, 0) = d1;
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth0,
obj1);
}
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth0, obj0);
}
if (t0 == t1)
return AML_SUCCESS;
(*out)->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
// Set distances t0 <-> t1.
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth0, NULL);
while (obj0 != NULL) {
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth1, NULL);
while (obj1 != NULL) {
if (aml_hwloc_distance_match(dist, obj0, obj1, &d0,
&d1) != 0)
goto err_with_out;
OBJ_DIST(*out, obj0, obj1, 0, nt0) = d0;
OBJ_DIST(*out, obj1, obj0, nt0, 0) = d1;
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth0,
obj1);
}
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth0, obj0);
}
// Set distances t1 <-> t1.
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth1, NULL);
while (obj0 != NULL) {
OBJ_DIST(*out, obj0, obj0, nt0, nt0) = 0;
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth1, obj0);
while (obj1 != NULL) {
if (aml_hwloc_distance_match(dist, obj0, obj1, &d0,
&d1) != 0)
goto err_with_out;
OBJ_DIST(*out, obj0, obj1, nt0, nt0) = d0;
OBJ_DIST(*out, obj1, obj0, nt0, nt0) = d1;
obj1 = hwloc_get_next_obj_by_depth(aml_topology, depth1,
obj1);
}
obj0 = hwloc_get_next_obj_by_depth(aml_topology, depth1, obj0);
}
return AML_SUCCESS;
err_with_out:
aml_hwloc_distances_free(*out);
return -AML_FAILURE;
}
/**
* Get a distance matrix between an initiator type and NUMANODEs
* This method never fails.
* If no distance matrix exist, one is created with hops as distance.
* If no matrix with initiator type or NUMANODE type is found, any
* available matrix is used. The obtained matrix is reshaped to fit
* (initiator numa) x (initiator numa) matrix.
**/
int aml_hwloc_get_NUMA_distance(const hwloc_obj_type_t type,
enum hwloc_distances_kind_e kind,
struct hwloc_distances_s **out)
{
// number of hwloc matrices to return in handle
unsigned int nr = 32;
// hwloc distance matrix
struct hwloc_distances_s *handle[nr], *dist = NULL;
// Collect distances. If fail, fallback on hop distances.
if (hwloc_distances_get(aml_topology, &nr, handle, kind, 0) != 0 ||
nr == 0) {
if (aml_hwloc_distance_hop_matrix(type, HWLOC_OBJ_NUMANODE,
out) == -1)
return -1;
return 0;
}
for (unsigned i = 0; i < nr; i++) {
// If we found a matrix with same type as initiator type
// then we pick this one.
if (handle[i]->objs[0]->type == type) {
dist = handle[i];
break;
}
// We pick any distance
if (dist == NULL)
dist = handle[i];
// If we find one that is a NUMANODE distance, we chose this one
// over a default choice.
if (handle[i]->objs[0]->type == HWLOC_OBJ_NUMANODE)
dist = handle[i];
// If we find a distance that is finer grain than default,
// then we chose this one.
if (dist->objs[0]->type != HWLOC_OBJ_NUMANODE &&
dist->objs[0]->depth < handle[i]->objs[0]->depth)
dist = handle[i];
}
// If we were not able to find any matrix, we craft one.
if (dist == NULL) {
if (aml_hwloc_distance_hop_matrix(type, HWLOC_OBJ_NUMANODE,
out) != AML_SUCCESS)
return -AML_ENOMEM;
return AML_SUCCESS;
}
// We reshape whatever matrix we got to be a distance to NUMANODEs
// matrix.
if (aml_hwloc_distances_reshape(dist, out, type, HWLOC_OBJ_NUMANODE) !=
AML_SUCCESS)
return -AML_ENOMEM;
return AML_SUCCESS;
}
int aml_area_hwloc_preferred_create(struct aml_area **area,
hwloc_obj_t initiator,
const enum hwloc_distances_kind_e kind)
enum hwloc_distances_kind_e kind)
{
int err;
// The number of nodes in this system.
const unsigned num_nodes =
hwloc_get_nbobjs_by_type(aml_topology, HWLOC_OBJ_NUMANODE);
// The number of initiator
const unsigned num_initiator =
hwloc_get_nbobjs_by_type(aml_topology, initiator->type);
// output area
struct aml_area *ar = NULL;
// area numanodes
struct aml_area_hwloc_preferred_data *data;
// number of hwloc matrices to return in handle
unsigned int i, nr = 1;
// hwloc distance matrix
struct hwloc_distances_s handle[nr];
// Distances
struct hwloc_distances_s *dist;
// array of distances to sort
struct aml_area_hwloc_distance distances[num_nodes];
// node iterator
hwloc_obj_t node =
hwloc_get_obj_by_type(aml_topology, HWLOC_OBJ_NUMANODE, 0);
// distances from/to initiator, to/from target.
hwloc_uint64_t itot = 0, ttoi = 0;
// Check input
if (area == NULL)
return -AML_EINVAL;
if (initiator == NULL || initiator->cpuset == NULL ||
hwloc_bitmap_weight(initiator->cpuset) == 0)
return -AML_EINVAL;
if (initiator->depth < node->parent->depth)
if (initiator == NULL)
return -AML_EINVAL;
// Allocate structures
err = aml_area_hwloc_preferred_alloc(&ar);
aml_area_hwloc_preferred_alloc(&ar);
if (ar == NULL)
return -AML_ENOMEM;
data = (struct aml_area_hwloc_preferred_data *)ar->data;
// Collect distances
err = hwloc_distances_get(
aml_topology, &nr, (struct hwloc_distances_s **)(&handle),
kind | HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES, 0);
// If fail, fallback on hop distances.
if ((err != 0 || nr == 0) &&
aml_hwloc_distance_hop_matrix(initiator->type, HWLOC_OBJ_NUMANODE,
handle) == -1) {
err = -AML_ENOTSUP;
goto err_with_area;
}
if (aml_hwloc_get_NUMA_distance(initiator->type, kind, &dist) !=
AML_SUCCESS)
return -AML_ENOMEM;
// For each numanode compute distance to initiator
for (i = 0; i < data->num_nodes; i++) {
node = hwloc_get_obj_by_type(aml_topology, HWLOC_OBJ_NUMANODE,
i);
try_again:
err = hwloc_distances_obj_pair_values(&handle[0], initiator,
node, &itot, &ttoi);
// There were no matrix for this (initiator, numanode).
// Let's try again with initiator's parent.
if (err != 0 && initiator->depth > node->parent->depth) {
initiator = initiator->parent;
goto try_again;
}
// Did not work either... then give up.
if (err != 0) {
err = -AML_FAILURE;
goto err_with_handle;
for (unsigned i = 0; i < num_nodes; i++) {
hwloc_obj_t target = hwloc_get_obj_by_type(
aml_topology, HWLOC_OBJ_NUMANODE, i);
if (initiator->type == HWLOC_OBJ_NUMANODE) {
itot = OBJ_DIST(dist, initiator, target, 0, 0);
ttoi = OBJ_DIST(dist, target, initiator, 0, 0);
} else {
itot = OBJ_DIST(dist, initiator, target, 0,
num_initiator);
ttoi = OBJ_DIST(dist, target, initiator, num_initiator,
0);
}
// Store average distance (back and forth)
distances[i].distance = (itot + ttoi) / 2;
distances[i].index = i;
}
// Sort distances
qsort(distances, data->num_nodes, sizeof(*distances),
qsort(distances, num_nodes, sizeof(*distances),
aml_area_hwloc_hwloc_lt);
// Store sorted nodes in area data.
for (i = 0; i < data->num_nodes; i++) {
node = hwloc_get_obj_by_type(aml_topology, HWLOC_OBJ_NUMANODE,
distances[i].index);
for (unsigned i = 0; i < num_nodes; i++) {
hwloc_obj_t node = hwloc_get_obj_by_type(
aml_topology, HWLOC_OBJ_NUMANODE, distances[i].index);
data->numanodes[i] = node;
}
// Cleanup
for (i = 0; i < nr; i++)
hwloc_distances_release(aml_topology, &handle[i]);
aml_hwloc_distances_free(dist);
// Success !
*area = ar;
return AML_SUCCESS;
// Error
err_with_handle:
for (i = 0; i < nr; i++)
hwloc_distances_release(aml_topology, &handle[i]);
err_with_area:
free(ar);
return err;
}
int aml_area_hwloc_preferred_local_create(
struct aml_area **area, const enum hwloc_distances_kind_e kind)
int aml_area_hwloc_preferred_local_create(struct aml_area **area,
enum hwloc_distances_kind_e kind)
{
int err;
hwloc_cpuset_t cpuset;
......@@ -588,7 +799,7 @@ int aml_area_hwloc_preferred_local_create(
/** Match cpuset with a location on machine **/
err = hwloc_get_largest_objs_inside_cpuset(aml_topology, cpuset,
&initiator, 1);
if (err != 0) {
if (err == -1) {
err = -AML_FAILURE;
goto err_with_cpuset;
}
......
......@@ -21,7 +21,7 @@ const char *xml_topology_path = "aml_topology.xml";
int aml_hwloc_distance_hop_matrix(const hwloc_obj_type_t ta,
const hwloc_obj_type_t tb,
struct hwloc_distances_s *s);
struct hwloc_distances_s **s);
//------------------------------------------------------------------------------
// Test basic API
......@@ -133,18 +133,18 @@ void check_areas()
// Test preferred API
//------------------------------------------------------------------------------
void test_preferred()
void create_topology()
{
unsigned nr = 1;
struct hwloc_distances_s hops, *xml_hops;
struct hwloc_distances_s *hops, *xml_hops;
// Get distance matrix
assert(aml_hwloc_distance_hop_matrix(HWLOC_OBJ_CORE, HWLOC_OBJ_CORE,
&hops) == 0);
// Add matrix to topology
assert(hwloc_distances_add(aml_topology, hops.nbobjs, hops.objs,
hops.values, hops.kind,
assert(hwloc_distances_add(aml_topology, hops->nbobjs, hops->objs,
hops->values, hops->kind,
HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE) ==
0);
......@@ -167,11 +167,47 @@ void test_preferred()
// At least one distance matrix is present
assert(nr > 0);
// Same number of objects
assert(hops.nbobjs == xml_hops->nbobjs);
assert(hops->nbobjs == xml_hops->nbobjs);
// Same values
assert(!memcmp(hops.values, xml_hops->values,
hops.nbobjs * sizeof(*hops.values)));
assert(!memcmp(hops->values, xml_hops->values,
hops->nbobjs * sizeof(*hops->values)));
}
void test_preferred()
{
// bind ourselves to the last NUMANODE.