Commit 26f79619 authored by Pavan Balaji's avatar Pavan Balaji
Browse files

[svn-r5653] Update the HYDT_bind_info structure to make it more flexible and

future proof. Also, extend the UI to allow users to make binding
requests more intelligently.

The HYDT_bind_info structure itself should provide enough flexibility
for hwloc (barring any comments from Guillaume). The user still cannot
specify arbitrarily complex binding requests. Not sure how this can be
done without overly complicating the command line interface.

This should fix ticket #911.
parent df37492c
......@@ -12,8 +12,8 @@
/* bind */
HYD_status HYDT_bind_init(char *binding, char *bindlib);
void HYDT_bind_finalize(void);
HYD_status HYDT_bind_process(int proc_unit_id);
int HYDT_bind_get_proc_unit_id(int process_id);
HYD_status HYDT_bind_process(int os_index);
int HYDT_bind_get_os_index(int process_id);
/* checkpointing */
HYD_status HYDT_ckpoint_init(char *ckpointlib, char *ckpoint_prefix);
......
......@@ -160,8 +160,8 @@ struct HYD_thread_context {
#endif /* HAVE_THREAD_SUPPORT */
HYD_status HYDU_create_process(char **client_arg, HYD_env_t * env_list,
int *in, int *out, int *err, int *pid, int proc_unit_id);
HYD_status HYDU_fork_and_exit(int proc_unit_id);
int *in, int *out, int *err, int *pid, int os_index);
HYD_status HYDU_fork_and_exit(int os_index);
#if defined HAVE_THREAD_SUPPORT
HYD_status HYDU_create_thread(void *(*func) (void *), void *args,
struct HYD_thread_context *ctxt);
......
......@@ -489,7 +489,7 @@ HYD_status HYD_pmcd_pmi_proxy_procinfo(int fd)
HYD_status HYD_pmcd_pmi_proxy_launch_procs(void)
{
int i, j, arg, stdin_fd, process_id, proc_unit_id, pmi_id;
int i, j, arg, stdin_fd, process_id, os_index, pmi_id;
char *str, *envstr, *list;
char *client_args[HYD_NUM_TMP_STRINGS];
HYD_env_t *env, *prop_env = NULL;
......@@ -650,14 +650,14 @@ HYD_status HYD_pmcd_pmi_proxy_launch_procs(void)
client_args[arg++] = HYDU_strdup(exec->exec[j]);
client_args[arg++] = NULL;
proc_unit_id = HYDT_bind_get_proc_unit_id(process_id);
os_index = HYDT_bind_get_os_index(process_id);
if (pmi_id == 0) {
status = HYDU_create_process(client_args, prop_env,
&HYD_pmcd_pmip.downstream.in,
&HYD_pmcd_pmip.downstream.out[process_id],
&HYD_pmcd_pmip.downstream.err[process_id],
&HYD_pmcd_pmip.downstream.pid[process_id],
proc_unit_id);
os_index);
HYD_pmcd_pmip.local.stdin_buf_offset = 0;
HYD_pmcd_pmip.local.stdin_buf_count = 0;
......@@ -675,7 +675,7 @@ HYD_status HYD_pmcd_pmi_proxy_launch_procs(void)
&HYD_pmcd_pmip.downstream.out[process_id],
&HYD_pmcd_pmip.downstream.err[process_id],
&HYD_pmcd_pmip.downstream.pid[process_id],
proc_unit_id);
os_index);
}
HYDU_ERR_POP(status, "create process returned error\n");
......
......@@ -20,20 +20,23 @@ struct HYDT_bind_info HYDT_bind_info;
HYD_status HYDT_bind_init(char *binding, char *bindlib)
{
char *bindstr, *bindentry;
int sock, core, thread, i, j;
char *bindstr, *bindentry, *obj;
int i, j, k, use_topo_obj[HYDT_TOPO_END] = { 0 }, child_id, found_obj;
HYDT_topo_obj_type_t topo_end;
struct HYDT_topo_obj *topo_obj[HYDT_TOPO_END];
HYD_status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
HYDT_bind_info.support_level = HYDT_BIND_NONE;
HYDT_bind_info.topology = NULL;
HYDT_bind_info.bindlib = HYDU_strdup(bindlib);
HYDT_bind_info.bindmap = NULL;
/***************************** NONE *****************************/
if (!binding || !strcmp(binding, "none")) {
/* If no binding is given, we just set all mappings to -1 */
HYDU_MALLOC(HYDT_bind_info.bindmap, int *, sizeof(int), status);
HYDT_bind_info.num_procs = 1;
HYDT_bind_info.total_proc_units = 1;
HYDT_bind_info.bindmap[0] = -1;
goto fn_exit;
......@@ -58,28 +61,27 @@ HYD_status HYDT_bind_init(char *binding, char *bindlib)
* all mappings to -1 */
if (HYDT_bind_info.support_level == HYDT_BIND_NONE) {
HYDU_MALLOC(HYDT_bind_info.bindmap, int *, sizeof(int), status);
HYDT_bind_info.num_procs = 1;
HYDT_bind_info.total_proc_units = 1;
HYDT_bind_info.bindmap[0] = -1;
goto fn_exit;
}
if (!strncmp(binding, "user:", strlen("user:"))) {
/* If the user specified the binding, we don't need to
* initialize anything */
/***************************** USER *****************************/
if (!strncmp(binding, "user:", strlen("user:"))) {
/* Find the number of processing elements */
bindstr = HYDU_strdup(binding + strlen("user:"));
HYDT_bind_info.num_procs = 0;
HYDT_bind_info.total_proc_units = 0;
bindentry = strtok(bindstr, ",");
while (bindentry) {
HYDT_bind_info.num_procs++;
HYDT_bind_info.total_proc_units++;
bindentry = strtok(NULL, ",");
}
/* Find the actual processing elements */
HYDU_MALLOC(HYDT_bind_info.bindmap, int *, HYDT_bind_info.num_procs * sizeof(int),
status);
HYDU_MALLOC(HYDT_bind_info.bindmap, int *,
HYDT_bind_info.total_proc_units * sizeof(int), status);
i = 0;
bindstr = HYDU_strdup(binding + strlen("user:"));
bindentry = strtok(bindstr, ",");
......@@ -91,53 +93,133 @@ HYD_status HYDT_bind_init(char *binding, char *bindlib)
goto fn_exit;
}
HYDU_MALLOC(HYDT_bind_info.bindmap, int *, HYDT_bind_info.num_procs * sizeof(int),
status);
for (i = 0; i < HYDT_bind_info.num_procs; i++) {
/***************************** RR *****************************/
HYDU_MALLOC(HYDT_bind_info.bindmap, int *,
HYDT_bind_info.total_proc_units * sizeof(int), status);
/* RR is supported at the basic binding level */
if (!strcmp(binding, "rr")) {
/* RR is supported at the basic binding level */
if (!strcmp(binding, "rr")) {
for (i = 0; i < HYDT_bind_info.total_proc_units; i++)
HYDT_bind_info.bindmap[i] = i;
continue;
}
/* If we reached here, the user requested for topology aware
* binding. */
if (HYDT_bind_info.support_level != HYDT_BIND_TOPO)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"topology binding not supported on this platform\n");
goto fn_exit;
}
if (!strcmp(binding, "buddy")) {
thread = i / (HYDT_bind_info.num_sockets * HYDT_bind_info.num_cores);
core = i % (HYDT_bind_info.num_sockets * HYDT_bind_info.num_cores);
core /= HYDT_bind_info.num_sockets;
/* If we reached here, the user requested for topology aware
* binding. */
if (HYDT_bind_info.support_level != HYDT_BIND_TOPO)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"topology binding not supported on this platform\n");
sock = i % HYDT_bind_info.num_sockets;
}
else if (!strcmp(binding, "pack")) {
sock = i / (HYDT_bind_info.num_cores * HYDT_bind_info.num_threads);
core = i % (HYDT_bind_info.num_cores * HYDT_bind_info.num_threads);
core /= HYDT_bind_info.num_threads;
/***************************** TOPO *****************************/
if (!strncmp(binding, "topo", strlen("topo"))) {
bindstr = HYDU_strdup(binding);
bindentry = strtok(bindstr, ":");
bindentry = strtok(NULL, ":");
thread = i % HYDT_bind_info.num_threads;
if (bindentry == NULL) {
/* No extension option specified; use all resources */
for (i = HYDT_TOPO_MACHINE; i < HYDT_TOPO_END; i++)
use_topo_obj[i] = 1;
}
else {
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unknown binding option\n");
obj = strtok(bindentry, ",");
do {
if (!strcmp(obj, "node"))
use_topo_obj[HYDT_TOPO_NODE] = 1;
else if (!strcmp(obj, "socket") || !strcmp(obj, "sockets"))
use_topo_obj[HYDT_TOPO_SOCKET] = 1;
else if (!strcmp(obj, "core") || !strcmp(obj, "cores"))
use_topo_obj[HYDT_TOPO_CORE] = 1;
else if (!strcmp(obj, "thread") || !strcmp(obj, "threads"))
use_topo_obj[HYDT_TOPO_THREAD] = 1;
else
HYDU_ERR_POP(status, "unrecognized binding option\n");
obj = strtok(NULL, ",");
} while (obj);
}
for (i = HYDT_TOPO_END - 1; i > HYDT_TOPO_MACHINE; i--) {
if (use_topo_obj[i])
use_topo_obj[i - 1] = 1;
}
for (j = 0; j < HYDT_bind_info.num_procs; j++) {
if (HYDT_bind_info.topology[j].socket_rank == sock &&
HYDT_bind_info.topology[j].core_rank == core &&
HYDT_bind_info.topology[j].thread_rank == thread) {
HYDT_bind_info.bindmap[i] = HYDT_bind_info.topology[j].processor_id;
topo_end = HYDT_TOPO_END;
for (i = HYDT_TOPO_MACHINE; i < HYDT_TOPO_END; i++) {
if (use_topo_obj[i] == 0) {
topo_end = i;
break;
}
}
/* Initialize indices and topology objects */
topo_obj[HYDT_TOPO_MACHINE] = &HYDT_bind_info.machine;
for (j = HYDT_TOPO_MACHINE; j < HYDT_TOPO_END; j++) {
if (j)
topo_obj[j] = topo_obj[j - 1]->children;
}
for (i = 0; i < HYDT_bind_info.total_proc_units; i++) {
HYDT_bind_info.bindmap[i] = topo_obj[HYDT_TOPO_END - 1]->os_index;
/* If we are done, break out */
if (i == HYDT_bind_info.total_proc_units - 1)
break;
/* If not, increment the object structure */
found_obj = 0;
for (j = HYDT_TOPO_END - 1; j > HYDT_TOPO_MACHINE; j--) {
/* If our topology depth is greater than what the user
* requested, don't try to find any more siblings */
if (j >= topo_end)
continue;
child_id = HYDT_TOPO_CHILD_ID(topo_obj[j]);
if (child_id < topo_obj[j]->parent->num_children - 1) {
/* This object is not the last of the siblings;
* move to the next sibling */
topo_obj[j] = &topo_obj[j]->parent->children[child_id + 1];
for (k = j + 1; k < HYDT_TOPO_END; k++)
topo_obj[k] = topo_obj[k - 1]->children;
found_obj = 1;
break;
}
}
if (!found_obj) {
/* Initialize indices and topology objects */
topo_obj[HYDT_TOPO_MACHINE] = &HYDT_bind_info.machine;
for (j = HYDT_TOPO_MACHINE; j < HYDT_TOPO_END; j++) {
if (j)
topo_obj[j] = topo_obj[j - 1]->children;
}
}
}
goto fn_exit;
}
/* If we reached here, the user requested for topology aware
* binding. */
if (HYDT_bind_info.support_level != HYDT_BIND_MEMTOPO)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"memory topology binding not supported on this platform\n");
/***************************** TOPOMEM *****************************/
if (!strncmp(binding, "topomem", strlen("topomem"))) {
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"memory-aware topology binding is not implemented yet\n");
}
fn_exit:
HYDU_FUNC_EXIT();
return status;
......@@ -146,6 +228,20 @@ HYD_status HYDT_bind_init(char *binding, char *bindlib)
goto fn_exit;
}
static void cleanup_topo_level(struct HYDT_topo_obj level)
{
int i;
level.parent = NULL;
if (level.shared_memory_depth)
HYDU_FREE(level.shared_memory_depth);
if (level.children)
for (i = 0; i < level.num_children; i++)
cleanup_topo_level(level.children[i]);
}
void HYDT_bind_finalize(void)
{
if (HYDT_bind_info.bindmap)
......@@ -154,11 +250,10 @@ void HYDT_bind_finalize(void)
if (HYDT_bind_info.bindlib)
HYDU_FREE(HYDT_bind_info.bindlib);
if (HYDT_bind_info.topology)
HYDU_FREE(HYDT_bind_info.topology);
cleanup_topo_level(HYDT_bind_info.machine);
}
HYD_status HYDT_bind_process(int core)
HYD_status HYDT_bind_process(int os_index)
{
HYD_status status = HYD_SUCCESS;
......@@ -166,14 +261,14 @@ HYD_status HYDT_bind_process(int core)
#if defined HAVE_PLPA
if (!strcmp(HYDT_bind_info.bindlib, "plpa")) {
status = HYDT_bind_plpa_process(core);
status = HYDT_bind_plpa_process(os_index);
HYDU_ERR_POP(status, "PLPA failure binding process to core\n");
}
#endif /* HAVE_PLPA */
#if defined HAVE_HWLOC
if (!strcmp(HYDT_bind_info.bindlib, "hwloc")) {
status = HYDT_bind_hwloc_process(core);
status = HYDT_bind_hwloc_process(os_index);
HYDU_ERR_POP(status, "HWLOC failure binding process to core\n");
}
#endif /* HAVE_HWLOC */
......@@ -186,8 +281,10 @@ HYD_status HYDT_bind_process(int core)
goto fn_exit;
}
int HYDT_bind_get_proc_unit_id(int id)
int HYDT_bind_get_os_index(int process_id)
{
return HYDT_bind_info.bindmap[id % HYDT_bind_info.num_procs];
/* TODO: Allow the binding layer to export CPU sets instead of
* single units */
return HYDT_bind_info.bindmap[process_id % HYDT_bind_info.total_proc_units];
}
......@@ -9,35 +9,51 @@
#include "hydra_utils.h"
#define HYDT_TOPO_CHILD_ID(obj) \
((((char *) obj) - ((char *) obj->parent->children)) / sizeof(struct HYDT_topo_obj))
typedef enum {
HYDT_BIND_NONE = 0,
HYDT_BIND_BASIC,
HYDT_BIND_TOPO
HYDT_BIND_TOPO,
HYDT_BIND_MEMTOPO
} HYDT_bind_support_level_t;
struct HYDT_bind_info {
HYDT_bind_support_level_t support_level;
typedef enum {
HYDT_TOPO_MACHINE = 0, /* Cache-coherent set of processors */
HYDT_TOPO_NODE, /* Sockets sharing memory dimms */
HYDT_TOPO_SOCKET,
HYDT_TOPO_CORE,
HYDT_TOPO_THREAD,
HYDT_TOPO_END /* The last element */
} HYDT_topo_obj_type_t;
int num_procs;
int num_sockets;
int num_cores;
int num_threads;
struct HYDT_topo_obj {
HYDT_topo_obj_type_t type;
int *bindmap;
char *bindlib;
int os_index; /* OS index */
struct HYDT_topo_obj *parent;
struct HYDT_topology {
int processor_id;
int num_children;
struct HYDT_topo_obj *children;
int socket_rank;
int socket_id;
/* Depth of the shared memory regions. This is a pointer to
* accomodate multiple levels of memory shared by this set of
* processing units. */
int *shared_memory_depth;
};
struct HYDT_bind_info {
HYDT_bind_support_level_t support_level;
char *bindlib;
int *bindmap;
int core_rank;
int core_id;
/* This is needed for all binding levels, except "NONE" */
int total_proc_units;
int thread_rank;
int thread_id;
} *topology;
struct HYDT_topo_obj machine;
};
extern struct HYDT_bind_info HYDT_bind_info;
......
......@@ -11,6 +11,6 @@
#include <assert.h>
HYD_status HYDT_bind_hwloc_init(HYDT_bind_support_level_t * support_level);
HYD_status HYDT_bind_hwloc_process(int core);
HYD_status HYDT_bind_hwloc_process(int os_index);
#endif /* BIND_PLPA_H_INCLUDED */
......@@ -16,8 +16,8 @@ struct HYDT_bind_info HYDT_bind_info;
HYD_status HYDT_bind_plpa_init(HYDT_bind_support_level_t * support_level)
{
PLPA_NAME(api_type_t) p;
int ret, i, j, max, id;
int processor, sock, core, thread;
int ret, i, j, k, proc_id, socket_id, core_id, max, total_cores;
struct HYDT_topo_obj *node, *sock, *core, *thread;
HYD_status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
......@@ -30,106 +30,116 @@ HYD_status HYDT_bind_plpa_init(HYDT_bind_support_level_t * support_level)
/* Find the maximum number of processing elements */
ret = PLPA_NAME(get_processor_data) (PLPA_NAME_CAPS(COUNT_ONLINE),
&HYDT_bind_info.num_procs, &max);
&HYDT_bind_info.total_proc_units, &max);
if (ret) {
/* Unable to get number of processors */
HYDU_warn_printf("plpa get processor data failed\n");
goto fn_fail;
}
HYDU_MALLOC(HYDT_bind_info.topology, struct HYDT_topology *,
HYDT_bind_info.num_procs * sizeof(struct HYDT_topology), status);
for (i = 0; i < HYDT_bind_info.num_procs; i++) {
HYDT_bind_info.topology[i].processor_id = -1;
HYDT_bind_info.topology[i].socket_rank = -1;
HYDT_bind_info.topology[i].socket_id = -1;
HYDT_bind_info.topology[i].core_rank = -1;
HYDT_bind_info.topology[i].core_id = -1;
HYDT_bind_info.topology[i].thread_rank = -1;
HYDT_bind_info.topology[i].thread_id = -1;
}
for (i = 0; i < HYDT_bind_info.num_procs; i++) {
ret = PLPA_NAME(get_processor_id) (i, PLPA_NAME_CAPS(COUNT_ALL), &processor);
if (ret) {
/* Unable to get processor ID */
HYDU_warn_printf("plpa get processor id failed\n");
if (HYDT_bind_info.topology)
HYDU_FREE(HYDT_bind_info.topology);
goto fn_fail;
}
HYDT_bind_info.topology[i].processor_id = processor;
}
/* We have qualified for basic binding support level */
*support_level = HYDT_BIND_BASIC;
/* PLPA only gives information about sockets and cores */
ret = PLPA_NAME(get_socket_info) (&HYDT_bind_info.num_sockets, &max);
/* Setup the machine level */
HYDT_bind_info.machine.type = HYDT_TOPO_MACHINE;
HYDT_bind_info.machine.os_index = -1; /* This is a set, not a single unit */
HYDT_bind_info.machine.parent = NULL;
HYDT_bind_info.machine.num_children = 1;
HYDU_MALLOC(HYDT_bind_info.machine.children, struct HYDT_topo_obj *,
sizeof(struct HYDT_topo_obj), status);
HYDT_bind_info.machine.shared_memory_depth = NULL;
/* Setup the node level */
node = &HYDT_bind_info.machine.children[0];
node->type = HYDT_TOPO_NODE;
node->os_index = -1;
node->parent = &HYDT_bind_info.machine;
ret = PLPA_NAME(get_socket_info) (&node->num_children, &max);
if (ret) {
/* Unable to get number of sockets */
HYDU_warn_printf("plpa get socket info failed\n");
goto fn_fail;
}
HYDU_MALLOC(node->children, struct HYDT_topo_obj *,
sizeof(struct HYDT_topo_obj) * node->num_children, status);
node->shared_memory_depth = NULL;
/* Setup the socket level */
total_cores = 0;
for (i = 0; i < node->num_children; i++) {
sock = &node->children[i];
sock->type = HYDT_TOPO_SOCKET;
sock->os_index = -1;
sock->parent = node;
ret = PLPA_NAME(get_socket_id) (i, &socket_id);
if (ret) {
HYDU_warn_printf("plpa get socket id failed\n");
goto fn_fail;
}
ret = PLPA_NAME(get_core_info) (0, &HYDT_bind_info.num_cores, &max);
if (ret) {
/* Unable to get number of cores */
HYDU_warn_printf("plpa get core info failed\n");
goto fn_fail;
}
HYDT_bind_info.num_threads = HYDT_bind_info.num_procs /
(HYDT_bind_info.num_sockets * HYDT_bind_info.num_cores);
/* Find the socket and core IDs for all processor IDs */
for (i = 0; i < HYDT_bind_info.num_procs; i++) {
ret = PLPA_NAME(map_to_socket_core) (HYDT_bind_info.topology[i].processor_id,
&sock, &core);
ret = PLPA_NAME(get_core_info) (socket_id, &sock->num_children, &max);
if (ret) {
/* Unable to get number of cores */
HYDU_warn_printf("plpa unable to map socket to core\n");
HYDU_warn_printf("plpa get core info failed\n");
goto fn_fail;
}
HYDU_MALLOC(sock->children, struct HYDT_topo_obj *,
sizeof(struct HYDT_topo_obj) * sock->num_children, status);
sock->shared_memory_depth = NULL;
HYDT_bind_info.topology[i].socket_id = sock;
HYDT_bind_info.topology[i].core_id = core;
total_cores += sock->num_children;
}
thread = -1;
for (j = 0; j < i; j++)
if (HYDT_bind_info.topology[j].socket_id == sock &&
HYDT_bind_info.topology[j].core_id == core)
thread = HYDT_bind_info.topology[j].thread_id;
thread++;
if (HYDT_bind_info.total_proc_units % total_cores) {
HYDU_warn_printf("total processing units is not a multiple of total cores\n");
goto fn_fail;
}
HYDT_bind_info.topology[i].thread_id = thread;
HYDT_bind_info.topology[i].thread_rank = thread;
/* Core level objects */
for (i = 0; i < node->num_children; i++) {
sock = &node->children[i];
for (j = 0; j < sock->num_children; j++) {
core = &sock->children[j];
core->type = HYDT_TOPO_CORE;
core->os_index = -1;
core->parent = sock;
core->num_children = HYDT_bind_info.total_proc_units / total_cores;
HYDU_MALLOC(core->children, struct HYDT_topo_obj *,
sizeof(struct HYDT_topo_obj) * sock->num_children, status);
core->shared_memory_depth = NULL;
for (k = 0; k < core->num_children; k++) {
thread = &core->children[k];
thread->type = HYDT_TOPO_THREAD;
thread->os_index = -1;
thread->parent = core;
thread->num_children = 0;
thread->children = NULL;
thread->shared_memory_depth = NULL;
}
}
}
/* Find the rank of each socket ID */
for (i = 0; i < HYDT_bind_info.num_sockets; i++) {
ret = PLPA_NAME(get_socket_id) (i, &id);
for (i = 0; i < HYDT_bind_info.total_proc_units; i++) {
ret = PLPA_NAME(get_processor_id) (i, PLPA_NAME_CAPS(COUNT_ONLINE), &proc_id);
if (ret) {
/* Unable to get socket id */
HYDU_warn_printf("plpa unable to get socket id\n");
HYDU_warn_printf("plpa unable to get processor id\n");
goto fn_fail;
}