Commit c559add6 authored by Pavan Balaji's avatar Pavan Balaji
Browse files

[svn-r7008] Bug fix for dynamic processes: the process mapping is specific to each

process group; we were using the value for PG0 for all process groups.
parent c4ecaf2c
......@@ -12,78 +12,6 @@ struct HYD_pmcd_pmi_handle *HYD_pmcd_pmi_handle = { 0 };
struct HYD_pmcd_pmi_publish *HYD_pmcd_pmi_publish_list = NULL;
HYD_status HYD_pmcd_pmi_process_mapping(char **process_mapping_str)
{
int i, node_id;
char *tmp[HYD_NUM_TMP_STRINGS];
struct HYD_proxy *proxy;
struct block {
int num_blocks;
int block_size;
struct block *next;
} *blocklist_head, *blocklist_tail = NULL, *block, *nblock;
HYD_status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
blocklist_head = NULL;
for (proxy = HYD_handle.pg_list.proxy_list; proxy; proxy = proxy->next) {
if (blocklist_head == NULL) {
HYDU_MALLOC(blocklist_head, struct block *, sizeof(struct block), status);
blocklist_head->block_size = proxy->node.core_count;
blocklist_head->num_blocks = 1;
blocklist_head->next = NULL;
blocklist_tail = blocklist_head;
}
else if (blocklist_tail->block_size == proxy->node.core_count) {
blocklist_tail->num_blocks++;
}
else {
HYDU_MALLOC(blocklist_tail->next, struct block *, sizeof(struct block), status);
blocklist_tail = blocklist_tail->next;
blocklist_tail->block_size = proxy->node.core_count;
blocklist_tail->num_blocks = 1;
blocklist_tail->next = NULL;
}
}
i = 0;
tmp[i++] = HYDU_strdup("(");
tmp[i++] = HYDU_strdup("vector,");
node_id = 0;
for (block = blocklist_head; block; block = block->next) {
tmp[i++] = HYDU_strdup("(");
tmp[i++] = HYDU_int_to_str(node_id++);
tmp[i++] = HYDU_strdup(",");
tmp[i++] = HYDU_int_to_str(block->num_blocks);
tmp[i++] = HYDU_strdup(",");
tmp[i++] = HYDU_int_to_str(block->block_size);
tmp[i++] = HYDU_strdup(")");
if (block->next)
tmp[i++] = HYDU_strdup(",");
HYDU_STRLIST_CONSOLIDATE(tmp, i, status);
}
tmp[i++] = HYDU_strdup(")");
tmp[i++] = NULL;
status = HYDU_str_alloc_and_join(tmp, process_mapping_str);
HYDU_ERR_POP(status, "error while joining strings\n");
HYDU_free_strlist(tmp);
for (block = blocklist_head; block; block = nblock) {
nblock = block->next;
HYDU_FREE(block);
}
fn_exit:
HYDU_FUNC_EXIT();
return status;
fn_fail:
goto fn_exit;
}
struct HYD_proxy *HYD_pmcd_pmi_find_proxy(int fd)
{
struct HYD_pg *pg;
......
......@@ -50,7 +50,6 @@ struct HYD_pmcd_pmi_publish {
};
struct HYD_proxy *HYD_pmcd_pmi_find_proxy(int fd);
HYD_status HYD_pmcd_pmi_process_mapping(char **process_mapping);
HYD_status HYD_pmcd_pmi_finalize(void);
HYD_status HYD_pmcd_pmi_free_publish(struct HYD_pmcd_pmi_publish *publish);
HYD_status HYD_pmcd_pmi_publish(char *name, char *port, int *success);
......
......@@ -99,6 +99,78 @@ HYD_status HYD_pmcd_pmi_fill_in_proxy_args(char **proxy_args, char *control_port
goto fn_exit;
}
static HYD_status pmi_process_mapping(struct HYD_pg *pg, char **process_mapping_str)
{
int i, node_id;
char *tmp[HYD_NUM_TMP_STRINGS];
struct HYD_proxy *proxy;
struct block {
int num_blocks;
int block_size;
struct block *next;
} *blocklist_head, *blocklist_tail = NULL, *block, *nblock;
HYD_status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
blocklist_head = NULL;
for (proxy = pg->proxy_list; proxy; proxy = proxy->next) {
if (blocklist_head == NULL) {
HYDU_MALLOC(blocklist_head, struct block *, sizeof(struct block), status);
blocklist_head->block_size = proxy->node.core_count;
blocklist_head->num_blocks = 1;
blocklist_head->next = NULL;
blocklist_tail = blocklist_head;
}
else if (blocklist_tail->block_size == proxy->node.core_count) {
blocklist_tail->num_blocks++;
}
else {
HYDU_MALLOC(blocklist_tail->next, struct block *, sizeof(struct block), status);
blocklist_tail = blocklist_tail->next;
blocklist_tail->block_size = proxy->node.core_count;
blocklist_tail->num_blocks = 1;
blocklist_tail->next = NULL;
}
}
i = 0;
tmp[i++] = HYDU_strdup("(");
tmp[i++] = HYDU_strdup("vector,");
node_id = 0;
for (block = blocklist_head; block; block = block->next) {
tmp[i++] = HYDU_strdup("(");
tmp[i++] = HYDU_int_to_str(node_id++);
tmp[i++] = HYDU_strdup(",");
tmp[i++] = HYDU_int_to_str(block->num_blocks);
tmp[i++] = HYDU_strdup(",");
tmp[i++] = HYDU_int_to_str(block->block_size);
tmp[i++] = HYDU_strdup(")");
if (block->next)
tmp[i++] = HYDU_strdup(",");
HYDU_STRLIST_CONSOLIDATE(tmp, i, status);
}
tmp[i++] = HYDU_strdup(")");
tmp[i++] = NULL;
status = HYDU_str_alloc_and_join(tmp, process_mapping_str);
HYDU_ERR_POP(status, "error while joining strings\n");
HYDU_free_strlist(tmp);
for (block = blocklist_head; block; block = nblock) {
nblock = block->next;
HYDU_FREE(block);
}
fn_exit:
HYDU_FUNC_EXIT();
return status;
fn_fail:
goto fn_exit;
}
HYD_status HYD_pmcd_pmi_fill_in_exec_launch_info(struct HYD_pg *pg)
{
int i, arg, process_id;
......@@ -114,7 +186,7 @@ HYD_status HYD_pmcd_pmi_fill_in_exec_launch_info(struct HYD_pg *pg)
int pmi_rank, ret;
HYD_status status = HYD_SUCCESS;
status = HYD_pmcd_pmi_process_mapping(&mapping);
status = pmi_process_mapping(pg, &mapping);
HYDU_ERR_POP(status, "Unable to get process mapping information\n");
/* Make sure the mapping is within the size allowed by PMI */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment