Commit 8f2dc211 authored by Shane Snyder's avatar Shane Snyder
Browse files

for now, do ssg lookups serially

At least for the na+sm protocol, running multiple lookups in
parallel is causing address resolution to point at the wrong
process.
parent fa6c5482
......@@ -308,7 +308,7 @@ static ssg_t ssg_init_internal(margo_instance_id mid, int self_rank,
s = NULL;
goto fini;
}
SSG_DEBUG(s, "group lookup succesful\n");
SSG_DEBUG(s, "group lookup successful\n");
#if USE_SWIM_FD
// initialize swim failure detector
......@@ -375,19 +375,27 @@ static hg_return_t ssg_lookup(ssg_t s, char **addr_strs)
args[r].ssg = s;
args[r].rank = r;
args[r].addr_str = addr_strs[r];
#if 0
int aret = ABT_thread_create(*margo_get_handler_pool(s->mid), &lookup_ult,
&args[r], ABT_THREAD_ATTR_NULL, &ults[r]);
if (aret != ABT_SUCCESS) {
hret = HG_OTHER_ERROR;
goto fini;
}
#endif
}
// wait on all
for (int i = 1; i < s->view.group_size; i++) {
int r = (s->view.self_rank + i) % s->view.group_size;
int aret = ABT_thread_join(ults[r]);
int aret = ABT_thread_create(*margo_get_handler_pool(s->mid), &lookup_ult,
&args[r], ABT_THREAD_ATTR_NULL, &ults[r]);
if (aret != ABT_SUCCESS) {
hret = HG_OTHER_ERROR;
goto fini;
}
aret = ABT_thread_join(ults[r]);
//int aret = ABT_thread_join(ults[r]);
ABT_thread_free(&ults[r]);
ults[r] = ABT_THREAD_NULL; // in case of cascading failure from join
if (aret != ABT_SUCCESS) {
......
......@@ -141,6 +141,7 @@ static int swim_send_dping(ssg_t s, int target)
return(ret);
SSG_DEBUG(s, "recv dping ack from %d\n", dping_resp.msg.source_rank);
assert((int)dping_resp.msg.source_rank == target);
/* extract target's membership state from response */
swim_unpack_message(s, &(dping_resp.msg));
......
......@@ -130,9 +130,6 @@ cleanup:
// cleanup
if(s) ssg_finalize(s);
if(mid != MARGO_INSTANCE_NULL) margo_finalize(mid);
/* XXX: hg does not shut down properly if we simulate failures or
* if a member receives RPCs before registering SWIM RPC handlers
*/
if(hgctx && 0) HG_Context_destroy(hgctx);
if(hgcl && 0) HG_Finalize(hgcl);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment