Commit 695b9225 authored by Shane Snyder's avatar Shane Snyder

bunch of mods on group descriptors & locking

parent 03a3135f
...@@ -28,8 +28,8 @@ extern "C" { ...@@ -28,8 +28,8 @@ extern "C" {
#define SSG_FAILURE (-1) #define SSG_FAILURE (-1)
/* opaque SSG group ID type */ /* opaque SSG group ID type */
typedef struct ssg_group_descriptor *ssg_group_id_t; typedef uint64_t ssg_group_id_t;
#define SSG_GROUP_ID_NULL ((ssg_group_id_t)NULL) #define SSG_GROUP_ID_INVALID 0
/* SSG group member ID type */ /* SSG group member ID type */
typedef uint64_t ssg_member_id_t; typedef uint64_t ssg_member_id_t;
...@@ -49,8 +49,8 @@ typedef void (*ssg_membership_update_cb)( ...@@ -49,8 +49,8 @@ typedef void (*ssg_membership_update_cb)(
ssg_member_update_type_t update_type); ssg_member_update_type_t update_type);
/* HG proc routine prototypes for SSG types */ /* HG proc routine prototypes for SSG types */
#define hg_proc_ssg_member_id_t hg_proc_int64_t #define hg_proc_ssg_group_id_t hg_proc_uint64_t
hg_return_t hg_proc_ssg_group_id_t(hg_proc_t proc, void *data); #define hg_proc_ssg_member_id_t hg_proc_uint64_t
/*************************************************** /***************************************************
*** SSG runtime intialization/shutdown routines *** *** SSG runtime intialization/shutdown routines ***
...@@ -209,22 +209,6 @@ hg_addr_t ssg_get_group_addr( ...@@ -209,22 +209,6 @@ hg_addr_t ssg_get_group_addr(
ssg_group_id_t group_id, ssg_group_id_t group_id,
ssg_member_id_t member_id); ssg_member_id_t member_id);
/**
* Duplicates the given SSG group identifier.
*
* @param[in] group_id SSG group ID
* @returns SSG group identifier on success, SSG_GROUP_ID_NULL otherwise
*/
ssg_group_id_t ssg_group_id_dup(
ssg_group_id_t group_id);
/** Frees the given SSG group identifier.
*
* @param[in] group_id SSG group ID
*/
void ssg_group_id_free(
ssg_group_id_t group_id);
/** /**
* Retrieves the HG address string associated with an SSG group identifier. * Retrieves the HG address string associated with an SSG group identifier.
* *
......
...@@ -47,22 +47,24 @@ typedef struct ssg_instance ...@@ -47,22 +47,24 @@ typedef struct ssg_instance
margo_instance_id mid; margo_instance_id mid;
char *self_addr_str; char *self_addr_str;
ssg_member_id_t self_id; ssg_member_id_t self_id;
struct ssg_group *group_table; struct ssg_group_descriptor *g_desc_table;
#if 0
struct ssg_attached_group *attached_group_table; struct ssg_attached_group *attached_group_table;
#endif
#ifdef SSG_HAVE_PMIX #ifdef SSG_HAVE_PMIX
size_t pmix_failure_evhdlr_ref; size_t pmix_failure_evhdlr_ref;
#endif #endif
ABT_rwlock lock; ABT_rwlock lock;
} ssg_instance_t; } ssg_instance_t;
/* TODO: associate a version number with a descriptor? */
typedef struct ssg_group_descriptor typedef struct ssg_group_descriptor
{ {
uint64_t magic_nr; uint64_t magic_nr;
uint64_t name_hash; ssg_group_id_t g_id;
char *addr_str; char *addr_str;
int owner_status; int owner_status;
int ref_count; struct ssg_group *g;
UT_hash_handle hh;
} ssg_group_descriptor_t; } ssg_group_descriptor_t;
enum ssg_group_descriptor_owner_status enum ssg_group_descriptor_owner_status
...@@ -101,7 +103,6 @@ typedef struct ssg_group ...@@ -101,7 +103,6 @@ typedef struct ssg_group
#ifdef DEBUG #ifdef DEBUG
FILE *dbg_log; FILE *dbg_log;
#endif #endif
UT_hash_handle hh;
} ssg_group_t; } ssg_group_t;
typedef struct ssg_attached_group typedef struct ssg_attached_group
...@@ -111,7 +112,6 @@ typedef struct ssg_attached_group ...@@ -111,7 +112,6 @@ typedef struct ssg_attached_group
ssg_group_view_t view; ssg_group_view_t view;
ssg_group_descriptor_t *descriptor; ssg_group_descriptor_t *descriptor;
ABT_rwlock lock; ABT_rwlock lock;
UT_hash_handle hh;
} ssg_attached_group_t; } ssg_attached_group_t;
typedef struct ssg_member_update typedef struct ssg_member_update
......
...@@ -24,9 +24,9 @@ ...@@ -24,9 +24,9 @@
/* NOTE: keep in sync with ssg_group_descriptor_t definition in ssg-internal.h */ /* NOTE: keep in sync with ssg_group_descriptor_t definition in ssg-internal.h */
MERCURY_GEN_STRUCT_PROC(ssg_group_descriptor_t, \ MERCURY_GEN_STRUCT_PROC(ssg_group_descriptor_t, \
((uint64_t) (magic_nr)) \ ((uint64_t) (magic_nr)) \
((uint64_t) (name_hash)) \ ((ssg_group_id_t) (g_id)) \
((hg_string_t) (addr_str))); ((hg_string_t) (addr_str)));
MERCURY_GEN_PROC(ssg_group_join_request_t, \ MERCURY_GEN_PROC(ssg_group_join_request_t, \
((ssg_group_descriptor_t) (group_descriptor)) ((ssg_group_descriptor_t) (group_descriptor))
...@@ -36,7 +36,7 @@ MERCURY_GEN_PROC(ssg_group_join_response_t, \ ...@@ -36,7 +36,7 @@ MERCURY_GEN_PROC(ssg_group_join_response_t, \
((hg_string_t) (group_name)) \ ((hg_string_t) (group_name)) \
((uint32_t) (group_size)) \ ((uint32_t) (group_size)) \
((hg_size_t) (view_buf_size)) ((hg_size_t) (view_buf_size))
((uint8_t) (ret))); ((uint8_t) (ret)));
MERCURY_GEN_PROC(ssg_group_leave_request_t, \ MERCURY_GEN_PROC(ssg_group_leave_request_t, \
((ssg_group_descriptor_t) (group_descriptor)) ((ssg_group_descriptor_t) (group_descriptor))
...@@ -201,7 +201,7 @@ static void ssg_group_join_recv_ult( ...@@ -201,7 +201,7 @@ static void ssg_group_join_recv_ult(
hg_handle_t handle) hg_handle_t handle)
{ {
const struct hg_info *hgi = NULL; const struct hg_info *hgi = NULL;
ssg_group_t *g = NULL; ssg_group_descriptor_t *g_desc = NULL;
ssg_group_join_request_t join_req; ssg_group_join_request_t join_req;
ssg_group_join_response_t join_resp; ssg_group_join_response_t join_resp;
hg_size_t view_size_requested; hg_size_t view_size_requested;
...@@ -224,15 +224,15 @@ static void ssg_group_join_recv_ult( ...@@ -224,15 +224,15 @@ static void ssg_group_join_recv_ult(
view_size_requested = margo_bulk_get_size(join_req.bulk_handle); view_size_requested = margo_bulk_get_size(join_req.bulk_handle);
/* look for the given group in my local table of groups */ /* look for the given group in my local table of groups */
HASH_FIND(hh, ssg_inst->group_table, &join_req.group_descriptor.name_hash, HASH_FIND(hh, ssg_inst->g_desc_table, &join_req.group_descriptor.g_id,
sizeof(uint64_t), g); sizeof(uint64_t), g_desc);
if (!g) if (!g_desc)
{ {
margo_free_input(handle, &join_req); margo_free_input(handle, &join_req);
goto fini; goto fini;
} }
sret = ssg_group_serialize(g, &view_buf, &view_buf_size); sret = ssg_group_serialize(g_desc->g, &view_buf, &view_buf_size);
if (sret != SSG_SUCCESS) if (sret != SSG_SUCCESS)
{ {
margo_free_input(handle, &join_req); margo_free_input(handle, &join_req);
...@@ -261,13 +261,13 @@ static void ssg_group_join_recv_ult( ...@@ -261,13 +261,13 @@ static void ssg_group_join_recv_ult(
/* apply group join locally */ /* apply group join locally */
join_update.type = SSG_MEMBER_JOINED; join_update.type = SSG_MEMBER_JOINED;
join_update.u.member_addr_str = join_req.addr_str; join_update.u.member_addr_str = join_req.addr_str;
ssg_apply_member_updates(g, &join_update, 1); ssg_apply_member_updates(g_desc->g, &join_update, 1);
} }
margo_free_input(handle, &join_req); margo_free_input(handle, &join_req);
/* set the response and send back */ /* set the response and send back */
join_resp.group_name = g->name; join_resp.group_name = g_desc->g->name;
join_resp.group_size = (int)g->view.size; join_resp.group_size = (int)g_desc->g->view.size;
join_resp.view_buf_size = view_buf_size; join_resp.view_buf_size = view_buf_size;
join_resp.ret = SSG_SUCCESS; join_resp.ret = SSG_SUCCESS;
fini: fini:
...@@ -324,7 +324,7 @@ static void ssg_group_leave_recv_ult( ...@@ -324,7 +324,7 @@ static void ssg_group_leave_recv_ult(
hg_handle_t handle) hg_handle_t handle)
{ {
const struct hg_info *hgi = NULL; const struct hg_info *hgi = NULL;
ssg_group_t *g = NULL; ssg_group_descriptor_t *g_desc = NULL;
ssg_group_leave_request_t leave_req; ssg_group_leave_request_t leave_req;
ssg_group_leave_response_t leave_resp; ssg_group_leave_response_t leave_resp;
ssg_member_update_t leave_update; ssg_member_update_t leave_update;
...@@ -341,9 +341,9 @@ static void ssg_group_leave_recv_ult( ...@@ -341,9 +341,9 @@ static void ssg_group_leave_recv_ult(
if (hret != HG_SUCCESS) goto fini; if (hret != HG_SUCCESS) goto fini;
/* look for the given group in my local table of groups */ /* look for the given group in my local table of groups */
HASH_FIND(hh, ssg_inst->group_table, &leave_req.group_descriptor.name_hash, HASH_FIND(hh, ssg_inst->g_desc_table, &leave_req.group_descriptor.g_id,
sizeof(uint64_t), g); sizeof(uint64_t), g_desc);
if (!g) if (!g_desc)
{ {
margo_free_input(handle, &leave_req); margo_free_input(handle, &leave_req);
goto fini; goto fini;
...@@ -352,7 +352,7 @@ static void ssg_group_leave_recv_ult( ...@@ -352,7 +352,7 @@ static void ssg_group_leave_recv_ult(
/* apply group leave locally */ /* apply group leave locally */
leave_update.type = SSG_MEMBER_LEFT; leave_update.type = SSG_MEMBER_LEFT;
leave_update.u.member_id = leave_req.member_id; leave_update.u.member_id = leave_req.member_id;
ssg_apply_member_updates(g, &leave_update, 1); ssg_apply_member_updates(g_desc->g, &leave_update, 1);
margo_free_input(handle, &leave_req); margo_free_input(handle, &leave_req);
leave_resp.ret = SSG_SUCCESS; leave_resp.ret = SSG_SUCCESS;
...@@ -482,7 +482,7 @@ static void ssg_group_attach_recv_ult( ...@@ -482,7 +482,7 @@ static void ssg_group_attach_recv_ult(
hg_handle_t handle) hg_handle_t handle)
{ {
const struct hg_info *hgi = NULL; const struct hg_info *hgi = NULL;
ssg_group_t *g = NULL; ssg_group_descriptor_t *g_desc = NULL;
ssg_group_attach_request_t attach_req; ssg_group_attach_request_t attach_req;
ssg_group_attach_response_t attach_resp; ssg_group_attach_response_t attach_resp;
hg_size_t view_size_requested; hg_size_t view_size_requested;
...@@ -502,15 +502,15 @@ static void ssg_group_attach_recv_ult( ...@@ -502,15 +502,15 @@ static void ssg_group_attach_recv_ult(
view_size_requested = margo_bulk_get_size(attach_req.bulk_handle); view_size_requested = margo_bulk_get_size(attach_req.bulk_handle);
/* look for the given group in my local table of groups */ /* look for the given group in my local table of groups */
HASH_FIND(hh, ssg_inst->group_table, &attach_req.group_descriptor.name_hash, HASH_FIND(hh, ssg_inst->g_desc_table, &attach_req.group_descriptor.g_id,
sizeof(uint64_t), g); sizeof(uint64_t), g_desc);
if (!g) if (!g_desc)
{ {
margo_free_input(handle, &attach_req); margo_free_input(handle, &attach_req);
goto fini; goto fini;
} }
sret = ssg_group_serialize(g, &view_buf, &view_buf_size); sret = ssg_group_serialize(g_desc->g, &view_buf, &view_buf_size);
if (sret != SSG_SUCCESS) if (sret != SSG_SUCCESS)
{ {
margo_free_input(handle, &attach_req); margo_free_input(handle, &attach_req);
...@@ -538,8 +538,8 @@ static void ssg_group_attach_recv_ult( ...@@ -538,8 +538,8 @@ static void ssg_group_attach_recv_ult(
} }
/* set the response and send back */ /* set the response and send back */
attach_resp.group_name = g->name; attach_resp.group_name = g_desc->g->name;
attach_resp.group_size = (int)g->view.size; attach_resp.group_size = (int)g_desc->g->view.size;
attach_resp.view_buf_size = view_buf_size; attach_resp.view_buf_size = view_buf_size;
margo_respond(handle, &attach_resp); margo_respond(handle, &attach_resp);
...@@ -564,6 +564,8 @@ static int ssg_group_serialize( ...@@ -564,6 +564,8 @@ static int ssg_group_serialize(
*buf = NULL; *buf = NULL;
*buf_size = 0; *buf_size = 0;
ABT_rwlock_rdlock(g->lock);
/* first determine size */ /* first determine size */
group_buf_size = strlen(ssg_inst->self_addr_str) + 1; group_buf_size = strlen(ssg_inst->self_addr_str) + 1;
HASH_ITER(hh, g->view.member_map, member_state, tmp) HASH_ITER(hh, g->view.member_map, member_state, tmp)
...@@ -574,6 +576,7 @@ static int ssg_group_serialize( ...@@ -574,6 +576,7 @@ static int ssg_group_serialize(
group_buf = malloc(group_buf_size); group_buf = malloc(group_buf_size);
if(!group_buf) if(!group_buf)
{ {
ABT_rwlock_unlock(g->lock);
return SSG_FAILURE; return SSG_FAILURE;
} }
...@@ -590,62 +593,13 @@ static int ssg_group_serialize( ...@@ -590,62 +593,13 @@ static int ssg_group_serialize(
*buf = group_buf; *buf = group_buf;
*buf_size = group_buf_size; *buf_size = group_buf_size;
ABT_rwlock_unlock(g->lock);
return SSG_SUCCESS; return SSG_SUCCESS;
} }
/* custom SSG RPC proc routines */ /* custom SSG RPC proc routines */
hg_return_t hg_proc_ssg_group_id_t(
hg_proc_t proc, void *data)
{
ssg_group_descriptor_t **group_descriptor = (ssg_group_descriptor_t **)data;
hg_return_t hret = HG_PROTOCOL_ERROR;
switch(hg_proc_get_op(proc))
{
case HG_ENCODE:
hret = hg_proc_ssg_group_descriptor_t(proc, *group_descriptor);
if (hret != HG_SUCCESS)
{
hret = HG_PROTOCOL_ERROR;
return hret;
}
break;
case HG_DECODE:
*group_descriptor = malloc(sizeof(**group_descriptor));
if (!(*group_descriptor))
{
hret = HG_NOMEM_ERROR;
return hret;
}
memset(*group_descriptor, 0, sizeof(**group_descriptor));
hret = hg_proc_ssg_group_descriptor_t(proc, *group_descriptor);
if (hret != HG_SUCCESS)
{
hret = HG_PROTOCOL_ERROR;
return hret;
}
(*group_descriptor)->ref_count = 1;
break;
case HG_FREE:
if ((*group_descriptor)->ref_count == 1)
{
free((*group_descriptor)->addr_str);
free(*group_descriptor);
}
else
{
(*group_descriptor)->ref_count--;
}
hret = HG_SUCCESS;
break;
default:
break;
}
return hret;
}
hg_return_t hg_proc_ssg_member_update_t( hg_return_t hg_proc_ssg_member_update_t(
hg_proc_t proc, void *data) hg_proc_t proc, void *data)
{ {
......
This diff is collapsed.
...@@ -70,6 +70,8 @@ struct swim_context ...@@ -70,6 +70,8 @@ struct swim_context
ABT_pool swim_pool; ABT_pool swim_pool;
/* swim protocol ULT handle */ /* swim protocol ULT handle */
ABT_thread prot_thread; ABT_thread prot_thread;
/* swim protocol lock */
ABT_rwlock swim_lock;
}; };
/* SWIM ping function prototypes */ /* SWIM ping function prototypes */
......
...@@ -181,6 +181,7 @@ static void swim_dping_req_recv_ult( ...@@ -181,6 +181,7 @@ static void swim_dping_req_recv_ult(
if (group == NULL || group->swim_ctx == NULL) if (group == NULL || group->swim_ctx == NULL)
{ {
fprintf(stderr, "SWIM dping req recv error -- invalid group state\n"); fprintf(stderr, "SWIM dping req recv error -- invalid group state\n");
margo_destroy(handle);
return; return;
} }
...@@ -243,6 +244,7 @@ static void swim_dping_ack_recv_ult( ...@@ -243,6 +244,7 @@ static void swim_dping_ack_recv_ult(
if (group == NULL || group->swim_ctx == NULL) if (group == NULL || group->swim_ctx == NULL)
{ {
fprintf(stderr, "SWIM dping ack recv error -- invalid group state\n"); fprintf(stderr, "SWIM dping ack recv error -- invalid group state\n");
margo_destroy(handle);
return; return;
} }
...@@ -353,11 +355,11 @@ void swim_iping_req_send_ult( ...@@ -353,11 +355,11 @@ void swim_iping_req_send_ult(
} }
swim_ctx = group->swim_ctx; swim_ctx = group->swim_ctx;
ABT_rwlock_wrlock(group->lock); ABT_rwlock_wrlock(swim_ctx->swim_lock);
iping_target_id = swim_ctx->iping_target_ids[swim_ctx->iping_target_ndx]; iping_target_id = swim_ctx->iping_target_ids[swim_ctx->iping_target_ndx];
iping_target_addr = swim_ctx->iping_target_addrs[swim_ctx->iping_target_ndx]; iping_target_addr = swim_ctx->iping_target_addrs[swim_ctx->iping_target_ndx];
swim_ctx->iping_target_ndx++; swim_ctx->iping_target_ndx++;
ABT_rwlock_unlock(group->lock); ABT_rwlock_unlock(swim_ctx->swim_lock);
hret = margo_create(swim_ctx->mid, iping_target_addr, swim_iping_req_rpc_id, &handle); hret = margo_create(swim_ctx->mid, iping_target_addr, swim_iping_req_rpc_id, &handle);
if(hret != HG_SUCCESS) if(hret != HG_SUCCESS)
...@@ -402,6 +404,7 @@ static void swim_iping_req_recv_ult(hg_handle_t handle) ...@@ -402,6 +404,7 @@ static void swim_iping_req_recv_ult(hg_handle_t handle)
if (group == NULL || group->swim_ctx == NULL) if (group == NULL || group->swim_ctx == NULL)
{ {
fprintf(stderr, "SWIM iping req recv error -- invalid group state\n"); fprintf(stderr, "SWIM iping req recv error -- invalid group state\n");
margo_destroy(handle);
return; return;
} }
...@@ -471,6 +474,7 @@ static void swim_iping_ack_recv_ult(hg_handle_t handle) ...@@ -471,6 +474,7 @@ static void swim_iping_ack_recv_ult(hg_handle_t handle)
if (group == NULL || group->swim_ctx == NULL) if (group == NULL || group->swim_ctx == NULL)
{ {
fprintf(stderr, "SWIM iping ack recv error -- invalid group state\n"); fprintf(stderr, "SWIM iping ack recv error -- invalid group state\n");
margo_destroy(handle);
return; return;
} }
......
This diff is collapsed.
...@@ -130,7 +130,7 @@ int main(int argc, char *argv[]) ...@@ -130,7 +130,7 @@ int main(int argc, char *argv[])
{ {
struct group_launch_opts opts; struct group_launch_opts opts;
margo_instance_id mid = MARGO_INSTANCE_NULL; margo_instance_id mid = MARGO_INSTANCE_NULL;
ssg_group_id_t g_id = SSG_GROUP_ID_NULL; ssg_group_id_t g_id = SSG_GROUP_ID_INVALID;
ssg_member_id_t my_id; ssg_member_id_t my_id;
int group_size; int group_size;
int sret; int sret;
...@@ -180,7 +180,7 @@ int main(int argc, char *argv[]) ...@@ -180,7 +180,7 @@ int main(int argc, char *argv[])
if(strcmp(opts.group_mode, "pmix") == 0) if(strcmp(opts.group_mode, "pmix") == 0)
g_id = ssg_group_create_pmix(opts.group_name, proc, NULL, NULL); g_id = ssg_group_create_pmix(opts.group_name, proc, NULL, NULL);
#endif #endif
DIE_IF(g_id == SSG_GROUP_ID_NULL, "ssg_group_create"); DIE_IF(g_id == SSG_GROUP_ID_INVALID, "ssg_group_create");
/* store the gid if requested */ /* store the gid if requested */
if (opts.gid_file) if (opts.gid_file)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment