Commit 3c5fb239 authored by Shane Snyder's avatar Shane Snyder

rest of ssg refactor changes

parent d76bef34
...@@ -36,21 +36,17 @@ typedef uint64_t ssg_member_id_t; ...@@ -36,21 +36,17 @@ typedef uint64_t ssg_member_id_t;
#define SSG_MEMBER_ID_INVALID 0 #define SSG_MEMBER_ID_INVALID 0
/* SSG group member update types */ /* SSG group member update types */
typedef enum ssg_update_type typedef enum ssg_member_update_type
{ {
SSG_MEMBER_JOINED = 0, SSG_MEMBER_JOINED = 0,
SSG_MEMBER_LEFT, SSG_MEMBER_LEFT,
SSG_MEMBER_DIED SSG_MEMBER_DIED
} ssg_update_type_t; } ssg_member_update_type_t;
typedef struct ssg_member_update
{
ssg_member_id_t id;
int type;
} ssg_member_update_t;
typedef void (*ssg_membership_update_cb)( typedef void (*ssg_membership_update_cb)(
ssg_member_update_t, void *); void * group_data,
ssg_member_id_t member_id,
ssg_member_update_type_t update_type);
/* HG proc routine prototypes for SSG types */ /* HG proc routine prototypes for SSG types */
#define hg_proc_ssg_member_id_t hg_proc_int64_t #define hg_proc_ssg_member_id_t hg_proc_int64_t
......
...@@ -48,13 +48,12 @@ extern "C" { ...@@ -48,13 +48,12 @@ extern "C" {
} while(0) } while(0)
/* debug printing macro for SSG */ /* debug printing macro for SSG */
/* TODO: how do we debug attachers? */
#ifdef DEBUG #ifdef DEBUG
#define SSG_DEBUG(__g, __fmt, ...) do { \ #define SSG_DEBUG(__g, __fmt, ...) do { \
double __now = ABT_get_wtime(); \ double __now = ABT_get_wtime(); \
fprintf(g->dbg_log, "[%.6lf] %20"PRIu64" (%s): SSG " __fmt, __now, \ fprintf(__g->dbg_log, "[%.6lf] %20"PRIu64" (%s): " __fmt, __now, \
__g->self_id, __g->name, ## __VA_ARGS__); \ __g->self_id, __g->name, ## __VA_ARGS__); \
fflush(g->dbg_log); \ fflush(__g->dbg_log); \
} while(0) } while(0)
#else #else
#define SSG_DEBUG(__g, __fmt, ...) do { \ #define SSG_DEBUG(__g, __fmt, ...) do { \
...@@ -63,6 +62,16 @@ extern "C" { ...@@ -63,6 +62,16 @@ extern "C" {
/* SSG internal dataypes */ /* SSG internal dataypes */
/* TODO: associate a version number with a descriptor? */
typedef struct ssg_group_descriptor
{
uint64_t magic_nr;
uint64_t name_hash;
char *addr_str;
int owner_status;
int ref_count;
} ssg_group_descriptor_t;
typedef struct ssg_member_state typedef struct ssg_member_state
{ {
ssg_member_id_t id; ssg_member_id_t id;
...@@ -72,15 +81,15 @@ typedef struct ssg_member_state ...@@ -72,15 +81,15 @@ typedef struct ssg_member_state
UT_hash_handle hh; UT_hash_handle hh;
} ssg_member_state_t; } ssg_member_state_t;
/* TODO: associate a version number with a descriptor */ typedef struct ssg_member_update
typedef struct ssg_group_descriptor
{ {
uint64_t magic_nr; ssg_member_update_type_t type;
uint64_t name_hash; union
char *addr_str; {
int owner_status; char *member_addr_str;
int ref_count; ssg_member_id_t member_id;
} ssg_group_descriptor_t; } u;
} ssg_member_update_t;
typedef struct ssg_group_view typedef struct ssg_group_view
{ {
...@@ -88,26 +97,17 @@ typedef struct ssg_group_view ...@@ -88,26 +97,17 @@ typedef struct ssg_group_view
ssg_member_state_t *member_map; ssg_member_state_t *member_map;
} ssg_group_view_t; } ssg_group_view_t;
typedef struct ssg_group_target_list
{
ssg_member_state_t **targets;
unsigned int nslots;
unsigned int len;
unsigned int dping_ndx;
} ssg_group_target_list_t;
typedef struct ssg_group typedef struct ssg_group
{ {
char *name; char *name;
ssg_member_id_t self_id; ssg_member_id_t self_id;
ssg_group_view_t view; ssg_group_view_t view;
ssg_group_target_list_t target_list;
ssg_member_state_t *dead_members; ssg_member_state_t *dead_members;
ssg_group_descriptor_t *descriptor; ssg_group_descriptor_t *descriptor;
swim_context_t *swim_ctx; swim_context_t *swim_ctx;
ABT_rwlock lock;
ssg_membership_update_cb update_cb; ssg_membership_update_cb update_cb;
void *update_cb_dat; void *update_cb_dat;
ABT_rwlock lock;
#ifdef DEBUG #ifdef DEBUG
FILE *dbg_log; FILE *dbg_log;
#endif #endif
...@@ -168,10 +168,12 @@ int ssg_group_attach_send( ...@@ -168,10 +168,12 @@ int ssg_group_attach_send(
char ** group_name, char ** group_name,
int * group_size, int * group_size,
void ** view_buf); void ** view_buf);
void ssg_apply_swim_user_updates( void ssg_apply_member_updates(
void *group_data, ssg_group_t * g,
swim_user_update_t *updates, ssg_member_update_t * updates,
hg_size_t update_count); hg_size_t update_count);
hg_return_t hg_proc_ssg_member_update_t(
hg_proc_t proc, void *data);
extern ssg_instance_t *ssg_inst; extern ssg_instance_t *ssg_inst;
......
...@@ -18,17 +18,6 @@ ...@@ -18,17 +18,6 @@
#define SSG_VIEW_BUF_DEF_SIZE (128 * 1024) #define SSG_VIEW_BUF_DEF_SIZE (128 * 1024)
#define SSG_USER_UPDATE_SERIALIZE(__type, __data, __size, __update) do { \
__update.size = sizeof(uint8_t) + __size; \
__update.data = malloc(__update.size); \
if (__update.data) { \
void *__p = __update.data; \
*(uint8_t *)__p = __type; \
__p += sizeof(uint8_t); \
memcpy(__p, __data, __size); \
} \
} while(0)
/* SSG RPC types and (de)serialization routines */ /* SSG RPC types and (de)serialization routines */
/* TODO join and attach are nearly identical -- refactor */ /* TODO join and attach are nearly identical -- refactor */
...@@ -224,7 +213,7 @@ static void ssg_group_join_recv_ult( ...@@ -224,7 +213,7 @@ static void ssg_group_join_recv_ult(
void *view_buf = NULL; void *view_buf = NULL;
hg_size_t view_buf_size; hg_size_t view_buf_size;
hg_bulk_t bulk_handle = HG_BULK_NULL; hg_bulk_t bulk_handle = HG_BULK_NULL;
swim_user_update_t join_update; ssg_member_update_t join_update;
int sret; int sret;
hg_return_t hret; hg_return_t hret;
...@@ -274,13 +263,10 @@ static void ssg_group_join_recv_ult( ...@@ -274,13 +263,10 @@ static void ssg_group_join_recv_ult(
goto fini; goto fini;
} }
/* create an SSG join update and register with SWIM to be gossiped */
SSG_USER_UPDATE_SERIALIZE(SSG_MEMBER_JOINED, join_req.addr_str,
strlen(join_req.addr_str) + 1, join_update);
swim_register_user_update(g->swim_ctx, join_update);
/* apply group join locally */ /* apply group join locally */
ssg_apply_swim_user_updates(g, &join_update, 1); join_update.type = SSG_MEMBER_JOINED;
join_update.u.member_addr_str = join_req.addr_str;
ssg_apply_member_updates(g, &join_update, 1);
} }
margo_free_input(handle, &join_req); margo_free_input(handle, &join_req);
...@@ -346,7 +332,7 @@ static void ssg_group_leave_recv_ult( ...@@ -346,7 +332,7 @@ static void ssg_group_leave_recv_ult(
ssg_group_t *g = NULL; ssg_group_t *g = NULL;
ssg_group_leave_request_t leave_req; ssg_group_leave_request_t leave_req;
ssg_group_leave_response_t leave_resp; ssg_group_leave_response_t leave_resp;
swim_user_update_t leave_update; ssg_member_update_t leave_update;
hg_return_t hret; hg_return_t hret;
leave_resp.ret = SSG_FAILURE; leave_resp.ret = SSG_FAILURE;
...@@ -368,15 +354,12 @@ static void ssg_group_leave_recv_ult( ...@@ -368,15 +354,12 @@ static void ssg_group_leave_recv_ult(
goto fini; goto fini;
} }
/* create an SSG join update and register with SWIM to be gossiped */ /* apply group leave locally */
SSG_USER_UPDATE_SERIALIZE(SSG_MEMBER_LEFT, &leave_req.member_id, leave_update.type = SSG_MEMBER_LEFT;
sizeof(leave_req.member_id), leave_update); leave_update.u.member_id = leave_req.member_id;
swim_register_user_update(g->swim_ctx, leave_update); ssg_apply_member_updates(g, &leave_update, 1);
margo_free_input(handle, &leave_req);
/* apply group join locally */
ssg_apply_swim_user_updates(g, &leave_update, 1);
margo_free_input(handle, &leave_req);
leave_resp.ret = SSG_SUCCESS; leave_resp.ret = SSG_SUCCESS;
fini: fini:
/* respond */ /* respond */
...@@ -656,7 +639,7 @@ hg_return_t hg_proc_ssg_group_id_t( ...@@ -656,7 +639,7 @@ hg_return_t hg_proc_ssg_group_id_t(
(*group_descriptor)->ref_count = 1; (*group_descriptor)->ref_count = 1;
break; break;
case HG_FREE: case HG_FREE:
if((*group_descriptor)->ref_count == 1) if ((*group_descriptor)->ref_count == 1)
{ {
free((*group_descriptor)->addr_str); free((*group_descriptor)->addr_str);
free(*group_descriptor); free(*group_descriptor);
...@@ -673,3 +656,93 @@ hg_return_t hg_proc_ssg_group_id_t( ...@@ -673,3 +656,93 @@ hg_return_t hg_proc_ssg_group_id_t(
return hret; return hret;
} }
hg_return_t hg_proc_ssg_member_update_t(
hg_proc_t proc, void *data)
{
ssg_member_update_t *update = (ssg_member_update_t *)data;
hg_return_t hret = HG_PROTOCOL_ERROR;
switch(hg_proc_get_op(proc))
{
case HG_ENCODE:
hret = hg_proc_uint8_t(proc, &(update->type));
if (hret != HG_SUCCESS)
{
hret = HG_PROTOCOL_ERROR;
return hret;
}
if (update->type == SSG_MEMBER_JOINED)
{
hret = hg_proc_hg_string_t(proc, &(update->u.member_addr_str));
if (hret != HG_SUCCESS)
{
hret = HG_PROTOCOL_ERROR;
return hret;
}
}
else if (update->type == SSG_MEMBER_LEFT)
{
hret = hg_proc_ssg_member_id_t(proc, &(update->u.member_id));
if (hret != HG_SUCCESS)
{
hret = HG_PROTOCOL_ERROR;
return hret;
}
}
else
{
hret = HG_PROTOCOL_ERROR;
}
break;
case HG_DECODE:
hret = hg_proc_uint8_t(proc, &(update->type));
if (hret != HG_SUCCESS)
{
hret = HG_PROTOCOL_ERROR;
return hret;
}
if (update->type == SSG_MEMBER_JOINED)
{
hret = hg_proc_hg_string_t(proc, &(update->u.member_addr_str));
if (hret != HG_SUCCESS)
{
hret = HG_PROTOCOL_ERROR;
return hret;
}
}
else if (update->type == SSG_MEMBER_LEFT)
{
hret = hg_proc_ssg_member_id_t(proc, &(update->u.member_id));
if (hret != HG_SUCCESS)
{
hret = HG_PROTOCOL_ERROR;
return hret;
}
}
else
{
hret = HG_PROTOCOL_ERROR;
}
break;
case HG_FREE:
if (update->type == SSG_MEMBER_JOINED)
{
hret = hg_proc_hg_string_t(proc, &(update->u.member_addr_str));
if (hret != HG_SUCCESS)
{
hret = HG_PROTOCOL_ERROR;
return hret;
}
}
else
{
hret = HG_SUCCESS;
}
break;
default:
break;
}
return hret;
}
This diff is collapsed.
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include <abt.h> #include <abt.h>
#include <margo.h> #include <margo.h>
#include "ssg.h"
#include "ssg-internal.h"
#include "swim-fd.h" #include "swim-fd.h"
#include "utlist.h" #include "utlist.h"
...@@ -24,59 +26,55 @@ extern "C" { ...@@ -24,59 +26,55 @@ extern "C" {
#define SWIM_MAX_PIGGYBACK_ENTRIES 8 #define SWIM_MAX_PIGGYBACK_ENTRIES 8
#define SWIM_MAX_PIGGYBACK_TX_COUNT 50 #define SWIM_MAX_PIGGYBACK_TX_COUNT 50
/* debug printing macro for SWIM */ typedef struct swim_ping_target_list
#ifdef DEBUG {
#define SWIM_DEBUG(__swim_ctx, __fmt, ...) do { \ ssg_member_state_t **targets;
double __now = ABT_get_wtime(); \ unsigned int nslots;
fprintf(stdout, "[%.6lf] %20"PRIu64": SWIM " __fmt, __now, \ unsigned int len;
__swim_ctx->self_id, ## __VA_ARGS__); \ unsigned int dping_ndx;
fflush(stdout); \ } swim_ping_target_list_t;
} while(0)
#else typedef struct swim_member_update
#define SWIM_DEBUG(__swim_ctx, __fmt, ...) do { \ {
} while(0) ssg_member_id_t id;
#endif swim_member_state_t state;
} swim_member_update_t;
/* internal swim context implementation */ /* internal swim context implementation */
struct swim_context struct swim_context
{ {
margo_instance_id mid; margo_instance_id mid;
/* void pointer to user group data */
void *group_data;
/* group management callbacks */
swim_group_mgmt_callbacks_t swim_callbacks;
/* SWIM protocol parameters */ /* SWIM protocol parameters */
double prot_period_len; double prot_period_len;
int prot_susp_timeout; int prot_susp_timeout;
int prot_subgroup_sz; int prot_subgroup_sz;
/* SWIM protocol internal state */ /* SWIM protocol internal state */
swim_member_id_t self_id;
swim_member_inc_nr_t self_inc_nr; swim_member_inc_nr_t self_inc_nr;
swim_member_id_t dping_target_id; ssg_member_id_t dping_target_id;
swim_member_inc_nr_t dping_target_inc_nr; swim_member_inc_nr_t dping_target_inc_nr;
hg_addr_t dping_target_addr; hg_addr_t dping_target_addr;
double dping_timeout; double dping_timeout;
swim_member_id_t iping_target_ids[SWIM_MAX_SUBGROUP_SIZE]; ssg_member_id_t iping_target_ids[SWIM_MAX_SUBGROUP_SIZE];
hg_addr_t iping_target_addrs[SWIM_MAX_SUBGROUP_SIZE]; hg_addr_t iping_target_addrs[SWIM_MAX_SUBGROUP_SIZE];
int iping_target_ndx; int iping_target_ndx;
int ping_target_acked; int ping_target_acked;
int shutdown_flag; int shutdown_flag;
/* list of currently supspected SWIM members */ /* list of SWIM ping targets */
swim_ping_target_list_t target_list;
/* list of currently supspected SWIM targets */
void *suspect_list; void *suspect_list;
/* lists of SWIM membership updates and user-supplied updates */ /* lists of SWIM and SSG membership updates to gossip */
void *swim_update_list; void *swim_update_list;
void *user_update_list; void *ssg_update_list;
/* argobots pool for launching SWIM threads */ /* argobots pool for launching SWIM threads */
ABT_pool swim_pool; ABT_pool swim_pool;
/* mutex for modifying SWIM group state */
ABT_mutex swim_mutex;
/* swim protocol ULT handle */ /* swim protocol ULT handle */
ABT_thread prot_thread; ABT_thread prot_thread;
}; };
/* SWIM ping function prototypes */ /* SWIM ping function prototypes */
void swim_register_ping_rpcs( void swim_register_ping_rpcs(
swim_context_t * swim_ctx); ssg_group_t * group);
void swim_dping_send_ult( void swim_dping_send_ult(
void * t_arg); void * t_arg);
void swim_iping_send_ult( void swim_iping_send_ult(
...@@ -84,16 +82,16 @@ void swim_iping_send_ult( ...@@ -84,16 +82,16 @@ void swim_iping_send_ult(
/* SWIM update function prototypes */ /* SWIM update function prototypes */
void swim_retrieve_member_updates( void swim_retrieve_member_updates(
swim_context_t *swim_ctx, ssg_group_t * group,
swim_member_update_t *updates, swim_member_update_t * updates,
hg_size_t *update_count); hg_size_t *update_count);
void swim_retrieve_user_updates( void swim_retrieve_ssg_member_updates(
swim_context_t *swim_ctx, ssg_group_t * group,
swim_user_update_t *updates, ssg_member_update_t * updates,
hg_size_t *update_count); hg_size_t *update_count);
void swim_apply_member_updates( void swim_apply_member_updates(
swim_context_t *swim_ctx, ssg_group_t * group,
swim_member_update_t *updates, swim_member_update_t * updates,
hg_size_t update_count); hg_size_t update_count);
#ifdef __cplusplus #ifdef __cplusplus
......
This diff is collapsed.
This diff is collapsed.
...@@ -9,6 +9,9 @@ ...@@ -9,6 +9,9 @@
#include <stdint.h> #include <stdint.h>
#include <inttypes.h> #include <inttypes.h>
#include "ssg.h"
#include "ssg-internal.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
...@@ -17,7 +20,6 @@ extern "C" { ...@@ -17,7 +20,6 @@ extern "C" {
typedef struct swim_context swim_context_t; typedef struct swim_context swim_context_t;
/* swim member specific types */ /* swim member specific types */
typedef uint64_t swim_member_id_t;
typedef uint32_t swim_member_inc_nr_t; typedef uint32_t swim_member_inc_nr_t;
typedef enum swim_member_status typedef enum swim_member_status
{ {
...@@ -33,142 +35,40 @@ typedef struct swim_member_state ...@@ -33,142 +35,40 @@ typedef struct swim_member_state
swim_member_status_t status; swim_member_status_t status;
} swim_member_state_t; } swim_member_state_t;
/* SWIM protocol update */ /* forward declarations to work around weird SSG/SWIM circular dependency */
typedef struct swim_member_update struct ssg_group;
{ struct ssg_member_state;
swim_member_id_t id; struct ssg_member_update;
swim_member_state_t state;
} swim_member_update_t;
/* generic SWIM user update */
typedef struct swim_user_update
{
hg_size_t size;
void *data;
} swim_user_update_t;
#define SWIM_MEMBER_SET_ALIVE(__ms) do { \
__ms.inc_nr = 0; \
__ms.status = SWIM_MEMBER_ALIVE; \
} while(0)
#define SWIM_MEMBER_SET_DEAD(__ms) do { \
__ms.status = SWIM_MEMBER_DEAD; \
} while(0)
#define SWIM_MEMBER_IS_DEAD(__ms) (__ms.status == SWIM_MEMBER_DEAD)
/* SWIM callbacks for integrating with an overlying group management layer */
typedef struct swim_group_mgmt_callbacks
{
/**
* Retrieve a (non-dead) random group member from the group
* management layer to send a direct ping request to.
* NOTE: to ensure time-bounded detection of faulty members,
* round-robin selection of members is required.
*
* @param[in] group_data void pointer to group managment data
* @param[out] target_id ID of selected direct ping target
* @param[out] inc_nr SWIM incarnation number of target
* @param[out] target_addr HG address of target
* @returns 0 on successful selection of a target, -1 if no available targets
*/
int (*get_dping_target)(
void *group_data,
swim_member_id_t *target_id,
swim_member_inc_nr_t *inc_nr,
hg_addr_t *target_addr
);
/**
* Retrieve a set of (non-dead) random group members from the group
* management layer to send indirect ping requests to.
*
* @param[in] group_data void pointer to group managment data
* @param[in] dping_target_id corresponding dping target ID
* @param[in/out] num_targets on input, maximum number of indirect ping
* targets to select. on output, the actual
* number of selected targets
* @param[out] target_ids IDs of selected indirect ping targets
* @param[out] target_addrs HG addresses of targets
* @returns 0 on successful selection of targets, -1 if no available targets
*/
int (*get_iping_targets)(
void *group_data,
swim_member_id_t dping_target_id,
int *num_targets,
swim_member_id_t *target_ids,
hg_addr_t *target_addrs
);
/**
* Get the HG address corresponding to a given member ID.
*
* @param[in] group_data void pointer to group managment data
* @param[in] id member ID to query
* @param[out] addr HG address of given member
*/
void (*get_member_addr)(
void *group_data,
swim_member_id_t id,
hg_addr_t *addr
);
/**
* Get the SWIM protocol state corresponding to a given member ID.
*
* @param[in] group_data void pointer to group managment data
* @param[in] id member ID to query
* @param[out] state pointer to given member's SWIM state
*/
void (*get_member_state)(
void *group_data,
swim_member_id_t id,
swim_member_state_t **state
);
/**
* Apply a SWIM protocol update in the group management layer.
*
* @param[in] group_data void pointer to group managment data
* @param[in] update SWIM member update to apply to group
*/
void (*apply_member_update)(
void *group_data,
swim_member_update_t update
);
void (*apply_user_updates)(
void *group_data,
swim_user_update_t *updates,
hg_size_t update_count
);
} swim_group_mgmt_callbacks_t;
/** /**
* Initialize the SWIM protocol. * Initialize SWIM protocol for the given SSG group and Margo instance.
* *
* @param[in] group pointer to SSG group associated with this SWIM context
* @param[in] mid Margo instance ID * @param[in] mid Margo instance ID
* @param[in] group_data void pointer to group management data
* @param[in] self_id ID
* @param[in] swim_callbacks SWIM callbacks to group management layer
* @param[in] active boolean value indicating whether member should actively ping * @param[in] active boolean value indicating whether member should actively ping
* @returns SWIM context pointer on success, NULL otherwise * @returns SSG_SUCCESS on success, SSG_FAILURE otherwise
*/ */
swim_context_t * swim_init( int swim_init(
struct ssg_group * group,
margo_instance_id mid, margo_instance_id mid,
void *group_data,
swim_member_id_t self_id,
swim_group_mgmt_callbacks_t swim_callbacks,
int active); int active);
/** /**
* Finalize the SWIM protocol. * Finalize the given SSG group's SWIM protocol.
* *
* @param[in] swim_ctx SWIM context pointer * @param[in] group pointer to SSG group to finalize SWIM for
*/ */
void swim_finalize( void swim_finalize(
swim_context_t *swim_ctx); struct ssg_group * group);
/** /**
* *
* @returns SSG_SUCCESS on success, SSG_FAILURE otherwise
*/ */
void swim_register_user_update( int swim_apply_ssg_member_update(
swim_context_t *swim_ctx, struct ssg_group * group,
swim_user_update_t update); struct ssg_member_state * ms,
struct ssg_member_update update);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -146,7 +146,7 @@ int main(int argc, char *argv[]) ...@@ -146,7 +146,7 @@ int main(int argc, char *argv[])
ssg_group_id_free(in_g_id); ssg_group_id_free(in_g_id);
/* sleep for given duration to allow group time to run */ /* sleep for given duration to allow group time to run */
if (opts.leave_time > 0) if (opts.leave_time >= 0)
{ {
margo_thread_sleep(mid, (opts.leave_time - opts.join_time) * 1000.0); margo_thread_sleep(mid, (opts.leave_time - opts.join_time) * 1000.0);
...@@ -155,20 +155,17 @@ int main(int argc, char *argv[]) ...@@ -155,20 +155,17 @@ int main(int argc, char *argv[])
sret = ssg_group_leave(out_g_id); sret = ssg_group_leave(out_g_id);
DIE_IF(sret != SSG_SUCCESS, "ssg_group_leave"); DIE_IF(sret != SSG_SUCCESS, "ssg_group_leave");
goto cleanup;
}
if (opts.leave_time