Commit 71102045 authored by Shane Snyder's avatar Shane Snyder

bunch of changes to get attach working

parent d39c6689
......@@ -22,8 +22,8 @@ extern "C" {
/**
* Creates an SSG group from a given MPI communicator.
*
* @param[in] group_name Name of the SSG group
* @param[in] comm MPI communicator containing group members
* @param[in] group_name Name of the SSG group
* @param[in] comm MPI communicator containing group members
* @returns SSG group identifier on success, SSG_GROUP_ID_NULL otherwise
*/
ssg_group_id_t ssg_group_create_mpi(
......
......@@ -32,6 +32,9 @@ struct ssg_group_descriptor;
typedef struct ssg_group_descriptor *ssg_group_id_t;
#define SSG_GROUP_ID_NULL ((ssg_group_id_t)NULL)
/* HG proc routine prototype for ssg_group_id_t */
hg_return_t hg_proc_ssg_group_id_t(hg_proc_t proc, void *data);
/***************************************************
*** SSG runtime intialization/shutdown routines ***
***************************************************/
......@@ -60,9 +63,9 @@ int ssg_finalize(
/**
* Creates an SSG group from a given list of HG address strings.
*
* @param[in] group_name Name of the SSG group
* @param[in] group_addr_strs Array of HG address strings for each group member
* @param[in] group_size Number of group members
* @param[in] group_name Name of the SSG group
* @param[in] group_addr_strs Array of HG address strings for each group member
* @param[in] group_size Number of group members
* @returns SSG group identifier on success, SSG_GROUP_ID_NULL otherwise
*
* NOTE: The HG address string of the caller of this function must be present in
......@@ -78,11 +81,11 @@ ssg_group_id_t ssg_group_create(
* Creates an SSG group from a given config file containing the HG address strings
* of all group members.
*
* @param[in] group_name Name of the SSG group
* @param[in] file_name Name of the config file containing the corresponding
* @param[in] group_name Name of the SSG group
* @param[in] file_name Name of the config file containing the corresponding
* HG address strings for this group
* @param[out] group_id Pointer to output SSG group ID
* @returns SSG group identifier on success, SSG_GROUP_ID_NULL otherwise
*
*
* NOTE: The HG address string of the caller of this function must be present in
* the list of address strings given in the config file. That is, the caller of
......@@ -156,6 +159,29 @@ hg_addr_t ssg_get_addr(
ssg_group_id_t group_id,
ssg_member_id_t member_id);
/**
* Duplicates the given SSG group identifier.
*
* @param[in] group_id SSG group ID
* @returns SSG group identifier on success, SSG_GROUP_ID_NULL otherwise
*/
ssg_group_id_t ssg_group_id_dup(
ssg_group_id_t group_id);
/** Frees the given SSG group identifier.
*
* @param[in] group_id SSG group ID
*/
void ssg_group_id_free(
ssg_group_id_t group_id);
/** Dumps details of caller's membership in a given group to stdout.
*
* @param[in] group_id SSG group ID
*/
void ssg_group_dump(
ssg_group_id_t group_id);
#ifdef __cplusplus
}
#endif
......@@ -40,42 +40,61 @@ extern "C" {
/* SSG internal dataypes */
typedef struct ssg_member_state ssg_member_state_t;
typedef struct ssg_view ssg_view_t;
typedef struct ssg_group ssg_group_t;
typedef struct ssg_instance ssg_instance_t;
struct ssg_member_state
typedef struct ssg_member_state
{
char *addr_str;
hg_addr_t addr;
int is_member;
};
uint8_t is_member;
} ssg_member_state_t;
struct ssg_view
/* TODO: these really need to be ref-counted, else I don't think
* duplicated references can be kept in sync...
*/
/* TODO: associate a version number with a descriptor */
typedef struct ssg_group_descriptor
{
uint64_t magic_nr;
uint64_t name_hash;
char *addr_str;
uint8_t owner_status;
} ssg_group_descriptor_t;
typedef struct ssg_group_view
{
uint32_t size;
ssg_member_state_t *member_states;
};
MERCURY_GEN_PROC(ssg_group_descriptor_t, \
((uint64_t) (magic_nr)) \
((uint64_t) (name_hash)) \
((hg_string_t) (addr_str)));
} ssg_group_view_t;
struct ssg_group
typedef struct ssg_group
{
char *group_name;
ssg_group_descriptor_t *group_descriptor;
ssg_view_t group_view;
char *name;
ssg_group_descriptor_t *descriptor;
ssg_member_id_t self_id;
ssg_group_view_t view;
void *fd_ctx; /* failure detector context (currently just SWIM) */
UT_hash_handle hh;
};
} ssg_group_t;
typedef struct ssg_attached_group
{
char *name;
ssg_group_descriptor_t *descriptor;
ssg_group_view_t view;
UT_hash_handle hh;
} ssg_attached_group_t;
struct ssg_instance
typedef struct ssg_instance
{
margo_instance_id mid;
ssg_group_t *group_table;
ssg_attached_group_t *attached_group_table;
} ssg_instance_t;
enum ssg_group_descriptor_owner_status
{
SSG_OWNER_IS_UNASSOCIATED = 0,
SSG_OWNER_IS_MEMBER,
SSG_OWNER_IS_ATTACHER
};
/* SSG internal function prototypes */
......@@ -85,11 +104,11 @@ extern void hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *
void ssg_register_rpcs(
void);
hg_return_t ssg_group_lookup(
ssg_group_t * g,
const char * const addr_strs[]);
hg_return_t ssg_group_attach_send(
ssg_group_descriptor_t *group_descriptor);
int ssg_group_attach_send(
ssg_group_descriptor_t * group_descriptor,
char ** group_name,
int * group_size,
void ** view_buf);
/* XXX: is this right? can this be a global? */
extern ssg_instance_t *ssg_inst;
......
This diff is collapsed.
This diff is collapsed.
......@@ -10,7 +10,6 @@
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <time.h>
#include <abt.h>
#include <margo.h>
......@@ -71,9 +70,6 @@ swim_context_t *swim_init(
if (g == NULL) return NULL;
/* seed RNG with time+rank combination to avoid identical seeds */
srand(time(NULL) + g->self_rank);
/* allocate structure for storing swim context */
swim_ctx = malloc(sizeof(*swim_ctx));
if (!swim_ctx) return NULL;
......
......@@ -31,6 +31,9 @@
} \
} while(0)
DECLARE_MARGO_RPC_HANDLER(group_id_forward_recv_ult)
static void usage()
{
fprintf(stderr,
......@@ -72,6 +75,12 @@ static void parse_args(int argc, char *argv[], int *sleep_time, const char **add
return;
}
struct group_id_forward_context
{
margo_instance_id mid;
ssg_group_id_t *g_id_p;
};
int main(int argc, char *argv[])
{
hg_class_t *hgcl = NULL;
......@@ -81,7 +90,14 @@ int main(int argc, char *argv[])
const char *addr_str;
const char *group_name = "simple_group";
ssg_group_id_t g_id;
int group_id_forward_rpc_id;
struct group_id_forward_context group_id_forward_ctx;
int is_attacher = 0;
hg_addr_t attacher_addr;
char attacher_addr_str[128];
hg_size_t attacher_addr_str_sz = 128;
hg_handle_t handle = HG_HANDLE_NULL;
hg_return_t hret;
int sret;
parse_args(argc, argv, &sleep_time, &addr_str);
......@@ -105,6 +121,14 @@ int main(int argc, char *argv[])
sret = ssg_init(mid);
DIE_IF(sret != SSG_SUCCESS, "ssg_init");
/* register RPC for forwarding an SSG group identifier */
group_id_forward_rpc_id = MERCURY_REGISTER(hgcl, "group_id_forward",
ssg_group_id_t, void, group_id_forward_recv_ult_handler);
group_id_forward_ctx.mid = mid;
group_id_forward_ctx.g_id_p = &g_id;
hret = HG_Register_data(hgcl, group_id_forward_rpc_id, &group_id_forward_ctx, NULL);
DIE_IF(hret != HG_SUCCESS, "HG_Register_data");
#ifdef SSG_HAVE_MPI
int my_world_rank;
int world_size;
......@@ -140,12 +164,35 @@ int main(int argc, char *argv[])
DIE_IF(g_id == SSG_GROUP_ID_NULL, "ssg_group_create");
if (my_world_rank == 1)
MPI_Send(&g_id, sizeof(g_id), MPI_BYTE, 0, 0, MPI_COMM_WORLD);
{
MPI_Recv(attacher_addr_str, 128, MPI_BYTE, 0, 0, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
/* send the identifier for the created group back to the attacher */
hret = margo_addr_lookup(mid, attacher_addr_str, &attacher_addr);
DIE_IF(hret != HG_SUCCESS, "margo_addr_lookup");
hret = HG_Create(margo_get_context(mid), attacher_addr,
group_id_forward_rpc_id, &handle);
DIE_IF(hret != HG_SUCCESS, "HG_Create");
hret = margo_forward(mid, handle, &g_id);
DIE_IF(hret != HG_SUCCESS, "margo_forward");
HG_Addr_free(hgcl, attacher_addr);
HG_Destroy(handle);
}
}
else
{
MPI_Recv(&g_id, sizeof(g_id), MPI_BYTE, 1, 0, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
hret = HG_Addr_self(hgcl, &attacher_addr);
DIE_IF(hret != HG_SUCCESS, "HG_Addr_self");
hret = HG_Addr_to_string(hgcl, attacher_addr_str, &attacher_addr_str_sz,
attacher_addr);
DIE_IF(hret != HG_SUCCESS, "HG_Addr_to_string");
HG_Addr_free(hgcl, attacher_addr);
/* send the attacher's address to a group member, so the group
* member can send us back the corresponding SSG group identifier
*/
MPI_Send(attacher_addr_str, 128, MPI_BYTE, 1, 0, MPI_COMM_WORLD);
}
#endif
......@@ -160,6 +207,9 @@ int main(int argc, char *argv[])
DIE_IF(sret != SSG_SUCCESS, "ssg_group_attach");
}
/* have everyone dump their group state */
ssg_group_dump(g_id);
/* XXX: for now, just sleep to give the attacher a chance to attach */
if (sleep_time > 0) margo_thread_sleep(mid, sleep_time * 1000.0);
......@@ -186,3 +236,26 @@ int main(int argc, char *argv[])
return 0;
}
static void group_id_forward_recv_ult(hg_handle_t handle)
{
const struct hg_info *info;
struct group_id_forward_context *group_id_forward_ctx;
hg_return_t hret;
info = HG_Get_info(handle);
DIE_IF(info == NULL, "HG_Get_info");
group_id_forward_ctx = (struct group_id_forward_context *)HG_Registered_data(
info->hg_class, info->id);
DIE_IF(group_id_forward_ctx == NULL, "HG_Registered_data");
hret = HG_Get_input(handle, group_id_forward_ctx->g_id_p);
DIE_IF(hret != HG_SUCCESS, "HG_Get_input");
margo_respond(group_id_forward_ctx->mid, handle, NULL);
HG_Free_input(handle, group_id_forward_ctx->g_id_p);
HG_Destroy(handle);
return;
}
DEFINE_MARGO_RPC_HANDLER(group_id_forward_recv_ult)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment