ssg.c 52.1 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (c) 2016 UChicago Argonne, LLC
 *
 * See COPYRIGHT in top-level directory.
 */

7
#include "ssg-config.h"
Shane Snyder's avatar
Shane Snyder committed
8

Jonathan Jenkins's avatar
Jonathan Jenkins committed
9 10 11 12
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
13
#include <errno.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
14 15
#include <stdlib.h>
#include <string.h>
16
#include <time.h>
17
#include <linux/limits.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
18
#include <assert.h>
Shane Snyder's avatar
Shane Snyder committed
19
#ifdef SSG_HAVE_MPI
20 21
#include <mpi.h>
#endif
Shane Snyder's avatar
Shane Snyder committed
22 23 24
#ifdef SSG_HAVE_PMIX
#include <pmix.h>
#endif
Jonathan Jenkins's avatar
Jonathan Jenkins committed
25

Shane Snyder's avatar
Shane Snyder committed
26
#include <mercury.h>
27
#include <abt.h>
Shane Snyder's avatar
Shane Snyder committed
28
#include <margo.h>
29

30
#include "ssg.h"
Shane Snyder's avatar
Shane Snyder committed
31 32 33
#ifdef SSG_HAVE_MPI
#include "ssg-mpi.h"
#endif
Shane Snyder's avatar
Shane Snyder committed
34 35 36
#ifdef SSG_HAVE_PMIX
#include "ssg-pmix.h"
#endif
37
#include "ssg-internal.h"
Shane Snyder's avatar
Shane Snyder committed
38
#include "swim-fd/swim-fd.h"
39

40 41 42
/* arguments for group lookup ULTs */
struct ssg_group_lookup_ult_args
{
43
    const char *addr_str;
44
    ssg_group_view_t *view;
Shane Snyder's avatar
Shane Snyder committed
45
    ABT_rwlock lock;
46
    int out;
47 48 49
};
static void ssg_group_lookup_ult(void * arg);

50
/* SSG helper routine prototypes */
51
static ssg_group_id_t ssg_group_create_internal(
Shane Snyder's avatar
Shane Snyder committed
52 53 54 55 56
    const char * group_name, const char * const group_addr_strs[],
    int group_size, ssg_membership_update_cb update_cb, void *update_cb_dat);
static int ssg_group_view_create(
    const char * const group_addr_strs[], int group_size,
    const char * self_addr_str, ABT_rwlock view_lock,
57
    ssg_group_view_t * view);
58
static ssg_member_state_t * ssg_group_view_add_member(
59 60
    const char * addr_str, hg_addr_t addr, ssg_member_id_t member_id,
    ssg_group_view_t * view);
61
static ssg_group_descriptor_t * ssg_group_descriptor_create(
62
    ssg_group_id_t g_id, const char * leader_addr_str, int owner_status);
63 64 65 66
static void ssg_group_destroy_internal(
    ssg_group_t * g);
static void ssg_attached_group_destroy(
    ssg_attached_group_t * ag);
Shane Snyder's avatar
Shane Snyder committed
67 68 69 70 71 72
static void ssg_group_view_destroy(
    ssg_group_view_t * view);
static void ssg_group_descriptor_free(
    ssg_group_descriptor_t * descriptor);
static ssg_member_id_t ssg_gen_member_id(
    const char * addr_str);
73 74
static const char ** ssg_addr_str_buf_to_list(
    const char * buf, int num_addrs);
75 76 77 78 79 80 81 82
#ifdef SSG_HAVE_PMIX
void ssg_pmix_proc_failure_notify_fn(
    size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source,
    pmix_info_t info[], size_t ninfo, pmix_info_t results[], size_t nresults,
    pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata);
void ssg_pmix_proc_failure_reg_cb(
    pmix_status_t status, size_t evhdlr_ref, void *cbdata);
#endif 
83

84
/* XXX: we ultimately need per-mid ssg instances rather than 1 global */
85
ssg_instance_t *ssg_inst = NULL;
86

87 88 89
/***************************************************
 *** SSG runtime intialization/shutdown routines ***
 ***************************************************/
90

91 92
int ssg_init(
    margo_instance_id mid)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
93
{
94
    struct timespec ts;
95 96
    hg_addr_t self_addr;
    hg_size_t self_addr_str_size;
97

98 99 100 101 102 103 104 105 106
    if (ssg_inst)
        return SSG_FAILURE;

    /* initialize an SSG instance for this margo instance */
    ssg_inst = malloc(sizeof(*ssg_inst));
    if (!ssg_inst)
        return SSG_FAILURE;
    memset(ssg_inst, 0, sizeof(*ssg_inst));
    ssg_inst->mid = mid;
107
    ABT_rwlock_create(&ssg_inst->lock);
108

109
    ssg_register_rpcs();
110

111 112 113 114
    /* seed RNG */
    clock_gettime(CLOCK_MONOTONIC, &ts);
    srand(ts.tv_nsec + getpid());

115 116
    /* get my self address string and ID (which are constant per-mid) */
    if (margo_addr_self(mid, &self_addr) != HG_SUCCESS)
117
    {
118 119
        free(ssg_inst);
        return SSG_FAILURE;
120
    }
121 122 123 124 125 126 127
    if (margo_addr_to_string(mid, NULL, &self_addr_str_size, self_addr) != HG_SUCCESS)
    {
        margo_addr_free(mid, self_addr); 
        free(ssg_inst);
        return SSG_FAILURE;
    }
    if ((ssg_inst->self_addr_str = malloc(self_addr_str_size)) == NULL)
128
    {
129 130 131 132 133 134 135 136 137 138
        margo_addr_free(mid, self_addr);
        free(ssg_inst);
        return SSG_FAILURE;
    }
    if (margo_addr_to_string(mid, ssg_inst->self_addr_str, &self_addr_str_size, self_addr) != HG_SUCCESS)
    {
        free(ssg_inst->self_addr_str);
        margo_addr_free(mid, self_addr);
        free(ssg_inst);
        return SSG_FAILURE;
139 140
    }

141
    ssg_inst->self_id = ssg_gen_member_id(ssg_inst->self_addr_str);
142

143
    margo_addr_free(mid, self_addr);
144 145
    return SSG_SUCCESS;
}
Jonathan Jenkins's avatar
Jonathan Jenkins committed
146

147
int ssg_finalize()
148
{
149 150
    ssg_group_descriptor_t *g_desc, *g_desc_tmp;
#if 0
151
    ssg_attached_group_t *ag, *ag_tmp;
152
#endif
153 154 155 156

    if (!ssg_inst)
        return SSG_FAILURE;

157

158
    /* destroy all active groups */
159 160
    ABT_rwlock_wrlock(ssg_inst->lock);
    HASH_ITER(hh, ssg_inst->g_desc_table, g_desc, g_desc_tmp)
161
    {
162
        HASH_DELETE(hh, ssg_inst->g_desc_table, g_desc);
163
        ABT_rwlock_unlock(ssg_inst->lock);
164 165
        ssg_group_destroy_internal(g_desc->g);
        ssg_group_descriptor_free(g_desc);
166
        ABT_rwlock_wrlock(ssg_inst->lock);
167
    }
168
    ABT_rwlock_unlock(ssg_inst->lock);
169

170
#if 0
171 172 173
    /* detach from all attached groups */
    HASH_ITER(hh, ssg_inst->attached_group_table, ag, ag_tmp)
    {
174
        HASH_DELETE(hh, ssg_inst->attached_group_table, ag);
175 176
        ssg_attached_group_destroy(ag);
    }
177 178 179 180 181 182
#endif

#ifdef SSG_HAVE_PMIX
    if (ssg_inst->pmix_failure_evhdlr_ref)
        PMIx_Deregister_event_handler(ssg_inst->pmix_failure_evhdlr_ref, NULL, NULL);
#endif
183

184 185
    ABT_rwlock_free(&ssg_inst->lock);

186
    free(ssg_inst->self_addr_str);
187 188 189
    free(ssg_inst);
    ssg_inst = NULL;

190
    return SSG_SUCCESS;
191
}
Jonathan Jenkins's avatar
Jonathan Jenkins committed
192

193 194 195
/*************************************
 *** SSG group management routines ***
 *************************************/
Jonathan Jenkins's avatar
Jonathan Jenkins committed
196

Shane Snyder's avatar
Shane Snyder committed
197 198 199 200 201 202
ssg_group_id_t ssg_group_create(
    const char * group_name,
    const char * const group_addr_strs[],
    int group_size,
    ssg_membership_update_cb update_cb,
    void * update_cb_dat)
203
{
204
    ssg_group_id_t g_id;
205

206
    g_id = ssg_group_create_internal(group_name, group_addr_strs,
Shane Snyder's avatar
Shane Snyder committed
207 208 209
            group_size, update_cb, update_cb_dat);

    return g_id;
210
}
Shane Snyder's avatar
Shane Snyder committed
211

Shane Snyder's avatar
Shane Snyder committed
212 213 214 215 216
ssg_group_id_t ssg_group_create_config(
    const char * group_name,
    const char * file_name,
    ssg_membership_update_cb update_cb,
    void * update_cb_dat)
217
{
Shane Snyder's avatar
Shane Snyder committed
218 219 220 221 222 223 224 225
    int fd;
    struct stat st;
    char *rd_buf = NULL;
    ssize_t rd_buf_size;
    char *tok;
    void *addr_str_buf = NULL;
    int addr_str_buf_len = 0, num_addrs = 0;
    const char **addr_strs = NULL;
Shane Snyder's avatar
Shane Snyder committed
226
    int ret;
227
    ssg_group_id_t g_id = SSG_GROUP_ID_INVALID;
228

Shane Snyder's avatar
Shane Snyder committed
229 230 231 232 233 234 235 236
    /* open config file for reading */
    fd = open(file_name, O_RDONLY);
    if (fd == -1)
    {
        fprintf(stderr, "Error: SSG unable to open config file %s for group %s\n",
            file_name, group_name);
        goto fini;
    }
237

Shane Snyder's avatar
Shane Snyder committed
238 239 240
    /* get file size and allocate a buffer to store it */
    ret = fstat(fd, &st);
    if (ret == -1)
Shane Snyder's avatar
Shane Snyder committed
241
    {
Shane Snyder's avatar
Shane Snyder committed
242 243 244
        fprintf(stderr, "Error: SSG unable to stat config file %s for group %s\n",
            file_name, group_name);
        goto fini;
Shane Snyder's avatar
Shane Snyder committed
245
    }
Shane Snyder's avatar
Shane Snyder committed
246 247
    rd_buf = malloc(st.st_size+1);
    if (rd_buf == NULL) goto fini;
Shane Snyder's avatar
Shane Snyder committed
248

Shane Snyder's avatar
Shane Snyder committed
249 250 251
    /* load it all in one fell swoop */
    rd_buf_size = read(fd, rd_buf, st.st_size);
    if (rd_buf_size != st.st_size)
Shane Snyder's avatar
Shane Snyder committed
252
    {
Shane Snyder's avatar
Shane Snyder committed
253 254 255
        fprintf(stderr, "Error: SSG unable to read config file %s for group %s\n",
            file_name, group_name);
        goto fini;
Shane Snyder's avatar
Shane Snyder committed
256
    }
Shane Snyder's avatar
Shane Snyder committed
257 258 259 260 261 262 263
    rd_buf[rd_buf_size]='\0';

    /* strtok the result - each space-delimited address is assumed to be
     * a unique mercury address
     */
    tok = strtok(rd_buf, "\r\n\t ");
    if (tok == NULL) goto fini;
Shane Snyder's avatar
Shane Snyder committed
264

Shane Snyder's avatar
Shane Snyder committed
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
    /* build up the address buffer */
    addr_str_buf = malloc(rd_buf_size);
    if (addr_str_buf == NULL) goto fini;
    do
    {
        int tok_size = strlen(tok);
        memcpy((char*)addr_str_buf + addr_str_buf_len, tok, tok_size+1);
        addr_str_buf_len += tok_size+1;
        num_addrs++;
        tok = strtok(NULL, "\r\n\t ");
    } while (tok != NULL);
    if (addr_str_buf_len != rd_buf_size)
    {
        /* adjust buffer size if our initial guess was wrong */
        void *tmp = realloc(addr_str_buf, addr_str_buf_len);
        if (tmp == NULL) goto fini;
        addr_str_buf = tmp;
    }
Shane Snyder's avatar
Shane Snyder committed
283

Shane Snyder's avatar
Shane Snyder committed
284 285 286
    /* set up address string array for group members */
    addr_strs = ssg_addr_str_buf_to_list(addr_str_buf, num_addrs);
    if (!addr_strs) goto fini;
Shane Snyder's avatar
Shane Snyder committed
287

Shane Snyder's avatar
Shane Snyder committed
288
    /* invoke the generic group create routine using our list of addrs */
289
    g_id = ssg_group_create(group_name, addr_strs, num_addrs,
Shane Snyder's avatar
Shane Snyder committed
290
        update_cb, update_cb_dat);
Shane Snyder's avatar
Shane Snyder committed
291

Shane Snyder's avatar
Shane Snyder committed
292 293 294 295 296 297
fini:
    /* cleanup before returning */
    if (fd != -1) close(fd);
    free(rd_buf);
    free(addr_str_buf);
    free(addr_strs);
Shane Snyder's avatar
Shane Snyder committed
298

299
    return g_id;
Shane Snyder's avatar
Shane Snyder committed
300 301
}

Shane Snyder's avatar
Shane Snyder committed
302 303 304 305 306 307
#ifdef SSG_HAVE_MPI
ssg_group_id_t ssg_group_create_mpi(
    const char * group_name,
    MPI_Comm comm,
    ssg_membership_update_cb update_cb,
    void * update_cb_dat)
Shane Snyder's avatar
Shane Snyder committed
308
{
Shane Snyder's avatar
Shane Snyder committed
309
    int i;
310
    int self_addr_str_size = 0;
Shane Snyder's avatar
Shane Snyder committed
311 312 313 314 315
    char *addr_str_buf = NULL;
    int *sizes = NULL;
    int *sizes_psum = NULL;
    int comm_size = 0, comm_rank = 0;
    const char **addr_strs = NULL;
316
    ssg_group_id_t g_id = SSG_GROUP_ID_INVALID;
Shane Snyder's avatar
Shane Snyder committed
317

Shane Snyder's avatar
Shane Snyder committed
318
    if (!ssg_inst) goto fini;
Shane Snyder's avatar
Shane Snyder committed
319

Shane Snyder's avatar
Shane Snyder committed
320 321 322 323 324
    /* gather the buffer sizes */
    MPI_Comm_size(comm, &comm_size);
    MPI_Comm_rank(comm, &comm_rank);
    sizes = malloc(comm_size * sizeof(*sizes));
    if (sizes == NULL) goto fini;
325
    self_addr_str_size = (int)strlen(ssg_inst->self_addr_str) + 1;
326
    sizes[comm_rank] = self_addr_str_size;
Shane Snyder's avatar
Shane Snyder committed
327
    MPI_Allgather(MPI_IN_PLACE, 0, MPI_BYTE, sizes, 1, MPI_INT, comm);
328

Shane Snyder's avatar
Shane Snyder committed
329 330
    /* compute a exclusive prefix sum of the data sizes, including the
     * total at the end
331
     */
Shane Snyder's avatar
Shane Snyder committed
332 333 334 335 336
    sizes_psum = malloc((comm_size+1) * sizeof(*sizes_psum));
    if (sizes_psum == NULL) goto fini;
    sizes_psum[0] = 0;
    for (i = 1; i < comm_size+1; i++)
        sizes_psum[i] = sizes_psum[i-1] + sizes[i-1];
337

Shane Snyder's avatar
Shane Snyder committed
338 339 340
    /* allgather the addresses */
    addr_str_buf = malloc(sizes_psum[comm_size]);
    if (addr_str_buf == NULL) goto fini;
341
    MPI_Allgatherv(ssg_inst->self_addr_str, self_addr_str_size, MPI_BYTE,
Shane Snyder's avatar
Shane Snyder committed
342
            addr_str_buf, sizes, sizes_psum, MPI_BYTE, comm);
Shane Snyder's avatar
Shane Snyder committed
343

Shane Snyder's avatar
Shane Snyder committed
344 345 346
    /* set up address string array for group members */
    addr_strs = ssg_addr_str_buf_to_list(addr_str_buf, comm_size);
    if (!addr_strs) goto fini;
347

Shane Snyder's avatar
Shane Snyder committed
348
    /* invoke the generic group create routine using our list of addrs */
349
    g_id = ssg_group_create(group_name, addr_strs, comm_size,
Shane Snyder's avatar
Shane Snyder committed
350
        update_cb, update_cb_dat);
Shane Snyder's avatar
Shane Snyder committed
351

Shane Snyder's avatar
Shane Snyder committed
352 353 354 355 356 357
fini:
    /* cleanup before returning */
    free(sizes);
    free(sizes_psum);
    free(addr_str_buf);
    free(addr_strs);
Shane Snyder's avatar
Shane Snyder committed
358

359
    return g_id;
Shane Snyder's avatar
Shane Snyder committed
360
}
Shane Snyder's avatar
Shane Snyder committed
361
#endif
Shane Snyder's avatar
Shane Snyder committed
362

Shane Snyder's avatar
Shane Snyder committed
363 364 365
#ifdef SSG_HAVE_PMIX
ssg_group_id_t ssg_group_create_pmix(
    const char * group_name,
Shane Snyder's avatar
Shane Snyder committed
366
    const pmix_proc_t proc,
Shane Snyder's avatar
Shane Snyder committed
367 368 369
    ssg_membership_update_cb update_cb,
    void * update_cb_dat)
{
Shane Snyder's avatar
Shane Snyder committed
370 371
    pmix_proc_t tmp_proc;
    pmix_value_t value;
372
    pmix_value_t *val_p;
Shane Snyder's avatar
Shane Snyder committed
373 374
    pmix_value_t *addr_vals = NULL;
    unsigned int nprocs;
375
    char key[512];
Shane Snyder's avatar
Shane Snyder committed
376 377 378 379 380
    pmix_info_t *info;
    bool flag;
    const char **addr_strs = NULL;
    unsigned int n;
    pmix_status_t ret;
381
    ssg_group_id_t g_id = SSG_GROUP_ID_INVALID;
Shane Snyder's avatar
Shane Snyder committed
382

Shane Snyder's avatar
Shane Snyder committed
383 384
    if (!ssg_inst || !PMIx_Initialized()) goto fini;

385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
    /* XXX config switch for this functionality */
    /* if not already done, register for PMIx process failure notifications */
    if (!ssg_inst->pmix_failure_evhdlr_ref)
    {
        /* use PMIx event registrations to inform us of terminated/aborted procs */
        pmix_status_t err_codes[2] = {PMIX_PROC_TERMINATED, PMIX_ERR_PROC_ABORTED};
        PMIx_Register_event_handler(err_codes, 2, NULL, 0,
            ssg_pmix_proc_failure_notify_fn, ssg_pmix_proc_failure_reg_cb,
            &ssg_inst->pmix_failure_evhdlr_ref);

        /* exchange information needed to map PMIx ranks to SSG member IDs */
        snprintf(key, 512, "ssg-%s-%d-id", proc.nspace, proc.rank);
        PMIX_VALUE_LOAD(&value, &ssg_inst->self_id, PMIX_UINT64);
        ret = PMIx_Put(PMIX_GLOBAL, key, &value);
        if (ret != PMIX_SUCCESS)
        {
            fprintf(stderr, "Warning: skipping PMIx event notification registration -- "\
                "Unable to put PMIx rank mapping\n");
            PMIx_Deregister_event_handler(ssg_inst->pmix_failure_evhdlr_ref, NULL, NULL);
        }
    }
Shane Snyder's avatar
Shane Snyder committed
406 407 408 409 410 411 412 413 414 415

    /* XXX note we are assuming every process in the job wants to join this group... */
    /* get the total nprocs in the job */
    PMIX_PROC_LOAD(&tmp_proc, proc.nspace, PMIX_RANK_WILDCARD);
    ret = PMIx_Get(&tmp_proc, PMIX_JOB_SIZE, NULL, 0, &val_p);
    if (ret != PMIX_SUCCESS) goto fini;
    nprocs = (int)val_p->data.uint32;
    PMIX_VALUE_RELEASE(val_p);

    /* put my address string using a well-known key */
416 417
    snprintf(key, 512, "ssg-%s-%s-%d-hg-addr", group_name, proc.nspace, proc.rank);
    PMIX_VALUE_LOAD(&value, ssg_inst->self_addr_str, PMIX_STRING);
418
    ret = PMIx_Put(PMIX_GLOBAL, key, &value);
Shane Snyder's avatar
Shane Snyder committed
419 420 421 422 423 424 425 426 427 428 429
    if (ret != PMIX_SUCCESS) goto fini;

    /* commit the put data to the local pmix server */
    ret = PMIx_Commit();
    if (ret != PMIX_SUCCESS) goto fini;

    /* barrier, additionally requesting to collect relevant process data */
    PMIX_INFO_CREATE(info, 1);
    flag = true;
    PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL);
    ret = PMIx_Fence(&proc, 1, info, 1);
430
    if (ret != PMIX_SUCCESS) goto fini;
Shane Snyder's avatar
Shane Snyder committed
431 432 433 434 435 436 437 438 439 440
    PMIX_INFO_FREE(info, 1);

    addr_strs = malloc(nprocs * sizeof(*addr_strs));
    if (addr_strs == NULL) goto fini;

    /* finalize exchange by getting each member's address */
    PMIX_VALUE_CREATE(addr_vals, nprocs);
    for (n = 0; n < nprocs; n++)
    {
        /* skip ourselves */
441 442
        if(n == proc.rank)
        {
443
            addr_strs[n] = ssg_inst->self_addr_str;
444 445
            continue;
        }
Shane Snyder's avatar
Shane Snyder committed
446

447 448
        if (snprintf(key, 128, "ssg-%s-%s-%d-hg-addr", group_name,
            proc.nspace, n) >= 128) goto fini;
Shane Snyder's avatar
Shane Snyder committed
449 450 451 452 453 454 455 456

        tmp_proc.rank = n;
        val_p = &addr_vals[n];
        ret = PMIx_Get(&tmp_proc, key, NULL, 0, &val_p);
        if (ret != PMIX_SUCCESS) goto fini;

        addr_strs[n] = val_p->data.string;
    }
Shane Snyder's avatar
Shane Snyder committed
457

Shane Snyder's avatar
Shane Snyder committed
458
    /* invoke the generic group create routine using our list of addrs */
459
    g_id = ssg_group_create(group_name, addr_strs, nprocs,
Shane Snyder's avatar
Shane Snyder committed
460
        update_cb, update_cb_dat);
Shane Snyder's avatar
Shane Snyder committed
461 462

fini:
Shane Snyder's avatar
Shane Snyder committed
463 464 465
    /* cleanup before returning */
    free(addr_strs);
    PMIX_VALUE_FREE(addr_vals, nprocs);
Shane Snyder's avatar
Shane Snyder committed
466

467
    return g_id;
Shane Snyder's avatar
Shane Snyder committed
468 469 470
}
#endif 

Shane Snyder's avatar
Shane Snyder committed
471 472
int ssg_group_destroy(
    ssg_group_id_t group_id)
473
{
474
    ssg_group_descriptor_t *g_desc;
475

476
    if (!ssg_inst || group_id == SSG_GROUP_ID_INVALID) return SSG_FAILURE;
477

478 479
    ABT_rwlock_wrlock(ssg_inst->lock);

Shane Snyder's avatar
Shane Snyder committed
480
    /* find the group structure and destroy it */
481 482
    HASH_FIND(hh, ssg_inst->g_desc_table, &group_id, sizeof(ssg_group_id_t), g_desc);
    if (!g_desc)
483
    {
484
        ABT_rwlock_unlock(ssg_inst->lock);
485
        fprintf(stderr, "Error: SSG unable to find expected group ID\n");
Shane Snyder's avatar
Shane Snyder committed
486
        return SSG_FAILURE;
487
    }
488 489
    HASH_DELETE(hh, ssg_inst->g_desc_table, g_desc);

490
    ABT_rwlock_unlock(ssg_inst->lock);
491 492 493 494

    /* destroy the group, free the descriptor */
    ssg_group_destroy_internal(g_desc->g);
    ssg_group_descriptor_free(g_desc);
495

Shane Snyder's avatar
Shane Snyder committed
496
    return SSG_SUCCESS;
497 498
}

499
#if 0
Shane Snyder's avatar
Shane Snyder committed
500 501
ssg_group_id_t ssg_group_join(
    ssg_group_id_t in_group_id,
502 503
    ssg_membership_update_cb update_cb,
    void * update_cb_dat)
504
{
Shane Snyder's avatar
Shane Snyder committed
505
    ssg_group_descriptor_t *in_group_descriptor = (ssg_group_descriptor_t *)in_group_id;
506
    hg_addr_t group_target_addr = HG_ADDR_NULL;
Shane Snyder's avatar
Shane Snyder committed
507 508 509 510
    char *group_name = NULL;
    int group_size;
    void *view_buf = NULL;
    const char **addr_strs = NULL;
511
    hg_return_t hret;
512
    int sret;
Shane Snyder's avatar
Shane Snyder committed
513 514
    ssg_group_t *g = NULL;
    ssg_group_id_t g_id = SSG_GROUP_ID_NULL;
515

Shane Snyder's avatar
Shane Snyder committed
516
    if (!ssg_inst || in_group_id == SSG_GROUP_ID_NULL) goto fini;
517

Shane Snyder's avatar
Shane Snyder committed
518
    if (in_group_descriptor->owner_status == SSG_OWNER_IS_MEMBER)
519
    {
Shane Snyder's avatar
Shane Snyder committed
520
        fprintf(stderr, "Error: SSG unable to join a group it is already a member of\n");
521 522
        goto fini;
    }
Shane Snyder's avatar
Shane Snyder committed
523
    else if (in_group_descriptor->owner_status == SSG_OWNER_IS_ATTACHER)
524
    {
Shane Snyder's avatar
Shane Snyder committed
525
        fprintf(stderr, "Error: SSG unable to join a group it is attached to\n");
526 527
        goto fini;
    }
528

529 530 531 532 533 534 535 536 537
    /* lookup the address of the target group member in the GID */
    hret = margo_addr_lookup(ssg_inst->mid, in_group_descriptor->addr_str,
        &group_target_addr);
    if (hret != HG_SUCCESS) goto fini;

    sret = ssg_group_join_send(in_group_descriptor, group_target_addr,
        &group_name, &group_size, &view_buf);
    if (sret != SSG_SUCCESS || !group_name || !view_buf) goto fini;

Shane Snyder's avatar
Shane Snyder committed
538 539
    /* set up address string array for all group members */
    addr_strs = ssg_addr_str_buf_to_list(view_buf, group_size);
540 541
    if (!addr_strs) goto fini;

Shane Snyder's avatar
Shane Snyder committed
542 543 544
    /* append self address string to list of group member address strings */
    addr_strs = realloc(addr_strs, (group_size+1)*sizeof(char *));
    if(!addr_strs) goto fini;
545
    addr_strs[group_size++] = ssg_inst->self_addr_str;
Shane Snyder's avatar
Shane Snyder committed
546 547 548 549 550

    g = ssg_group_create_internal(group_name, addr_strs, group_size,
            update_cb, update_cb_dat);
    if (g)
    {
Shane Snyder's avatar
Shane Snyder committed
551
        g_id = (ssg_group_id_t)g->descriptor;
552 553 554

        /* don't free on success */
        group_name = NULL;
Shane Snyder's avatar
Shane Snyder committed
555
    }
Shane Snyder's avatar
Shane Snyder committed
556 557

fini:
558 559
    if (group_target_addr != HG_ADDR_NULL)
        margo_addr_free(ssg_inst->mid, group_target_addr);
Shane Snyder's avatar
Shane Snyder committed
560 561 562
    free(addr_strs);
    free(view_buf);
    free(group_name);
563

Shane Snyder's avatar
Shane Snyder committed
564
    return g_id;
Shane Snyder's avatar
Shane Snyder committed
565 566
}

Shane Snyder's avatar
Shane Snyder committed
567
int ssg_group_leave(
568
    ssg_group_id_t group_id)
Shane Snyder's avatar
Shane Snyder committed
569
{
Shane Snyder's avatar
Shane Snyder committed
570
    ssg_group_descriptor_t *group_descriptor = (ssg_group_descriptor_t *)group_id;
571 572 573 574
    ssg_group_t *g = NULL;
    hg_addr_t group_target_addr = HG_ADDR_NULL;
    hg_return_t hret;
    int sret = SSG_FAILURE;
575

576
    if (!ssg_inst || group_id == SSG_GROUP_ID_NULL) goto fini;
577

578 579
    if (group_descriptor->owner_status != SSG_OWNER_IS_MEMBER)
    {
Shane Snyder's avatar
Shane Snyder committed
580
        fprintf(stderr, "Error: SSG unable to leave group it is not a member of\n");
581
        goto fini;
582 583
    }

584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601
    ABT_rwlock_rdlock(ssg_inst->lock);
    HASH_FIND(hh, ssg_inst->group_table, &group_descriptor->name_hash,
        sizeof(uint64_t), g);
    if (!g)
    {
        ABT_rwlock_unlock(ssg_inst->lock);
        goto fini;
    }

    /* send the leave req to the first member in the view */
    hret = margo_addr_dup(ssg_inst->mid, g->view.member_map->addr, &group_target_addr);
    if (hret != HG_SUCCESS)
    {
        ABT_rwlock_unlock(ssg_inst->lock);
        goto fini;
    }
    ABT_rwlock_unlock(ssg_inst->lock);

602
    sret = ssg_group_leave_send(group_descriptor, ssg_inst->self_id, group_target_addr);
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
    if (sret != SSG_SUCCESS) goto fini;

    /* at least one group member knows of the leave request -- safe to
     * shutdown the group locally
     */

    /* re-lookup the group as we don't hold the lock while sending the leave req */
    ABT_rwlock_wrlock(ssg_inst->lock);
    HASH_FIND(hh, ssg_inst->group_table, &group_descriptor->name_hash,
        sizeof(uint64_t), g);
    if (g)
    {
        HASH_DELETE(hh, ssg_inst->group_table, g);
        ABT_rwlock_unlock(ssg_inst->lock);
        ssg_group_destroy_internal(g);
    }
Shane Snyder's avatar
Shane Snyder committed
619 620
    else
        ABT_rwlock_unlock(ssg_inst->lock);
621 622 623 624 625 626 627 628

    sret = SSG_SUCCESS;

fini:
    if (group_target_addr != HG_ADDR_NULL)
        margo_addr_free(ssg_inst->mid, group_target_addr);

    return sret;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
629 630
}

631 632 633
int ssg_group_attach(
    ssg_group_id_t group_id)
{
Shane Snyder's avatar
Shane Snyder committed
634
    ssg_group_descriptor_t *group_descriptor = (ssg_group_descriptor_t *)group_id;
635 636 637 638 639 640
    ssg_attached_group_t *ag = NULL;
    char *group_name = NULL;
    int group_size;
    void *view_buf = NULL;
    const char **addr_strs = NULL;
    int sret = SSG_FAILURE;
641

642
    if (!ssg_inst || group_id == SSG_GROUP_ID_NULL) goto fini;
643

644 645 646 647 648 649 650 651 652 653 654
    if (group_descriptor->owner_status == SSG_OWNER_IS_MEMBER)
    {
        fprintf(stderr, "Error: SSG unable to attach a group it is a member of\n");
        goto fini;
    }
    else if (group_descriptor->owner_status == SSG_OWNER_IS_ATTACHER)
    {
        fprintf(stderr, "Error: SSG unable to attach a group it is" \
            " already attached to\n");
        goto fini;
    }
655

656 657 658 659 660 661 662
    /* send the attach request to a group member to initiate a bulk transfer
     * of the group's membership view
     */
    sret = ssg_group_attach_send(group_descriptor, &group_name,
        &group_size, &view_buf);
    if (sret != SSG_SUCCESS || !group_name || !view_buf) goto fini;

Shane Snyder's avatar
Shane Snyder committed
663
    /* set up address string array for all group members */
664 665 666 667 668 669 670
    addr_strs = ssg_addr_str_buf_to_list(view_buf, group_size);
    if (!addr_strs) goto fini;

    /* allocate an SSG attached group data structure and initialize some of it */
    ag = malloc(sizeof(*ag));
    if (!ag) goto fini;
    memset(ag, 0, sizeof(*ag));
Shane Snyder's avatar
Shane Snyder committed
671
    ag->name = strdup(group_name);
672 673 674 675 676
    ag->descriptor = ssg_group_descriptor_dup(group_descriptor);
    if (!ag->descriptor) goto fini;
    ag->descriptor->owner_status = SSG_OWNER_IS_ATTACHER;

    /* create the view for the group */
677
    sret = ssg_group_view_create(addr_strs, group_size, NULL, ag->lock, &ag->view);
678 679 680 681 682 683 684 685 686
    if (sret != SSG_SUCCESS) goto fini;

    /* add this group reference to our group table */
    HASH_ADD(hh, ssg_inst->attached_group_table, descriptor->name_hash,
        sizeof(uint64_t), ag);

    sret = SSG_SUCCESS;

    /* don't free on success */
Shane Snyder's avatar
Shane Snyder committed
687
    group_name = NULL;
688 689
    ag = NULL;
fini:
Shane Snyder's avatar
Shane Snyder committed
690
    if (ag) ssg_attached_group_destroy(ag);
691
    free(addr_strs);
Shane Snyder's avatar
Shane Snyder committed
692 693
    free(view_buf);
    free(group_name);
694 695

    return sret;
696 697 698 699 700
}

int ssg_group_detach(
    ssg_group_id_t group_id)
{
701 702 703
    ssg_group_descriptor_t *group_descriptor = (ssg_group_descriptor_t *)group_id;
    ssg_attached_group_t *ag;

Shane Snyder's avatar
Shane Snyder committed
704 705
    if (!ssg_inst || group_id == SSG_GROUP_ID_NULL) return SSG_FAILURE;

706 707 708 709 710 711 712 713 714 715 716 717
    if (group_descriptor->owner_status != SSG_OWNER_IS_ATTACHER)
    {
        fprintf(stderr, "Error: SSG unable to detach from group that" \
            " was never attached\n");
        return SSG_FAILURE;
    }

    /* find the attached group structure and destroy it */
    HASH_FIND(hh, ssg_inst->attached_group_table, &group_descriptor->name_hash,
        sizeof(uint64_t), ag);
    if (!ag)
    {
Shane Snyder's avatar
Shane Snyder committed
718
        fprintf(stderr, "Error: SSG unable to find expected group attached\n");
719 720 721 722 723
        return SSG_FAILURE;
    }
    HASH_DELETE(hh, ssg_inst->attached_group_table, ag);
    ssg_attached_group_destroy(ag);

724 725
    return SSG_SUCCESS;
}
726
#endif
727

728 729 730
/*********************************************************
 *** SSG routines for obtaining self/group information ***
 *********************************************************/
Shane Snyder's avatar
Shane Snyder committed
731

732 733
ssg_member_id_t ssg_get_self_id(
    margo_instance_id mid)
Shane Snyder's avatar
Shane Snyder committed
734
{
735
    /* XXX eventually mid needed to distinguish multiple ssg contexts */
736

737
    if (!ssg_inst) return SSG_MEMBER_ID_INVALID;
738

739
    return ssg_inst->self_id;
Shane Snyder's avatar
Shane Snyder committed
740 741
}

742 743
int ssg_get_group_size(
    ssg_group_id_t group_id)
Shane Snyder's avatar
Shane Snyder committed
744
{
745
    ssg_group_descriptor_t *g_desc;
Shane Snyder's avatar
Shane Snyder committed
746
    int group_size = 0;
747

748
    if (!ssg_inst || group_id == SSG_GROUP_ID_INVALID) return 0;
749

750
    ABT_rwlock_rdlock(ssg_inst->lock);
751

752 753 754 755
    /* find the group descriptor */
    HASH_FIND(hh, ssg_inst->g_desc_table, &group_id, sizeof(ssg_group_id_t), g_desc);
    if (!g_desc)
    {
756
        ABT_rwlock_unlock(ssg_inst->lock);
757 758 759 760 761 762 763 764 765
        fprintf(stderr, "Error: SSG unable to find expected group ID\n");
        return 0;
    }

    if (g_desc->owner_status == SSG_OWNER_IS_MEMBER)
    {
        ABT_rwlock_rdlock(g_desc->g->lock);
        group_size = g_desc->g->view.size + 1; /* add ourself to view size */
        ABT_rwlock_unlock(g_desc->g->lock);
766
    }
767
#if 0
768 769 770 771 772 773 774
    else if (group_descriptor->owner_status == SSG_OWNER_IS_ATTACHER)
    {
        ssg_attached_group_t *ag;

        HASH_FIND(hh, ssg_inst->attached_group_table, &group_descriptor->name_hash,
            sizeof(uint64_t), ag);
        if (ag)
Shane Snyder's avatar
Shane Snyder committed
775 776 777 778 779
        {
            ABT_rwlock_rdlock(ag->lock);
            group_size = ag->view.size;
            ABT_rwlock_unlock(ag->lock);
        }
780
    }
781
#endif
782 783 784 785 786
    else
    {
        fprintf(stderr, "Error: SSG can only obtain size of groups that the caller" \
            " is a member of or an attacher of\n");
    }
787

788 789
    ABT_rwlock_unlock(ssg_inst->lock);

Shane Snyder's avatar
Shane Snyder committed
790
    return group_size;
Shane Snyder's avatar
Shane Snyder committed
791 792
}

793
hg_addr_t ssg_get_group_addr(
794 795
    ssg_group_id_t group_id,
    ssg_member_id_t member_id)
Shane Snyder's avatar
Shane Snyder committed
796
{
797
    ssg_group_descriptor_t *g_desc;
798
    ssg_member_state_t *member_state;
Shane Snyder's avatar
Shane Snyder committed
799
    hg_addr_t member_addr = HG_ADDR_NULL;
800

801
    if (!ssg_inst || group_id == SSG_GROUP_ID_INVALID ||
802
            member_id == SSG_MEMBER_ID_INVALID)
803 804
        return HG_ADDR_NULL;

805
    ABT_rwlock_rdlock(ssg_inst->lock);
806

807 808 809 810
    /* find the group descriptor */
    HASH_FIND(hh, ssg_inst->g_desc_table, &group_id, sizeof(ssg_group_id_t), g_desc);
    if (!g_desc)
    {
811
        ABT_rwlock_unlock(ssg_inst->lock);
812 813 814 815 816 817 818 819 820 821 822 823
        fprintf(stderr, "Error: SSG unable to find expected group ID\n");
        return HG_ADDR_NULL;
    }

    if (g_desc->owner_status == SSG_OWNER_IS_MEMBER)
    {
        ABT_rwlock_rdlock(g_desc->g->lock);
        HASH_FIND(hh, g_desc->g->view.member_map, &member_id,
            sizeof(ssg_member_id_t), member_state);
        if (member_state) 
            member_addr = member_state->addr;
        ABT_rwlock_unlock(g_desc->g->lock);
824
    }
825
#if 0
826 827 828 829 830 831 832
    else if (group_descriptor->owner_status == SSG_OWNER_IS_ATTACHER)
    {
        ssg_attached_group_t *ag;

        HASH_FIND(hh, ssg_inst->attached_group_table, &group_descriptor->name_hash,
            sizeof(uint64_t), ag);
        if (ag)
Shane Snyder's avatar
Shane Snyder committed
833 834 835 836 837 838 839 840
        {
            ABT_rwlock_rdlock(ag->lock);
            HASH_FIND(hh, ag->view.member_map, &member_id, sizeof(ssg_member_id_t),
                member_state);
            if (member_state) 
                member_addr = member_state->addr;
            ABT_rwlock_unlock(ag->lock);
        }
841
    }
842
#endif
843 844 845 846 847 848
    else
    {
        fprintf(stderr, "Error: SSG can only obtain member addresses of groups" \
            " that the caller is a member of or an attacher of\n");
    }

849 850
    ABT_rwlock_unlock(ssg_inst->lock);

Shane Snyder's avatar
Shane Snyder committed
851
    return member_addr;
852 853
}

854
char *ssg_group_id_get_addr_str(
855 856
    ssg_group_id_t group_id)
{
857 858
    ssg_group_descriptor_t *g_desc;
    char *addr_str;
859

860
    if (!ssg_inst || group_id == SSG_GROUP_ID_INVALID) return 0;
861

862
    ABT_rwlock_rdlock(ssg_inst->lock);
863

864 865 866 867 868 869 870 871
    /* find the group descriptor */
    HASH_FIND(hh, ssg_inst->g_desc_table, &group_id, sizeof(ssg_group_id_t), g_desc);
    if (!g_desc)
    {
        ABT_rwlock_unlock(ssg_inst->lock);
        fprintf(stderr, "Error: SSG unable to find expected group ID\n");
        return NULL;
    }
872

873
    addr_str = strdup(g_desc->addr_str);
874

875 876 877
    ABT_rwlock_unlock(ssg_inst->lock);

    return addr_str;
878 879
}

880 881 882 883 884
void ssg_group_id_serialize(
    ssg_group_id_t group_id,
    char ** buf_p,
    size_t * buf_size_p)
{
885
    ssg_group_descriptor_t *g_desc;
886
    size_t alloc_size;
887
    char *gid_buf, *p; 
888 889 890 891

    *buf_p = NULL;
    *buf_size_p = 0;

892 893 894 895 896 897 898 899 900 901 902 903 904
    if (!ssg_inst || group_id == SSG_GROUP_ID_INVALID) return;

    ABT_rwlock_rdlock(ssg_inst->lock);

    /* find the group descriptor */
    HASH_FIND(hh, ssg_inst->g_desc_table, &group_id, sizeof(ssg_group_id_t), g_desc);
    if (!g_desc)
    {
        ABT_rwlock_unlock(ssg_inst->lock);
        fprintf(stderr, "Error: SSG unable to find expected group ID\n");
        return;
    }

905
    /* determine needed buffer size */
906 907
    alloc_size = (sizeof(g_desc->magic_nr) + sizeof(g_desc->g_id) +
        strlen(g_desc->addr_str) + 1);
908 909 910 911 912 913

    gid_buf = malloc(alloc_size);
    if (!gid_buf)
        return;

    /* serialize */
914
    p = gid_buf;
915
    *(uint64_t *)p = g_desc->magic_nr;
916
    p += sizeof(uint64_t);
917 918 919
    *(ssg_group_id_t *)p = g_desc->g_id;
    p += sizeof(ssg_group_id_t);
    strcpy(p, g_desc->addr_str);
920 921
    /* the rest of the descriptor is stateful and not appropriate for serializing... */

922 923
    ABT_rwlock_unlock(ssg_inst->lock);

924 925 926 927 928 929 930 931 932 933 934 935 936
    *buf_p = gid_buf;
    *buf_size_p = alloc_size;

    return;
}

void ssg_group_id_deserialize(
    const char * buf,
    size_t buf_size,
    ssg_group_id_t * group_id_p)
{
    size_t min_buf_size;
    uint64_t magic_nr;
937
    ssg_group_id_t g_id;
938
    const char *addr_str;
939 940 941
    ssg_group_descriptor_t *g_desc;

    *group_id_p = SSG_GROUP_ID_INVALID;
942

943
    if (!ssg_inst || !buf || buf_size == 0) return;
944 945

    /* check to ensure the buffer contains enough data to make a group ID */
946
    min_buf_size = (sizeof(g_desc->magic_nr) + sizeof(g_desc->g_id) + 1);
947 948 949 950 951 952 953 954 955 956 957 958 959 960
    if (buf_size < min_buf_size)
    {
        fprintf(stderr, "Error: Serialized buffer does not contain a valid SSG group ID\n");
        return;
    }

    /* deserialize */
    magic_nr = *(uint64_t *)buf;
    if (magic_nr != SSG_MAGIC_NR)
    {
        fprintf(stderr, "Error: Magic number mismatch when deserializing SSG group ID\n");
        return;
    }
    buf += sizeof(uint64_t);
961 962
    g_id = *(ssg_group_id_t *)buf;
    buf += sizeof(ssg_group_id_t);
963 964
    addr_str = buf;

965 966
    g_desc = ssg_group_descriptor_create(g_id, addr_str, SSG_OWNER_IS_UNASSOCIATED);
    if (!g_desc)
967 968
        return;

969 970 971 972 973 974 975
    /* add this group descriptor to our global table */
    /* NOTE: g_id is not associated with any group -- caller must join or attach first */
    ABT_rwlock_wrlock(ssg_inst->lock);
    HASH_ADD(hh, ssg_inst->g_desc_table, g_id, sizeof(ssg_group_id_t), g_desc);
    ABT_rwlock_unlock(ssg_inst->lock);

    *group_id_p = g_id;
976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028

    return;
}

int ssg_group_id_store(
    const char * file_name,
    ssg_group_id_t group_id)
{
    int fd;
    char *buf;
    size_t buf_size;
    ssize_t bytes_written;

    fd = open(file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
    if (fd < 0)
    {
        fprintf(stderr, "Error: Unable to open file %s for storing SSG group ID\n",
            file_name);
        return SSG_FAILURE;
    }

    ssg_group_id_serialize(group_id, &buf, &buf_size);
    if (buf == NULL)
    {
        fprintf(stderr, "Error: Unable to serialize SSG group ID.\n");
        close(fd);
        return SSG_FAILURE;
    }

    bytes_written = write(fd, buf, buf_size);
    if (bytes_written != (ssize_t)buf_size)
    {
        fprintf(stderr, "Error: Unable to write SSG group ID to file %s\n", file_name);
        close(fd);
        free(buf);
        return SSG_FAILURE;
    }

    close(fd);
    free(buf);
    return SSG_SUCCESS;
}

int ssg_group_id_load(
    const char * file_name,
    ssg_group_id_t * group_id_p)
{
    int fd;
    struct stat fstats;
    char *buf;
    ssize_t bytes_read;
    int ret;

1029
    *group_id_p = SSG_GROUP_ID_INVALID;
1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069

    fd = open(file_name, O_RDONLY);
    if (fd < 0)
    {
        fprintf(stderr, "Error: Unable to open file %s for loading SSG group ID\n",
            file_name);
        return SSG_FAILURE;
    }

    ret = fstat(fd, &fstats);
    if (ret != 0)
    {
        fprintf(stderr, "Error: Unable to stat file %s\n", file_name);
        close(fd);
        return SSG_FAILURE;
    }
    if (fstats.st_size == 0)
    {
        fprintf(stderr, "Error: SSG group ID file %s is empty\n", file_name);
        close(fd);
        return SSG_FAILURE;
    }

    buf = malloc(fstats.st_size);
    if (buf == NULL)
    {
        close(fd);
        return SSG_FAILURE;
    }

    bytes_read = read(fd, buf, fstats.st_size);
    if (bytes_read != (ssize_t)fstats.st_size)
    {
        fprintf(stderr, "Error: Unable to read SSG group ID from file %s\n", file_name);
        close(fd);
        free(buf);
        return SSG_FAILURE;
    }

    ssg_group_id_deserialize(buf, (size_t)bytes_read, group_id_p);
1070
    if (*group_id_p == SSG_GROUP_ID_INVALID)
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
    {
        fprintf(stderr, "Error: Unable to deserialize SSG group ID\n");
        close(fd);
        free(buf);
        return SSG_FAILURE;
    }

    close(fd);
    free(buf);
    return SSG_SUCCESS;
}

1083 1084 1085
void ssg_group_dump(
    ssg_group_id_t group_id)
{
1086
    ssg_group_descriptor_t *g_desc;
1087
    ssg_group_view_t *group_view = NULL;
1088
    ABT_rwlock group_lock;
1089
    int group_size;
1090 1091 1092 1093
    char *group_name = NULL;
    char group_role[32];
    char group_self_id[32];

1094
    if (!ssg_inst || group_id == SSG_GROUP_ID_INVALID) return;
1095

1096 1097 1098 1099 1100 1101
    ABT_rwlock_rdlock(ssg_inst->lock);

    /* find the group descriptor */
    HASH_FIND(hh, ssg_inst->g_desc_table, &group_id, sizeof(ssg_group_id_t), g_desc);
    if (!g_desc)
    {
1102
        ABT_rwlock_unlock(ssg_inst->lock);
1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116
        fprintf(stderr, "Error: SSG unable to find expected group ID\n");
        return;
    }

    if (g_desc->owner_status == SSG_OWNER_IS_MEMBER)
    {
        ABT_rwlock_rdlock(g_desc->g->lock);
        group_view = &g_desc->g->view;
        group_lock = g_desc->g->lock;
        group_size = g_desc->g->view.size + 1;
        group_name = g_desc->g->name;
        strcpy(group_role, "member");
        sprintf(group_self_id, "%lu", ssg_inst->self_id);
        ABT_rwlock_unlock(g_desc->g->lock);
1117
    }
1118
#if 0
1119 1120 1121 1122 1123 1124 1125 1126 1127
    else if (group_descriptor->owner_status == SSG_OWNER_IS_ATTACHER)
    {
        ssg_attached_group_t *ag;

        HASH_FIND(hh, ssg_inst->attached_group_table, &group_descriptor->name_hash,
            sizeof(uint64_t), ag);
        if (ag)
        {
            group_view = &ag->view;
1128
            group_size = ag->view.size;
1129 1130 1131 1132
            group_name = ag->name;
            strcpy(group_role, "attacher");
        }
    }
1133
#endif
1134 1135 1136 1137 1138 1139 1140 1141
    else
    {
        fprintf(stderr, "Error: SSG can only dump membership information for" \
            " groups that the caller is a member of or an attacher of\n");
    }

    if (group_view)
    {
1142
        ssg_member_state_t *member_state, *tmp_ms;
Shane Snyder's avatar
Shane Snyder committed
1143 1144
        char hostname[1024];
        gethostname(hostname, 1024);
1145 1146

        printf("SSG membership information for group '%s':\n", group_name);
Shane Snyder's avatar
Shane Snyder committed
1147 1148
        printf("\trole: %s\n", group_role);
        printf("\thost: %s\n", hostname);
1149 1150
        if (strcmp(group_role, "member") == 0)
            printf("\tself_id: %s\n", group_self_id);
1151
        printf("\tsize: %d\n", group_size);
1152
        printf("\tview:\n");
1153
        ABT_rwlock_rdlock(group_lock);
1154
        HASH_ITER(hh, group_view->member_map, member_state, tmp_ms)
1155
        {
Shane Snyder's avatar
Shane Snyder committed
1156
            printf("\t\tid: %20lu\taddr: %s\n", member_state->id,
1157
                member_state->addr_str);
1158
        }
1159
        ABT_rwlock_unlock(group_lock);
1160
    }
1161 1162

    ABT_rwlock_unlock(ssg_inst->lock);
1163

1164
    return;
Shane Snyder's avatar
Shane Snyder committed
1165 1166
}

1167 1168 1169
/************************************
 *** SSG internal helper routines ***
 ************************************/
1170

1171
static ssg_group_id_t ssg_group_create_internal(
Shane Snyder's avatar
Shane Snyder committed
1172 1173
    const char * group_name, const char * const group_addr_strs[],
    int group_size, ssg_membership_update_cb update_cb, void *update_cb_dat)
1174
{
1175 1176 1177
    ssg_group_id_t g_id;
    ssg_group_descriptor_t *g_desc = NULL, *g_desc_check;
    ssg_group_t *g;
Shane Snyder's avatar
Shane Snyder committed
1178
    int success = 0;
1179
    int sret;
1180

1181 1182 1183
    if (!ssg_inst) goto fini;

    g_id = ssg_hash64_str(group_name);
Shane Snyder's avatar
Shane Snyder committed
1184

1185 1186 1187 1188 1189
    /* make sure we aren't re-creating an existing group */
    ABT_rwlock_rdlock(ssg_inst->lock);
    HASH_FIND(hh, ssg_inst->g_desc_table, &g_id, sizeof(ssg_group_id_t), g_desc_check);
    ABT_rwlock_unlock(ssg_inst->lock);
    if (g_desc_check) goto fini;
Shane Snyder's avatar
Shane Snyder committed
1190 1191 1192 1193 1194 1195 1196

    /* allocate an SSG group data structure and initialize some of it */
    g = malloc(sizeof(*g));
    if (!g) goto fini;
    memset(g, 0, sizeof(*g));
    g->name = strdup(group_name);
    if (!g->name) goto fini;
1197
    g->ssg_inst = ssg_inst;
Shane Snyder's avatar
Shane Snyder committed
1198 1199 1200 1201 1202
    g->update_cb = update_cb;
    g->update_cb_dat = update_cb_dat;
    ABT_rwlock_create(&g->lock);

    /* initialize the group view */
1203 1204
    sret = ssg_group_view_create(group_addr_strs, group_size, ssg_inst->self_addr_str,
        g->lock, &g->view);
Shane Snyder's avatar
Shane Snyder committed
1205
    if (sret != SSG_SUCCESS) goto fini;
1206

1207 1208 1209 1210 1211 1212 1213
#ifdef DEBUG
    /* set debug output pointer */
    char *dbg_log_dir = getenv("SSG_DEBUG_LOGDIR");
    if (dbg_log_dir)
    {
        char dbg_log_path[PATH_MAX];
        snprintf(dbg_log_path, PATH_MAX, "%s/ssg-%s-%lu.log",
1214
            dbg_log_dir, g->name, g->ssg_inst->self_id);