ssg.c 13.5 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (c) 2016 UChicago Argonne, LLC
 *
 * See COPYRIGHT in top-level directory.
 */

Shane Snyder's avatar
Shane Snyder committed
7 8
#include <ssg-config.h>

Jonathan Jenkins's avatar
Jonathan Jenkins committed
9 10 11 12
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
13
#include <errno.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
14 15
#include <stdlib.h>
#include <string.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
16
#include <assert.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
17

Shane Snyder's avatar
Shane Snyder committed
18
#include <mercury.h>
19
#include <abt.h>
Shane Snyder's avatar
Shane Snyder committed
20
#include <margo.h>
21

22
#include <ssg.h>
23
#include "ssg-internal.h"
24 25 26 27
#if USE_SWIM_FD
#include "swim-fd/swim-fd.h"
#endif

28 29 30 31 32
/* SSG helper routine prototypes */
static hg_return_t ssg_group_lookup(
    ssg_group_t * g, const char * const addr_strs[]);
static const char ** ssg_setup_addr_str_list(
    char * buf, int num_addrs);
33

34 35
/* XXX: is this right? can this be a global? */
static margo_instance_id ssg_mid = MARGO_INSTANCE_NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
36

37 38 39
/***************************************************
 *** SSG runtime intialization/shutdown routines ***
 ***************************************************/
40

41 42
int ssg_init(
    margo_instance_id mid)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
43
{
44
    ssg_mid = mid;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
45

46 47
    return SSG_SUCCESS;
}
Jonathan Jenkins's avatar
Jonathan Jenkins committed
48

49 50 51 52
void ssg_finalize()
{
    return;
}
Jonathan Jenkins's avatar
Jonathan Jenkins committed
53

54 55 56
/*************************************
 *** SSG group management routines ***
 *************************************/
Jonathan Jenkins's avatar
Jonathan Jenkins committed
57

58 59 60 61 62
ssg_group_id_t ssg_group_create(
    const char * group_name,
    const char * const group_addr_strs[],
    int group_size)
{
63
    hg_class_t *hgcl = NULL;
Shane Snyder's avatar
Shane Snyder committed
64
    hg_addr_t self_addr = HG_ADDR_NULL;
65
    char *self_addr_str = NULL;
Shane Snyder's avatar
Shane Snyder committed
66
    hg_size_t self_addr_size = 0;
67 68 69
    const char *self_addr_substr = NULL;
    const char *addr_substr = NULL;
    int i;
70
    hg_return_t hret;
71 72
    ssg_group_t *g = NULL;
    ssg_group_id_t g_id = SSG_GROUP_ID_NULL;
73

74 75
    hgcl = margo_get_class(ssg_mid);
    if (!hgcl) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
76

77
    /* get my address */
78 79 80 81 82 83 84 85 86
    hret = HG_Addr_self(hgcl, &self_addr);
    if (hret != HG_SUCCESS) goto fini;
    hret = HG_Addr_to_string(hgcl, NULL, &self_addr_size, self_addr);
    if (hret != HG_SUCCESS) goto fini;
    self_addr_str = malloc(self_addr_size);
    if (self_addr_str == NULL) goto fini;
    hret = HG_Addr_to_string(hgcl, self_addr_str, &self_addr_size, self_addr);
    if (hret != HG_SUCCESS) goto fini;

87 88 89 90
    /* strstr is used here b/c there may be inconsistencies in whether the class
     * is included in the address or not (it should not be in HG_Addr_to_string,
     * but it's possible that it is in the list of group address strings)
     */
91
    self_addr_substr = strstr(self_addr_str, "://");
Shane Snyder's avatar
Shane Snyder committed
92 93 94 95
    if (self_addr_substr == NULL)
        self_addr_substr = self_addr_str;
    else
        self_addr_substr += 3;
Shane Snyder's avatar
Shane Snyder committed
96

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
    /* allocate an SSG group data structure and initialize some of it */
    g = malloc(sizeof(*g));
    if (!g) goto fini;
    memset(g, 0, sizeof(*g));
    g->name = strdup(group_name);
    if (!g->name) goto fini;
    g->self_rank = -1;
    g->view.group_size = group_size;
    g->view.member_states = malloc(group_size * sizeof(*g->view.member_states));
    if (!g->view.member_states) goto fini;
    memset(g->view.member_states, 0, group_size * sizeof(*g->view.member_states));

    /* resolve my rank within the group */
    for (i = 0; i < group_size; i++)
    {
        addr_substr = strstr(group_addr_strs[i], "://");
Shane Snyder's avatar
Shane Snyder committed
113 114 115 116
        if (addr_substr == NULL)
            addr_substr = group_addr_strs[i];
        else
            addr_substr += 3;
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
        if (strcmp(self_addr_substr, addr_substr) == 0)
        {
            /* this is my address -- my rank is the offset in the address array */
            g->self_rank = i;
            g->view.member_states[i].addr = self_addr;
        }
        else
        {
            /* initialize group member addresses to NULL before looking them up */
            g->view.member_states[i].addr = HG_ADDR_NULL;
        }
        g->view.member_states[i].is_member = 1;
    }
    /* if unable to resolve my rank within the group, error out */
    if(g->self_rank == -1)
    {
        fprintf(stderr, "Error: SSG unable to resolve rank in group %s\n",
            group_name);
        goto fini;
    }

    /* lookup hg addresses information for all group members */
    hret = ssg_group_lookup(g, group_addr_strs);
    if (hret != HG_SUCCESS)
    {
        fprintf(stderr, "Error: SSG unable to complete lookup for group %s\n",
            group_name);
        goto fini;
    }
Shane Snyder's avatar
Shane Snyder committed
146
    SSG_DEBUG(g, "group lookup successful (size=%d)\n", group_size);
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164

#if 0
#if USE_SWIM_FD
    // initialize swim failure detector
    // TODO: we should probably barrier or sync somehow to avoid rpc failures
    // due to timing skew of different ranks initializing swim
    s->swim_ctx = swim_init(s, 1);
    if (s->swim_ctx == NULL)
    {
        ssg_finalize(s);
        s = NULL;
    }
#endif
#endif

    /* TODO: last step => add reference to this group to SSG runtime state */

    /* don't free these pointers on success */
Shane Snyder's avatar
Shane Snyder committed
165
    //self_addr = HG_ADDR_NULL; /* TODO: free this in ssg_group_destroy */
166 167
    g = NULL;
fini:
Shane Snyder's avatar
Shane Snyder committed
168 169
    if (hgcl && self_addr != HG_ADDR_NULL) HG_Addr_free(hgcl, self_addr);
    free(self_addr_str);
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
    if (g)
    {
        free(g->name);
        free(g->view.member_states);
        free(g);
    }

    return g_id;
}

ssg_group_id_t ssg_group_create_config(
    const char * group_name,
    const char * file_name)
{
    int fd;
    struct stat st;
    char *rd_buf = NULL;
    ssize_t rd_buf_sz;
    char *tok;
    void *addr_buf = NULL;
    int addr_buf_len = 0, num_addrs = 0;
    int ret;
    const char **addr_strs = NULL;
    ssg_group_id_t g_id = SSG_GROUP_ID_NULL;

    /* open config file for reading */
    fd = open(file_name, O_RDONLY);
    if (fd == -1)
    {
        fprintf(stderr, "Error: SSG unable to open config file %s for group %s\n",
            file_name, group_name);
        goto fini;
    }

    /* get file size and allocate a buffer to store it */
    ret = fstat(fd, &st);
    if (ret == -1)
    {
        fprintf(stderr, "Error: SSG unable to stat config file %s for group %s\n",
            file_name, group_name);
        goto fini;
    }
    rd_buf = malloc(st.st_size+1);
    if (rd_buf == NULL) goto fini;

    /* load it all in one fell swoop */
    rd_buf_sz = read(fd, rd_buf, st.st_size);
    if (rd_buf_sz != st.st_size)
    {
        fprintf(stderr, "Error: SSG unable to read config file %s for group %s\n",
            file_name, group_name);
        goto fini;
    }
    rd_buf[rd_buf_sz]='\0';

    /* strtok the result - each space-delimited address is assumed to be
     * a unique mercury address
     */
    tok = strtok(rd_buf, "\r\n\t ");
Jonathan Jenkins's avatar
Jonathan Jenkins committed
229 230
    if (tok == NULL) goto fini;

231 232
    /* build up the address buffer */
    addr_buf = malloc(rd_buf_sz);
233
    if (addr_buf == NULL) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
234 235
    do {
        int tok_sz = strlen(tok);
236 237
        memcpy((char*)addr_buf + addr_buf_len, tok, tok_sz+1);
        addr_buf_len += tok_sz+1;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
238 239 240
        num_addrs++;
        tok = strtok(NULL, "\r\n\t ");
    } while (tok != NULL);
241 242 243 244 245 246 247
    if (addr_buf_len != rd_buf_sz)
    {
        /* adjust buffer size if our initial guess was wrong */
        void *tmp = realloc(addr_buf, addr_buf_len);
        if (tmp == NULL) goto fini;
        addr_buf = tmp;
    }
Jonathan Jenkins's avatar
Jonathan Jenkins committed
248

249 250 251
    /* set up address string array for group members */
    addr_strs = ssg_setup_addr_str_list(addr_buf, num_addrs);
    if (!addr_strs) goto fini;
252

253 254
    /* invoke the generic group create routine using our list of addrs */
    g_id = ssg_group_create(group_name, addr_strs, num_addrs);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
255 256

fini:
257
    /* cleanup before returning */
Jonathan Jenkins's avatar
Jonathan Jenkins committed
258
    if (fd != -1) close(fd);
259
    free(rd_buf);
260
    free(addr_buf);
261 262 263
    free(addr_strs);

    return g_id;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
264 265 266
}

#ifdef HAVE_MPI
267 268 269
ssg_group_id_t ssg_group_create_mpi(
    const char * group_name,
    MPI_Comm comm)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
270
{
271
    hg_class_t *hgcl = NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
272
    hg_addr_t self_addr = HG_ADDR_NULL;
273
    char *self_addr_str = NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
274
    hg_size_t self_addr_size = 0;
275 276 277 278 279 280
    int self_addr_size_int = 0; /* for mpi-friendly conversion */
    char *addr_buf = NULL;
    int *sizes = NULL;
    int *sizes_psum = NULL;
    int comm_size = 0, comm_rank = 0;
    const char **addr_strs = NULL;
281
    hg_return_t hret;
282
    ssg_group_id_t g_id = SSG_GROUP_ID_NULL;
283

284 285
    hgcl = margo_get_class(ssg_mid);
    if (!hgcl) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
286

287
    /* get my address */
Jonathan Jenkins's avatar
Jonathan Jenkins committed
288 289 290
    hret = HG_Addr_self(hgcl, &self_addr);
    if (hret != HG_SUCCESS) goto fini;
    hret = HG_Addr_to_string(hgcl, NULL, &self_addr_size, self_addr);
Shane Snyder's avatar
Shane Snyder committed
291
    if (hret != HG_SUCCESS) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
292 293
    self_addr_str = malloc(self_addr_size);
    if (self_addr_str == NULL) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
294 295
    hret = HG_Addr_to_string(hgcl, self_addr_str, &self_addr_size, self_addr);
    if (hret != HG_SUCCESS) goto fini;
296
    self_addr_size_int = (int)self_addr_size; /* null char included in call */
Jonathan Jenkins's avatar
Jonathan Jenkins committed
297

298
    /* gather the buffer sizes */
Jonathan Jenkins's avatar
Jonathan Jenkins committed
299 300 301 302 303 304 305
    MPI_Comm_size(comm, &comm_size);
    MPI_Comm_rank(comm, &comm_rank);
    sizes = malloc(comm_size * sizeof(*sizes));
    if (sizes == NULL) goto fini;
    sizes[comm_rank] = self_addr_size_int;
    MPI_Allgather(MPI_IN_PLACE, 0, MPI_BYTE, sizes, 1, MPI_INT, comm);

306 307 308
    /* compute a exclusive prefix sum of the data sizes, including the
     * total at the end
     */
Jonathan Jenkins's avatar
Jonathan Jenkins committed
309 310 311 312 313 314
    sizes_psum = malloc((comm_size+1) * sizeof(*sizes_psum));
    if (sizes_psum == NULL) goto fini;
    sizes_psum[0] = 0;
    for (int i = 1; i < comm_size+1; i++)
        sizes_psum[i] = sizes_psum[i-1] + sizes[i-1];

315
    /* allgather the addresses */
316 317
    addr_buf = malloc(sizes_psum[comm_size]);
    if (addr_buf == NULL) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
318
    MPI_Allgatherv(self_addr_str, self_addr_size_int, MPI_BYTE,
319
            addr_buf, sizes, sizes_psum, MPI_BYTE, comm);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
320

321 322 323 324 325 326
    /* set up address string array for group members */
    addr_strs = ssg_setup_addr_str_list(addr_buf, comm_size);
    if (!addr_strs) goto fini;

    /* invoke the generic group create routine using our list of addrs */
    g_id = ssg_group_create(group_name, addr_strs, comm_size);
Shane Snyder's avatar
Shane Snyder committed
327 328

fini:
329
    /* cleanup before returning */
Shane Snyder's avatar
Shane Snyder committed
330 331
    free(sizes);
    free(sizes_psum);
332
    free(addr_buf);
333 334
    if (hgcl && self_addr != HG_ADDR_NULL) HG_Addr_free(hgcl, self_addr);
    free(self_addr_str);
335 336 337
    free(addr_strs);

    return g_id;
Shane Snyder's avatar
Shane Snyder committed
338 339 340
}
#endif

341 342
int ssg_group_destroy(
    ssg_group_id_t group_id)
Shane Snyder's avatar
Shane Snyder committed
343
{
Shane Snyder's avatar
Shane Snyder committed
344 345
    if (group_id == SSG_GROUP_ID_NULL)
        return SSG_SUCCESS;
346
#if 0
347
#if USE_SWIM_FD
348 349 350 351 352 353
    if(s->swim_ctx)
        swim_finalize(s->swim_ctx);
#endif
    for (int i = 0; i < s->view.group_size; i++) {
        if (s->view.member_states[i].addr != HG_ADDR_NULL)
            HG_Addr_free(margo_get_class(s->mid), s->view.member_states[i].addr);
354
    }
355 356
    free(s->view.member_states);
    free(s);
357
#endif
358
    return SSG_SUCCESS;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
359 360
}

361 362 363 364 365
/***************************
 *** SSG helper routines ***
 ***************************/

static void ssg_lookup_ult(void * arg);
366 367
struct lookup_ult_args
{
368
    ssg_group_t *g;
369
    int rank;
370
    const char *addr_str;
371 372 373
    hg_return_t out;
};

374 375
static hg_return_t ssg_group_lookup(
    ssg_group_t * g, const char * const addr_strs[])
Jonathan Jenkins's avatar
Jonathan Jenkins committed
376
{
377 378
    ABT_thread *ults;
    struct lookup_ult_args *args;
379
    int i, r;
380
    int aret;
381
    hg_return_t hret = HG_SUCCESS;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
382

383
    if (g == NULL) return HG_INVALID_PARAM;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
384

385 386
    /* initialize ULTs */
    ults = malloc(g->view.group_size * sizeof(*ults));
387
    if (ults == NULL) return HG_NOMEM_ERROR;
388
    args = malloc(g->view.group_size * sizeof(*args));
389 390 391 392
    if (args == NULL) {
        free(ults);
        return HG_NOMEM_ERROR;
    }
393
    for (i = 0; i < g->view.group_size; i++)
394 395
        ults[i] = ABT_THREAD_NULL;

396 397 398
    for (i = 1; i < g->view.group_size; i++) {
        r = (g->self_rank + i) % g->view.group_size;
        args[r].g = g;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
399
        args[r].rank = r;
400
        args[r].addr_str = addr_strs[r];
401
#if 0
402
        aret = ABT_thread_create(*margo_get_handler_pool(ssg_mid), &ssg_lookup_ult,
Jonathan Jenkins's avatar
Jonathan Jenkins committed
403
                &args[r], ABT_THREAD_ATTR_NULL, &ults[r]);
404 405
        if (aret != ABT_SUCCESS) {
            hret = HG_OTHER_ERROR;
406
            goto fini;
407
        }
408
#endif
Jonathan Jenkins's avatar
Jonathan Jenkins committed
409
    }
410

411 412 413
    /* wait on all */
    for (i = 1; i < g->view.group_size; i++) {
        r = (g->self_rank + i) % g->view.group_size;
414
#if 1
415
        aret = ABT_thread_create(*margo_get_handler_pool(ssg_mid), &ssg_lookup_ult,
416 417 418 419 420
                &args[r], ABT_THREAD_ATTR_NULL, &ults[r]);
        if (aret != ABT_SUCCESS) {
            hret = HG_OTHER_ERROR;
            goto fini;
        }
421
#endif
422
        aret = ABT_thread_join(ults[r]);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
423 424
        ABT_thread_free(&ults[r]);
        ults[r] = ABT_THREAD_NULL; // in case of cascading failure from join
425 426
        if (aret != ABT_SUCCESS) {
            hret = HG_OTHER_ERROR;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
427
            break;
428
        }
Jonathan Jenkins's avatar
Jonathan Jenkins committed
429
        else if (args[r].out != HG_SUCCESS) {
Shane Snyder's avatar
Shane Snyder committed
430
            fprintf(stderr, "Error: SSG unable to lookup HG address for rank %d"
431
                "(err=%d)\n", r, args[r].out);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
432 433
            hret = args[r].out;
            break;
434 435 436
        }
    }

437
fini:
438 439 440 441 442
    /* cleanup */
    for (i = 0; i < g->view.group_size; i++) {
        if (ults[i] != ABT_THREAD_NULL) {
            ABT_thread_cancel(ults[i]);
            ABT_thread_free(ults[i]);
443 444
        }
    }
445 446
    free(ults);
    free(args);
447 448

    return hret;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
449
}
Jonathan Jenkins's avatar
Jonathan Jenkins committed
450

451 452
static void ssg_lookup_ult(
    void * arg)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
453
{
454 455
    struct lookup_ult_args *l = arg;
    ssg_group_t *g = l->g;
456

457 458 459 460
    l->out = margo_addr_lookup(ssg_mid, l->addr_str,
        &g->view.member_states[l->rank].addr);
    return;
}
461

462 463 464 465 466 467 468 469 470 471
static const char ** ssg_setup_addr_str_list(
    char * buf, int num_addrs)
{
    const char **ret = malloc(num_addrs * sizeof(*ret));
    if (ret == NULL) return NULL;

    ret[0] = buf;
    for (int i = 1; i < num_addrs; i++) {
        const char * a = ret[i-1];
        ret[i] = a + strlen(a) + 1;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
472
    }
473
    return ret;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
474 475
}

476 477 478 479

#if 0
/*** SSG group membership view access routines */

480
int ssg_get_group_rank(const ssg_t s)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
481
{
482
    return s->view.self_rank;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
483 484
}

485
int ssg_get_group_size(const ssg_t s)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
486
{
487
    return s->view.group_size;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
488 489
}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
490
hg_addr_t ssg_get_addr(const ssg_t s, int rank)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
491
{
492 493
    if (rank >= 0 && rank < s->view.group_size)
        return s->view.member_states[rank].addr;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
494
    else
Jonathan Jenkins's avatar
Jonathan Jenkins committed
495
        return HG_ADDR_NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
496
}
Shane Snyder's avatar
Shane Snyder committed
497
#endif