ssg.c 13.5 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (c) 2016 UChicago Argonne, LLC
 *
 * See COPYRIGHT in top-level directory.
 */

7
#include "ssg-config.h"
Shane Snyder's avatar
Shane Snyder committed
8

Jonathan Jenkins's avatar
Jonathan Jenkins committed
9 10 11 12
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
13
#include <errno.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
14 15
#include <stdlib.h>
#include <string.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
16
#include <assert.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
17

Shane Snyder's avatar
Shane Snyder committed
18
#include <mercury.h>
19
#include <abt.h>
Shane Snyder's avatar
Shane Snyder committed
20
#include <margo.h>
21

22
#include "ssg.h"
23
#include "ssg-internal.h"
24 25 26 27
#if USE_SWIM_FD
#include "swim-fd/swim-fd.h"
#endif

28 29 30 31 32
/* SSG helper routine prototypes */
static hg_return_t ssg_group_lookup(
    ssg_group_t * g, const char * const addr_strs[]);
static const char ** ssg_setup_addr_str_list(
    char * buf, int num_addrs);
33

34
/* XXX: is this right? can this be a global? */
35
margo_instance_id ssg_mid = MARGO_INSTANCE_NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
36

37 38 39
/***************************************************
 *** SSG runtime intialization/shutdown routines ***
 ***************************************************/
40

41 42
int ssg_init(
    margo_instance_id mid)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
43
{
44
    ssg_mid = mid;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
45

46 47
    return SSG_SUCCESS;
}
Jonathan Jenkins's avatar
Jonathan Jenkins committed
48

49 50 51 52
void ssg_finalize()
{
    return;
}
Jonathan Jenkins's avatar
Jonathan Jenkins committed
53

54 55 56
/*************************************
 *** SSG group management routines ***
 *************************************/
Jonathan Jenkins's avatar
Jonathan Jenkins committed
57

58 59 60 61 62
ssg_group_id_t ssg_group_create(
    const char * group_name,
    const char * const group_addr_strs[],
    int group_size)
{
63
    hg_class_t *hgcl = NULL;
Shane Snyder's avatar
Shane Snyder committed
64
    hg_addr_t self_addr = HG_ADDR_NULL;
65
    char *self_addr_str = NULL;
Shane Snyder's avatar
Shane Snyder committed
66
    hg_size_t self_addr_size = 0;
67 68 69
    const char *self_addr_substr = NULL;
    const char *addr_substr = NULL;
    int i;
70
    hg_return_t hret;
71 72
    ssg_group_t *g = NULL;
    ssg_group_id_t g_id = SSG_GROUP_ID_NULL;
73

74 75
    hgcl = margo_get_class(ssg_mid);
    if (!hgcl) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
76

77
    /* get my address */
78 79 80 81 82 83 84 85 86
    hret = HG_Addr_self(hgcl, &self_addr);
    if (hret != HG_SUCCESS) goto fini;
    hret = HG_Addr_to_string(hgcl, NULL, &self_addr_size, self_addr);
    if (hret != HG_SUCCESS) goto fini;
    self_addr_str = malloc(self_addr_size);
    if (self_addr_str == NULL) goto fini;
    hret = HG_Addr_to_string(hgcl, self_addr_str, &self_addr_size, self_addr);
    if (hret != HG_SUCCESS) goto fini;

87 88 89 90
    /* strstr is used here b/c there may be inconsistencies in whether the class
     * is included in the address or not (it should not be in HG_Addr_to_string,
     * but it's possible that it is in the list of group address strings)
     */
91
    self_addr_substr = strstr(self_addr_str, "://");
Shane Snyder's avatar
Shane Snyder committed
92 93 94 95
    if (self_addr_substr == NULL)
        self_addr_substr = self_addr_str;
    else
        self_addr_substr += 3;
Shane Snyder's avatar
Shane Snyder committed
96

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
    /* allocate an SSG group data structure and initialize some of it */
    g = malloc(sizeof(*g));
    if (!g) goto fini;
    memset(g, 0, sizeof(*g));
    g->name = strdup(group_name);
    if (!g->name) goto fini;
    g->self_rank = -1;
    g->view.group_size = group_size;
    g->view.member_states = malloc(group_size * sizeof(*g->view.member_states));
    if (!g->view.member_states) goto fini;
    memset(g->view.member_states, 0, group_size * sizeof(*g->view.member_states));

    /* resolve my rank within the group */
    for (i = 0; i < group_size; i++)
    {
        addr_substr = strstr(group_addr_strs[i], "://");
Shane Snyder's avatar
Shane Snyder committed
113 114 115 116
        if (addr_substr == NULL)
            addr_substr = group_addr_strs[i];
        else
            addr_substr += 3;
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
        if (strcmp(self_addr_substr, addr_substr) == 0)
        {
            /* this is my address -- my rank is the offset in the address array */
            g->self_rank = i;
            g->view.member_states[i].addr = self_addr;
        }
        else
        {
            /* initialize group member addresses to NULL before looking them up */
            g->view.member_states[i].addr = HG_ADDR_NULL;
        }
        g->view.member_states[i].is_member = 1;
    }
    /* if unable to resolve my rank within the group, error out */
    if(g->self_rank == -1)
    {
        fprintf(stderr, "Error: SSG unable to resolve rank in group %s\n",
            group_name);
        goto fini;
    }

    /* lookup hg addresses information for all group members */
    hret = ssg_group_lookup(g, group_addr_strs);
    if (hret != HG_SUCCESS)
    {
        fprintf(stderr, "Error: SSG unable to complete lookup for group %s\n",
            group_name);
        goto fini;
    }
Shane Snyder's avatar
Shane Snyder committed
146
    SSG_DEBUG(g, "group lookup successful (size=%d)\n", group_size);
147 148

#if USE_SWIM_FD
149
    /* initialize swim failure detector */
150 151
    // TODO: we should probably barrier or sync somehow to avoid rpc failures
    // due to timing skew of different ranks initializing swim
152 153
    g->fd_ctx = (void *)swim_init(g, 1);
    if (g->fd_ctx == NULL) goto fini;
154 155 156 157 158
#endif

    /* TODO: last step => add reference to this group to SSG runtime state */

    /* don't free these pointers on success */
159
    self_addr = HG_ADDR_NULL; /* TODO: free this in ssg_group_destroy */
160 161
    g = NULL;
fini:
Shane Snyder's avatar
Shane Snyder committed
162 163
    if (hgcl && self_addr != HG_ADDR_NULL) HG_Addr_free(hgcl, self_addr);
    free(self_addr_str);
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
    if (g)
    {
        free(g->name);
        free(g->view.member_states);
        free(g);
    }

    return g_id;
}

ssg_group_id_t ssg_group_create_config(
    const char * group_name,
    const char * file_name)
{
    int fd;
    struct stat st;
    char *rd_buf = NULL;
    ssize_t rd_buf_sz;
    char *tok;
    void *addr_buf = NULL;
    int addr_buf_len = 0, num_addrs = 0;
    int ret;
    const char **addr_strs = NULL;
    ssg_group_id_t g_id = SSG_GROUP_ID_NULL;

    /* open config file for reading */
    fd = open(file_name, O_RDONLY);
    if (fd == -1)
    {
        fprintf(stderr, "Error: SSG unable to open config file %s for group %s\n",
            file_name, group_name);
        goto fini;
    }

    /* get file size and allocate a buffer to store it */
    ret = fstat(fd, &st);
    if (ret == -1)
    {
        fprintf(stderr, "Error: SSG unable to stat config file %s for group %s\n",
            file_name, group_name);
        goto fini;
    }
    rd_buf = malloc(st.st_size+1);
    if (rd_buf == NULL) goto fini;

    /* load it all in one fell swoop */
    rd_buf_sz = read(fd, rd_buf, st.st_size);
    if (rd_buf_sz != st.st_size)
    {
        fprintf(stderr, "Error: SSG unable to read config file %s for group %s\n",
            file_name, group_name);
        goto fini;
    }
    rd_buf[rd_buf_sz]='\0';

    /* strtok the result - each space-delimited address is assumed to be
     * a unique mercury address
     */
    tok = strtok(rd_buf, "\r\n\t ");
Jonathan Jenkins's avatar
Jonathan Jenkins committed
223 224
    if (tok == NULL) goto fini;

225 226
    /* build up the address buffer */
    addr_buf = malloc(rd_buf_sz);
227
    if (addr_buf == NULL) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
228 229
    do {
        int tok_sz = strlen(tok);
230 231
        memcpy((char*)addr_buf + addr_buf_len, tok, tok_sz+1);
        addr_buf_len += tok_sz+1;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
232 233 234
        num_addrs++;
        tok = strtok(NULL, "\r\n\t ");
    } while (tok != NULL);
235 236 237 238 239 240 241
    if (addr_buf_len != rd_buf_sz)
    {
        /* adjust buffer size if our initial guess was wrong */
        void *tmp = realloc(addr_buf, addr_buf_len);
        if (tmp == NULL) goto fini;
        addr_buf = tmp;
    }
Jonathan Jenkins's avatar
Jonathan Jenkins committed
242

243 244 245
    /* set up address string array for group members */
    addr_strs = ssg_setup_addr_str_list(addr_buf, num_addrs);
    if (!addr_strs) goto fini;
246

247 248
    /* invoke the generic group create routine using our list of addrs */
    g_id = ssg_group_create(group_name, addr_strs, num_addrs);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
249 250

fini:
251
    /* cleanup before returning */
Jonathan Jenkins's avatar
Jonathan Jenkins committed
252
    if (fd != -1) close(fd);
253
    free(rd_buf);
254
    free(addr_buf);
255 256 257
    free(addr_strs);

    return g_id;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
258 259 260
}

#ifdef HAVE_MPI
261 262 263
ssg_group_id_t ssg_group_create_mpi(
    const char * group_name,
    MPI_Comm comm)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
264
{
265
    hg_class_t *hgcl = NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
266
    hg_addr_t self_addr = HG_ADDR_NULL;
267
    char *self_addr_str = NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
268
    hg_size_t self_addr_size = 0;
269 270 271 272 273 274
    int self_addr_size_int = 0; /* for mpi-friendly conversion */
    char *addr_buf = NULL;
    int *sizes = NULL;
    int *sizes_psum = NULL;
    int comm_size = 0, comm_rank = 0;
    const char **addr_strs = NULL;
275
    hg_return_t hret;
276
    ssg_group_id_t g_id = SSG_GROUP_ID_NULL;
277

278 279
    hgcl = margo_get_class(ssg_mid);
    if (!hgcl) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
280

281
    /* get my address */
Jonathan Jenkins's avatar
Jonathan Jenkins committed
282 283 284
    hret = HG_Addr_self(hgcl, &self_addr);
    if (hret != HG_SUCCESS) goto fini;
    hret = HG_Addr_to_string(hgcl, NULL, &self_addr_size, self_addr);
Shane Snyder's avatar
Shane Snyder committed
285
    if (hret != HG_SUCCESS) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
286 287
    self_addr_str = malloc(self_addr_size);
    if (self_addr_str == NULL) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
288 289
    hret = HG_Addr_to_string(hgcl, self_addr_str, &self_addr_size, self_addr);
    if (hret != HG_SUCCESS) goto fini;
290
    self_addr_size_int = (int)self_addr_size; /* null char included in call */
Jonathan Jenkins's avatar
Jonathan Jenkins committed
291

292
    /* gather the buffer sizes */
Jonathan Jenkins's avatar
Jonathan Jenkins committed
293 294 295 296 297 298 299
    MPI_Comm_size(comm, &comm_size);
    MPI_Comm_rank(comm, &comm_rank);
    sizes = malloc(comm_size * sizeof(*sizes));
    if (sizes == NULL) goto fini;
    sizes[comm_rank] = self_addr_size_int;
    MPI_Allgather(MPI_IN_PLACE, 0, MPI_BYTE, sizes, 1, MPI_INT, comm);

300 301 302
    /* compute a exclusive prefix sum of the data sizes, including the
     * total at the end
     */
Jonathan Jenkins's avatar
Jonathan Jenkins committed
303 304 305 306 307 308
    sizes_psum = malloc((comm_size+1) * sizeof(*sizes_psum));
    if (sizes_psum == NULL) goto fini;
    sizes_psum[0] = 0;
    for (int i = 1; i < comm_size+1; i++)
        sizes_psum[i] = sizes_psum[i-1] + sizes[i-1];

309
    /* allgather the addresses */
310 311
    addr_buf = malloc(sizes_psum[comm_size]);
    if (addr_buf == NULL) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
312
    MPI_Allgatherv(self_addr_str, self_addr_size_int, MPI_BYTE,
313
            addr_buf, sizes, sizes_psum, MPI_BYTE, comm);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
314

315 316 317 318 319 320
    /* set up address string array for group members */
    addr_strs = ssg_setup_addr_str_list(addr_buf, comm_size);
    if (!addr_strs) goto fini;

    /* invoke the generic group create routine using our list of addrs */
    g_id = ssg_group_create(group_name, addr_strs, comm_size);
Shane Snyder's avatar
Shane Snyder committed
321 322

fini:
323
    /* cleanup before returning */
Shane Snyder's avatar
Shane Snyder committed
324 325
    free(sizes);
    free(sizes_psum);
326
    free(addr_buf);
327 328
    if (hgcl && self_addr != HG_ADDR_NULL) HG_Addr_free(hgcl, self_addr);
    free(self_addr_str);
329 330 331
    free(addr_strs);

    return g_id;
Shane Snyder's avatar
Shane Snyder committed
332 333 334
}
#endif

335 336
int ssg_group_destroy(
    ssg_group_id_t group_id)
Shane Snyder's avatar
Shane Snyder committed
337
{
Shane Snyder's avatar
Shane Snyder committed
338 339
    if (group_id == SSG_GROUP_ID_NULL)
        return SSG_SUCCESS;
340
#if 0
341
#if USE_SWIM_FD
342 343 344 345 346 347
    if(s->swim_ctx)
        swim_finalize(s->swim_ctx);
#endif
    for (int i = 0; i < s->view.group_size; i++) {
        if (s->view.member_states[i].addr != HG_ADDR_NULL)
            HG_Addr_free(margo_get_class(s->mid), s->view.member_states[i].addr);
348
    }
349 350
    free(s->view.member_states);
    free(s);
351
#endif
352
    return SSG_SUCCESS;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
353 354
}

355 356 357 358 359
/***************************
 *** SSG helper routines ***
 ***************************/

static void ssg_lookup_ult(void * arg);
360 361
struct lookup_ult_args
{
362
    ssg_group_t *g;
363
    int rank;
364
    const char *addr_str;
365 366 367
    hg_return_t out;
};

368 369
static hg_return_t ssg_group_lookup(
    ssg_group_t * g, const char * const addr_strs[])
Jonathan Jenkins's avatar
Jonathan Jenkins committed
370
{
371 372
    ABT_thread *ults;
    struct lookup_ult_args *args;
373
    int i, r;
374
    int aret;
375
    hg_return_t hret = HG_SUCCESS;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
376

377
    if (g == NULL) return HG_INVALID_PARAM;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
378

379 380
    /* initialize ULTs */
    ults = malloc(g->view.group_size * sizeof(*ults));
381
    if (ults == NULL) return HG_NOMEM_ERROR;
382
    args = malloc(g->view.group_size * sizeof(*args));
383 384 385 386
    if (args == NULL) {
        free(ults);
        return HG_NOMEM_ERROR;
    }
387
    for (i = 0; i < g->view.group_size; i++)
388 389
        ults[i] = ABT_THREAD_NULL;

390 391 392
    for (i = 1; i < g->view.group_size; i++) {
        r = (g->self_rank + i) % g->view.group_size;
        args[r].g = g;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
393
        args[r].rank = r;
394
        args[r].addr_str = addr_strs[r];
395
#if 0
396
        aret = ABT_thread_create(*margo_get_handler_pool(ssg_mid), &ssg_lookup_ult,
Jonathan Jenkins's avatar
Jonathan Jenkins committed
397
                &args[r], ABT_THREAD_ATTR_NULL, &ults[r]);
398 399
        if (aret != ABT_SUCCESS) {
            hret = HG_OTHER_ERROR;
400
            goto fini;
401
        }
402
#endif
Jonathan Jenkins's avatar
Jonathan Jenkins committed
403
    }
404

405 406 407
    /* wait on all */
    for (i = 1; i < g->view.group_size; i++) {
        r = (g->self_rank + i) % g->view.group_size;
408
#if 1
409
        aret = ABT_thread_create(*margo_get_handler_pool(ssg_mid), &ssg_lookup_ult,
410 411 412 413 414
                &args[r], ABT_THREAD_ATTR_NULL, &ults[r]);
        if (aret != ABT_SUCCESS) {
            hret = HG_OTHER_ERROR;
            goto fini;
        }
415
#endif
416
        aret = ABT_thread_join(ults[r]);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
417 418
        ABT_thread_free(&ults[r]);
        ults[r] = ABT_THREAD_NULL; // in case of cascading failure from join
419 420
        if (aret != ABT_SUCCESS) {
            hret = HG_OTHER_ERROR;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
421
            break;
422
        }
Jonathan Jenkins's avatar
Jonathan Jenkins committed
423
        else if (args[r].out != HG_SUCCESS) {
Shane Snyder's avatar
Shane Snyder committed
424
            fprintf(stderr, "Error: SSG unable to lookup HG address for rank %d"
425
                "(err=%d)\n", r, args[r].out);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
426 427
            hret = args[r].out;
            break;
428 429 430
        }
    }

431
fini:
432 433 434 435 436
    /* cleanup */
    for (i = 0; i < g->view.group_size; i++) {
        if (ults[i] != ABT_THREAD_NULL) {
            ABT_thread_cancel(ults[i]);
            ABT_thread_free(ults[i]);
437 438
        }
    }
439 440
    free(ults);
    free(args);
441 442

    return hret;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
443
}
Jonathan Jenkins's avatar
Jonathan Jenkins committed
444

445 446
static void ssg_lookup_ult(
    void * arg)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
447
{
448 449
    struct lookup_ult_args *l = arg;
    ssg_group_t *g = l->g;
450

451 452 453 454
    l->out = margo_addr_lookup(ssg_mid, l->addr_str,
        &g->view.member_states[l->rank].addr);
    return;
}
455

456 457 458 459 460 461 462 463 464 465
static const char ** ssg_setup_addr_str_list(
    char * buf, int num_addrs)
{
    const char **ret = malloc(num_addrs * sizeof(*ret));
    if (ret == NULL) return NULL;

    ret[0] = buf;
    for (int i = 1; i < num_addrs; i++) {
        const char * a = ret[i-1];
        ret[i] = a + strlen(a) + 1;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
466
    }
467
    return ret;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
468 469
}

470 471 472 473

#if 0
/*** SSG group membership view access routines */

474
int ssg_get_group_rank(const ssg_t s)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
475
{
476
    return s->view.self_rank;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
477 478
}

479
int ssg_get_group_size(const ssg_t s)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
480
{
481
    return s->view.group_size;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
482 483
}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
484
hg_addr_t ssg_get_addr(const ssg_t s, int rank)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
485
{
486 487
    if (rank >= 0 && rank < s->view.group_size)
        return s->view.member_states[rank].addr;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
488
    else
Jonathan Jenkins's avatar
Jonathan Jenkins committed
489
        return HG_ADDR_NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
490
}
Shane Snyder's avatar
Shane Snyder committed
491
#endif