ssg.c 19.3 KB
Newer Older
1
2
3
4
5
6
/*
 * Copyright (c) 2016 UChicago Argonne, LLC
 *
 * See COPYRIGHT in top-level directory.
 */

Jonathan Jenkins's avatar
Jonathan Jenkins committed
7
8
9
10
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
11
#include <errno.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
12
13
14
15
16
17
18
19
20
21
22
23
#include <stdlib.h>
#include <string.h>

#include <mercury_proc.h>

#include <ssg.h>
#include <ssg-config.h>
#include "def.h"

#ifdef HAVE_MPI
#include <ssg-mpi.h>
#endif
Jonathan Jenkins's avatar
Jonathan Jenkins committed
24
25
26
#ifdef HAVE_MARGO
#include <ssg-margo.h>
#endif
Jonathan Jenkins's avatar
Jonathan Jenkins committed
27
28

// helpers for looking up a server
Jonathan Jenkins's avatar
Jonathan Jenkins committed
29
30
31
static hg_return_t lookup_serv_addr_cb(const struct hg_cb_info *info);
static hg_addr_t lookup_serv_addr(
        hg_context_t *hgctx,
Jonathan Jenkins's avatar
Jonathan Jenkins committed
32
33
34
35
        const char *info_str);

static char** setup_addr_str_list(int num_addrs, char * buf);

36
ssg_t ssg_init_config(const char * fname, int is_member)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
{
    // file to read
    int fd = -1;
    struct stat st;

    // file content to parse
    char *rdbuf = NULL;
    ssize_t rdsz;

    // parse metadata (strtok)
    char *tok;

    // vars to build up the addr string list
    int addr_cap = 128;
    int addr_len = 0;
    int num_addrs = 0;
    void *buf = NULL;
    char **addr_strs = NULL;

    // return var
    ssg_t s = NULL;

    // misc return codes
    int ret;

    // open file for reading
    fd = open(fname, O_RDONLY);
    if (fd == -1) goto fini;

    // get file size
    ret = fstat(fd, &st);
    if (ret == -1) goto fini;

    // slurp file in all at once
    rdbuf = malloc(st.st_size+1);
    if (rdbuf == NULL) goto fini;

    // load it all in one fell swoop
    rdsz = read(fd, rdbuf, st.st_size);
    if (rdsz <= 0) goto fini;
    if (rdsz != st.st_size) { free(rdbuf); close(fd); return NULL; }
    rdbuf[rdsz]='\0';

    // strtok the result - each space-delimited address is assumed to be
    // a unique mercury address
    tok = strtok(rdbuf, "\r\n\t ");
    if (tok == NULL) goto fini;

    // build up the address buffer
    buf = malloc(addr_cap);
    if (buf == NULL) goto fini;
    do {
        int tok_sz = strlen(tok);
        if (tok_sz + addr_len + 1 > addr_cap) {
            void * tmp;
            addr_cap *= 2;
            tmp = realloc(buf, addr_cap);
            if (tmp == NULL) goto fini;
            buf = tmp;
        }
        memcpy((char*)buf + addr_len, tok, tok_sz+1);
        addr_len += tok_sz+1;
        num_addrs++;
        tok = strtok(NULL, "\r\n\t ");
    } while (tok != NULL);

    // set up the list of addresses
    addr_strs = malloc(num_addrs * sizeof(*addr_strs));
    if (addr_strs == NULL) goto fini;
    tok = (char*)buf;
    for (int i = 0; i < num_addrs; i++) {
        addr_strs[i] = tok;
        tok += strlen(tok) + 1;
    }

    // done parsing - setup the return structure
    s = malloc(sizeof(*s));
    if (s == NULL) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
115
    s->hgcl = NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
116
117
    s->addrs = malloc(num_addrs*sizeof(*s->addrs));
    if (s->addrs == NULL) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
118
    for (int i = 0; i < num_addrs; i++) s->addrs[i] = HG_ADDR_NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
119
120
121
122
    s->addr_strs = addr_strs; addr_strs = NULL;
    s->backing_buf = buf; buf = NULL;
    s->num_addrs = num_addrs;
    s->buf_size = addr_len;
123
    s->rank = is_member ? SSG_RANK_UNKNOWN : SSG_EXTERNAL_RANK;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
124
125
126
127
128
129

fini:
    if (fd != -1) close(fd);
    free(rdbuf);
    free(addr_strs);
    free(buf);
130
    if (s != NULL && s->addrs == NULL) { free(s); s = NULL; }
Jonathan Jenkins's avatar
Jonathan Jenkins committed
131
132
133
134
    return s;
}

#ifdef HAVE_MPI
Jonathan Jenkins's avatar
Jonathan Jenkins committed
135
ssg_t ssg_init_mpi(hg_class_t *hgcl, MPI_Comm comm)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
136
137
{
    // my addr
Jonathan Jenkins's avatar
Jonathan Jenkins committed
138
    hg_addr_t self_addr = HG_ADDR_NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
139
    char * self_addr_str = NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
140
    hg_size_t self_addr_size = 0;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
141
142
143
144
145
146
147
148
149
    int self_addr_size_int = 0; // for mpi-friendly conversion

    // collective helpers
    char * buf = NULL;
    int * sizes = NULL;
    int * sizes_psum = NULL;
    int comm_size = 0;
    int comm_rank = 0;

Jonathan Jenkins's avatar
Jonathan Jenkins committed
150
151
    // hg addresses
    hg_addr_t *addrs = NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
152
153
154
155
156
157

    // return data
    char **addr_strs = NULL;
    ssg_t s = NULL;

    // misc return codes
Jonathan Jenkins's avatar
Jonathan Jenkins committed
158
    hg_return_t hret;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
159
160

    // get my address
Jonathan Jenkins's avatar
Jonathan Jenkins committed
161
162
163
    hret = HG_Addr_self(hgcl, &self_addr);
    if (hret != HG_SUCCESS) goto fini;
    hret = HG_Addr_to_string(hgcl, NULL, &self_addr_size, self_addr);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
164
165
166
    if (self_addr == NULL) goto fini;
    self_addr_str = malloc(self_addr_size);
    if (self_addr_str == NULL) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
167
168
    hret = HG_Addr_to_string(hgcl, self_addr_str, &self_addr_size, self_addr);
    if (hret != HG_SUCCESS) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
    self_addr_size_int = (int)self_addr_size; // null char included in call

    // gather the buffer sizes
    MPI_Comm_size(comm, &comm_size);
    MPI_Comm_rank(comm, &comm_rank);
    sizes = malloc(comm_size * sizeof(*sizes));
    if (sizes == NULL) goto fini;
    sizes[comm_rank] = self_addr_size_int;
    MPI_Allgather(MPI_IN_PLACE, 0, MPI_BYTE, sizes, 1, MPI_INT, comm);

    // compute a exclusive prefix sum of the data sizes,
    // including the total at the end
    sizes_psum = malloc((comm_size+1) * sizeof(*sizes_psum));
    if (sizes_psum == NULL) goto fini;
    sizes_psum[0] = 0;
    for (int i = 1; i < comm_size+1; i++)
        sizes_psum[i] = sizes_psum[i-1] + sizes[i-1];

    // allgather the addresses
    buf = malloc(sizes_psum[comm_size]);
    if (buf == NULL) goto fini;
    MPI_Allgatherv(self_addr_str, self_addr_size_int, MPI_BYTE,
            buf, sizes, sizes_psum, MPI_BYTE, comm);

    // set the addresses
    addr_strs = setup_addr_str_list(comm_size, buf);
    if (addr_strs == NULL) goto fini;

    // init peer addresses
    addrs = malloc(comm_size*sizeof(*addrs));
    if (addrs == NULL) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
200
    for (int i = 0; i < comm_size; i++) addrs[i] = HG_ADDR_NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
201
202
203
204
205
    addrs[comm_rank] = self_addr;

    // set up the output
    s = malloc(sizeof(*s));
    if (s == NULL) goto fini;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
206
    s->hgcl = NULL; // set in ssg_lookup
Jonathan Jenkins's avatar
Jonathan Jenkins committed
207
208
209
210
211
212
    s->addr_strs = addr_strs; addr_strs = NULL;
    s->addrs = addrs; addrs = NULL;
    s->backing_buf = buf; buf = NULL;
    s->num_addrs = comm_size;
    s->buf_size = sizes_psum[comm_size];
    s->rank = comm_rank;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
213
    self_addr = HG_ADDR_NULL; // don't free this on success
Jonathan Jenkins's avatar
Jonathan Jenkins committed
214
215

fini:
Jonathan Jenkins's avatar
Jonathan Jenkins committed
216
    if (self_addr != HG_ADDR_NULL) HG_Addr_free(hgcl, self_addr);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
217
218
219
220
221
222
223
224
    free(buf);
    free(sizes);
    free(addr_strs);
    free(addrs);
    return s;
}
#endif

225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
hg_return_t ssg_resolve_rank(ssg_t s, hg_class_t *hgcl)
{
    if (s->rank == SSG_EXTERNAL_RANK ||
            s->rank != SSG_RANK_UNKNOWN)
        return HG_SUCCESS;

    // helpers
    hg_addr_t self_addr = HG_ADDR_NULL;
    char * self_addr_str = NULL;
    const char * self_addr_substr = NULL;
    hg_size_t self_addr_size = 0;
    const char * addr_substr = NULL;
    int rank = 0;
    hg_return_t hret;

    // get my address
    hret = HG_Addr_self(hgcl, &self_addr);
    if (hret != HG_SUCCESS) goto end;
    hret = HG_Addr_to_string(hgcl, NULL, &self_addr_size, self_addr);
    if (self_addr == NULL) { hret = HG_NOMEM_ERROR; goto end; }
    self_addr_str = malloc(self_addr_size);
    if (self_addr_str == NULL) { hret = HG_NOMEM_ERROR; goto end; }
    hret = HG_Addr_to_string(hgcl, self_addr_str, &self_addr_size, self_addr);
    if (hret != HG_SUCCESS) goto end;

    // strstr is used here b/c there may be inconsistencies in whether the class
    // is included in the address or not (it's not in HG_Addr_to_string, it
    // should be in ssg_init_config)
    self_addr_substr = strstr(self_addr_str, "://");
    if (self_addr_substr == NULL) { hret = HG_INVALID_PARAM; goto end; }
    self_addr_substr += 3;
    for (rank = 0; rank < s->num_addrs; rank++) {
        addr_substr = strstr(s->addr_strs[rank], "://");
        if (addr_substr == NULL) { hret = HG_INVALID_PARAM; goto end; }
        addr_substr+= 3;
        if (strcmp(self_addr_substr, addr_substr) == 0)
            break;
    }
    if (rank == s->num_addrs) {
        hret = HG_INVALID_PARAM;
        goto end;
    }

    // success - set
    s->rank = rank;
    s->addrs[rank] = self_addr; self_addr = HG_ADDR_NULL;

end:
    if (self_addr != HG_ADDR_NULL) HG_Addr_free(hgcl, self_addr);
    free(self_addr_str);

    return hret;
}

279
hg_return_t ssg_lookup(ssg_t s, hg_context_t *hgctx)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
280
281
282
283
{
    // "effective" rank for the lookup loop
    int eff_rank = 0;

Jonathan Jenkins's avatar
Jonathan Jenkins committed
284
285
286
    // set the hg class up front - need for destructing addrs
    s->hgcl = HG_Context_get_class(hgctx);
    if (s->hgcl == NULL) return HG_INVALID_PARAM;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
287
288
289

    // perform search for my rank if not already set
    if (s->rank == SSG_RANK_UNKNOWN) {
290
        hg_return_t hret = ssg_resolve_rank(s, s->hgcl);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
291
        if (hret != HG_SUCCESS) return hret;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
292
293
294
295
296
    }

    if (s->rank == SSG_EXTERNAL_RANK) {
        // do a completely arbitrary effective rank determination to try and
        // prevent everyone talking to the same member at once
Jonathan Jenkins's avatar
Jonathan Jenkins committed
297
        eff_rank = (((intptr_t)hgctx)/sizeof(hgctx)) % s->num_addrs;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
298
299
300
301
302
303
    }
    else
        eff_rank = s->rank;

    // rank is set, perform lookup
    for (int i = eff_rank+1; i < s->num_addrs; i++) {
Jonathan Jenkins's avatar
Jonathan Jenkins committed
304
305
        s->addrs[i] = lookup_serv_addr(hgctx, s->addr_strs[i]);
        if (s->addrs[i] == HG_ADDR_NULL) return HG_PROTOCOL_ERROR;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
306
307
    }
    for (int i = 0; i < eff_rank; i++) {
Jonathan Jenkins's avatar
Jonathan Jenkins committed
308
309
        s->addrs[i] = lookup_serv_addr(hgctx, s->addr_strs[i]);
        if (s->addrs[i] == HG_ADDR_NULL) return HG_PROTOCOL_ERROR;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
310
311
312
    }
    if (s->rank == SSG_EXTERNAL_RANK) {
        s->addrs[eff_rank] =
Jonathan Jenkins's avatar
Jonathan Jenkins committed
313
314
            lookup_serv_addr(hgctx, s->addr_strs[eff_rank]);
        if (s->addrs[eff_rank] == HG_ADDR_NULL) return HG_PROTOCOL_ERROR;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
315
316
    }

Jonathan Jenkins's avatar
Jonathan Jenkins committed
317
    return HG_SUCCESS;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
318
319
}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
320
#ifdef HAVE_MARGO
Jonathan Jenkins's avatar
Jonathan Jenkins committed
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337

struct lookup_ult_args
{
    ssg_t ssg;
    margo_instance_id mid;
    int rank;
    hg_return_t out;
};

static void lookup_ult(void *arg)
{
    struct lookup_ult_args *l = arg;

    l->out = margo_addr_lookup(l->mid, l->ssg->addr_strs[l->rank],
            &l->ssg->addrs[l->rank]);
}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
338
339
340
341
// TODO: refactor - code is mostly a copy of ssg_lookup
hg_return_t ssg_lookup_margo(ssg_t s, margo_instance_id mid)
{
    hg_context_t *hgctx;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
342
343
344
    ABT_thread *ults;
    struct lookup_ult_args *args;
    hg_return_t hret = HG_SUCCESS;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
345
346
347
348
349
350
351
352
353
354
355
356

    // "effective" rank for the lookup loop
    int eff_rank = 0;

    // set the hg class up front - need for destructing addrs
    hgctx = margo_get_context(mid);
    if (hgctx == NULL) return HG_INVALID_PARAM;
    s->hgcl = margo_get_class(mid);
    if (s->hgcl == NULL) return HG_INVALID_PARAM;

    // perform search for my rank if not already set
    if (s->rank == SSG_RANK_UNKNOWN) {
357
        hret = ssg_resolve_rank(s, s->hgcl);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
358
359
360
361
362
363
364
365
366
367
368
        if (hret != HG_SUCCESS) return hret;
    }

    if (s->rank == SSG_EXTERNAL_RANK) {
        // do a completely arbitrary effective rank determination to try and
        // prevent everyone talking to the same member at once
        eff_rank = (((intptr_t)hgctx)/sizeof(hgctx)) % s->num_addrs;
    }
    else
        eff_rank = s->rank;

Jonathan Jenkins's avatar
Jonathan Jenkins committed
369
370
371
372
373
374
375
376
377
378
379
    // initialize ULTs
    ults = malloc(s->num_addrs * sizeof(*ults));
    if (ults == NULL) return HG_NOMEM_ERROR;
    args = malloc(s->num_addrs * sizeof(*args));
    if (args == NULL) {
        free(ults);
        return HG_NOMEM_ERROR;
    }
    for (int i = 0; i < s->num_addrs; i++)
        ults[i] = ABT_THREAD_NULL;

Jonathan Jenkins's avatar
Jonathan Jenkins committed
380
    for (int i = eff_rank+1; i < s->num_addrs; i++) {
Jonathan Jenkins's avatar
Jonathan Jenkins committed
381
382
383
384
385
386
387
388
389
390
        args[i].ssg = s;
        args[i].mid = mid;
        args[i].rank = i;

        int aret = ABT_thread_create(*margo_get_handler_pool(mid), &lookup_ult,
                &args[i], ABT_THREAD_ATTR_NULL, &ults[i]);
        if (aret != ABT_SUCCESS) {
            hret = HG_OTHER_ERROR;
            goto fin;
        }
Jonathan Jenkins's avatar
Jonathan Jenkins committed
391
392
    }
    for (int i = 0; i < eff_rank; i++) {
Jonathan Jenkins's avatar
Jonathan Jenkins committed
393
394
395
396
397
398
399
400
401
402
        args[i].ssg = s;
        args[i].mid = mid;
        args[i].rank = i;

        int aret = ABT_thread_create(*margo_get_handler_pool(mid), &lookup_ult,
                &args[i], ABT_THREAD_ATTR_NULL, &ults[i]);
        if (aret != ABT_SUCCESS) {
            hret = HG_OTHER_ERROR;
            goto fin;
        }
Jonathan Jenkins's avatar
Jonathan Jenkins committed
403
404
    }
    if (s->rank == SSG_EXTERNAL_RANK) {
Jonathan Jenkins's avatar
Jonathan Jenkins committed
405
406
407
408
409
410
411
412
413
414
        args[eff_rank].ssg = s;
        args[eff_rank].mid = mid;
        args[eff_rank].rank = eff_rank;

        int aret = ABT_thread_create(*margo_get_handler_pool(mid), &lookup_ult,
                &args[eff_rank], ABT_THREAD_ATTR_NULL, &ults[eff_rank]);
        if (aret != ABT_SUCCESS) {
            hret = HG_OTHER_ERROR;
            goto fin;
        }
Jonathan Jenkins's avatar
Jonathan Jenkins committed
415
416
    }

Jonathan Jenkins's avatar
Jonathan Jenkins committed
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
    // wait on all
    for (int i = 0; i < s->num_addrs; i++) {
        if (ults[i] != ABT_THREAD_NULL) {
            int aret = ABT_thread_join(ults[i]);
            ABT_thread_free(&ults[i]);
            ults[i] = ABT_THREAD_NULL; // in case of cascading failure from join
            if (aret != ABT_SUCCESS) {
                hret = HG_OTHER_ERROR;
                break;
            }
            else if (args[i].out != HG_SUCCESS) {
                hret = args[i].out;
                break;
            }
        }
    }

fin:
    // cleanup
    if (ults != NULL) {
        for (int i = 0; i < s->num_addrs; i++) {
            if (ults[i] != ABT_THREAD_NULL) {
                ABT_thread_cancel(ults[i]);
                ABT_thread_free(ults[i]);
            }
        }
        free(ults);
    }
    if (args != NULL) free(args);

    return hret;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
448
449
450
}
#endif

Jonathan Jenkins's avatar
Jonathan Jenkins committed
451
452
void ssg_finalize(ssg_t s)
{
Jonathan Jenkins's avatar
Jonathan Jenkins committed
453
454
    if (s == SSG_NULL) return;

Jonathan Jenkins's avatar
Jonathan Jenkins committed
455
    for (int i = 0; i < s->num_addrs; i++) {
Jonathan Jenkins's avatar
Jonathan Jenkins committed
456
        if (s->addrs[i] != HG_ADDR_NULL) HG_Addr_free(s->hgcl, s->addrs[i]);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
    }
    free(s->backing_buf);
    free(s->addr_strs);
    free(s->addrs);
    free(s);
}

int ssg_get_rank(const ssg_t s)
{
    return s->rank;
}

int ssg_get_count(const ssg_t s)
{
    return s->num_addrs;
}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
474
hg_addr_t ssg_get_addr(const ssg_t s, int rank)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
475
{
476
    if (rank >= 0 && rank < s->num_addrs)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
477
478
        return s->addrs[rank];
    else
Jonathan Jenkins's avatar
Jonathan Jenkins committed
479
        return HG_ADDR_NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
480
481
482
483
}

const char * ssg_get_addr_str(const ssg_t s, int rank)
{
484
    if (rank >= 0 && rank < s->num_addrs)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
485
486
487
488
489
490
491
        return s->addr_strs[rank];
    else
        return NULL;
}

// serialization format looks like:
// < num members, buffer size, buffer... >
Jonathan Jenkins's avatar
Jonathan Jenkins committed
492
// doesn't attempt to grab hg_addr's, string buffers, etc. - client will be
Jonathan Jenkins's avatar
Jonathan Jenkins committed
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
// responsible for doing a separate address lookup routine
hg_return_t hg_proc_ssg_t(hg_proc_t proc, ssg_t *s)
{
    // error and return handling
    hg_return_t hret = HG_SUCCESS;
    char * err_str = NULL;

    // input/output vars + helpers for ssg decode setup
    ssg_t ss = NULL;

    switch(hg_proc_get_op(proc)) {
        case HG_ENCODE:
            ss = *s;
            // encode address count
            hret = hg_proc_int32_t(proc, &ss->num_addrs);
            if (hret != HG_SUCCESS) { err_str = "ssg num addrs"; goto end; }
            // encode addr
            hret = hg_proc_int32_t(proc, &ss->buf_size);
            if (hret != HG_SUCCESS) { err_str = "ssg buf size"; goto end; }
            // encode addr string, simple as blitting the backing buffer
            hret = hg_proc_memcpy(proc, ss->backing_buf, ss->buf_size);
            if (hret != HG_SUCCESS) { err_str = "ssg addr buf"; goto end; }
            break;

        case HG_DECODE:
            // create the output
            *s = NULL;
            ss = malloc(sizeof(*ss));
            if (ss == NULL) {
                err_str = "ssg alloc";
                hret = HG_NOMEM_ERROR;
                goto end;
            }
            ss->addr_strs = NULL;
            ss->addrs = NULL;
            ss->backing_buf = NULL;
            // get address count
            hret = hg_proc_int32_t(proc, &ss->num_addrs);
            if (hret != HG_SUCCESS) { err_str = "ssg num addrs"; goto end; }
            // get number of bytes for the address
            hret = hg_proc_int32_t(proc, &ss->buf_size);
            if (hret != HG_SUCCESS) { err_str = "ssg buf size"; goto end; }
            // allocate output buffer
            ss->backing_buf = malloc(ss->buf_size);
            if (hret != HG_SUCCESS) {
                err_str = "ssg buf alloc";
                hret = HG_NOMEM_ERROR;
                goto end;
            }
            hret = hg_proc_memcpy(proc, ss->backing_buf, ss->buf_size);
            if (hret != HG_SUCCESS) { err_str = "ssg addr buf"; goto end; }

            // set the remaining ssg vars

            ss->addr_strs = NULL; ss->addrs = NULL;
            ss->rank = -1; // receivers aren't part of the group

            ss->addr_strs = setup_addr_str_list(ss->num_addrs, ss->backing_buf);
            if (ss->addr_strs == NULL) {
                err_str = "ssg addr strs alloc";
                hret = HG_NOMEM_ERROR;
                goto end;
            }

            ss->addrs = malloc(ss->num_addrs * sizeof(*ss->addrs));
            if (ss->addrs == NULL) {
                err_str = "ssg addrs alloc";
                hret = HG_NOMEM_ERROR;
                goto end;
            }
            for (int i = 0; i < ss->num_addrs; i++) {
Jonathan Jenkins's avatar
Jonathan Jenkins committed
564
                ss->addrs[i] = HG_ADDR_NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
            }

            // success: set the output
            *s = ss;
            break;

        case HG_FREE:
            if (s != NULL && *s != NULL) {
                err_str = "ssg shouldn't be freed via HG_Free_*";
                hret = HG_INVALID_PARAM;
            }
            goto end;

        default:
            err_str = "bad proc mode";
            hret = HG_INVALID_PARAM;
    }
end:
    if (err_str) {
        HG_LOG_ERROR("Proc error: %s", err_str);
        if (hg_proc_get_op(proc) == HG_DECODE) {
            free(ss->addr_strs);
            free(ss->addrs);
            free(ss->backing_buf);
            free(ss);
        }
    }
    return hret;
}

595
596
597
598
599
600
601
602
603
604
605
606
607
608
int ssg_dump(const ssg_t s, const char *fname)
{
    // file to write to
    int fd = -1;
    ssize_t written;

    // string to xform and dump
    char * addrs_dup = NULL;
    char * tok = NULL;
    char * addrs_dup_end = NULL;

    // return code
    int ret = 0;

609
610
    // copy the backing buffer, replacing all null chars with
    // newlines
611
612
613
614
615
    addrs_dup = malloc(s->buf_size);
    if (addrs_dup == NULL) { errno = ENOMEM; ret = -1; goto end; }
    memcpy(addrs_dup, s->backing_buf, s->buf_size);
    tok = addrs_dup;
    addrs_dup_end = addrs_dup + s->buf_size;
616
    for (int i = 0; i < s->num_addrs; i++) {
617
618
619
620
621
622
        tok = memchr(tok, '\0', addrs_dup_end - tok);
        if (tok == NULL) { errno = EINVAL; ret = -1; goto end; }
        *tok = '\n';
    }

    // open the file and dump in a single call
Jonathan Jenkins's avatar
Jonathan Jenkins committed
623
    fd = open(fname, O_WRONLY | O_CREAT | O_EXCL, 0644);
624
625
    if (fd == -1) { ret = -1; goto end; }
    // don't include the null char at the end
626
627
    written = write(fd, addrs_dup, s->buf_size);
    if (written != s->buf_size) ret = -1;
628
629
630
631
632
633
634
635

end:
    free(addrs_dup);
    if (fd != -1) close(fd);

    return ret;
}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
636
637
typedef struct serv_addr_out
{
Jonathan Jenkins's avatar
Jonathan Jenkins committed
638
    hg_addr_t addr;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
639
640
641
    int set;
} serv_addr_out_t;

Jonathan Jenkins's avatar
Jonathan Jenkins committed
642
static hg_return_t lookup_serv_addr_cb(const struct hg_cb_info *info)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
643
644
645
646
{
    serv_addr_out_t *out = info->arg;
    out->addr = info->info.lookup.addr;
    out->set = 1;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
647
    return HG_SUCCESS;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
648
649
}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
650
651
static hg_addr_t lookup_serv_addr(
        hg_context_t *hgctx,
Jonathan Jenkins's avatar
Jonathan Jenkins committed
652
653
654
        const char *info_str)
{
    serv_addr_out_t out;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
655
    hg_return_t hret;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
656

Jonathan Jenkins's avatar
Jonathan Jenkins committed
657
    out.addr = HG_ADDR_NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
658
659
    out.set = 0;

Jonathan Jenkins's avatar
Jonathan Jenkins committed
660
661
662
    hret = HG_Addr_lookup(hgctx, &lookup_serv_addr_cb, &out,
            info_str, HG_OP_ID_IGNORE);
    if (hret != HG_SUCCESS) return HG_ADDR_NULL;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
663
664
665
666
667

    // run the progress loop until we've got the output
    do {
        unsigned int count = 0;
        do {
Jonathan Jenkins's avatar
Jonathan Jenkins committed
668
669
            hret = HG_Trigger(hgctx, 0, 1, &count);
        } while (hret == HG_SUCCESS && count > 0);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
670
671
672

        if (out.set != 0) break;

Jonathan Jenkins's avatar
Jonathan Jenkins committed
673
674
        hret = HG_Progress(hgctx, 5000);
    } while(hret == HG_SUCCESS || hret == HG_TIMEOUT);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690

    return out.addr;
}

static char** setup_addr_str_list(int num_addrs, char * buf)
{
    char ** ret = malloc(num_addrs * sizeof(*ret));
    if (ret == NULL) return NULL;

    ret[0] = buf;
    for (int i = 1; i < num_addrs; i++) {
        char * a = ret[i-1];
        ret[i] = a + strlen(a) + 1;
    }
    return ret;
}