margo.c 43.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11

/*
 * (C) 2015 The University of Chicago
 * 
 * See COPYRIGHT in top-level directory.
 */

#include <assert.h>
#include <unistd.h>
#include <errno.h>
#include <abt.h>
12
#include <stdlib.h>
13 14 15

#include <margo-config.h>
#ifdef HAVE_ABT_SNOOZER
16
#include <abt-snoozer.h>
17
#endif
18
#include <time.h>
Philip Carns's avatar
Philip Carns committed
19
#include <math.h>
20 21

#include "margo.h"
22
#include "margo-timer.h"
Philip Carns's avatar
Philip Carns committed
23
#include "utlist.h"
24
#include "uthash.h"
25

26
#define DEFAULT_MERCURY_PROGRESS_TIMEOUT_UB 100 /* 100 milliseconds */
Shane Snyder's avatar
Shane Snyder committed
27
#define DEFAULT_MERCURY_HANDLE_CACHE_SIZE 32
28

29
struct provider_element
30 31 32
{
    hg_id_t id;
    ABT_pool pool;
33 34
    void* user_data;
    void(*user_free_callback)(void*);
35 36 37
    UT_hash_handle hh;
};

Philip Carns's avatar
Philip Carns committed
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
struct diag_data
{
    double min;
    double max;
    double cumulative;
    int count;
};

#define __DIAG_UPDATE(__data, __time)\
do {\
    __data.count++; \
    __data.cumulative += (__time); \
    if((__time) > __data.max) __data.max = (__time); \
    if((__time) < __data.min) __data.min = (__time); \
} while(0)

Shane Snyder's avatar
Shane Snyder committed
54 55 56 57 58 59 60
struct margo_handle_cache_el
{
    hg_handle_t handle;
    UT_hash_handle hh; /* in-use hash link */
    struct margo_handle_cache_el *next; /* free list link */
};

61 62 63 64 65 66 67
struct margo_finalize_cb
{
    void(*callback)(void*);
    void* uargs;
    struct margo_finalize_cb* next;
};

68 69
struct margo_timer_list; /* defined in margo-timer.c */

70 71
struct margo_instance
{
Shane Snyder's avatar
Shane Snyder committed
72
    /* mercury/argobots state */
73 74
    hg_context_t *hg_context;
    hg_class_t *hg_class;
75 76 77
    ABT_pool handler_pool;
    ABT_pool progress_pool;

78
    /* internal to margo for this particular instance */
79
    int margo_init;
80
    int abt_init;
81 82
    ABT_thread hg_progress_tid;
    int hg_progress_shutdown_flag;
83
    ABT_xstream progress_xstream;
84 85 86
    int owns_progress_pool;
    ABT_xstream *rpc_xstreams;
    int num_handler_pool_threads;
87
    unsigned int hg_progress_timeout_ub;
88 89 90

    /* control logic for callers waiting on margo to be finalized */
    int finalize_flag;
91
    int refcount;
92 93
    ABT_mutex finalize_mutex;
    ABT_cond finalize_cond;
94
    struct margo_finalize_cb* finalize_cb;
95

96 97 98 99 100 101
    /* control logic to prevent margo_finalize from destroying
       the instance when some operations are pending */
    unsigned pending_operations;
    ABT_mutex pending_operations_mtx;
    int finalize_requested;

Matthieu Dorier's avatar
Matthieu Dorier committed
102 103 104 105
    /* control logic for shutting down */
    hg_id_t shutdown_rpc_id;
    int enable_remote_shutdown;

106 107 108
    /* timer data */
    struct margo_timer_list* timer_list;

109 110
    /* hash table to track provider IDs registered with margo */
    struct provider_element *provider_table;
Philip Carns's avatar
Philip Carns committed
111

Shane Snyder's avatar
Shane Snyder committed
112 113 114
    /* linked list of free hg handles and a hash of in-use handles */
    struct margo_handle_cache_el *free_handle_list;
    struct margo_handle_cache_el *used_handle_hash;
115
    ABT_mutex handle_cache_mtx; /* mutex protecting access to above caches */
Shane Snyder's avatar
Shane Snyder committed
116

Philip Carns's avatar
Philip Carns committed
117 118 119 120 121 122 123 124 125 126 127
    /* optional diagnostics data tracking */
    /* NOTE: technically the following fields are subject to races if they
     * are updated from more than one thread at a time.  We will be careful
     * to only update the counters from the progress_fn,
     * which will serialize access.
     */
    int diag_enabled;
    struct diag_data diag_trigger_elapsed;
    struct diag_data diag_progress_elapsed_zero_timeout;
    struct diag_data diag_progress_elapsed_nonzero_timeout;
    struct diag_data diag_progress_timeout_value;
128 129
};

130 131 132 133 134 135
struct margo_rpc_data
{
	margo_instance_id mid;
	void* user_data;
	void (*user_free_callback)(void *);
};
136

Matthieu Dorier's avatar
Matthieu Dorier committed
137 138
MERCURY_GEN_PROC(margo_shutdown_out_t, ((int32_t)(ret)))

139
static void hg_progress_fn(void* foo);
140
static void margo_rpc_data_free(void* ptr);
Matthieu Dorier's avatar
Matthieu Dorier committed
141 142
static void remote_shutdown_ult(hg_handle_t handle);
DECLARE_MARGO_RPC_HANDLER(remote_shutdown_ult);
143

144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
static inline void demux_id(hg_id_t in, hg_id_t* base_id, uint16_t *provider_id)
{
    /* retrieve low bits for provider */
    *provider_id = 0;
    *provider_id += (in & (((1<<(__MARGO_PROVIDER_ID_SIZE*8))-1)));

    /* clear low order bits */
    *base_id = (in >> (__MARGO_PROVIDER_ID_SIZE*8)) <<
        (__MARGO_PROVIDER_ID_SIZE*8);

    return;
}

static inline hg_id_t mux_id(hg_id_t base_id, uint16_t provider_id)
{
    hg_id_t id;

    id = (base_id >> (__MARGO_PROVIDER_ID_SIZE*8)) <<
       (__MARGO_PROVIDER_ID_SIZE*8);
    id |= provider_id;

    return id;
}

static inline hg_id_t gen_id(const char* func_name, uint16_t provider_id)
{
    hg_id_t id;
    unsigned hashval;

    HASH_JEN(func_name, strlen(func_name), hashval);
    id = hashval << (__MARGO_PROVIDER_ID_SIZE*8);
    id |= provider_id;

    return id;
}

Shane Snyder's avatar
Shane Snyder committed
180 181 182 183 184 185
static hg_return_t margo_handle_cache_init(margo_instance_id mid);
static void margo_handle_cache_destroy(margo_instance_id mid);
static hg_return_t margo_handle_cache_get(margo_instance_id mid,
    hg_addr_t addr, hg_id_t id, hg_handle_t *handle);
static hg_return_t margo_handle_cache_put(margo_instance_id mid,
    hg_handle_t handle);
186 187
static void delete_provider_hash(margo_instance_id mid);
static int margo_lookup_provider(margo_instance_id mid, hg_id_t id, uint16_t provider_id, ABT_pool *pool);
188 189
static hg_id_t margo_register_internal(margo_instance_id mid, hg_id_t id,
    hg_proc_cb_t in_proc_cb, hg_proc_cb_t out_proc_cb, hg_rpc_cb_t rpc_cb);
Shane Snyder's avatar
Shane Snyder committed
190

191
margo_instance_id margo_init(const char *addr_str, int mode,
192
    int use_progress_thread, int rpc_thread_count)
193
{
194 195 196 197 198
    ABT_xstream progress_xstream = ABT_XSTREAM_NULL;
    ABT_pool progress_pool = ABT_POOL_NULL;
    ABT_xstream *rpc_xstreams = NULL;
    ABT_xstream rpc_xstream = ABT_XSTREAM_NULL;
    ABT_pool rpc_pool = ABT_POOL_NULL;
199 200
    hg_class_t *hg_class = NULL;
    hg_context_t *hg_context = NULL;
201
    int listen_flag = (mode == MARGO_CLIENT_MODE) ? HG_FALSE : HG_TRUE;
202
    int abt_init = 0;
203
    int i;
204 205 206
    int ret;
    struct margo_instance *mid = MARGO_INSTANCE_NULL;

207
    if(mode != MARGO_CLIENT_MODE && mode != MARGO_SERVER_MODE) goto err;
208

209 210 211 212 213 214 215 216 217 218 219 220
    /* NOTE: Margo is very likely to create a single producer (the
     * progress function), multiple consumer usage pattern that
     * causes excess memory consumption in some versions of
     * Argobots.  See
     * https://xgitlab.cels.anl.gov/sds/margo/issues/40 for details.
     * We therefore manually set the ABT_MEM_MAX_NUM_STACKS parameter 
     * for Argobots to a low value so that RPC handler threads do not
     * queue large numbers of stacks for reuse in per-ES data 
     * structures.
     */
    putenv("ABT_MEM_MAX_NUM_STACKS=8");

221 222 223 224 225 226
    if (ABT_initialized() == ABT_ERR_UNINITIALIZED)
    {
        ret = ABT_init(0, NULL); /* XXX: argc/argv not currently used by ABT ... */
        if(ret != 0) goto err;
        abt_init = 1;
    }
227

228
    /* set caller (self) ES to idle without polling */
229
#ifdef HAVE_ABT_SNOOZER
230 231
    ret = ABT_snoozer_xstream_self_set();
    if(ret != 0) goto err;
232
#endif
233 234 235

    if (use_progress_thread)
    {
236
#ifdef HAVE_ABT_SNOOZER
237
        ret = ABT_snoozer_xstream_create(1, &progress_pool, &progress_xstream);
238 239 240 241 242 243 244
		if (ret != ABT_SUCCESS) goto err;
#else
		ret = ABT_xstream_create(ABT_SCHED_NULL, &progress_xstream);
		if (ret != ABT_SUCCESS) goto err;
		ret = ABT_xstream_get_main_pools(progress_xstream, 1, &progress_pool);
		if (ret != ABT_SUCCESS) goto err;
#endif
245 246 247 248 249 250 251 252 253
    }
    else
    {
        ret = ABT_xstream_self(&progress_xstream);
        if (ret != ABT_SUCCESS) goto err;
        ret = ABT_xstream_get_main_pools(progress_xstream, 1, &progress_pool);
        if (ret != ABT_SUCCESS) goto err;
    }

254
    if (rpc_thread_count > 0)
255
    {
256 257
        rpc_xstreams = calloc(rpc_thread_count, sizeof(*rpc_xstreams));
        if (rpc_xstreams == NULL) goto err;
258
#ifdef HAVE_ABT_SNOOZER
259 260 261
        ret = ABT_snoozer_xstream_create(rpc_thread_count, &rpc_pool,
                rpc_xstreams);
        if (ret != ABT_SUCCESS) goto err;
262
#else
263 264 265 266 267
        int j;
        ret = ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, &rpc_pool);
        if (ret != ABT_SUCCESS) goto err;
        for(j=0; j<rpc_thread_count; j++) {
            ret = ABT_xstream_create(ABT_SCHED_NULL, rpc_xstreams+j);
268 269
            if (ret != ABT_SUCCESS) goto err;
        }
270 271 272 273 274 275 276 277 278 279 280 281
#endif
    }
    else if (rpc_thread_count == 0)
    {
        ret = ABT_xstream_self(&rpc_xstream);
        if (ret != ABT_SUCCESS) goto err;
        ret = ABT_xstream_get_main_pools(rpc_xstream, 1, &rpc_pool);
        if (ret != ABT_SUCCESS) goto err;
    }
    else
    {
        rpc_pool = progress_pool;
282 283
    }

284 285 286 287 288 289
    hg_class = HG_Init(addr_str, listen_flag);
    if(!hg_class) goto err;

    hg_context = HG_Context_create(hg_class);
    if(!hg_context) goto err;

290 291 292
    mid = margo_init_pool(progress_pool, rpc_pool, hg_context);
    if (mid == MARGO_INSTANCE_NULL) goto err;

293
    mid->margo_init = 1;
294
    mid->abt_init = abt_init;
295 296 297
    mid->owns_progress_pool = use_progress_thread;
    mid->progress_xstream = progress_xstream;
    mid->num_handler_pool_threads = rpc_thread_count < 0 ? 0 : rpc_thread_count;
298
    mid->rpc_xstreams = rpc_xstreams;
299

300 301 302
    return mid;

err:
303 304
    if(mid)
    {
305
        margo_timer_list_free(mid->timer_list);
306 307 308 309
        ABT_mutex_free(&mid->finalize_mutex);
        ABT_cond_free(&mid->finalize_cond);
        free(mid);
    }
310 311 312 313 314 315 316 317 318 319 320 321 322 323
    if (use_progress_thread && progress_xstream != ABT_XSTREAM_NULL)
    {
        ABT_xstream_join(progress_xstream);
        ABT_xstream_free(&progress_xstream);
    }
    if (rpc_thread_count > 0 && rpc_xstreams != NULL)
    {
        for (i = 0; i < rpc_thread_count; i++)
        {
            ABT_xstream_join(rpc_xstreams[i]);
            ABT_xstream_free(&rpc_xstreams[i]);
        }
        free(rpc_xstreams);
    }
324 325 326 327
    if(hg_context)
        HG_Context_destroy(hg_context);
    if(hg_class)
        HG_Finalize(hg_class);
328 329
    if(abt_init)
        ABT_finalize();
330 331 332 333
    return MARGO_INSTANCE_NULL;
}

margo_instance_id margo_init_pool(ABT_pool progress_pool, ABT_pool handler_pool,
334
    hg_context_t *hg_context)
335 336
{
    int ret;
Shane Snyder's avatar
Shane Snyder committed
337
    hg_return_t hret;
338 339
    struct margo_instance *mid;

Matthieu Dorier's avatar
Matthieu Dorier committed
340
    mid = calloc(1,sizeof(*mid));
341
    if(!mid) goto err;
342
    memset(mid, 0, sizeof(*mid));
343

344 345 346
    ABT_mutex_create(&mid->finalize_mutex);
    ABT_cond_create(&mid->finalize_cond);

347 348
    mid->progress_pool = progress_pool;
    mid->handler_pool = handler_pool;
349
    mid->hg_class = HG_Context_get_class(hg_context);
350
    mid->hg_context = hg_context;
351
    mid->hg_progress_timeout_ub = DEFAULT_MERCURY_PROGRESS_TIMEOUT_UB;
352
    mid->provider_table = NULL;
353
    mid->refcount = 1;
354
    mid->finalize_cb = NULL;
Matthieu Dorier's avatar
Matthieu Dorier committed
355
    mid->enable_remote_shutdown = 0;
356

357 358 359 360
    mid->pending_operations = 0;
    ABT_mutex_create(&mid->pending_operations_mtx);
    mid->finalize_requested = 0;

361 362
    mid->timer_list = margo_timer_list_create();
    if(mid->timer_list == NULL) goto err;
363

Shane Snyder's avatar
Shane Snyder committed
364 365 366 367
    /* initialize the handle cache */
    hret = margo_handle_cache_init(mid);
    if(hret != HG_SUCCESS) goto err;

368
    ret = ABT_thread_create(mid->progress_pool, hg_progress_fn, mid, 
369
        ABT_THREAD_ATTR_NULL, &mid->hg_progress_tid);
370 371
    if(ret != 0) goto err;

Matthieu Dorier's avatar
Matthieu Dorier committed
372 373 374
    mid->shutdown_rpc_id = MARGO_REGISTER(mid, "__shutdown__", 
            void, margo_shutdown_out_t, remote_shutdown_ult);

375 376
    return mid;

377 378
err:
    if(mid)
379
    {
Shane Snyder's avatar
Shane Snyder committed
380
        margo_handle_cache_destroy(mid);
381
        margo_timer_list_free(mid->timer_list);
382 383
        ABT_mutex_free(&mid->finalize_mutex);
        ABT_cond_free(&mid->finalize_cond);
384
        ABT_mutex_free(&mid->pending_operations_mtx);
385
        free(mid);
386
    }
387
    return MARGO_INSTANCE_NULL;
388 389
}

390 391 392 393
static void margo_cleanup(margo_instance_id mid)
{
    int i;

394 395 396 397 398 399 400 401 402
    /* call finalize callbacks */
    struct margo_finalize_cb* fcb = mid->finalize_cb;
    while(fcb) {
        (fcb->callback)(fcb->uargs);
        struct margo_finalize_cb* tmp = fcb;
        fcb = fcb->next;
        free(tmp);
    }

403
    margo_timer_list_free(mid->timer_list);
404

405 406
    /* delete the hash used for provider IDs */
    delete_provider_hash(mid);
407

408 409
    ABT_mutex_free(&mid->finalize_mutex);
    ABT_cond_free(&mid->finalize_cond);
410
    ABT_mutex_free(&mid->pending_operations_mtx);
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427

    if (mid->owns_progress_pool)
    {
        ABT_xstream_join(mid->progress_xstream);
        ABT_xstream_free(&mid->progress_xstream);
    }

    if (mid->num_handler_pool_threads > 0)
    {
        for (i = 0; i < mid->num_handler_pool_threads; i++)
        {
            ABT_xstream_join(mid->rpc_xstreams[i]);
            ABT_xstream_free(&mid->rpc_xstreams[i]);
        }
        free(mid->rpc_xstreams);
    }

Shane Snyder's avatar
Shane Snyder committed
428 429
    margo_handle_cache_destroy(mid);

430 431 432 433 434 435
    if (mid->margo_init)
    {
        if (mid->hg_context)
            HG_Context_destroy(mid->hg_context);
        if (mid->hg_class)
            HG_Finalize(mid->hg_class);
436 437
        if (mid->abt_init)
            ABT_finalize();
438 439
    }

440 441 442
    free(mid);
}

443
void margo_finalize(margo_instance_id mid)
444
{
445
    int do_cleanup;
446

447 448 449 450 451 452 453 454 455 456
    /* check if there are pending operations */
    int pending;
    ABT_mutex_lock(mid->pending_operations_mtx);
    pending = mid->pending_operations;
    ABT_mutex_unlock(mid->pending_operations_mtx);
    if(pending) {
        mid->finalize_requested = 1;
        return;
    }

457
    /* tell progress thread to wrap things up */
458
    mid->hg_progress_shutdown_flag = 1;
459 460

    /* wait for it to shutdown cleanly */
461 462
    ABT_thread_join(mid->hg_progress_tid);
    ABT_thread_free(&mid->hg_progress_tid);
463

464 465 466 467
    ABT_mutex_lock(mid->finalize_mutex);
    mid->finalize_flag = 1;
    ABT_cond_broadcast(mid->finalize_cond);

468 469
    mid->refcount--;
    do_cleanup = mid->refcount == 0;
470

471 472 473 474 475 476 477
    ABT_mutex_unlock(mid->finalize_mutex);

    /* if there was noone waiting on the finalize at the time of the finalize
     * broadcast, then we're safe to clean up. Otherwise, let the finalizer do
     * it */
    if (do_cleanup)
        margo_cleanup(mid);
478 479 480 481 482 483

    return;
}

void margo_wait_for_finalize(margo_instance_id mid)
{
484
    int do_cleanup;
485 486 487

    ABT_mutex_lock(mid->finalize_mutex);

488
        mid->refcount++;
489 490 491 492
            
        while(!mid->finalize_flag)
            ABT_cond_wait(mid->finalize_cond, mid->finalize_mutex);

493 494 495
        mid->refcount--;
        do_cleanup = mid->refcount == 0;

496
    ABT_mutex_unlock(mid->finalize_mutex);
497 498 499 500

    if (do_cleanup)
        margo_cleanup(mid);

501 502 503
    return;
}

504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
void margo_push_finalize_callback(
            margo_instance_id mid,
            void(*cb)(void*),                  
            void* uargs)
{
    if(cb == NULL) return;

    struct margo_finalize_cb* fcb = 
        (struct margo_finalize_cb*)malloc(sizeof(*fcb));
    fcb->callback = cb;
    fcb->uargs = uargs;

    struct margo_finalize_cb* next = mid->finalize_cb;
    fcb->next = next;
    mid->finalize_cb = fcb;
}

Matthieu Dorier's avatar
Matthieu Dorier committed
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
void margo_enable_remote_shutdown(margo_instance_id mid)
{
    mid->enable_remote_shutdown = 1;
}

int margo_shutdown_remote_instance(
        margo_instance_id mid,
        hg_addr_t remote_addr)
{
    hg_return_t hret;
    hg_handle_t handle;

    hret = margo_create(mid, remote_addr,
                        mid->shutdown_rpc_id, &handle);
    if(hret != HG_SUCCESS) return -1;

537
    hret = margo_forward(handle, NULL);
Matthieu Dorier's avatar
Matthieu Dorier committed
538 539 540 541 542 543 544 545 546 547 548 549 550 551
    if(hret != HG_SUCCESS)
    {
        margo_destroy(handle);
        return -1;
    }

    margo_shutdown_out_t out;
    hret = margo_get_output(handle, &out);
    if(hret != HG_SUCCESS)
    {
        margo_destroy(handle);
        return -1;
    }

552
    margo_free_output(handle, &out);
Matthieu Dorier's avatar
Matthieu Dorier committed
553 554 555 556 557
    margo_destroy(handle);

    return out.ret;
}

558
hg_id_t margo_provider_register_name(margo_instance_id mid, const char *func_name,
559
    hg_proc_cb_t in_proc_cb, hg_proc_cb_t out_proc_cb, hg_rpc_cb_t rpc_cb,
560
    uint16_t provider_id, ABT_pool pool)
561
{
562
    struct provider_element *element;
563
    hg_id_t id;
564 565 566 567 568
    int ret;

    assert(provider_id <= MARGO_MAX_PROVIDER_ID);
    
    id = gen_id(func_name, provider_id);
569

570 571
    ret = margo_register_internal(mid, id, in_proc_cb, out_proc_cb, rpc_cb);
    if(ret == 0)
572 573 574
        return(0);

    /* nothing to do, we'll let the handler pool take this directly */
575
    if(provider_id == MARGO_DEFAULT_PROVIDER_ID)
576 577
        return(id);

578
    HASH_FIND(hh, mid->provider_table, &id, sizeof(id), element);
579 580 581
    if(element)
        return(id);

582
    element = calloc(1,sizeof(*element));
583 584
    if(!element)
        return(0);
585
    element->id = id;
586 587
    element->pool = pool;

588
    HASH_ADD(hh, mid->provider_table, id, sizeof(id), element);
589 590

    return(id);
591 592
}

593 594
hg_return_t margo_registered_name(margo_instance_id mid, const char *func_name,
    hg_id_t *id, hg_bool_t *flag)
595
{
596 597
    *id = gen_id(func_name, 0);
    return(HG_Registered(mid->hg_class, *id, flag));
598 599
}

600
hg_return_t margo_provider_registered_name(margo_instance_id mid, const char *func_name,
601
    uint16_t provider_id, hg_id_t *id, hg_bool_t *flag)
602 603
{
    hg_bool_t b;
604 605 606 607 608
    hg_return_t ret;

    *id = gen_id(func_name, provider_id);

    ret = HG_Registered(mid->hg_class, *id, &b);
609 610
    if(ret != HG_SUCCESS) 
        return ret;
611
    if((!b) || (!provider_id)) {
612 613 614 615
        *flag = b;
        return ret;
    }

616
    struct provider_element *element;
617

618
    HASH_FIND(hh, mid->provider_table, id, sizeof(*id), element);
619 620 621 622 623
    if(!element) {
        *flag = 0;
        return HG_SUCCESS;
    }

624
    assert(element->id == *id);
625 626 627 628 629

    *flag = 1;
    return HG_SUCCESS;
}

630 631 632 633 634 635 636
hg_return_t margo_register_data(
    margo_instance_id mid,
    hg_id_t id,
    void *data,
    void (*free_callback)(void *)) 
{
	struct margo_rpc_data* margo_data 
637
		= (struct margo_rpc_data*) HG_Registered_data(mid->hg_class, id);
638
	if(!margo_data) return HG_OTHER_ERROR;
639 640 641
    if(margo_data->user_data && margo_data->user_free_callback) {
        (margo_data->user_free_callback)(margo_data->user_data);
    }
642 643 644 645 646 647 648 649 650 651 652 653 654
	margo_data->user_data = data;
	margo_data->user_free_callback = free_callback;
	return HG_SUCCESS;
}

void* margo_registered_data(margo_instance_id mid, hg_id_t id)
{
	struct margo_rpc_data* data
		= (struct margo_rpc_data*) HG_Registered_data(margo_get_class(mid), id);
	if(!data) return NULL;
	else return data->user_data;
}

655 656 657 658
hg_return_t margo_registered_disable_response(
    margo_instance_id mid,
    hg_id_t id,
    int disable_flag)
659
{
660
    return(HG_Registered_disable_response(mid->hg_class, id, disable_flag));
661
}
662

663
struct lookup_cb_evt
664
{
665
    hg_return_t hret;
666 667 668 669 670 671
    hg_addr_t addr;
};

static hg_return_t margo_addr_lookup_cb(const struct hg_cb_info *info)
{
    struct lookup_cb_evt evt;
672
    evt.hret = info->ret;
673
    evt.addr = info->info.lookup.addr;
Matthieu Dorier's avatar
Matthieu Dorier committed
674
    ABT_eventual eventual = (ABT_eventual)(info->arg);
675 676

    /* propagate return code out through eventual */
Matthieu Dorier's avatar
Matthieu Dorier committed
677
    ABT_eventual_set(eventual, &evt, sizeof(evt));
678

679 680 681
    return(HG_SUCCESS);
}

682 683 684 685
hg_return_t margo_addr_lookup(
    margo_instance_id mid,
    const char   *name,
    hg_addr_t    *addr)
686
{
687
    hg_return_t hret;
688 689 690
    struct lookup_cb_evt *evt;
    ABT_eventual eventual;
    int ret;
691

692 693 694 695 696 697
    ret = ABT_eventual_create(sizeof(*evt), &eventual);
    if(ret != 0)
    {
        return(HG_NOMEM_ERROR);        
    }

698
    hret = HG_Addr_lookup(mid->hg_context, margo_addr_lookup_cb,
Matthieu Dorier's avatar
Matthieu Dorier committed
699
        (void*)eventual, name, HG_OP_ID_IGNORE);
700
    if(hret == HG_SUCCESS)
701 702 703
    {
        ABT_eventual_wait(eventual, (void**)&evt);
        *addr = evt->addr;
704
        hret = evt->hret;
705 706 707 708
    }

    ABT_eventual_free(&eventual);

709
    return(hret);
710 711 712 713 714
}

hg_return_t margo_addr_free(
    margo_instance_id mid,
    hg_addr_t addr)
715
{
716 717
    return(HG_Addr_free(mid->hg_class, addr));
}
718

719 720 721 722 723
hg_return_t margo_addr_self(
    margo_instance_id mid,
    hg_addr_t *addr)
{
    return(HG_Addr_self(mid->hg_class, addr));
724 725
}

726 727 728 729 730 731 732 733 734
hg_return_t margo_addr_dup(
    margo_instance_id mid,
    hg_addr_t addr,
    hg_addr_t *new_addr)
{
    return(HG_Addr_dup(mid->hg_class, addr, new_addr));
}

hg_return_t margo_addr_to_string(
735
    margo_instance_id mid,
736 737 738 739 740 741 742 743 744 745
    char *buf,
    hg_size_t *buf_size,
    hg_addr_t addr)
{
    return(HG_Addr_to_string(mid->hg_class, buf, buf_size, addr));
}

hg_return_t margo_create(margo_instance_id mid, hg_addr_t addr,
    hg_id_t id, hg_handle_t *handle)
{
746
    hg_return_t hret = HG_OTHER_ERROR;
Shane Snyder's avatar
Shane Snyder committed
747 748 749 750 751 752 753 754

    /* look for a handle to reuse */
    hret = margo_handle_cache_get(mid, addr, id, handle);
    if(hret != HG_SUCCESS)
    {
        /* else try creating a new handle */
        hret = HG_Create(mid->hg_context, addr, id, handle);
    }
755

Shane Snyder's avatar
Shane Snyder committed
756
    return hret;
757 758
}

759
hg_return_t margo_destroy(hg_handle_t handle)
760
{
761
    margo_instance_id mid;
762
    hg_return_t hret = HG_OTHER_ERROR;
Shane Snyder's avatar
Shane Snyder committed
763

764 765 766
    /* use the handle to get the associated mid */
    mid = margo_hg_handle_get_instance(handle);

Shane Snyder's avatar
Shane Snyder committed
767 768 769 770 771 772 773
    /* recycle this handle if it came from the handle cache */
    hret = margo_handle_cache_put(mid, handle);
    if(hret != HG_SUCCESS)
    {
        /* else destroy the handle manually */
        hret = HG_Destroy(handle);
    }
774

Shane Snyder's avatar
Shane Snyder committed
775
    return hret;
776 777 778 779 780
}

static hg_return_t margo_cb(const struct hg_cb_info *info)
{
    hg_return_t hret = info->ret;
Matthieu Dorier's avatar
Matthieu Dorier committed
781
    ABT_eventual eventual = (ABT_eventual)(info->arg);
782 783

    /* propagate return code out through eventual */
Matthieu Dorier's avatar
Matthieu Dorier committed
784
    ABT_eventual_set(eventual, &hret, sizeof(hret));
785 786 787 788
    
    return(HG_SUCCESS);
}

789
hg_return_t margo_provider_forward(
790
    uint16_t provider_id,
791 792
    hg_handle_t handle,
    void *in_struct)
793 794 795
{
	hg_return_t hret;
	margo_request req;
796
	hret = margo_provider_iforward(provider_id, handle, in_struct, &req);
797 798 799 800 801
	if(hret != HG_SUCCESS) 
		return hret;
	return margo_wait(req);
}

802
hg_return_t margo_provider_iforward(
803
    uint16_t provider_id,
804 805 806
    hg_handle_t handle,
    void *in_struct,
    margo_request* req)
807 808
{
    hg_return_t hret = HG_TIMEOUT;
809
    ABT_eventual eventual;
810
    int ret;
811 812 813 814 815 816 817 818
    const struct hg_info* hgi; 
    hg_id_t id;
    hg_proc_cb_t in_cb, out_cb;
    hg_bool_t flag;

    assert(provider_id <= MARGO_MAX_PROVIDER_ID);

    hgi = HG_Get_info(handle);
819
    id = mux_id(hgi->id, provider_id);
820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837

    /* TODO: if we reset the handle here, is there any reason to do so in
     * the handle cache?
     */
    ret = HG_Reset(handle, hgi->addr, id);
    if(ret == HG_NO_MATCH)
    {
        /* if Mercury does not recognize this ID (with provider id included)
         * then register it now
         */
        /* find encoders for base ID */
        ret = HG_Registered_proc_cb(hgi->hg_class, hgi->id, &flag, &in_cb, &out_cb);
        if(ret != HG_SUCCESS)
            return(ret);
        if(!flag)
            return(HG_NO_MATCH);

        /* register new ID that includes provider id */
838 839 840 841
        ret = margo_register_internal(margo_hg_info_get_instance(hgi), 
            id, in_cb, out_cb, NULL);
        if(ret == 0)
            return(HG_OTHER_ERROR);
842 843 844 845 846

        /* should be able to reset now */
        ret = HG_Reset(handle, hgi->addr, id);
        if(ret != HG_SUCCESS)
            return(ret);
847
    }
848 849 850 851 852 853 854

    ret = ABT_eventual_create(sizeof(hret), &eventual);
    if(ret != 0)
    {
        return(HG_NOMEM_ERROR);        
    }

855
    *req = eventual;
856

Matthieu Dorier's avatar
Matthieu Dorier committed
857
    return HG_Forward(handle, margo_cb, (void*)eventual, in_struct);
858
}
859

860 861 862 863
hg_return_t margo_wait(margo_request req)
{
	hg_return_t* waited_hret;
	hg_return_t  hret;
864

865 866 867 868
    ABT_eventual_wait(req, (void**)&waited_hret);
	hret = *waited_hret;
    ABT_eventual_free(&req);
	
869
    return(hret);
870 871
}

Matthieu Dorier's avatar
Matthieu Dorier committed
872 873 874 875 876
int margo_test(margo_request req, int* flag)
{
    return ABT_eventual_test(req, NULL, flag);
}

877 878 879 880
typedef struct
{
    hg_handle_t handle;
} margo_forward_timeout_cb_dat;
881

882 883 884 885 886 887 888 889 890 891 892
static void margo_forward_timeout_cb(void *arg)
{
    margo_forward_timeout_cb_dat *timeout_cb_dat =
        (margo_forward_timeout_cb_dat *)arg;

    /* cancel the Mercury op if the forward timed out */
    HG_Cancel(timeout_cb_dat->handle);
    return;
}

hg_return_t margo_forward_timed(
893
    hg_handle_t handle,
894 895
    void *in_struct,
    double timeout_ms)
896 897
{
    int ret;
898
    hg_return_t hret;
899
    margo_instance_id mid;
900
    ABT_eventual eventual;
901
    hg_return_t* waited_hret;
902 903
    margo_timer_t forward_timer;
    margo_forward_timeout_cb_dat timeout_cb_dat;
904 905 906 907 908 909 910

    ret = ABT_eventual_create(sizeof(hret), &eventual);
    if(ret != 0)
    {
        return(HG_NOMEM_ERROR);        
    }

911 912 913
    /* use the handle to get the associated mid */
    mid = margo_hg_handle_get_instance(handle);

914 915 916 917 918
    /* set a timer object to expire when this forward times out */
    timeout_cb_dat.handle = handle;
    margo_timer_init(mid, &forward_timer, margo_forward_timeout_cb,
        &timeout_cb_dat, timeout_ms);

Matthieu Dorier's avatar
Matthieu Dorier committed
919
    hret = HG_Forward(handle, margo_cb, (void*)eventual, in_struct);
920
    if(hret == HG_SUCCESS)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
921 922 923 924 925
    {
        ABT_eventual_wait(eventual, (void**)&waited_hret);
        hret = *waited_hret;
    }

926 927 928 929 930 931 932 933
    /* convert HG_CANCELED to HG_TIMEOUT to indicate op timed out */
    if(hret == HG_CANCELED)
        hret = HG_TIMEOUT;

    /* remove timer if it is still in place (i.e., not timed out) */
    if(hret != HG_TIMEOUT)
        margo_timer_destroy(mid, &forward_timer);

Jonathan Jenkins's avatar
Jonathan Jenkins committed
934 935 936 937 938 939 940 941
    ABT_eventual_free(&eventual);

    return(hret);
}

hg_return_t margo_respond(
    hg_handle_t handle,
    void *out_struct)
942 943 944 945 946 947 948 949 950 951 952 953 954
{
    hg_return_t hret;
    margo_request req;
    hret = margo_irespond(handle,out_struct,&req);
    if(hret != HG_SUCCESS)
        return hret;
    return margo_wait(req);
}

hg_return_t margo_irespond(
    hg_handle_t handle,
    void *out_struct,
    margo_request* req)
Jonathan Jenkins's avatar
Jonathan Jenkins committed
955 956 957 958
{
    ABT_eventual eventual;
    int ret;

Matthieu Dorier's avatar
Matthieu Dorier committed
959
    ret = ABT_eventual_create(sizeof(hg_return_t), &eventual);
Jonathan Jenkins's avatar
Jonathan Jenkins committed
960 961 962 963 964
    if(ret != 0)
    {
        return(HG_NOMEM_ERROR);
    }

965
    *req = eventual;
966

Matthieu Dorier's avatar
Matthieu Dorier committed
967
    return HG_Respond(handle, margo_cb, (void*)eventual, out_struct);
968 969
}

970 971 972 973 974 975 976
hg_return_t margo_bulk_create(
    margo_instance_id mid,
    hg_uint32_t count,
    void **buf_ptrs,
    const hg_size_t *buf_sizes,
    hg_uint8_t flags,
    hg_bulk_t *handle)
977
{
978 979 980
    return(HG_Bulk_create(mid->hg_class, count,
        buf_ptrs, buf_sizes, flags, handle));
}
981

982 983 984 985
hg_return_t margo_bulk_free(
    hg_bulk_t handle)
{
    return(HG_Bulk_free(handle));
986 987
}

988 989 990 991 992 993 994 995
hg_return_t margo_bulk_deserialize(
    margo_instance_id mid,
    hg_bulk_t *handle,
    const void *buf,
    hg_size_t buf_size)
{
    return(HG_Bulk_deserialize(mid->hg_class, handle, buf, buf_size));
}
996

997
hg_return_t margo_bulk_transfer(
998
    margo_instance_id mid,
999
    hg_bulk_op_t op,
1000
    hg_addr_t origin_addr,
1001 1002 1003 1004
    hg_bulk_t origin_handle,
    size_t origin_offset,
    hg_bulk_t local_handle,
    size_t local_offset,
1005
    size_t size)
1006 1007 1008 1009
{  
    margo_request req;
    hg_return_t hret = margo_bulk_itransfer(mid,op,origin_addr,
                          origin_handle, origin_offset, local_handle,
Matthieu Dorier's avatar
Matthieu Dorier committed
1010
                          local_offset, size, &req);
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
    if(hret != HG_SUCCESS)
        return hret;
    return margo_wait(req);
}

hg_return_t margo_bulk_itransfer(
    margo_instance_id mid,
    hg_bulk_op_t op,
    hg_addr_t origin_addr,
    hg_bulk_t origin_handle,
    size_t origin_offset,
    hg_bulk_t local_handle,
    size_t local_offset,
    size_t size,
    margo_request* req)
1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036
{
    hg_return_t hret = HG_TIMEOUT;
    ABT_eventual eventual;
    int ret;

    ret = ABT_eventual_create(sizeof(hret), &eventual);
    if(ret != 0)
    {
        return(HG_NOMEM_ERROR);        
    }

1037
    *req = eventual;
1038

Matthieu Dorier's avatar
Matthieu Dorier committed
1039 1040
    hret = HG_Bulk_transfer(mid->hg_context, margo_cb,
        (void*)eventual, op, origin_addr, origin_handle, origin_offset, local_handle,
1041
        local_offset, size, HG_OP_ID_IGNORE);
1042 1043 1044 1045

    return(hret);
}

1046 1047 1048 1049
typedef struct
{
    ABT_mutex mutex;
    ABT_cond cond;
1050
    char is_asleep;
1051 1052 1053 1054 1055 1056 1057 1058 1059
} margo_thread_sleep_cb_dat;

static void margo_thread_sleep_cb(void *arg)
{
    margo_thread_sleep_cb_dat *sleep_cb_dat =
        (margo_thread_sleep_cb_dat *)arg;

    /* wake up the sleeping thread */
    ABT_mutex_lock(sleep_cb_dat->mutex);
1060
    sleep_cb_dat->is_asleep = 0;
1061 1062 1063 1064 1065 1066 1067
    ABT_cond_signal(sleep_cb_dat->cond);
    ABT_mutex_unlock(sleep_cb_dat->mutex);

    return;
}

void margo_thread_sleep(
1068
    margo_instance_id mid,
1069 1070 1071 1072 1073 1074 1075 1076
    double timeout_ms)
{
    margo_timer_t sleep_timer;
    margo_thread_sleep_cb_dat sleep_cb_dat;

    /* set data needed for sleep callback */
    ABT_mutex_create(&(sleep_cb_dat.mutex));
    ABT_cond_create(&(sleep_cb_dat.cond));
1077
    sleep_cb_dat.is_asleep = 1;
1078 1079

    /* initialize the sleep timer */
1080
    margo_timer_init(mid, &sleep_timer, margo_thread_sleep_cb,
1081 1082 1083 1084
        &sleep_cb_dat, timeout_ms);

    /* yield thread for specified timeout */
    ABT_mutex_lock(sleep_cb_dat.mutex);
1085 1086
    while(sleep_cb_dat.is_asleep)
        ABT_cond_wait(sleep_cb_dat.cond, sleep_cb_dat.mutex);
1087 1088
    ABT_mutex_unlock(sleep_cb_dat.mutex);

1089 1090 1091 1092
    /* clean up */
    ABT_mutex_free(&sleep_cb_dat.mutex);
    ABT_cond_free(&sleep_cb_dat.cond);

1093 1094 1095
    return;
}

1096
int margo_get_handler_pool(margo_instance_id mid, ABT_pool* pool)
1097
{
1098 1099 1100 1101 1102 1103
    if(mid) {
        *pool = mid->handler_pool;
        return 0;
    } else {
        return -1;
    }
1104
}
1105

1106 1107 1108 1109
hg_context_t* margo_get_context(margo_instance_id mid)
{
    return(mid->hg_context);
}
1110

1111 1112 1113
hg_class_t* margo_get_class(margo_instance_id mid)
{
    return(mid->hg_class);
1114
}
Philip Carns's avatar
Philip Carns committed
1115

1116
ABT_pool margo_hg_handle_get_handler_pool(hg_handle_t h)
1117
{
1118 1119 1120 1121 1122 1123 1124 1125 1126
    struct margo_rpc_data* data;
    const struct hg_info* info;
    hg_id_t base_id; 
    uint16_t provider_id;
    int ret;
    ABT_pool pool;
    
    info = HG_Get_info(h);
    if(!info) return ABT_POOL_NULL;
1127

1128 1129
    data = (struct margo_rpc_data*) HG_Registered_data(info->hg_class, info->id);
    if(!data) return ABT_POOL_NULL;
1130

1131 1132 1133
    /* TODO: if we stored a pointer to the pool in the margo_rpc_data struct
     * then we wouldn't have to search hash table for it here.
     */
1134 1135 1136
    demux_id(info->id, &base_id, &provider_id);
    ret = margo_lookup_provider(data->mid, base_id, provider_id, &pool);
    if(ret != 0) return ABT_POOL_NULL;
1137

1138 1139 1140
    if(pool == ABT_POOL_NULL)
        margo_get_handler_pool(data->mid, &pool);

1141 1142
    return pool;
}
1143

1144 1145 1146 1147 1148 1149 1150 1151
margo_instance_id margo_hg_info_get_instance(const struct hg_info *info)
{
    struct margo_rpc_data* data = 
        (struct margo_rpc_data*) HG_Registered_data(info->hg_class, info->id);
    if(!data) return MARGO_INSTANCE_NULL;
    return data->mid;
}

1152 1153 1154 1155 1156 1157 1158
margo_instance_id margo_hg_handle_get_instance(hg_handle_t h)
{
    struct margo_rpc_data* data;
    const struct hg_info* info;
    
    info = HG_Get_info(h);
    if(!info) return MARGO_INSTANCE_NULL;
Philip Carns's avatar
Philip Carns committed
1159

1160 1161
    data = (struct margo_rpc_data*) HG_Registered_data(info->hg_class, info->id);
    if(!data) return MARGO_INSTANCE_NULL;
1162

1163
    return data->mid;
1164 1165
}

1166
int margo_provider_register_data(margo_instance_id mid, hg_id_t id, uint16_t provider_id, void* data, void (*free_callback)(void *))
1167
{
1168 1169
    struct provider_element *element;
    hg_id_t muxed_id;
1170

1171
    muxed_id = mux_id(id, provider_id);
1172

1173
    HASH_FIND(hh, mid->provider_table, &muxed_id, sizeof(muxed_id), element);
1174 1175 1176
    if(!element)
        return -1;

1177
    assert(element->id == muxed_id);
1178 1179 1180 1181 1182 1183 1184 1185 1186 1187

    if(element->user_data && element->user_free_callback)
        (element->user_free_callback)(element->user_data);

    element->user_data = data;
    element->user_free_callback = free_callback;

    return(0);
}

1188
void* margo_provider_registered_data(margo_instance_id mid, hg_id_t id, uint16_t provider_id)
1189
{
1190 1191
    struct provider_element *element;
    hg_id_t muxed_id;
1192

1193
    muxed_id = mux_id(id, provider_id);
1194

1195
    HASH_FIND(hh, mid->provider_table, &muxed_id, sizeof(muxed_id), element);
1196 1197 1198
    if(!element)
        return NULL;

1199
    assert(element->id == muxed_id);
1200 1201 1202

    return element->user_data;
}
1203
static void margo_rpc_data_free(void* ptr)
Philip Carns's avatar
Philip Carns committed
1204
{
1205 1206 1207 1208 1209 1210
	struct margo_rpc_data* data = (struct margo_rpc_data*) ptr;
	if(data->user_data && data->user_free_callback) {
		data->user_free_callback(data->user_data);
	}
	free(ptr);
}
1211

1212
static void delete_provider_hash(margo_instance_id mid)
1213
{
1214
    struct provider_element *current_element, *tmp;
1215

1216
    HASH_ITER(hh, mid->provider_table, current_element, tmp) {
1217 1218
        if(current_element->user_data && current_element->user_free_callback)
            (current_element->user_free_callback)(current_element->user_data);
1219
        HASH_DEL(mid->provider_table, current_element);
1220 1221 1222 1223
        free(current_element);
    }
}

1224 1225 1226 1227 1228 1229 1230
/* dedicated thread function to drive Mercury progress */
static void hg_progress_fn(void* foo)
{
    int ret;
    unsigned int actual_count;
    struct margo_instance *mid = (struct margo_instance *)foo;
    size_t size;
1231
    unsigned int hg_progress_timeout = mid->hg_progress_timeout_ub;
1232 1233
    double next_timer_exp;
    int trigger_happened;
1234 1235
    double tm1, tm2;
    int diag_enabled = 0;
1236

1237 1238 1239 1240
    while(!mid->hg_progress_shutdown_flag)
    {
        trigger_happened = 0;
        do {
1241 1242 1243 1244
            /* save value of instance diag variable, in case it is modified
             * while we are in loop 
             */
            diag_enabled = mid->diag_enabled;
1245

1246
            if(diag_enabled) tm1 = ABT_get_wtime();
1247
            ret = HG_Trigger(mid->hg_context, 0, 1, &actual_count);
1248 1249 1250 1251 1252
            if(diag_enabled)
            {
                tm2 = ABT_get_wtime();
                __DIAG_UPDATE(mid->diag_trigger_elapsed, (tm2-tm1));
            }
1253

1254 1255 1256
            if(ret == HG_SUCCESS && actual_count > 0)
                trigger_happened = 1;
        } while((ret == HG_SUCCESS) && actual_count && !mid->hg_progress_shutdown_flag);
1257

1258 1259
        if(trigger_happened)
            ABT_thread_yield();
1260

1261
        ABT_pool_get_size(mid->progress_pool, &size);
1262
        /* Are there any other threads executing in this pool that are *not*
1263 1264 1265 1266
         * blocked ?  If so then, we can't sleep here or else those threads 
         * will not get a chance to execute.
         * TODO: check is ABT_pool_get_size returns the number of ULT/tasks
         * that can be executed including this one, or not including this one.
1267
         */
1268
        if(size > 0)
1269 1270 1271 1272 1273 1274 1275 1276
        {
            /* TODO: this is being executed more than is necessary (i.e.
             * in cases where there are other legitimate ULTs eligible
             * for execution that are not blocking on any events, Margo
             * or otherwise). Maybe we need an abt scheduling tweak here
             * to make sure that this ULT is the lowest priority in that
             * scenario.
             */
1277
            if(diag_enabled) tm1 = ABT_get_wtime();
1278
            ret = HG_Progress(mid->hg_context, 0);
1279 1280 1281 1282 1283 1284
            if(diag_enabled)
            {
                tm2 = ABT_get_wtime();
                __DIAG_UPDATE(mid->diag_progress_elapsed_zero_timeout, (tm2-tm1));
                __DIAG_UPDATE(mid->diag_progress_timeout_value, 0);
            }
1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303
            if(ret == HG_SUCCESS)
            {
                /* Mercury completed something; loop around to trigger
                 * callbacks 
                 */
            }
            else if(ret == HG_TIMEOUT)
            {
                /* No completion; yield here to allow other ULTs to run */
                ABT_thread_yield();
            }
            else
            {
                /* TODO: error handling */
                fprintf(stderr, "WARNING: unexpected return code (%d) from HG_Progress()\n", ret);
            }
        }
        else
        {
1304
            hg_progress_timeout = mid->hg_progress_timeout_ub;
1305 1306 1307 1308 1309 1310 1311 1312 1313
            ret = margo_timer_get_next_expiration(mid, &next_timer_exp);
            if(ret == 0)
            {
                /* there is a queued timer, don't block long enough
                 * to keep this timer waiting
                 */
                if(next_timer_exp >= 0.0)
                {
                    next_timer_exp *= 1000; /* convert to milliseconds */
1314
                    if(next_timer_exp < mid->hg_progress_timeout_ub)
1315 1316 1317 1318 1319 1320 1321
                        hg_progress_timeout = (unsigned int)next_timer_exp;
                }
                else
                {
                    hg_progress_timeout = 0;
                }
            }
1322
            if(diag_enabled) tm1 = ABT_get_wtime();
1323
            ret = HG_Progress(mid->hg_context, hg_progress_timeout);
1324 1325 1326 1327 1328 1329 1330 1331 1332 1333
            if(diag_enabled)
            {
                tm2 = ABT_get_wtime();
                if(hg_progress_timeout == 0)
                    __DIAG_UPDATE(mid->diag_progress_elapsed_zero_timeout, (tm2-tm1));
                else
                    __DIAG_UPDATE(mid->diag_progress_elapsed_nonzero_timeout, (tm2-tm1));
                    
                __DIAG_UPDATE(mid->diag_progress_timeout_value, hg_progress_timeout);
            }
1334 1335 1336 1337 1338 1339
            if(ret != HG_SUCCESS && ret != HG_TIMEOUT)
            {
                /* TODO: error handling */
                fprintf(stderr, "WARNING: unexpected return code (%d) from HG_Progress()\n", ret);
            }
        }
1340

1341 1342 1343
        /* check for any expired timers */
        margo_check_timers(mid);
    }
1344

1345
    return;
Philip Carns's avatar
Philip Carns committed
1346
}
Philip Carns's avatar
Philip Carns committed
1347 1348 1349 1350 1351 1352 1353


void margo_diag_start(margo_instance_id mid)
{
    mid->diag_enabled = 1;
}

1354
static void print_diag_data(FILE *file, const char* name, const char *description, struct diag_data *data)
Philip Carns's avatar
Philip Carns committed
1355
{
1356 1357 1358 1359 1360 1361 1362 1363
    double avg;

    fprintf(file, "# %s\n", description);
    if(data->count != 0)
        avg = data->cumulative/data->count;
    else
        avg = 0;
    fprintf(file, "%s\t%.9f\t%.9f\t%.9f\t%d\n", name, avg, data->min, data->max, data->count);
Philip Carns's avatar
Philip Carns committed
1364 1365 1366
    return;
}

1367
void margo_diag_dump(margo_instance_id mid, const char* file, int uniquify)
Philip Carns's avatar
Philip Carns committed
1368 1369 1370
{
    FILE *outfile;
    time_t ltime;
1371
    char revised_file_name[256] = {0};
Philip Carns's avatar
Philip Carns committed
1372 1373 1374

    assert(mid->diag_enabled);

1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389
    if(uniquify)
    {
        char hostname[128] = {0};
        int pid;

        gethostname(hostname, 128);
        pid = getpid();

        sprintf(revised_file_name, "%s-%s-%d", file, hostname, pid);
    }
    else
    {
        sprintf(revised_file_name, "%s", file);
    }

Philip Carns's avatar
Philip Carns committed
1390 1391 1392 1393 1394 1395
    if(strcmp("-", file) == 0)
    {
        outfile = stdout;
    }
    else
    {
1396
        outfile = fopen(revised_file_name, "a");
Philip Carns's avatar
Philip Carns committed
1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412
        if(!outfile)
        {
            perror("fopen");
            return;
        }
    }

    /* TODO: retrieve self addr and include in output */
    /* TODO: support pattern substitution in file name to create unique
     * output files per process
     */

    time(&ltime);
    fprintf(outfile, "# Margo diagnostics\n");
    fprintf(outfile, "# %s\n", ctime(&ltime));
    fprintf(outfile, "# <stat>\t<avg>\t<min>\t<max>\t<count>\n");
1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424
    print_diag_data(outfile, "trigger_elapsed", 
        "Time consumed by HG_Trigger()", 
        &mid->diag_trigger_elapsed);
    print_diag_data(outfile, "progress_elapsed_zero_timeout", 
        "Time consumed by HG_Progress() when called with timeout==0", 
        &mid->diag_progress_elapsed_zero_timeout);
    print_diag_data(outfile, "progress_elapsed_nonzero_timeout", 
        "Time consumed by HG_Progress() when called with timeout!=0", 
        &mid->diag_progress_elapsed_nonzero_timeout);
    print_diag_data(outfile, "progress_timeout_value", 
        "Timeout values passed to HG_Progress()", 
        &mid->diag_progress_timeout_value);
Philip Carns's avatar
Philip Carns committed
1425 1426 1427 1428 1429 1430

    if(outfile != stdout)
        fclose(outfile);
    
    return;
}
1431

1432
void margo_set_param(margo_instance_id mid, int option, const void *param)
1433 1434 1435
{
    switch(option)
    {
1436
        case MARGO_PARAM_PROGRESS_TIMEOUT_UB:
1437 1438 1439 1440 1441 1442 1443
            mid->hg_progress_timeout_ub = (*((const unsigned int*)param));
            break;
    }

    return;
}

1444
void margo_get_param(margo_instance_id mid, int option, void *param)
1445 1446 1447 1448
{

    switch(option)
    {
1449
        case MARGO_PARAM_PROGRESS_TIMEOUT_UB:
1450 1451 1452 1453 1454 1455
            (*((unsigned int*)param)) = mid->hg_progress_timeout_ub;
            break;
    }

    return;
}
Shane Snyder's avatar
Shane Snyder committed
1456 1457 1458 1459 1460 1461

static hg_return_t margo_handle_cache_init(margo_instance_id mid)
{
    int i;
    struct margo_handle_cache_el *el;
    hg_return_t hret = HG_SUCCESS;
1462

1463
    ABT_mutex_create(&(mid->handle_cache_mtx));
Shane Snyder's avatar
Shane Snyder committed
1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502

    for(i = 0; i < DEFAULT_MERCURY_HANDLE_CACHE_SIZE; i++)
    {
        el = malloc(sizeof(*el));
        if(!el)
        {
            hret = HG_NOMEM_ERROR;
            margo_handle_cache_destroy(mid);
            break;
        }