Commit 321c6534 authored by Philip Carns's avatar Philip Carns

Merge branch 'carns/dev-rpc-breadcrumb-redo2' into 'master'

Margo RPC instrumentation mechanism

See merge request !16
parents 065632ad d835964d
......@@ -129,6 +129,11 @@ also need to specify
We do not recommend using any BMI methods besides TCP. It's usage is very similar to the CCI/TCP examples above, except that "bmi+" should be substituted for "cci+".
## Instrumentation
See the [Instrumentation documentation](doc/instrumentation.md) for
information on how to extract diagnostic instrumentation from Margo.
## Design details
![Margo architecture](doc/fig/margo-diagram.png)
......
# Margo instrumentation
This file documents instrumentation capabilities that are built into the
margo library. See the [top level README.md](../README.md) for general
information about margo.
Margo includes two forms of instrumentation. The first measures time spent
executing key Mercury functions within the communication progress
loop. The second measures time spent invoking remote procedure calls.
## Usage
Both can be enabled at run time by calling the `margo_diag_start()` any
time after `margo_init()` on the process that you wish to instrument.
Statistics from both can then be emitted at any time prior to
`margo_finalize()` by calling the `margo_diag_dump()` function.
The arguments to `margo_diag_dump()` are as follows:
* `mid`: the margo instance to retrieve instrumentation from
* `file`: name of the file to write the (text) data to. If the "-" string
is used, then data will be written to `STDOUT`.
* `uniquify`: flag indicating that the file name should be suffixed with
additional characters to make it unique from other diagnostic files emited
on the same node.
## Output format
Example output from `margo_diag_dump()` will look like this for a given
processes:
```
# Margo diagnostics
# Wed Jul 31 11:15:13 2019
# RPC breadcrumbs for RPCs that were registered on this process:
# 0x5f22 data_xfer_read
# 0xa1ef delegator_read
# 0x5f22 data_xfer_read
# 0x9245 my_shutdown_rpc
# <stat> <avg> <min> <max> <count>
# Time consumed by HG_Trigger()
trigger_elapsed 0.000000036 0.000000238 0.000114679 3911094
# Time consumed by HG_Progress() when called with timeout==0
progress_elapsed_zero_timeout 0.000004716 0.000000238 0.016073227 3909480
# Time consumed by HG_Progress() when called with timeout!=0
progress_elapsed_nonzero_timeout 0.051754011 0.000023842 0.100308180 411
# Timeout values passed to HG_Progress()
progress_timeout_value 0.010511802 0.000000000 100.000000000 3909891
# RPC statistics
0x5f22 0xa1ef 0x0000 0x0000 0.001448274 0.001207113 0.007883787 100
```
Key components of the output are:
* A table of RPC names registered on that processes. Each has a 16 bit
hexadecimal identifier and a string name. There may be duplicates in the
table if the same RPC is registered more than once on the process.
* A set of statistics for Mercury functions used to drive communication and
completion project. There are counters and elapsed time measurements for
the `HG_Trigger()` function and the `HG_Progress()` function (when called with
or without a timeout value, as Margo varies its pollin strategy). There
is also a category that records statistics about the actual timeout values
used.
* A set of statistics for each RPC that was _issued_ by the process (in the
"RPC statistics" category at the end. Each RPC will be identified by a
set of up to 4 hexidecmial identifiers. The set of identifiers represents a
stack that shows the heritage of up to 4 chained RPCS that lead to this
measurement. Each identifier will match a name in the table at the top.
In the above example, only one RPC was issued by this
process: a "data_xfer_read" RPC that was issed as a side effect of a
"delegator_read" RPC.
## Implementation
## Future directions and use cases
......@@ -60,6 +60,7 @@ int main(int argc, char **argv)
fprintf(stderr, "Error: margo_init()\n");
return(-1);
}
margo_diag_start(mid);
/* register core RPC */
my_rpc_shutdown_id = MARGO_REGISTER(mid, "my_shutdown_rpc",
......@@ -145,6 +146,7 @@ int main(int argc, char **argv)
margo_addr_free(mid, data_xfer_svr_addr);
/* shut down everything */
margo_diag_dump(mid, "-", 0);
margo_finalize(mid);
free(buffer);
......
......@@ -42,6 +42,7 @@ static void my_rpc_shutdown_ult(hg_handle_t handle)
* margo_wait_for_finalize() to suspend until this RPC executes, so there
* is no need to send any extra signal to notify it.
*/
margo_diag_dump(mid, "-", 0);
margo_finalize(mid);
return;
......@@ -80,6 +81,7 @@ int main(int argc, char **argv)
fprintf(stderr, "Error: margo_init()\n");
return(-1);
}
margo_diag_start(mid);
/* figure out what address this server is listening on */
hret = margo_addr_self(mid, &addr_self);
......
......@@ -962,6 +962,20 @@ void __margo_internal_incr_pending(margo_instance_id mid);
*/
void __margo_internal_decr_pending(margo_instance_id mid);
/**
* @private
* Internal function used by DEFINE_MARGO_RPC_HANDLER, not supposed to be
* called by users!
*/
void __margo_internal_pre_wrapper_hooks(margo_instance_id mid, hg_handle_t handle);
/**
* @private
* Internal function used by DEFINE_MARGO_RPC_HANDLER, not supposed to be
* called by users!
*/
void __margo_internal_post_wrapper_hooks(margo_instance_id mid);
/**
* macro that registers a function as an RPC.
*/
......@@ -969,33 +983,31 @@ void __margo_internal_decr_pending(margo_instance_id mid);
margo_provider_register_name(__mid, __func_name, \
BOOST_PP_CAT(hg_proc_, __in_t), \
BOOST_PP_CAT(hg_proc_, __out_t), \
__handler##_handler, \
_handler_for_##__handler, \
MARGO_DEFAULT_PROVIDER_ID, ABT_POOL_NULL);
#define MARGO_REGISTER_PROVIDER(__mid, __func_name, __in_t, __out_t, __handler, __provider_id, __pool) \
margo_provider_register_name(__mid, __func_name, \
BOOST_PP_CAT(hg_proc_, __in_t), \
BOOST_PP_CAT(hg_proc_, __out_t), \
__handler##_handler, \
_handler_for_##__handler, \
__provider_id, __pool);
#define NULL_handler NULL
#define _handler_for_NULL NULL
/**
* macro that defines a function to glue an RPC handler to a ult handler
* @param [in] __name name of handler function
*/
#define DEFINE_MARGO_RPC_HANDLER(__name) \
void __name##_wrapper(hg_handle_t handle) { \
#define __MARGO_INTERNAL_RPC_WRAPPER_BODY(__name) \
margo_instance_id __mid; \
__mid = margo_hg_handle_get_instance(handle); \
__margo_internal_pre_wrapper_hooks(__mid, handle); \
__name(handle); \
__margo_internal_decr_pending(__mid); \
if(__margo_internal_finalize_requested(__mid)) { \
margo_finalize(__mid); \
} \
} \
hg_return_t __name##_handler(hg_handle_t handle) { \
__margo_internal_post_wrapper_hooks(__mid);
#define __MARGO_INTERNAL_RPC_WRAPPER(__name) \
void _wrapper_for_##__name(hg_handle_t handle) { \
__MARGO_INTERNAL_RPC_WRAPPER_BODY(__name) \
}
#define __MARGO_INTERNAL_RPC_HANDLER_BODY(__name) \
int __ret; \
ABT_pool __pool; \
margo_instance_id __mid; \
......@@ -1004,19 +1016,31 @@ hg_return_t __name##_handler(hg_handle_t handle) { \
if(__margo_internal_finalize_requested(__mid)) { return(HG_CANCELED); } \
__pool = margo_hg_handle_get_handler_pool(handle); \
__margo_internal_incr_pending(__mid); \
__ret = ABT_thread_create(__pool, (void (*)(void *))__name##_wrapper, handle, ABT_THREAD_ATTR_NULL, NULL); \
__ret = ABT_thread_create(__pool, (void (*)(void *))_wrapper_for_##__name, handle, ABT_THREAD_ATTR_NULL, NULL); \
if(__ret != 0) { \
return(HG_NOMEM_ERROR); \
} \
return(HG_SUCCESS); \
return(HG_SUCCESS);
#define __MARGO_INTERNAL_RPC_HANDLER(__name) \
hg_return_t _handler_for_##__name(hg_handle_t handle) { \
__MARGO_INTERNAL_RPC_HANDLER_BODY(__name) \
}
/**
* macro that defines a function to glue an RPC handler to a ult handler
* @param [in] __name name of handler function
*/
#define DEFINE_MARGO_RPC_HANDLER(__name) \
__MARGO_INTERNAL_RPC_WRAPPER(__name) \
__MARGO_INTERNAL_RPC_HANDLER(__name)
/**
* macro that declares the prototype for a function to glue an RPC
* handler to a ult
* @param [in] __name name of handler function
*/
#define DECLARE_MARGO_RPC_HANDLER(__name) hg_return_t __name##_handler(hg_handle_t handle);
#define DECLARE_MARGO_RPC_HANDLER(__name) hg_return_t _handler_for_##__name(hg_handle_t handle);
#ifdef __cplusplus
}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment