Commit d78f8310 authored by Junchao Zhang's avatar Junchao Zhang
Browse files

Port instr variables in rma code to MPI_T pvars

Fixes #1962

Signed-off-by: Junchao Zhang<jczhang@mcs.anl.gov> (Reviewed by Bill Gropp)
parent fb4c0182
......@@ -14,9 +14,7 @@
#define MPIDI_CH3_PAGESIZE_MASK (~(MPIDI_CH3_PAGESIZE-1))
#define MPIDI_CH3_ROUND_UP_PAGESIZE(x) ((((MPI_Aint)x)+(~MPIDI_CH3_PAGESIZE_MASK)) & MPIDI_CH3_PAGESIZE_MASK)
#ifdef USE_MPIU_INSTR
MPIU_INSTR_DURATION_EXTERN_DECL(wincreate_allgather);
#endif
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_wincreate_allgather);
static int MPIDI_CH3I_Win_allocate_shm(MPI_Aint size, int disp_unit, MPID_Info *info, MPID_Comm *comm_ptr,
void *base_ptr, MPID_Win **win_ptr);
......@@ -113,7 +111,7 @@ static int MPIDI_CH3I_Win_allocate_shm(MPI_Aint size, int disp_unit, MPID_Info *
node_size = node_comm_ptr->local_size;
node_rank = node_comm_ptr->rank;
MPIU_INSTR_DURATION_START(wincreate_allgather);
MPIR_T_PVAR_TIMER_START(RMA, rma_wincreate_allgather);
/* allocate memory for the base addresses, disp_units, and
completion counters of all processes */
MPIU_CHKPMEM_MALLOC((*win_ptr)->base_addrs, void **,
......@@ -151,7 +149,7 @@ static int MPIDI_CH3I_Win_allocate_shm(MPI_Aint size, int disp_unit, MPID_Info *
mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
tmp_buf, 3 * sizeof(MPI_Aint), MPI_BYTE,
(*win_ptr)->comm_ptr, &errflag);
MPIU_INSTR_DURATION_END(wincreate_allgather);
MPIR_T_PVAR_TIMER_END(RMA, rma_wincreate_allgather);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
......
......@@ -9,11 +9,9 @@
#include "mpl_utlist.h"
#include "mpidi_ch3_impl.h"
#ifdef USE_MPIU_INSTR
MPIU_INSTR_DURATION_EXTERN_DECL(wincreate_allgather);
MPIU_INSTR_DURATION_EXTERN_DECL(winfree_rs);
MPIU_INSTR_DURATION_EXTERN_DECL(winfree_complete);
#endif
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_wincreate_allgather);
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winfree_rs);
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winfree_complete);
typedef enum MPIDI_RMA_Op_type {
MPIDI_RMA_PUT = 23,
......@@ -1062,20 +1060,20 @@ static inline int MPIDI_CH3I_Wait_for_pt_ops_finish(MPID_Win *win_ptr)
MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_WAIT_FOR_PT_OPS_FINISH);
comm_ptr = win_ptr->comm_ptr;
MPIU_INSTR_DURATION_START(winfree_rs);
MPIR_T_PVAR_TIMER_START(RMA, rma_winfree_rs);
mpi_errno = MPIR_Reduce_scatter_block_impl(win_ptr->pt_rma_puts_accs,
&total_pt_rma_puts_accs, 1,
MPI_INT, MPI_SUM, comm_ptr, &errflag);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
MPIU_INSTR_DURATION_END(winfree_rs);
MPIR_T_PVAR_TIMER_END(RMA, rma_winfree_rs);
if (total_pt_rma_puts_accs != win_ptr->my_pt_rma_puts_accs)
{
MPID_Progress_state progress_state;
/* poke the progress engine until the two are equal */
MPIU_INSTR_DURATION_START(winfree_complete);
MPIR_T_PVAR_TIMER_START(RMA, rma_winfree_complete);
MPID_Progress_start(&progress_state);
while (total_pt_rma_puts_accs != win_ptr->my_pt_rma_puts_accs)
{
......@@ -1089,7 +1087,7 @@ static inline int MPIDI_CH3I_Wait_for_pt_ops_finish(MPID_Win *win_ptr)
/* --END ERROR HANDLING-- */
}
MPID_Progress_end(&progress_state);
MPIU_INSTR_DURATION_END(winfree_complete);
MPIR_T_PVAR_TIMER_END(RMA, rma_winfree_complete);
}
fn_exit:
......
......@@ -6,11 +6,9 @@
#include "mpidrma.h"
#ifdef USE_MPIU_INSTR
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_alloc);
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_set);
extern void MPIDI_CH3_RMA_InitInstr(void);
#endif
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_rmaqueue_alloc);
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_rmaqueue_set);
extern void MPIDI_CH3_RMA_Init_Pvars(void);
#undef FUNCNAME
#define FUNCNAME MPIDI_Get_accumulate
......@@ -82,14 +80,14 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
MPIDI_RMA_Op_t *new_ptr = NULL;
/* Append the operation to the window's RMA ops queue */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
/* TODO: Can we use the MPIDI_RMA_ACC_CONTIG optimization? */
MPIU_INSTR_DURATION_START(rmaqueue_set);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
new_ptr->type = MPIDI_RMA_GET_ACCUMULATE;
/* Cast away const'ness for origin_address as MPIDI_RMA_Op_t
* contain both PUT and GET like ops */
......@@ -104,7 +102,7 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
new_ptr->op = op;
MPIU_INSTR_DURATION_END(rmaqueue_set);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
/* if source or target datatypes are derived, increment their
reference counts */
......@@ -194,12 +192,12 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
MPIDI_RMA_Op_t *new_ptr = NULL;
/* Append this operation to the RMA ops queue */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
MPIU_INSTR_DURATION_START(rmaqueue_set);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
new_ptr->type = MPIDI_RMA_COMPARE_AND_SWAP;
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = 1;
......@@ -214,7 +212,7 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
new_ptr->compare_addr = (void *) compare_addr;
new_ptr->compare_count = 1;
new_ptr->compare_datatype = datatype;
MPIU_INSTR_DURATION_END(rmaqueue_set);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
}
fn_exit:
......@@ -286,12 +284,12 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
MPIDI_RMA_Op_t *new_ptr = NULL;
/* Append this operation to the RMA ops queue */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
MPIU_INSTR_DURATION_START(rmaqueue_set);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
new_ptr->type = MPIDI_RMA_FETCH_AND_OP;
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = 1;
......@@ -304,7 +302,7 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
new_ptr->result_count = 1;
new_ptr->result_datatype = datatype;
new_ptr->op = op;
MPIU_INSTR_DURATION_END(rmaqueue_set);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
}
fn_exit:
......
......@@ -8,11 +8,9 @@
static int enableShortACC=1;
#ifdef USE_MPIU_INSTR
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_alloc);
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_set);
extern void MPIDI_CH3_RMA_InitInstr(void);
#endif
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_rmaqueue_alloc);
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_rmaqueue_set);
extern void MPIDI_CH3_RMA_Init_Pvars(void);
#define MPIDI_PASSIVE_TARGET_DONE_TAG 348297
#define MPIDI_PASSIVE_TARGET_RMA_TAG 563924
......@@ -177,12 +175,12 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
MPIDI_RMA_Op_t *new_ptr = NULL;
/* queue it up */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
MPIU_INSTR_DURATION_START(rmaqueue_set);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
/* FIXME: For contig and very short operations, use a streamlined op */
new_ptr->type = MPIDI_RMA_PUT;
/* Cast away const'ness for the origin address, as the
......@@ -195,7 +193,7 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
MPIU_INSTR_DURATION_END(rmaqueue_set);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
/* if source or target datatypes are derived, increment their
reference counts */
......@@ -289,12 +287,12 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
MPIDI_RMA_Op_t *new_ptr = NULL;
/* queue it up */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
MPIU_INSTR_DURATION_START(rmaqueue_set);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
/* FIXME: For contig and very short operations, use a streamlined op */
new_ptr->type = MPIDI_RMA_GET;
new_ptr->origin_addr = origin_addr;
......@@ -304,7 +302,7 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
MPIU_INSTR_DURATION_END(rmaqueue_set);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
/* if source or target datatypes are derived, increment their
reference counts */
......@@ -401,15 +399,15 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
MPIDI_RMA_Op_t *new_ptr = NULL;
/* queue it up */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
/* If predefined and contiguous, use a simplified element */
if (MPIR_DATATYPE_IS_PREDEFINED(origin_datatype) &&
MPIR_DATATYPE_IS_PREDEFINED(target_datatype) && enableShortACC) {
MPIU_INSTR_DURATION_START(rmaqueue_set);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
new_ptr->type = MPIDI_RMA_ACC_CONTIG;
/* Only the information needed for the contig/predefined acc */
/* Cast away const'ness for origin_address as
......@@ -422,11 +420,11 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
new_ptr->op = op;
MPIU_INSTR_DURATION_END(rmaqueue_set);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
goto fn_exit;
}
MPIU_INSTR_DURATION_START(rmaqueue_set);
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
new_ptr->type = MPIDI_RMA_ACCUMULATE;
/* Cast away const'ness for origin_address as MPIDI_RMA_Op_t
* contain both PUT and GET like ops */
......@@ -438,7 +436,7 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
new_ptr->op = op;
MPIU_INSTR_DURATION_END(rmaqueue_set);
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
/* if source or target datatypes are derived, increment their
reference counts */
......
This diff is collapsed.
......@@ -8,9 +8,7 @@
#include "mpiinfo.h"
#include "mpidrma.h"
#ifdef USE_MPIU_INSTR
MPIU_INSTR_DURATION_EXTERN_DECL(wincreate_allgather);
#endif
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_wincreate_allgather);
#undef FUNCNAME
#define FUNCNAME MPIDI_Win_fns_init
......@@ -56,7 +54,7 @@ int MPIDI_CH3U_Win_create_gather( void *base, MPI_Aint size, int disp_unit,
/* RMA handlers should be set before calling this function */
mpi_errno = (*win_ptr)->RMAFns.Win_set_info(*win_ptr, info);
MPIU_INSTR_DURATION_START(wincreate_allgather);
MPIR_T_PVAR_TIMER_START(RMA, rma_wincreate_allgather);
/* allocate memory for the base addresses, disp_units, and
completion counters of all processes */
MPIU_CHKPMEM_MALLOC((*win_ptr)->base_addrs, void **,
......@@ -94,7 +92,7 @@ int MPIDI_CH3U_Win_create_gather( void *base, MPI_Aint size, int disp_unit,
mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
tmp_buf, 4, MPI_AINT,
(*win_ptr)->comm_ptr, &errflag);
MPIU_INSTR_DURATION_END(wincreate_allgather);
MPIR_T_PVAR_TIMER_END(RMA, rma_wincreate_allgather);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
......
......@@ -9,15 +9,14 @@
MPIU_THREADSAFE_INIT_DECL(initRMAoptions);
#ifdef USE_MPIU_INSTR
MPIU_INSTR_DURATION_DECL(wincreate_allgather);
MPIU_INSTR_DURATION_DECL(winfree_rs);
MPIU_INSTR_DURATION_DECL(winfree_complete);
MPIU_INSTR_DURATION_DECL(rmaqueue_alloc);
MPIU_INSTR_DURATION_DECL(rmaqueue_set);
extern void MPIDI_CH3_RMA_InitInstr(void);
#endif
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_wincreate_allgather);
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winfree_rs);
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winfree_complete);
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_rmaqueue_alloc);
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_rmaqueue_set);
extern void MPIDI_CH3_RMA_Init_Pvars(void);
static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
MPID_Comm *comm_ptr, MPID_Win **win_ptr);
......@@ -255,15 +254,8 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
if(initRMAoptions) {
MPIU_THREADSAFE_INIT_BLOCK_BEGIN(initRMAoptions);
#ifdef USE_MPIU_INSTR
/* Define all instrumentation handles used in the CH3 RMA here*/
MPIU_INSTR_DURATION_INIT(wincreate_allgather,0,"WIN_CREATE:Allgather");
MPIU_INSTR_DURATION_INIT(winfree_rs,0,"WIN_FREE:ReduceScatterBlock");
MPIU_INSTR_DURATION_INIT(winfree_complete,0,"WIN_FREE:Complete");
MPIU_INSTR_DURATION_INIT(rmaqueue_alloc,0,"Allocate RMA Queue element");
MPIU_INSTR_DURATION_INIT(rmaqueue_set,0,"Set fields in RMA Queue element");
MPIDI_CH3_RMA_InitInstr();
#endif
MPIDI_CH3_RMA_Init_Pvars();
MPIU_THREADSAFE_INIT_CLEAR(initRMAoptions);
MPIU_THREADSAFE_INIT_BLOCK_END(initRMAoptions);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment