Commit 5510107a authored by James Dinan's avatar James Dinan
Browse files

[svn-r10592] Updated active target to use a shared ops list

This fixes the performance regression that was introduced by concatenation of
per-target lists.

Reviewer: goodell
parent f344bc2e
......@@ -251,7 +251,11 @@ struct MPIDI_Win_target_state {
completed as target */ \
MPI_Aint *sizes; /* array of sizes of all windows */ \
struct MPIDI_Win_info_args info_args; \
struct MPIDI_Win_target_state *targets; \
struct MPIDI_Win_target_state *targets; /* Target state and ops \
lists for passive target \
mode of operation */ \
struct MPIDI_RMA_Op *at_rma_ops_list; /* Ops list for active target \
mode of operation. */ \
enum MPIDI_Win_epoch_states epoch_state; \
int epoch_count; \
int fence_issued; /* Indicates if fence has been called, and if an \
......
......@@ -278,27 +278,28 @@ static inline void MPIDI_CH3I_RMA_Ops_free(MPIDI_RMA_Ops_list_t *list)
}
/* Concatenate RMA ops to all targets into a single list.
/* Retrieve the RMA ops list pointer from the window. This routine detects
* whether we are in an active or passive target epoch and returns the correct
* ops list; we use a shared list for active target and separate per-target
* lists for passive target.
*/
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Ops_concat_all
#define FUNCNAME MPIDI_CH3I_RMA_Get_ops_list
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline MPIDI_RMA_Ops_list_t MPIDI_CH3I_RMA_Ops_concat_all(MPID_Win *win_ptr)
static inline MPIDI_RMA_Ops_list_t *MPIDI_CH3I_RMA_Get_ops_list(MPID_Win *win_ptr,
int target)
{
int i;
MPIDI_RMA_Op_t *root = NULL;
for (i = 0; i < MPIR_Comm_size(win_ptr->comm_ptr); i++) {
/* Attach this list to the end of the aggregate list */
MPL_DL_CONCAT(root, win_ptr->targets[i].rma_ops_list);
win_ptr->targets[i].rma_ops_list = NULL;
if (win_ptr->epoch_state == MPIDI_EPOCH_FENCE ||
win_ptr->epoch_state == MPIDI_EPOCH_GAT)
{
return &win_ptr->at_rma_ops_list;
}
else {
return &win_ptr->targets[target].rma_ops_list;
}
return root;
}
#undef FUNCNAME
#undef FCNAME
......
......@@ -170,11 +170,12 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
}
}
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
/* Append the operation to the window's RMA ops queue */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(&win_ptr->targets[target_rank].rma_ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
......@@ -270,11 +271,12 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
goto fn_exit;
}
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
/* Append this operation to the RMA ops queue */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(&win_ptr->targets[target_rank].rma_ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
......@@ -352,11 +354,12 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
goto fn_exit;
}
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
/* Append this operation to the RMA ops queue */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(&win_ptr->targets[target_rank].rma_ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
......
......@@ -159,11 +159,12 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
}
else
{
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
/* queue it up */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(&win_ptr->targets[target_rank].rma_ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
......@@ -256,11 +257,12 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
}
else
{
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
/* queue it up */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(&win_ptr->targets[target_rank].rma_ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
......@@ -449,11 +451,12 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
}
else
{
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
/* queue it up */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(&win_ptr->targets[target_rank].rma_ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
......
......@@ -208,7 +208,7 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
int comm_size;
int *rma_target_proc, *nops_to_proc, i, total_op_count, *curr_ops_cnt;
MPIDI_RMA_Op_t *curr_ptr;
MPIDI_RMA_Ops_list_t ops_list;
MPIDI_RMA_Ops_list_t *ops_list;
MPID_Comm *comm_ptr;
MPI_Win source_win_handle, target_win_handle;
MPID_Progress_state progress_state;
......@@ -291,17 +291,14 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
mpi_errno, "nops_to_proc");
for (i=0; i<comm_size; i++) nops_to_proc[i] = 0;
/* FIXME: This is a temporary hack to keep active target working with
* the new ops list structure. This should be replaced with iteration
* over the targets/ops lists inside of the ops processing code.
* Ideally, we should issue ops breadth-first across targets. */
ops_list = MPIDI_CH3I_RMA_Ops_concat_all(win_ptr);
/* Note, active target uses the following ops list, and passive
target uses win_ptr->targets[..] */
ops_list = &win_ptr->at_rma_ops_list;
/* set rma_target_proc[i] to 1 if rank i is a target of RMA
ops from this process */
total_op_count = 0;
curr_ptr = MPIDI_CH3I_RMA_Ops_head(&ops_list);
curr_ptr = MPIDI_CH3I_RMA_Ops_head(ops_list);
while (curr_ptr != NULL)
{
total_op_count++;
......@@ -339,7 +336,7 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
MPIU_INSTR_DURATION_MAX(winfence_issue,1,total_op_count);
MPIU_INSTR_COUNTER_RESET(winfence_reqs);
i = 0;
curr_ptr = MPIDI_CH3I_RMA_Ops_head(&ops_list);
curr_ptr = MPIDI_CH3I_RMA_Ops_head(ops_list);
while (curr_ptr != NULL)
{
/* The completion counter at the target is decremented only on
......@@ -362,7 +359,7 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
curr_ops_cnt[curr_ptr->target_rank]++;
/* If the request is null, we can remove it immediately */
if (!curr_ptr->request) {
MPIDI_CH3I_RMA_Ops_free_and_next(&ops_list, &curr_ptr);
MPIDI_CH3I_RMA_Ops_free_and_next(ops_list, &curr_ptr);
}
else {
nRequest++;
......@@ -376,7 +373,7 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
if (nRequest > MPIR_PARAM_RMA_NREQUEST_THRESHOLD &&
nRequest - nRequestNew > MPIR_PARAM_RMA_NREQUEST_NEW_THRESHOLD) {
int nDone = 0;
MPIDI_CH3I_RMAListPartialComplete(win_ptr, &ops_list, curr_ptr, &nDone);
MPIDI_CH3I_RMAListPartialComplete(win_ptr, ops_list, curr_ptr, &nDone);
/* if (nDone > 0) printf( "nDone = %d\n", nDone ); */
nRequest -= nDone;
nRequestNew = nRequest;
......@@ -400,10 +397,10 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
{
MPIU_INSTR_STMT(list_complete=MPIU_INSTR_GET_VAR(winfence_complete));
MPIU_INSTR_STMT(list_block=MPIU_INSTR_GET_VAR(winfence_block));
mpi_errno = MPIDI_CH3I_RMAListComplete(win_ptr, &ops_list);
mpi_errno = MPIDI_CH3I_RMAListComplete(win_ptr, ops_list);
}
MPIU_Assert(MPIDI_CH3I_RMA_Ops_isempty(&ops_list));
MPIU_Assert(MPIDI_CH3I_RMA_Ops_isempty(ops_list));
/* wait for all operations from other processes to finish */
if (win_ptr->my_counter)
......@@ -1489,7 +1486,7 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
int comm_size, *nops_to_proc, src, new_total_op_count;
int i, j, dst, total_op_count, *curr_ops_cnt;
MPIDI_RMA_Op_t *curr_ptr;
MPIDI_RMA_Ops_list_t ops_list;
MPIDI_RMA_Ops_list_t *ops_list;
MPID_Comm *comm_ptr;
MPI_Win source_win_handle, target_win_handle;
MPID_Group *win_grp_ptr;
......@@ -1591,19 +1588,16 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
MPIU_INSTR_DURATION_START(wincomplete_issue);
/* FIXME: This is a temporary hack to keep active target working with
* the new ops list structure. This should be replaced with iteration
* over the targets/ops lists inside of the ops processing code.
* Ideally, we should issue ops breadth-first across targets. */
ops_list = MPIDI_CH3I_RMA_Ops_concat_all(win_ptr);
/* Note, active target uses the following ops list, and passive
target uses win_ptr->targets[..] */
ops_list = &win_ptr->at_rma_ops_list;
MPIU_CHKLMEM_MALLOC(nops_to_proc, int *, comm_size*sizeof(int),
mpi_errno, "nops_to_proc");
for (i=0; i<comm_size; i++) nops_to_proc[i] = 0;
total_op_count = 0;
curr_ptr = MPIDI_CH3I_RMA_Ops_head(&ops_list);
curr_ptr = MPIDI_CH3I_RMA_Ops_head(ops_list);
while (curr_ptr != NULL)
{
nops_to_proc[curr_ptr->target_rank]++;
......@@ -1623,7 +1617,7 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
for (i=0; i<comm_size; i++) curr_ops_cnt[i] = 0;
i = 0;
curr_ptr = MPIDI_CH3I_RMA_Ops_head(&ops_list);
curr_ptr = MPIDI_CH3I_RMA_Ops_head(ops_list);
while (curr_ptr != NULL)
{
/* The completion counter at the target is decremented only on
......@@ -1646,7 +1640,7 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
curr_ops_cnt[curr_ptr->target_rank]++;
/* If the request is null, we can remove it immediately */
if (!curr_ptr->request) {
MPIDI_CH3I_RMA_Ops_free_and_next(&ops_list, &curr_ptr);
MPIDI_CH3I_RMA_Ops_free_and_next(ops_list, &curr_ptr);
}
else {
nRequest++;
......@@ -1655,7 +1649,7 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
if (nRequest > MPIR_PARAM_RMA_NREQUEST_THRESHOLD &&
nRequest - nRequestNew > MPIR_PARAM_RMA_NREQUEST_NEW_THRESHOLD) {
int nDone = 0;
MPIDI_CH3I_RMAListPartialComplete(win_ptr, &ops_list, curr_ptr, &nDone);
MPIDI_CH3I_RMAListPartialComplete(win_ptr, ops_list, curr_ptr, &nDone);
nRequest -= nDone;
nRequestNew = nRequest;
}
......@@ -1705,7 +1699,7 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(&ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
......@@ -1722,10 +1716,10 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
{
MPIU_INSTR_STMT(list_complete=MPIU_INSTR_GET_VAR(wincomplete_complete));
MPIU_INSTR_STMT(list_block=MPIU_INSTR_GET_VAR(wincomplete_block));
mpi_errno = MPIDI_CH3I_RMAListComplete(win_ptr, &ops_list);
mpi_errno = MPIDI_CH3I_RMAListComplete(win_ptr, ops_list);
}
MPIU_Assert(MPIDI_CH3I_RMA_Ops_isempty(&ops_list));
MPIU_Assert(MPIDI_CH3I_RMA_Ops_isempty(ops_list));
mpi_errno = MPIR_Group_free_impl(win_grp_ptr);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
......
......@@ -280,6 +280,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
(*win_ptr)->my_pt_rma_puts_accs = 0;
(*win_ptr)->epoch_state = MPIDI_EPOCH_NONE;
(*win_ptr)->epoch_count = 0;
(*win_ptr)->at_rma_ops_list = NULL;
/* Initialize the passive target lock state */
MPIU_CHKPMEM_MALLOC((*win_ptr)->targets, struct MPIDI_Win_target_state *,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment