Commit fc7617f2 authored by Xin Zhao's avatar Xin Zhao Committed by Pavan Balaji
Browse files

Add global / local pools of RMA ops and related APIs.



Instead of allocating / deallocating RMA operations whenever
an RMA op is posted by user, we allocate fixed size operation
pools beforehand and take the op element from those pools
when an RMA op is posted.

With only a local (per-window) op pool, the number of ops
allocated can increase arbitrarily if many windows are created.
Alternatively, if we only use a global op pool, other windows
might use up all operations thus starving the window we are
working on.

In this patch we create two pools: a local (per-window) pool and a
global pool.  Every window is guaranteed to have at least the number
of operations in the local pool.  If we run out of these operations,
we check in the global pool to see if we have any operations left.
When an operation is released, it is added back to the same pool it
was allocated from.
Signed-off-by: Pavan Balaji's avatarPavan Balaji <balaji@anl.gov>
parent b1685139
......@@ -10,6 +10,66 @@
#include "mpl_utlist.h"
#include "mpid_rma_types.h"
extern struct MPIDI_RMA_Op *global_rma_op_pool, *global_rma_op_pool_tail, *global_rma_op_pool_start;
/* MPIDI_CH3I_Win_op_alloc(): get a new op element from op pool and
* initialize it. If we cannot get one, return NULL. */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_Win_op_alloc
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline MPIDI_RMA_Op_t *MPIDI_CH3I_Win_op_alloc(MPID_Win * win_ptr)
{
MPIDI_RMA_Op_t *e;
if (win_ptr->op_pool == NULL) {
/* local pool is empty, try to find something in the global pool */
if (global_rma_op_pool == NULL)
return NULL;
else {
e = global_rma_op_pool;
MPL_LL_DELETE(global_rma_op_pool, global_rma_op_pool_tail, e);
}
}
else {
e = win_ptr->op_pool;
MPL_LL_DELETE(win_ptr->op_pool, win_ptr->op_pool_tail, e);
}
e->dataloop = NULL;
e->request = NULL;
return e;
}
/* MPIDI_CH3I_Win_op_free(): put an op element back to the op pool which
* it belongs to. */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_Win_op_free
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int MPIDI_CH3I_Win_op_free(MPID_Win * win_ptr, MPIDI_RMA_Op_t * e)
{
int mpi_errno = MPI_SUCCESS;
/* Check if we allocated a dataloop for this op (see send/recv_rma_msg) */
if (e->dataloop != NULL) {
MPIU_Free(e->dataloop);
}
/* We enqueue elements to the right pool, so when they get freed
* at window free time, they won't conflict with the global pool
* or other windows */
/* use PREPEND when return objects back to the pool
in order to improve cache performance */
if (e->pool_type == MPIDI_RMA_POOL_WIN)
MPL_LL_PREPEND(win_ptr->op_pool, win_ptr->op_pool_tail, e);
else
MPL_LL_PREPEND(global_rma_op_pool, global_rma_op_pool_tail, e);
return mpi_errno;
}
/* Return nonzero if the RMA operations list is empty.
*/
#undef FUNCNAME
......@@ -40,9 +100,9 @@ static inline MPIDI_RMA_Op_t *MPIDI_CH3I_RMA_Ops_head(MPIDI_RMA_Ops_list_t * lis
#define FUNCNAME MPIDI_CH3I_RMA_Ops_tail
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline MPIDI_RMA_Op_t *MPIDI_CH3I_RMA_Ops_tail(MPIDI_RMA_Ops_list_t * list)
static inline MPIDI_RMA_Op_t *MPIDI_CH3I_RMA_Ops_tail(MPIDI_RMA_Ops_list_t * list_tail)
{
return (*list) ? (*list)->prev : NULL;
return (*list_tail);
}
......@@ -55,9 +115,10 @@ static inline MPIDI_RMA_Op_t *MPIDI_CH3I_RMA_Ops_tail(MPIDI_RMA_Ops_list_t * lis
#define FUNCNAME MPIDI_CH3I_RMA_Ops_append
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline void MPIDI_CH3I_RMA_Ops_append(MPIDI_RMA_Ops_list_t * list, MPIDI_RMA_Op_t * elem)
static inline void MPIDI_CH3I_RMA_Ops_append(MPIDI_RMA_Ops_list_t * list, MPIDI_RMA_Ops_list_t * list_tail,
MPIDI_RMA_Op_t * elem)
{
MPL_DL_APPEND(*list, elem);
MPL_LL_APPEND(*list, *list_tail, elem);
}
......@@ -71,30 +132,23 @@ static inline void MPIDI_CH3I_RMA_Ops_append(MPIDI_RMA_Ops_list_t * list, MPIDI_
#define FUNCNAME MPIDI_CH3I_RMA_Ops_alloc_tail
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int MPIDI_CH3I_RMA_Ops_alloc_tail(MPIDI_RMA_Ops_list_t * list,
static inline int MPIDI_CH3I_RMA_Ops_alloc_tail(MPID_Win * win_ptr, MPIDI_RMA_Ops_list_t * list,
MPIDI_RMA_Ops_list_t * list_tail,
MPIDI_RMA_Op_t ** new_elem)
{
int mpi_errno = MPI_SUCCESS;
MPIDI_RMA_Op_t *tmp_ptr;
MPIU_CHKPMEM_DECL(1);
/* FIXME: We should use a pool allocator here */
MPIU_CHKPMEM_MALLOC(tmp_ptr, MPIDI_RMA_Op_t *, sizeof(MPIDI_RMA_Op_t),
mpi_errno, "RMA operation entry");
tmp_ptr = MPIDI_CH3I_Win_op_alloc(win_ptr);
MPIU_ERR_CHKANDJUMP(tmp_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem");
tmp_ptr->next = NULL;
tmp_ptr->dataloop = NULL;
tmp_ptr->request = NULL;
MPL_DL_APPEND(*list, tmp_ptr);
MPL_LL_APPEND(*list, *list_tail, tmp_ptr);
*new_elem = tmp_ptr;
fn_exit:
MPIU_CHKPMEM_COMMIT();
return mpi_errno;
fn_fail:
MPIU_CHKPMEM_REAP();
*new_elem = NULL;
goto fn_exit;
}
......@@ -109,9 +163,10 @@ static inline int MPIDI_CH3I_RMA_Ops_alloc_tail(MPIDI_RMA_Ops_list_t * list,
#define FUNCNAME MPIDI_CH3I_RMA_Ops_unlink
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline void MPIDI_CH3I_RMA_Ops_unlink(MPIDI_RMA_Ops_list_t * list, MPIDI_RMA_Op_t * elem)
static inline void MPIDI_CH3I_RMA_Ops_unlink(MPIDI_RMA_Ops_list_t * list, MPIDI_RMA_Ops_list_t *list_tail,
MPIDI_RMA_Op_t * elem)
{
MPL_DL_DELETE(*list, elem);
MPL_LL_DELETE(*list, *list_tail, elem);
}
......@@ -124,19 +179,17 @@ static inline void MPIDI_CH3I_RMA_Ops_unlink(MPIDI_RMA_Ops_list_t * list, MPIDI_
#define FUNCNAME MPIDI_CH3I_RMA_Ops_free_elem
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline void MPIDI_CH3I_RMA_Ops_free_elem(MPIDI_RMA_Ops_list_t * list,
static inline void MPIDI_CH3I_RMA_Ops_free_elem(MPID_Win * win_ptr, MPIDI_RMA_Ops_list_t * list,
MPIDI_RMA_Ops_list_t * list_tail,
MPIDI_RMA_Op_t * curr_ptr)
{
MPIDI_RMA_Op_t *tmp_ptr = curr_ptr;
MPIU_Assert(curr_ptr != NULL);
MPL_DL_DELETE(*list, curr_ptr);
MPL_LL_DELETE(*list, *list_tail, curr_ptr);
/* Check if we allocated a dataloop for this op (see send/recv_rma_msg) */
if (tmp_ptr->dataloop != NULL)
MPIU_Free(tmp_ptr->dataloop);
MPIU_Free(tmp_ptr);
MPIDI_CH3I_Win_op_free(win_ptr, tmp_ptr);
}
......@@ -151,12 +204,13 @@ static inline void MPIDI_CH3I_RMA_Ops_free_elem(MPIDI_RMA_Ops_list_t * list,
#define FUNCNAME MPIDI_CH3I_RMA_Ops_free_and_next
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline void MPIDI_CH3I_RMA_Ops_free_and_next(MPIDI_RMA_Ops_list_t * list,
static inline void MPIDI_CH3I_RMA_Ops_free_and_next(MPID_Win * win_ptr, MPIDI_RMA_Ops_list_t * list,
MPIDI_RMA_Ops_list_t * list_tail,
MPIDI_RMA_Op_t ** curr_ptr)
{
MPIDI_RMA_Op_t *next_ptr = (*curr_ptr)->next;
MPIDI_CH3I_RMA_Ops_free_elem(list, *curr_ptr);
MPIDI_CH3I_RMA_Ops_free_elem(win_ptr, list, list_tail, *curr_ptr);
*curr_ptr = next_ptr;
}
......@@ -167,12 +221,13 @@ static inline void MPIDI_CH3I_RMA_Ops_free_and_next(MPIDI_RMA_Ops_list_t * list,
#define FUNCNAME MPIDI_CH3I_RMA_Ops_free
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline void MPIDI_CH3I_RMA_Ops_free(MPIDI_RMA_Ops_list_t * list)
static inline void MPIDI_CH3I_RMA_Ops_free(MPID_Win * win_ptr, MPIDI_RMA_Ops_list_t * list,
MPIDI_RMA_Ops_list_t * list_tail)
{
MPIDI_RMA_Op_t *curr_ptr, *tmp_ptr;
MPL_DL_FOREACH_SAFE(*list, curr_ptr, tmp_ptr) {
MPIDI_CH3I_RMA_Ops_free_elem(list, curr_ptr);
MPL_LL_FOREACH_SAFE(*list, curr_ptr, tmp_ptr) {
MPIDI_CH3I_RMA_Ops_free_elem(win_ptr, list, list_tail, curr_ptr);
}
}
......@@ -197,4 +252,19 @@ static inline MPIDI_RMA_Ops_list_t *MPIDI_CH3I_RMA_Get_ops_list(MPID_Win * win_p
}
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Get_ops_list
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline MPIDI_RMA_Ops_list_t *MPIDI_CH3I_RMA_Get_ops_list_tail(MPID_Win * win_ptr, int target)
{
if (win_ptr->epoch_state == MPIDI_EPOCH_FENCE ||
win_ptr->epoch_state == MPIDI_EPOCH_START || win_ptr->epoch_state == MPIDI_EPOCH_PSCW) {
return &win_ptr->at_rma_ops_list_tail;
}
else {
return &win_ptr->targets[target].rma_ops_list_tail;
}
}
#endif /* MPID_RMA_OPLIST_H_INCLUDED */
......@@ -46,9 +46,13 @@ typedef struct MPIDI_RMA_dtype_info { /* for derived datatypes */
int has_sticky_ub, has_sticky_lb;
} MPIDI_RMA_dtype_info;
typedef enum MPIDI_RMA_Pool_type {
MPIDI_RMA_POOL_WIN = 6,
MPIDI_RMA_POOL_GLOBAL = 7
} MPIDI_RMA_Pool_type_t;
/* for keeping track of RMA ops, which will be executed at the next sync call */
typedef struct MPIDI_RMA_Op {
struct MPIDI_RMA_Op *prev; /* pointer to next element in list */
struct MPIDI_RMA_Op *next; /* pointer to next element in list */
void *origin_addr;
......@@ -69,6 +73,7 @@ typedef struct MPIDI_RMA_Op {
int target_rank;
MPIDI_CH3_Pkt_t pkt;
MPIDI_RMA_Pool_type_t pool_type;
} MPIDI_RMA_Op_t;
typedef struct MPIDI_PT_single_op {
......
......@@ -249,6 +249,7 @@ struct MPIDI_RMA_op; /* forward decl from mpidrma.h */
struct MPIDI_Win_target_state {
struct MPIDI_RMA_Op *rma_ops_list;
/* List of outstanding RMA operations */
struct MPIDI_RMA_Op *rma_ops_list_tail;
volatile enum MPIDI_CH3_Lock_states remote_lock_state;
/* Indicates the state of the target
process' "lock" for passive target
......@@ -284,6 +285,7 @@ struct MPIDI_Win_target_state {
mode of operation */ \
struct MPIDI_RMA_Op *at_rma_ops_list; /* Ops list for active target \
mode of operation. */ \
struct MPIDI_RMA_Op *at_rma_ops_list_tail; \
enum MPIDI_Win_epoch_states epoch_state; \
int epoch_count; \
int fence_issued; /* Indicates if fence has been called, and if an \
......@@ -296,6 +298,9 @@ struct MPIDI_Win_target_state {
int start_assert; /* assert passed to MPI_Win_start */ \
int shm_allocated; /* flag: TRUE iff this window has a shared memory \
region associated with it */ \
struct MPIDI_RMA_Op *op_pool_start; /* start pointer used for freeing */\
struct MPIDI_RMA_Op *op_pool; /* pool of operations */ \
struct MPIDI_RMA_Op *op_pool_tail; /* tail pointer to pool of operations. */ \
#ifdef MPIDI_CH3_WIN_DECL
#define MPID_DEV_WIN_DECL \
......@@ -440,4 +445,8 @@ MPID_REQUEST_DECL
/* Tell the RMA code to use a table of RMA functions provided by the
ADI */
#define USE_MPID_RMA_TABLE
int MPIDI_RMA_init(void);
void MPIDI_RMA_finalize(void);
#endif /* !defined(MPICH_MPIDPRE_H_INCLUDED) */
......@@ -73,11 +73,12 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
}
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Ops_list_t *ops_list_tail = MPIDI_CH3I_RMA_Get_ops_list_tail(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_put_t *put_pkt = NULL;
/* queue it up */
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(win_ptr, ops_list, ops_list_tail, &new_ptr);
if (mpi_errno) {
MPIU_ERR_POP(mpi_errno);
}
......@@ -184,11 +185,12 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
}
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Ops_list_t *ops_list_tail = MPIDI_CH3I_RMA_Get_ops_list_tail(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_get_t *get_pkt = NULL;
/* queue it up */
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(win_ptr, ops_list, ops_list_tail, &new_ptr);
if (mpi_errno) {
MPIU_ERR_POP(mpi_errno);
}
......@@ -296,11 +298,12 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
}
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Ops_list_t *ops_list_tail = MPIDI_CH3I_RMA_Get_ops_list_tail(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_accum_t *accum_pkt = NULL;
/* queue it up */
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(win_ptr, ops_list, ops_list_tail, &new_ptr);
if (mpi_errno) {
MPIU_ERR_POP(mpi_errno);
}
......@@ -441,10 +444,11 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
}
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Ops_list_t *ops_list_tail = MPIDI_CH3I_RMA_Get_ops_list_tail(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
/* Append the operation to the window's RMA ops queue */
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(win_ptr, ops_list, ops_list_tail, &new_ptr);
if (mpi_errno) {
MPIU_ERR_POP(mpi_errno);
}
......@@ -575,11 +579,12 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
}
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Ops_list_t *ops_list_tail = MPIDI_CH3I_RMA_Get_ops_list_tail(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_cas_t *cas_pkt = NULL;
/* Append this operation to the RMA ops queue */
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(win_ptr, ops_list, ops_list_tail, &new_ptr);
if (mpi_errno) {
MPIU_ERR_POP(mpi_errno);
}
......@@ -669,11 +674,12 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
}
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Ops_list_t *ops_list_tail = MPIDI_CH3I_RMA_Get_ops_list_tail(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_fop_t *fop_pkt = NULL;
/* Append this operation to the RMA ops queue */
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(win_ptr, ops_list, ops_list_tail, &new_ptr);
if (mpi_errno) {
MPIU_ERR_POP(mpi_errno);
}
......
......@@ -349,9 +349,11 @@ static int do_passive_target_rma(MPID_Win * win_ptr, int target_rank,
int *wait_for_rma_done_pkt, MPIDI_CH3_Pkt_flags_t sync_flags);
static int send_lock_put_or_acc(MPID_Win *, int);
static int send_lock_get(MPID_Win *, int);
static inline int rma_list_complete(MPID_Win * win_ptr, MPIDI_RMA_Ops_list_t * ops_list);
static inline int rma_list_complete(MPID_Win * win_ptr, MPIDI_RMA_Ops_list_t * ops_list,
MPIDI_RMA_Ops_list_t * ops_list_tail);
static inline int rma_list_gc(MPID_Win * win_ptr,
MPIDI_RMA_Ops_list_t * ops_list,
MPIDI_RMA_Ops_list_t * ops_list_tail,
MPIDI_RMA_Op_t * last_elm, int *nDone);
......@@ -366,6 +368,7 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
int *rma_target_proc, *nops_to_proc, i, total_op_count, *curr_ops_cnt;
MPIDI_RMA_Op_t *curr_ptr;
MPIDI_RMA_Ops_list_t *ops_list;
MPIDI_RMA_Ops_list_t *ops_list_tail;
MPID_Comm *comm_ptr;
MPID_Progress_state progress_state;
int errflag = FALSE;
......@@ -432,6 +435,7 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
/* Note, active target uses the following ops list, and passive
* target uses win_ptr->targets[..] */
ops_list = &win_ptr->at_rma_ops_list;
ops_list_tail = &win_ptr->at_rma_ops_list_tail;
/* set rma_target_proc[i] to 1 if rank i is a target of RMA
* ops from this process */
......@@ -494,7 +498,7 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
curr_ops_cnt[curr_ptr->target_rank]++;
/* If the request is null, we can remove it immediately */
if (!curr_ptr->request) {
MPIDI_CH3I_RMA_Ops_free_and_next(ops_list, &curr_ptr);
MPIDI_CH3I_RMA_Ops_free_and_next(win_ptr, ops_list, ops_list_tail, &curr_ptr);
}
else {
nRequest++;
......@@ -511,7 +515,7 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
mpi_errno = rma_list_gc(win_ptr, ops_list, curr_ptr, &nDone);
mpi_errno = rma_list_gc(win_ptr, ops_list, ops_list_tail, curr_ptr, &nDone);
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
/* if (nDone > 0) printf("nDone = %d\n", nDone); */
......@@ -533,7 +537,7 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
* needed while looping through the requests.
*/
if (total_op_count) {
mpi_errno = rma_list_complete(win_ptr, ops_list);
mpi_errno = rma_list_complete(win_ptr, ops_list, ops_list_tail);
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
}
......@@ -927,6 +931,7 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
int i, j, dst, total_op_count, *curr_ops_cnt;
MPIDI_RMA_Op_t *curr_ptr;
MPIDI_RMA_Ops_list_t *ops_list;
MPIDI_RMA_Ops_list_t *ops_list_tail;
MPID_Comm *comm_ptr;
int start_grp_size, *ranks_in_win_grp, rank;
int nRequest = 0;
......@@ -986,6 +991,7 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
/* Note, active target uses the following ops list, and passive
* target uses win_ptr->targets[..] */
ops_list = &win_ptr->at_rma_ops_list;
ops_list_tail = &win_ptr->at_rma_ops_list_tail;
MPIU_CHKLMEM_MALLOC(nops_to_proc, int *, comm_size * sizeof(int), mpi_errno, "nops_to_proc");
for (i = 0; i < comm_size; i++)
......@@ -1026,7 +1032,7 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
curr_ops_cnt[curr_ptr->target_rank]++;
/* If the request is null, we can remove it immediately */
if (!curr_ptr->request) {
MPIDI_CH3I_RMA_Ops_free_and_next(ops_list, &curr_ptr);
MPIDI_CH3I_RMA_Ops_free_and_next(win_ptr, ops_list, ops_list_tail, &curr_ptr);
}
else {
nRequest++;
......@@ -1037,7 +1043,7 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
mpi_errno = poke_progress_engine();
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
mpi_errno = rma_list_gc(win_ptr, ops_list, curr_ptr, &nDone);
mpi_errno = rma_list_gc(win_ptr, ops_list, ops_list_tail, curr_ptr, &nDone);
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
nRequest -= nDone;
......@@ -1086,7 +1092,7 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
if (request) {
MPIDI_RMA_Op_t *new_ptr = NULL;
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(win_ptr, ops_list, ops_list_tail, &new_ptr);
if (mpi_errno) {
MPIU_ERR_POP(mpi_errno);
}
......@@ -1099,7 +1105,7 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
}
if (new_total_op_count) {
mpi_errno = rma_list_complete(win_ptr, ops_list);
mpi_errno = rma_list_complete(win_ptr, ops_list, ops_list_tail);
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
}
......@@ -1921,7 +1927,7 @@ static int do_passive_target_rma(MPID_Win * win_ptr, int target_rank,
*wait_for_rma_done_pkt = 0;
}
else {
MPIDI_RMA_Op_t *tail = MPIDI_CH3I_RMA_Ops_tail(&win_ptr->targets[target_rank].rma_ops_list);
MPIDI_RMA_Op_t *tail = MPIDI_CH3I_RMA_Ops_tail(&win_ptr->targets[target_rank].rma_ops_list_tail);
/* Check if we can piggyback the RMA done acknowlegdement on the last
* operation in the epoch. */
......@@ -1948,8 +1954,10 @@ static int do_passive_target_rma(MPID_Win * win_ptr, int target_rank,
*wait_for_rma_done_pkt = 0;
MPIDI_CH3I_RMA_Ops_unlink(&win_ptr->targets[target_rank].rma_ops_list,
&win_ptr->targets[target_rank].rma_ops_list_tail,
curr_ptr);
MPIDI_CH3I_RMA_Ops_append(&win_ptr->targets[target_rank].rma_ops_list,
&win_ptr->targets[target_rank].rma_ops_list_tail,
curr_ptr);
break;
}
......@@ -2023,7 +2031,8 @@ static int do_passive_target_rma(MPID_Win * win_ptr, int target_rank,
/* If the request is null, we can remove it immediately */
if (!curr_ptr->request) {
MPIDI_CH3I_RMA_Ops_free_and_next(&win_ptr->targets[target_rank].rma_ops_list,
MPIDI_CH3I_RMA_Ops_free_and_next(win_ptr, &win_ptr->targets[target_rank].rma_ops_list,
&win_ptr->targets[target_rank].rma_ops_list_tail,
&curr_ptr);
}
else {
......@@ -2036,7 +2045,8 @@ static int do_passive_target_rma(MPID_Win * win_ptr, int target_rank,
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
mpi_errno =
rma_list_gc(win_ptr, &win_ptr->targets[target_rank].rma_ops_list, curr_ptr,
rma_list_gc(win_ptr, &win_ptr->targets[target_rank].rma_ops_list,
&win_ptr->targets[target_rank].rma_ops_list_tail, curr_ptr,
&nDone);
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
......@@ -2048,7 +2058,8 @@ static int do_passive_target_rma(MPID_Win * win_ptr, int target_rank,
}
if (nops) {
mpi_errno = rma_list_complete(win_ptr, &win_ptr->targets[target_rank].rma_ops_list);
mpi_errno = rma_list_complete(win_ptr, &win_ptr->targets[target_rank].rma_ops_list,
&win_ptr->targets[target_rank].rma_ops_list_tail);
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
}
......@@ -2337,7 +2348,8 @@ static int send_lock_put_or_acc(MPID_Win * win_ptr, int target_rank)
/* Free MPIDI_RMA_Ops_list - the lock packet should still be in place, so
* we have to free two elements. */
MPIDI_CH3I_RMA_Ops_free(&win_ptr->targets[target_rank].rma_ops_list);
MPIDI_CH3I_RMA_Ops_free(win_ptr, &win_ptr->targets[target_rank].rma_ops_list,
&win_ptr->targets[target_rank].rma_ops_list_tail);
fn_fail:
MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_LOCK_PUT_OR_ACC);
......@@ -2450,7 +2462,8 @@ static int send_lock_get(MPID_Win * win_ptr, int target_rank)
/* Free MPIDI_RMA_Ops_list - the lock packet should still be in place, so
* we have to free two elements. */
MPIDI_CH3I_RMA_Ops_free(&win_ptr->targets[target_rank].rma_ops_list);
MPIDI_CH3I_RMA_Ops_free(win_ptr, &win_ptr->targets[target_rank].rma_ops_list,
&win_ptr->targets[target_rank].rma_ops_list_tail);
fn_fail:
MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_LOCK_GET);
......@@ -2460,7 +2473,8 @@ static int send_lock_get(MPID_Win * win_ptr, int target_rank)
/* ------------------------------------------------------------------------ */
/* list_complete_timer/counter and list_block_timer defined above */
static inline int rma_list_complete(MPID_Win * win_ptr, MPIDI_RMA_Ops_list_t * ops_list)
static inline int rma_list_complete(MPID_Win * win_ptr, MPIDI_RMA_Ops_list_t * ops_list,
MPIDI_RMA_Ops_list_t *ops_list_tail)
{
int ntimes = 0, mpi_errno = 0;
MPIDI_RMA_Op_t *curr_ptr;
......@@ -2470,7 +2484,7 @@ static inline int rma_list_complete(MPID_Win * win_ptr, MPIDI_RMA_Ops_list_t * o
/* Process all operations until they are complete */
while (!MPIDI_CH3I_RMA_Ops_isempty(ops_list)) {
int nDone = 0;
mpi_errno = rma_list_gc(win_ptr, ops_list, NULL, &nDone);
mpi_errno = rma_list_gc(win_ptr, ops_list, ops_list_tail, NULL, &nDone);
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
ntimes++;
......@@ -2510,6 +2524,7 @@ static inline int rma_list_complete(MPID_Win * win_ptr, MPIDI_RMA_Ops_list_t * o
*/
static inline int rma_list_gc(MPID_Win * win_ptr,
MPIDI_RMA_Ops_list_t * ops_list,
MPIDI_RMA_Ops_list_t * ops_list_tail,
MPIDI_RMA_Op_t * last_elm, int *nDone)
{
int mpi_errno = 0;
......@@ -2538,7 +2553,7 @@ static inline int rma_list_gc(MPID_Win * win_ptr,
}
/* --END ERROR HANDLING-- */
MPID_Request_release(curr_ptr->request);
MPIDI_CH3I_RMA_Ops_free_and_next(ops_list, &curr_ptr);
MPIDI_CH3I_RMA_Ops_free_and_next(win_ptr, ops_list, ops_list_tail, &curr_ptr);
nVisit++;
/* MT: avoid processing unissued operations enqueued by other
......
......@@ -144,6 +144,8 @@ int MPID_Finalize(void)
p = pNext;
}
}
MPIDI_RMA_finalize();
MPIU_Free(MPIDI_failed_procs_string);
......
......@@ -338,6 +338,9 @@ int MPID_Init(int *argc, char ***argv, int requested, int *provided,
NULL);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPIDI_RMA_init();
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPID_INIT);
return mpi_errno;
......
......@@ -257,7 +257,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
int mpi_errno = MPI_SUCCESS;
int i;
MPID_Comm *win_comm_ptr;
MPIU_CHKPMEM_DECL(1);
MPIU_CHKPMEM_DECL(2);
MPIDI_STATE_DECL(MPID_STATE_WIN_INIT);
MPIDI_FUNC_ENTER(MPID_STATE_WIN_INIT);
......@@ -304,6 +304,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
(*win_ptr)->epoch_state = MPIDI_EPOCH_NONE;
(*win_ptr)->epoch_count = 0;
(*win_ptr)->at_rma_ops_list = NULL;
(*win_ptr)->at_rma_ops_list_tail = NULL;
(*win_ptr)->shm_allocated = FALSE;
/* Initialize the passive target lock state */
......@@ -313,6 +314,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
for (i = 0; i < MPIR_Comm_size(win_comm_ptr); i++) {
(*win_ptr)->targets[i].rma_ops_list = NULL;
(*win_ptr)->targets[i].rma_ops_list_tail = NULL;
(*win_ptr)->targets[i].remote_lock_state = MPIDI_CH3_WIN_LOCK_NONE;
}
......@@ -325,6 +327,16 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
(*win_ptr)->info_args.alloc_shared_noncontig = 0;
(*win_ptr)->info_args.alloc_shm = FALSE;
MPIU_CHKPMEM_MALLOC((*win_ptr)->op_pool_start, struct MPIDI_RMA_Op *,
sizeof(MPIDI_RMA_Op_t) * MPIR_CVAR_CH3_RMA_OP_WIN_POOL_SIZE, mpi_errno,
"RMA op pool");
(*win_ptr)->op_pool = NULL;
(*win_ptr)->op_pool_tail = NULL;
for (i = 0; i < MPIR_CVAR_CH3_RMA_OP_WIN_POOL_SIZE; i++) {
(*win_ptr)->op_pool_start[i].pool_type = MPIDI_RMA_POOL_WIN;
MPL_LL_APPEND((*win_ptr)->op_pool, (*win_ptr)->op_pool_tail, &((*win_ptr)->op_pool_start[i]));
}