Commit 5dd8a0a4 authored by Xin Zhao's avatar Xin Zhao
Browse files

Add target element and global / local pools and related APIs.



Here we add a data structure to store information of active target.
The information includes operation lists, pasive lock state,
sync state, etc.

The target element is created by origin on-demand, and can
be freed after the remote completion of all previous oeprations
is detected. After RMA ending synchrnization calls, all
target elements should be freed.

Similiarly with operation pools, we create two-level target
pools for target elements: one pre-window target pool and
one global target pool.
Signed-off-by: Pavan Balaji's avatarPavan Balaji <balaji@anl.gov>
parent f4253c38
......@@ -11,6 +11,7 @@
#include "mpid_rma_types.h"
extern struct MPIDI_RMA_Op *global_rma_op_pool, *global_rma_op_pool_tail, *global_rma_op_pool_start;
extern struct MPIDI_RMA_Target *global_rma_target_pool, *global_rma_target_pool_tail, *global_rma_target_pool_start;
/* MPIDI_CH3I_Win_op_alloc(): get a new op element from op pool and
* initialize it. If we cannot get one, return NULL. */
......@@ -70,6 +71,77 @@ static inline int MPIDI_CH3I_Win_op_free(MPID_Win * win_ptr, MPIDI_RMA_Op_t * e)
return mpi_errno;
}
/* MPIDI_CH3I_Win_target_alloc(): get a target element from the target pool.
* If we cannot get one, return NULL. */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_Win_target_alloc
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline MPIDI_RMA_Target_t *MPIDI_CH3I_Win_target_alloc(MPID_Win * win_ptr)
{
MPIDI_RMA_Target_t *e;
if (win_ptr->target_pool == NULL) {
/* local pool is empty, try to find something in the global pool */
if (global_rma_target_pool == NULL)
return NULL;
else {
e = global_rma_target_pool;
MPL_LL_DELETE(global_rma_target_pool, global_rma_target_pool_tail, e);
}
}
else {
e = win_ptr->target_pool;
MPL_LL_DELETE(win_ptr->target_pool, win_ptr->target_pool_tail, e);
}
e->read_op_list = e->read_op_list_tail = NULL;
e->write_op_list = e->write_op_list_tail = NULL;
e->dt_op_list = e->dt_op_list_tail = NULL;
e->pending_op_list = e->pending_op_list_tail = NULL;
e->next_op_to_issue = NULL;
e->target_rank = -1;
e->lock_type = MPIDI_RMA_LOCK_TYPE_NONE;
e->lock_mode = 0;
e->outstanding_lock = 0;
e->sync.sync_flag = MPIDI_RMA_NONE;
e->sync.outstanding_acks = 0;
e->sync.have_remote_incomplete_ops = 1; /* When I create a new target, there must be
incomplete ops until a FLUSH/UNLOCK packet
is sent. */
return e;
}
/* MPIDI_CH3I_Win_target_free(): put a target element back to the target pool
* it belongs to. */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_Win_target_free
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int MPIDI_CH3I_Win_target_free(MPID_Win * win_ptr, MPIDI_RMA_Target_t * e)
{
int mpi_errno = MPI_SUCCESS;
/* We enqueue elements to the right pool, so when they get freed
* at window free time, they won't conflict with the global pool
* or other windows */
MPIU_Assert(e->read_op_list == NULL);
MPIU_Assert(e->write_op_list == NULL);
MPIU_Assert(e->dt_op_list == NULL);
MPIU_Assert(e->pending_op_list == NULL);
/* use PREPEND when return objects back to the pool
in order to improve cache performance */
if (e->pool_type == MPIDI_RMA_POOL_WIN)
MPL_LL_PREPEND(win_ptr->target_pool, win_ptr->target_pool_tail, e);
else
MPL_LL_PREPEND(global_rma_target_pool, global_rma_target_pool_tail, e);
return mpi_errno;
}
/* Return nonzero if the RMA operations list is empty.
*/
#undef FUNCNAME
......
......@@ -24,6 +24,12 @@ enum MPID_Lock_state {
MPID_LOCK_SHARED_ALL
};
enum MPIDI_RMA_Lock_type {
MPIDI_RMA_LOCK_TYPE_NONE = 23,
MPIDI_RMA_LOCK_TYPE_SHARED,
MPIDI_RMA_LOCK_TYPE_EXCLUSIVE
};
/*
* RMA Declarations. We should move these into something separate from
* a Request.
......@@ -76,6 +82,43 @@ typedef struct MPIDI_RMA_Op {
MPIDI_RMA_Pool_type_t pool_type;
} MPIDI_RMA_Op_t;
typedef struct MPIDI_RMA_Target {
struct MPIDI_RMA_Op *read_op_list, *read_op_list_tail;
struct MPIDI_RMA_Op *write_op_list, *write_op_list_tail;
struct MPIDI_RMA_Op *dt_op_list, *dt_op_list_tail;
struct MPIDI_RMA_Op *pending_op_list, *pending_op_list_tail;
struct MPIDI_RMA_Op *next_op_to_issue;
struct MPIDI_RMA_Target *next;
int target_rank;
enum MPIDI_RMA_Lock_type lock_type; /* SHARED, EXCLUSIVE */
int lock_mode; /* e.g., MODE_NO_CHECK */
int outstanding_lock;
/* The target structure is free to be cleaned up when all of the
* following conditions hold true:
* - No operations are queued up (op_list == NULL)
* - There are no outstanding acks (outstanding_acks == 0)
* - There are no incomplete ops (have_remote_incomplete_ops == 0)
* - There are no sync messages to be sent (sync_flag == NONE)
*/
struct {
/* next synchronization flag to be sent to the target (either
* piggybacked or as a separate packet */
enum MPIDI_RMA_sync_types sync_flag; /* UNLOCK, FLUSH or FLUSH_LOCAL */
/* packets sent out that we are expecting an ack for */
int outstanding_acks;
/* if we sent out any operations, but have not waited for
* their remote completion, this flag is set. When the next
* FLUSH or UNLOCK sync flag is set, we will clear this
* variable. */
int have_remote_incomplete_ops; /* have ops that have not completed remotely */
} sync;
MPIDI_RMA_Pool_type_t pool_type;
} MPIDI_RMA_Target_t;
typedef struct MPIDI_PT_single_op {
MPIDI_CH3_Pkt_type_t type; /* put, get, or accum. */
void *addr;
......
......@@ -199,6 +199,41 @@ typedef struct MPIDI_VC * MPID_VCR;
# define MPIDI_REQUEST_SEQNUM
#endif
/* Here we add RMA sync types to specify types
* of synchronizations the origin is going to
* perform to the target. */
/* There are four kinds of synchronizations: NONE,
* FLUSH_LOCAL, FLUSH, UNLOCK.
* (1) NONE means there is no special synchronization,
* origin just issues as many operations as it can,
* excluding the last operation which is a piggyback
* candidate;
* (2) FLUSH_LOCAL means origin wants to do a
* FLUSH_LOCAL sync and issues out all pending
* operations including the piggyback candidate;
* (3) FLUSH means origin wants to do a FLUSH sync
* and issues out all pending operations including
* the last op piggybacked with a FLUSH flag to
* detect remote completion;
* (4) UNLOCK means origin issues all pending operations
* incuding the last op piggybacked with an UNLOCK
* flag to release the lock on target and detect remote
* completion.
* Note that FLUSH_LOCAL is a superset of NONE, FLUSH
* is a superset of FLUSH_LOCAL, and UNLOCK is a superset
* of FLUSH.
*/
/* We start with an arbitrarily chosen number (42), to help with
* debugging when a packet type is not initialized or wrongly
* initialized. */
enum MPIDI_RMA_sync_types {
MPIDI_RMA_SYNC_NONE = 42,
MPIDI_RMA_SYNC_FLUSH_LOCAL,
MPIDI_RMA_SYNC_FLUSH,
MPIDI_RMA_SYNC_UNLOCK
};
/* We start with an arbitrarily chosen number (42), to help with
* debugging when a packet type is not initialized or wrongly
* initialized. */
......@@ -301,6 +336,9 @@ struct MPIDI_Win_target_state {
struct MPIDI_RMA_Op *op_pool_start; /* start pointer used for freeing */\
struct MPIDI_RMA_Op *op_pool; /* pool of operations */ \
struct MPIDI_RMA_Op *op_pool_tail; /* tail pointer to pool of operations. */ \
struct MPIDI_RMA_Target *target_pool_start; /* start pointer used for freeing */\
struct MPIDI_RMA_Target *target_pool; /* pool of targets */ \
struct MPIDI_RMA_Target *target_pool_tail; /* tail pointer to pool of targets */\
#ifdef MPIDI_CH3_WIN_DECL
#define MPID_DEV_WIN_DECL \
......
......@@ -257,7 +257,8 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
int mpi_errno = MPI_SUCCESS;
int i;
MPID_Comm *win_comm_ptr;
MPIU_CHKPMEM_DECL(2);
int win_target_pool_size;
MPIU_CHKPMEM_DECL(3);
MPIDI_STATE_DECL(MPID_STATE_WIN_INIT);
MPIDI_FUNC_ENTER(MPID_STATE_WIN_INIT);
......@@ -337,6 +338,17 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
MPL_LL_APPEND((*win_ptr)->op_pool, (*win_ptr)->op_pool_tail, &((*win_ptr)->op_pool_start[i]));
}
win_target_pool_size = MPIR_MIN(MPIR_CVAR_CH3_RMA_TARGET_WIN_POOL_SIZE, MPIR_Comm_size(win_comm_ptr));
MPIU_CHKPMEM_MALLOC((*win_ptr)->target_pool_start, struct MPIDI_RMA_Target *,
sizeof(MPIDI_RMA_Target_t) * win_target_pool_size,
mpi_errno, "RMA target pool");
(*win_ptr)->target_pool = NULL;
(*win_ptr)->target_pool_tail = NULL;
for (i = 0; i < win_target_pool_size; i++) {
(*win_ptr)->target_pool_start[i].pool_type = MPIDI_RMA_POOL_WIN;
MPL_LL_APPEND((*win_ptr)->target_pool, (*win_ptr)->target_pool_tail, &((*win_ptr)->target_pool_start[i]));
}
MPID_WIN_FTABLE_SET_DEFAULTS(win_ptr);
fn_exit:
......
......@@ -35,11 +35,36 @@ cvars:
operations) that stores information about RMA operations that
could not be issued immediatly. Requires a positive value.
- name : MPIR_CVAR_CH3_RMA_TARGET_WIN_POOL_SIZE
category : CH3
type : int
default : 256
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_ALL_EQ
description : >-
Size of the window-private RMA target pool (in number of
targets) that stores information about RMA targets that
could not be issued immediately. Requires a positive value.
- name : MPIR_CVAR_CH3_RMA_TARGET_GLOBAL_POOL_SIZE
category : CH3
type : int
default : 16384
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_ALL_EQ
description : >-
Size of the Global RMA targets pool (in number of
targets) that stores information about RMA targets that
could not be issued immediatly. Requires a positive value.
=== END_MPI_T_CVAR_INFO_BLOCK ===
*/
struct MPIDI_RMA_Op *global_rma_op_pool = NULL, *global_rma_op_pool_tail = NULL, *global_rma_op_pool_start = NULL;
struct MPIDI_RMA_Target *global_rma_target_pool = NULL, *global_rma_target_pool_tail = NULL, *global_rma_target_pool_start = NULL;
#undef FUNCNAME
#define FUNCNAME MPIDI_RMA_init
......@@ -49,7 +74,7 @@ int MPIDI_RMA_init(void)
{
int mpi_errno = MPI_SUCCESS;
int i;
MPIU_CHKPMEM_DECL(1);
MPIU_CHKPMEM_DECL(2);
MPIDI_STATE_DECL(MPID_STATE_MPIDI_RMA_INIT);
......@@ -63,6 +88,14 @@ int MPIDI_RMA_init(void)
MPL_LL_APPEND(global_rma_op_pool, global_rma_op_pool_tail, &(global_rma_op_pool_start[i]));
}
MPIU_CHKPMEM_MALLOC(global_rma_target_pool_start, struct MPIDI_RMA_Target *,
sizeof(struct MPIDI_RMA_Target) * MPIR_CVAR_CH3_RMA_TARGET_GLOBAL_POOL_SIZE,
mpi_errno, "RMA target pool");
for (i = 0; i < MPIR_CVAR_CH3_RMA_TARGET_GLOBAL_POOL_SIZE; i++) {
global_rma_target_pool_start[i].pool_type = MPIDI_RMA_POOL_GLOBAL;
MPL_LL_APPEND(global_rma_target_pool, global_rma_target_pool_tail, &(global_rma_target_pool_start[i]));
}
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_RMA_INIT);
return mpi_errno;
......@@ -84,6 +117,7 @@ void MPIDI_RMA_finalize(void)
MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_RMA_FINALIZE);
MPIU_Free(global_rma_op_pool_start);
MPIU_Free(global_rma_target_pool_start);
MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_RMA_FINALIZE);
}
......@@ -125,6 +159,7 @@ int MPIDI_Win_free(MPID_Win ** win_ptr)
MPIU_Free((*win_ptr)->disp_units);
MPIU_Free((*win_ptr)->all_win_handles);
MPIU_Free((*win_ptr)->op_pool_start);
MPIU_Free((*win_ptr)->target_pool_start);
/* Free the attached buffer for windows created with MPI_Win_allocate() */
if ((*win_ptr)->create_flavor == MPI_WIN_FLAVOR_ALLOCATE ||
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment