Commit f076f3fe authored by Xin Zhao's avatar Xin Zhao
Browse files

Add new RMA states on window / target and modify state checking.



We define new states to indicate the current situation of
RMA synchronization. The states contain both ACCESS states
and EXPOPSURE states, and specify if the synchronization
is initialized (_CALLED), on-going (_ISSUED) and completed
(_GRANTED). For single lock in Passive Target, we use
per-target state whereas the window state is set to PER_TARGET.
Signed-off-by: Pavan Balaji's avatarPavan Balaji <balaji@anl.gov>
parent 7eac974f
......@@ -103,6 +103,7 @@ static inline MPIDI_RMA_Target_t *MPIDI_CH3I_Win_target_alloc(MPID_Win * win_ptr
e->next_op_to_issue = NULL;
e->target_rank = -1;
e->access_state = MPIDI_RMA_NONE;
e->lock_type = MPIDI_RMA_LOCK_TYPE_NONE;
e->lock_mode = 0;
e->outstanding_lock = 0;
......
......@@ -91,6 +91,7 @@ typedef struct MPIDI_RMA_Target {
struct MPIDI_RMA_Op *next_op_to_issue;
struct MPIDI_RMA_Target *next;
int target_rank;
enum MPIDI_RMA_states access_state;
enum MPIDI_RMA_Lock_type lock_type; /* SHARED, EXCLUSIVE */
int lock_mode; /* e.g., MODE_NO_CHECK */
int outstanding_lock;
......
......@@ -234,6 +234,28 @@ enum MPIDI_RMA_sync_types {
MPIDI_RMA_SYNC_UNLOCK
};
/* We start with an arbitrarily chosen number (42), to help with
* debugging when a packet type is not initialized or wrongly
* initialized. */
enum MPIDI_RMA_states {
/* window-wide states */
MPIDI_RMA_NONE = 42,
MPIDI_RMA_FENCE_ISSUED, /* access / exposure */
MPIDI_RMA_FENCE_GRANTED, /* access / exposure */
MPIDI_RMA_PSCW_ISSUED, /* access */
MPIDI_RMA_PSCW_GRANTED, /* access */
MPIDI_RMA_PSCW_EXPO, /* exposure */
MPIDI_RMA_PER_TARGET, /* access */
MPIDI_RMA_LOCK_ALL_CALLED, /* access */
MPIDI_RMA_LOCK_ALL_ISSUED, /* access */
MPIDI_RMA_LOCK_ALL_GRANTED, /* access */
/* target-specific states */
MPIDI_RMA_LOCK_CALLED, /* access */
MPIDI_RMA_LOCK_ISSUED, /* access */
MPIDI_RMA_LOCK_GRANTED, /* access */
};
/* We start with an arbitrarily chosen number (42), to help with
* debugging when a packet type is not initialized or wrongly
* initialized. */
......@@ -341,6 +363,10 @@ struct MPIDI_Win_target_state {
struct MPIDI_RMA_Target *target_pool_tail; /* tail pointer to pool of targets */\
struct MPIDI_RMA_Slot *slots; \
int num_slots; \
struct { \
enum MPIDI_RMA_states access_state; \
enum MPIDI_RMA_states exposure_state; \
} states; \
#ifdef MPIDI_CH3_WIN_DECL
#define MPID_DEV_WIN_DECL \
......
......@@ -29,17 +29,13 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_PUT);
MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
if (target_rank == MPI_PROC_NULL) {
goto fn_exit;
}
if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
}
MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, data_sz, dtp, dt_true_lb);
if (data_sz == 0) {
......@@ -143,17 +139,13 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_GET);
MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
if (target_rank == MPI_PROC_NULL) {
goto fn_exit;
}
if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
}
MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, data_sz, dtp, dt_true_lb);
if (data_sz == 0) {
......@@ -257,17 +249,13 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_ACCUMULATE);
MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
if (target_rank == MPI_PROC_NULL) {
goto fn_exit;
}
if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
}
MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, data_sz, dtp, dt_true_lb);
if (data_sz == 0) {
......@@ -409,17 +397,13 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_GET_ACCUMULATE);
MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
if (target_rank == MPI_PROC_NULL) {
goto fn_exit;
}
if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
}
MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
MPIDI_Datatype_get_info(target_count, target_datatype, dt_contig, data_sz, dtp, dt_true_lb);
if (data_sz == 0) {
......@@ -548,19 +532,16 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMPARE_AND_SWAP);
MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_COMPARE_AND_SWAP);
MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
if (target_rank == MPI_PROC_NULL) {
goto fn_exit;
}
if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
}
MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
rank = win_ptr->comm_ptr->rank;
if (win_ptr->shm_allocated == TRUE && target_rank != rank &&
......@@ -644,19 +625,16 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_FETCH_AND_OP);
MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_FETCH_AND_OP);
MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
if (target_rank == MPI_PROC_NULL) {
goto fn_exit;
}
if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
}
MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
rank = win_ptr->comm_ptr->rank;
if (win_ptr->shm_allocated == TRUE && target_rank != rank &&
......
......@@ -34,8 +34,10 @@ static int MPIDI_CH3I_Rma_req_poll(void *state, MPI_Status * status)
* is still active first; the user could complete the request after calling
* unlock. */
/* FIXME: We need per-operation completion to make this more efficient. */
if (req_state->win_ptr->targets[req_state->target_rank].remote_lock_state
!= MPIDI_CH3_WIN_LOCK_NONE) {
if (req_state->win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
req_state->win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED ||
req_state->win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED ||
req_state->win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_GRANTED) {
mpi_errno = req_state->win_ptr->RMAFns.Win_flush(req_state->target_rank,
req_state->win_ptr);
}
......@@ -156,9 +158,12 @@ int MPIDI_Rput(const void *origin_addr, int origin_count,
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_RPUT);
MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state != MPIDI_EPOCH_LOCK &&
win_ptr->epoch_state != MPIDI_EPOCH_LOCK_ALL &&
target_rank != MPI_PROC_NULL, mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
/* request-based RMA operations are only valid within a passive epoch */
MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
MPIU_CHKPMEM_MALLOC(req_state, MPIDI_CH3I_Rma_req_state_t *,
sizeof(MPIDI_CH3I_Rma_req_state_t), mpi_errno, "req-based RMA state");
......@@ -243,9 +248,12 @@ int MPIDI_Rget(void *origin_addr, int origin_count,
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_RGET);
MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state != MPIDI_EPOCH_LOCK &&
win_ptr->epoch_state != MPIDI_EPOCH_LOCK_ALL &&
target_rank != MPI_PROC_NULL, mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
/* request-based RMA operations are only valid within a passive epoch */
MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
MPIU_CHKPMEM_MALLOC(req_state, MPIDI_CH3I_Rma_req_state_t *,
sizeof(MPIDI_CH3I_Rma_req_state_t), mpi_errno, "req-based RMA state");
......@@ -330,9 +338,12 @@ int MPIDI_Raccumulate(const void *origin_addr, int origin_count,
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_RACCUMULATE);
MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state != MPIDI_EPOCH_LOCK &&
win_ptr->epoch_state != MPIDI_EPOCH_LOCK_ALL &&
target_rank != MPI_PROC_NULL, mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
/* request-based RMA operations are only valid within a passive epoch */
MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
MPIU_CHKPMEM_MALLOC(req_state, MPIDI_CH3I_Rma_req_state_t *,
sizeof(MPIDI_CH3I_Rma_req_state_t), mpi_errno, "req-based RMA state");
......@@ -418,9 +429,12 @@ int MPIDI_Rget_accumulate(const void *origin_addr, int origin_count,
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_RGET_ACCUMULATE);
MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state != MPIDI_EPOCH_LOCK &&
win_ptr->epoch_state != MPIDI_EPOCH_LOCK_ALL &&
target_rank != MPI_PROC_NULL, mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
/* request-based RMA operations are only valid within a passive epoch */
MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
MPIU_CHKPMEM_MALLOC(req_state, MPIDI_CH3I_Rma_req_state_t *,
sizeof(MPIDI_CH3I_Rma_req_state_t), mpi_errno, "req-based RMA state");
......
......@@ -2102,8 +2102,10 @@ int MPIDI_Win_sync(MPID_Win * win_ptr)
MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_SYNC);
MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state != MPIDI_EPOCH_LOCK &&
win_ptr->epoch_state != MPIDI_EPOCH_LOCK_ALL,
MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED &&
win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
OPA_read_write_barrier();
......
......@@ -329,6 +329,8 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
(*win_ptr)->at_rma_ops_list = NULL;
(*win_ptr)->at_rma_ops_list_tail = NULL;
(*win_ptr)->shm_allocated = FALSE;
(*win_ptr)->states.access_state = MPIDI_RMA_NONE;
(*win_ptr)->states.exposure_state = MPIDI_RMA_NONE;
/* Initialize the passive target lock state */
MPIU_CHKPMEM_MALLOC((*win_ptr)->targets, struct MPIDI_Win_target_state *,
......
......@@ -138,7 +138,8 @@ int MPIDI_Win_free(MPID_Win ** win_ptr)
MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_FREE);
MPIU_ERR_CHKANDJUMP((*win_ptr)->epoch_state != MPIDI_EPOCH_NONE,
MPIU_ERR_CHKANDJUMP((*win_ptr)->states.access_state != MPIDI_RMA_NONE ||
(*win_ptr)->states.exposure_state != MPIDI_RMA_NONE,
mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
if (!(*win_ptr)->shm_allocated) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment