Commit 9b1b9241 authored by Xin Zhao's avatar Xin Zhao Committed by Pavan Balaji
Browse files

Add DELAY_ISSUING_FOR_PIGGYBACKING option for RMA synchronization.



Originally in the RMA synchronization, we always try to piggyback
LOCK/UNLOCK/FLUSH flags with operations by delaying issuing some
of the operations. This is good when number of operations is very
small, but delaying issuing not good when message size is large or
number of operations is large.

In this patch, we add an CVAR to control turn on/off piggybacking
LOCK/UNLOCK/FLUSH flags. Defaultly it is off, which means we only
piggyback when there are operations available, but not at the cost
of delaying issuing operations.
Signed-off-by: Pavan Balaji's avatarPavan Balaji <balaji@anl.gov>
parent 93a25439
......@@ -201,6 +201,7 @@ static inline int check_and_switch_target_state(MPID_Win * win_ptr, MPIDI_RMA_Ta
/* if we reach WIN_UNLOCK and there is still operation existing
* in pending list, this operation must be the only operation
* and it is prepared to piggyback LOCK and UNLOCK. */
MPIU_Assert(MPIR_CVAR_CH3_RMA_DELAY_ISSUING_FOR_PIGGYBACKING);
MPIU_Assert(target->pending_op_list_head->next == NULL);
MPIU_Assert(target->pending_op_list_head->piggyback_lock_candidate);
}
......@@ -209,6 +210,13 @@ static inline int check_and_switch_target_state(MPID_Win * win_ptr, MPIDI_RMA_Ta
case MPIDI_RMA_LOCK_GRANTED:
case MPIDI_RMA_NONE:
if (target->win_complete_flag) {
if (target->pending_op_list_head == NULL) {
mpi_errno = send_decr_at_cnt_msg(target->target_rank, win_ptr);
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
}
}
if (target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
if (target->pending_op_list_head == NULL) {
if (target->target_rank != rank) {
......@@ -309,11 +317,12 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t * targ
break;
}
if (curr_op->next == NULL &&
if (MPIR_CVAR_CH3_RMA_DELAY_ISSUING_FOR_PIGGYBACKING && curr_op->next == NULL &&
target->sync.sync_flag == MPIDI_RMA_SYNC_NONE && curr_op->ureq == NULL) {
/* Skip the last OP if sync_flag is NONE since we
/* If DELAY_ISSUING_FOR_PIGGYBACKING is turned on,
* skip the last OP if sync_flag is NONE since we
* want to leave it to the ending synchronization
* so that we can piggyback LOCK / FLUSH.
* so that we can piggyback UNLOCK / FLUSH.
* However, if it is a request-based RMA, do not
* skip it (otherwise a wait call before unlock
* will be blocked). */
......
......@@ -230,6 +230,29 @@ cvars:
is smaller than the value, FENCE will use a basic but fast
algorithm which requires an O(P) data structure.
- name : MPIR_CVAR_CH3_RMA_DELAY_ISSUING_FOR_PIGGYBACKING
category : CH3
type : int
default : 0
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_ALL_EQ
description : >-
Specify if delay issuing of RMA operations for piggybacking
LOCK/UNLOCK/FLUSH is enabled. It can be either 0 or 1. When
it is set to 1, the issuing of LOCK message is delayed until
origin process see the first RMA operation and piggyback
LOCK with that operation, and the origin process always keeps
the current last operation until the ending synchronization
call in order to piggyback UNLOCK/FLUSH with that operation.
When it is set to 0, in WIN_LOCK/UNLOCK case, the LOCK message
is sent out as early as possible, in WIN_LOCK_ALL/UNLOCK_ALL
case, the origin process still tries to piggyback LOCK message
with the first operation; for UNLOCK/FLUSH message, the origin
process no longer keeps the current last operation but only
piggyback UNLOCK/FLUSH if there is an operation avaliable in
the ending synchronization call.
=== END_MPI_T_CVAR_INFO_BLOCK ===
*/
......@@ -432,18 +455,12 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
for (i = 0; i < win_ptr->num_slots; i++) {
curr_target = win_ptr->slots[i].target_list_head;
while (curr_target != NULL) {
if (curr_target->pending_op_list_head != NULL) {
if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH_LOCAL) {
curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH_LOCAL;
}
/* flag is set in order to decrement complete counter on target */
curr_target->win_complete_flag = 1;
}
else {
mpi_errno = send_decr_at_cnt_msg(curr_target->target_rank, win_ptr);
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
}
curr_target = curr_target->next;
}
}
......@@ -1046,7 +1063,8 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win * win_ptr)
/* If Destination is myself or process on SHM, acquire the lock,
* wait until lock is granted. */
if (!(assert & MPI_MODE_NOCHECK) && (dest == rank || shm_target)) {
if (!(assert & MPI_MODE_NOCHECK)) {
if (dest == rank || shm_target) {
mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, dest, &made_progress);
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
......@@ -1057,6 +1075,14 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win * win_ptr)
MPIU_ERR_POP(mpi_errno);
}
}
else if (!MPIR_CVAR_CH3_RMA_DELAY_ISSUING_FOR_PIGGYBACKING) {
/* if DELAY_ISSUING_FOR_PIGGYBACKING is turned off, send lock request now
* since we do not want to piggyback LOCK with future OP */
mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, dest, &made_progress);
if (mpi_errno != MPI_SUCCESS)
MPIU_ERR_POP(mpi_errno);
}
}
finish_lock:
/* Ensure ordering of load/store operations. */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment