Commit e70c26ae authored by William Gropp's avatar William Gropp
Browse files

[svn-r8069] Detect and report incorrect use of assert and/or rank in Win_lock...

[svn-r8069] Detect and report incorrect use of assert and/or rank in Win_lock and Win_unlock.  These detect an error in the current test suite that manifested itself as a storage leak.  Also corrected some man page information and instrumentation for RMA completion in non-fence modes
parent 858aae27
......@@ -1554,6 +1554,8 @@ typedef struct MPID_Win {
MPID_Comm *comm_ptr; /* Pointer to comm of window (dup) */
int myrank; /* Rank of this process in comm (used to
detect operations on self) */
int lockRank; /* If within an MPI_Win_lock epoch,
the rank that we locked */
#ifdef USE_THREADED_WINDOW_CODE
/* These were causing compilation errors. We need to figure out how to
integrate threads into MPICH2 before including these fields. */
......
......@@ -367,8 +367,23 @@ be in the range 0 to %d
**rmasize %d:Invalid size argument in RMA call (value is %d)
**rmadisp:Invalid displacement argument in RMA call
**assert:Invalid assert argument
**lockassertval:Invalid assert argument passed to MPI_Win_lock
**lockassertval %d: Invalid assert argument (%d) passed to MPI_Win_lock
**winunlockrank:Invalid rank argument
**winunlockrank %d %d:Invalid rank argument %d, should be %d
**mismatchedlockrank:Rank to unlock in MPI_Win_unlock was not locked with \
MPI_Win_lock
**mismatchedlockrank %d %d:Rank to unlock (%d) in MPI_Win_unlock was not \
locked with MPI_Win_lock (rank %d was locked)
**winunlockwithoutlock: Attempt to unlock in MPI_Win_unlock without \
first locking with MPI_Win_lock
**winfreewhilelocked: MPI_Win_free called while holding an MPI_Win_lock on \
a window
**winfreewhilelocked %d: MPI_Win_free called while holding an MPI_Win_lock on \
the process with rank %d
**lockwhilelocked: Attempt to lock a process while holding a lock
**lockwhilelocked %d: Attempt to lock a process while holding a lock on \
process %d
**nomemreq:failure occurred while allocating memory for a request object
**nomemuereq %d:Failed to allocate memory for an unexpected message. %d \
unexpected messages queued.
......@@ -395,6 +410,7 @@ unexpected messages queued.
# Duplicates?
#**argnull:Invalid null parameter
#**argnull %s:Invalid null parameter %s
**commstack:Internal overflow in stack used for MPI_Comm_split
**cond_create:MPIU_Thread_cond_create failed
**cond_create %s:MPIU_Thread_cond_create failed: %s
......
......@@ -47,9 +47,11 @@
.N Errors
.N MPI_SUCCESS
.N MPI_ERR_ARG
.N MPI_ERR_COMM
.N MPI_ERR_INFO
.N MPI_ERR_OTHER
.N MPI_ERR_SIZE
@*/
int MPI_Win_create(void *base, MPI_Aint size, int disp_unit, MPI_Info info,
MPI_Comm comm, MPI_Win *win)
......@@ -119,6 +121,7 @@ int MPI_Win_create(void *base, MPI_Aint size, int disp_unit, MPI_Info info,
/* Initialize a few fields that have specific defaults */
win_ptr->name[0] = 0;
win_ptr->errhandler = 0;
win_ptr->lockRank = -1;
/* return the handle of the window object to the user */
MPIU_OBJ_PUBLISH_HANDLE(*win, win_ptr->handle);
......
......@@ -45,6 +45,7 @@
.N Errors
.N MPI_SUCCESS
.N MPI_ERR_WIN
.N MPI_ERR_OTHER
@*/
int MPI_Win_free(MPI_Win *win)
{
......@@ -83,6 +84,12 @@ int MPI_Win_free(MPI_Win *win)
/* If win_ptr is not valid, it will be reset to null */
if (mpi_errno) goto fn_fail;
/* Check for unterminated lock epoch */
if (win_ptr->lockRank != -1) {
MPIU_ERR_SET1(mpi_errno,MPI_ERR_OTHER,
"**winfreewhilelocked",
"**winfreewhilelocked %d", win_ptr->lockRank);
}
}
MPID_END_ERROR_CHECKS;
}
......
......@@ -52,9 +52,9 @@
be used portably only in such memory.
The 'assert' argument is used to indicate special conditions for the
fence that an implementation may use to optimize the 'MPI_Win_fence'
fence that an implementation may use to optimize the 'MPI_Win_lock'
operation. The value zero is always correct. Other assertion values
may be or''ed together. Assertions that are valid for 'MPI_Win_fence' are\:
may be or''ed together. Assertions that are valid for 'MPI_Win_lock' are\:
. MPI_MODE_NOCHECK - no other process holds, or will attempt to acquire a
conflicting lock, while the caller holds the window lock. This is useful
......@@ -70,6 +70,7 @@
.N MPI_SUCCESS
.N MPI_ERR_RANK
.N MPI_ERR_WIN
.N MPI_ERR_OTHER
@*/
int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win)
{
......@@ -110,13 +111,22 @@ int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win)
/* If win_ptr is not value, it will be reset to null */
if (mpi_errno) goto fn_fail;
if (lock_type != MPI_LOCK_SHARED && lock_type != MPI_LOCK_EXCLUSIVE)
mpi_errno = MPIR_Err_create_code( MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
FCNAME, __LINE__,
MPI_ERR_OTHER,
"**locktype", 0 );
if (assert != 0 && assert != MPI_MODE_NOCHECK) {
MPIU_ERR_SET1(mpi_errno,MPI_ERR_ARG,
"**lockassertval",
"**lockassertval %d", assert );
if (mpi_errno) goto fn_fail;
}
if (lock_type != MPI_LOCK_SHARED &&
lock_type != MPI_LOCK_EXCLUSIVE) {
MPIU_ERR_SET(mpi_errno,MPI_ERR_OTHER, "**locktype" );
}
if (win_ptr->lockRank != -1) {
MPIU_ERR_SET1(mpi_errno,MPI_ERR_OTHER,
"**lockwhilelocked",
"**lockwhillocked %d", win_ptr->lockRank );
}
comm_ptr = win_ptr->comm_ptr;
MPIR_ERRTEST_SEND_RANK(comm_ptr, rank, mpi_errno);
......@@ -131,6 +141,8 @@ int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win)
mpi_errno = MPIU_RMA_CALL(win_ptr,
Win_lock(lock_type, rank, assert, win_ptr));
if (mpi_errno != MPI_SUCCESS) goto fn_fail;
/* If the lock succeeded, remember which one with locked */
win_ptr->lockRank = rank;
/* ... end of body of routine ... */
......
......@@ -89,8 +89,19 @@ int MPI_Win_unlock(int rank, MPI_Win win)
comm_ptr = win_ptr->comm_ptr;
MPIR_ERRTEST_SEND_RANK(comm_ptr, rank, mpi_errno);
if (mpi_errno) goto fn_fail;
/* Test that the rank we are unlocking is the rank that we locked */
if (win_ptr->lockRank != rank) {
if (win_ptr->lockRank < 0) {
MPIU_ERR_SET(mpi_errno,MPI_ERR_RANK,"**winunlockwithoutlock");
}
else {
MPIU_ERR_SET2(mpi_errno,MPI_ERR_RANK,
"**mismatchedlockrank",
"**mismatchedlockrank %d %d", rank, win_ptr->lockRank );
}
if (mpi_errno) goto fn_fail;
}
}
MPID_END_ERROR_CHECKS;
}
......@@ -100,6 +111,9 @@ int MPI_Win_unlock(int rank, MPI_Win win)
mpi_errno = MPIU_RMA_CALL(win_ptr,Win_unlock(rank, win_ptr));
if (mpi_errno != MPI_SUCCESS) goto fn_fail;
/* Clear the lockRank on success with the unlock */
/* FIXME: Should this always be cleared, even on failure? */
win_ptr->lockRank = -1;
/* ... end of body of routine ... */
......
......@@ -26,11 +26,13 @@ MPIU_INSTR_DURATION_DECL(winstart_clearlock);
MPIU_INSTR_DURATION_DECL(wincomplete_issue);
MPIU_INSTR_DURATION_DECL(wincomplete_complete);
MPIU_INSTR_DURATION_DECL(wincomplete_recvsync);
MPIU_INSTR_DURATION_DECL(wincomplete_block);
MPIU_INSTR_DURATION_DECL(winwait_wait);
MPIU_INSTR_DURATION_DECL(winlock_getlocallock);
MPIU_INSTR_DURATION_DECL(winunlock_getlock);
MPIU_INSTR_DURATION_DECL(winunlock_issue);
MPIU_INSTR_DURATION_DECL(winunlock_complete);
MPIU_INSTR_DURATION_DECL(winunlock_block);
MPIU_INSTR_DURATION_DECL(lockqueue_alloc);
MPIU_INSTR_DURATION_DECL(rmapkt_acc);
MPIU_INSTR_DURATION_DECL(rmapkt_acc_predef);
......@@ -53,11 +55,13 @@ void MPIDI_CH3_RMA_InitInstr(void)
MPIU_INSTR_DURATION_INIT(wincomplete_recvsync,1,"WIN_COMPLETE:Recv sync messages");
MPIU_INSTR_DURATION_INIT(wincomplete_issue,2,"WIN_COMPLETE:Issue RMA ops");
MPIU_INSTR_DURATION_INIT(wincomplete_complete,1,"WIN_COMPLETE:Complete RMA ops");
MPIU_INSTR_DURATION_INIT(wincomplete_block,0,"WIN_COMPLETE:Wait for any progress");
MPIU_INSTR_DURATION_INIT(winwait_wait,1,"WIN_WAIT:Wait for ops from other processes");
MPIU_INSTR_DURATION_INIT(winlock_getlocallock,0,"WIN_LOCK:Get local lock");
MPIU_INSTR_DURATION_INIT(winunlock_issue,2,"WIN_UNLOCK:Issue RMA ops");
MPIU_INSTR_DURATION_INIT(winunlock_complete,1,"WIN_UNLOCK:Complete RMA ops");
MPIU_INSTR_DURATION_INIT(winunlock_getlock,0,"WIN_UNLOCK:Acquire lock");
MPIU_INSTR_DURATION_INIT(winunlock_block,0,"WIN_UNLOCK:Wait for any progress");
MPIU_INSTR_DURATION_INIT(rmapkt_acc,0,"RMA:PKTHANDLER for Accumulate");
MPIU_INSTR_DURATION_INIT(rmapkt_acc_predef,0,"RMA:PKTHANDLER for Accumulate: predef dtype");
MPIU_INSTR_DURATION_INIT(rmapkt_acc_immed,0,"RMA:PKTHANDLER for Accum immed");
......@@ -1508,7 +1512,7 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
there was an incomplete request. */
curr_ptr = win_ptr->rma_ops_list_head;
if (curr_ptr && !MPID_Request_is_complete(curr_ptr->request) ) {
MPIU_INSTR_DURATION_START(winfence_block);
MPIU_INSTR_DURATION_START(wincomplete_block);
mpi_errno = MPID_Progress_wait(&progress_state);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno != MPI_SUCCESS) {
......@@ -1516,7 +1520,7 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
}
/* --END ERROR HANDLING-- */
MPIU_INSTR_DURATION_END(winfence_block);
MPIU_INSTR_DURATION_END(wincomplete_block);
}
} /* While list of rma operation is non-empty */
......@@ -1639,7 +1643,7 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win *win_ptr)
if (dest == MPI_PROC_NULL) goto fn_exit;
comm_ptr = win_ptr->comm_ptr;
if (dest == win_ptr->myrank) {
/* The target is this process itself. We must block until the lock
* is acquired. */
......@@ -1667,7 +1671,6 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win *win_ptr)
/* local lock acquired. local puts, gets, accumulates will be done
directly without queueing. */
}
else {
/* target is some other process. add the lock request to rma_ops_list */
MPIU_INSTR_DURATION_START(rmaqueue_alloc);
......@@ -1675,6 +1678,7 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win *win_ptr)
mpi_errno, "RMA operation entry");
MPIU_INSTR_DURATION_END(rmaqueue_alloc);
MPIU_Assert( !win_ptr->rma_ops_list_head );
win_ptr->rma_ops_list_head = new_ptr;
win_ptr->rma_ops_list_tail = new_ptr;
......@@ -1714,17 +1718,18 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
if (dest == MPI_PROC_NULL) goto fn_exit;
comm_ptr = win_ptr->comm_ptr;
if (dest == win_ptr->myrank) {
/* local lock. release the lock on the window, grant the next one
* in the queue, and return. */
MPIU_Assert(!win_ptr->rma_ops_list_head);
mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
if (mpi_errno != MPI_SUCCESS) goto fn_exit;
mpi_errno = MPID_Progress_poke();
goto fn_exit;
}
comm_ptr = win_ptr->comm_ptr;
rma_op = win_ptr->rma_ops_list_head;
/* win_lock was not called. return error */
......@@ -1749,20 +1754,21 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
single_op_opt = 0;
MPIDI_Comm_get_vc_set_active(comm_ptr, dest, &vc);
if (rma_op->next->next == NULL) {
/* Single put, get, or accumulate between the lock and unlock. If it
* is of small size and predefined datatype at the target, we
* do an optimization where the lock and the RMA operation are
* sent in a single packet. Otherwise, we send a separate lock
* request first. */
curr_op = rma_op->next;
MPID_Datatype_get_size_macro(curr_op->origin_datatype, type_size);
MPIDI_CH3I_DATATYPE_IS_PREDEFINED(curr_op->target_datatype, predefined);
/* msg_sz typically = 65480 */
if ( predefined &&
(type_size * curr_op->origin_count <= vc->eager_max_msg_sz) ) {
single_op_opt = 1;
......@@ -1869,6 +1875,7 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
win_ptr->lock_granted = 0;
fn_exit:
MPIU_Assert( !win_ptr->rma_ops_list_head );
MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_UNLOCK);
return mpi_errno;
/* --BEGIN ERROR HANDLING-- */
......@@ -2076,7 +2083,7 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr,
there was an incomplete request. */
curr_ptr = win_ptr->rma_ops_list_head;
if (curr_ptr && !MPID_Request_is_complete(curr_ptr->request) ) {
MPIU_INSTR_DURATION_START(winfence_block);
MPIU_INSTR_DURATION_START(winunlock_block);
mpi_errno = MPID_Progress_wait(&progress_state);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno != MPI_SUCCESS) {
......@@ -2084,7 +2091,7 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr,
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
}
/* --END ERROR HANDLING-- */
MPIU_INSTR_DURATION_END(winfence_block);
MPIU_INSTR_DURATION_END(winunlock_block);
}
} /* While list of rma operation is non-empty */
......@@ -2192,6 +2199,7 @@ static int MPIDI_CH3I_Send_lock_put_or_acc(MPID_Win *win_ptr)
iov[0].MPID_IOV_LEN = sizeof(*lock_accum_unlock_pkt);
}
else {
/* FIXME: Error return */
printf( "expected short accumulate...\n" );
/* */
}
......@@ -3016,6 +3024,9 @@ int MPIDI_CH3_PktHandler_Accumulate_Immed( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
}
}
mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
/* Without the following signal_completion call, we
sometimes hang */
MPIDI_CH3_Progress_signal_completion();
}
}
......@@ -3063,6 +3074,7 @@ int MPIDI_CH3_PktHandler_Lock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
/* queue the lock information */
MPIDI_Win_lock_queue *curr_ptr, *prev_ptr, *new_ptr;
/* Note: This code is reached by the fechandadd rma tests */
/* FIXME: MT: This may need to be done atomically. */
/* FIXME: Since we need to add to the tail of the list,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment