Commit 7a785c84 authored by Wesley Bland's avatar Wesley Bland Committed by Huiwei Lu
Browse files

Strip out pending ANY_SOURCE request handling



The existing way that we handle non-blocking requests involving wildcard
receive operations is incorrect. We're cancelling request operations and
trying to recreate them later. In the meantime, it's messing with
matching and makes it possible (likely?) that some messages that arrive
will never be matched. A new way of handling this is coming next.
Signed-off-by: default avatarHuiwei Lu <huiweilu@mcs.anl.gov>
parent a96ac72e
......@@ -3631,23 +3631,6 @@ void MPID_Request_set_completed(MPID_Request *);
@*/
void MPID_Request_release(MPID_Request *);
/*@
MPID_Request_is_pending_failure - Check if a request is pending because of a process failures
Input Parameter:
request - request to check
Return value:
0 - The request is not pending because of a failure
Non-zero - The request is pending because of a failure
Notes:
This routine checks to see if the communicator used in the request can
participate in MPI_ANY_SOURCE operations and if this request is already
pending due to a process failure.
@*/
int MPID_Request_is_pending_failure(MPID_Request *);
typedef struct MPID_Grequest_class {
MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
MPI_Grequest_query_function *query_fn;
......
......@@ -60,11 +60,7 @@ int MPIR_Test_impl(MPI_Request *request, int *flag, MPI_Status *status)
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
if (MPID_Request_is_pending_failure(request_ptr)) {
*flag = TRUE;
mpi_errno = request_ptr->status.MPI_ERROR;
goto fn_fail;
} else if (MPID_Request_is_complete(request_ptr)) {
if (MPID_Request_is_complete(request_ptr)) {
mpi_errno = MPIR_Request_complete(request, request_ptr, status,
&active_flag);
*flag = TRUE;
......
......@@ -166,7 +166,7 @@ int MPI_Testall(int count, MPI_Request array_of_requests[], int *flag,
&(array_of_statuses[i]));
if (mpi_errno != MPI_SUCCESS) goto fn_fail;
}
if (request_ptrs[i] != NULL && (MPID_Request_is_complete(request_ptrs[i]) || MPID_Request_is_pending_failure(request_ptrs[i])))
if (request_ptrs[i] != NULL && (MPID_Request_is_complete(request_ptrs[i]))
{
n_completed++;
rc = MPIR_Request_get_error(request_ptrs[i]);
......
......@@ -168,12 +168,7 @@ int MPI_Testany(int count, MPI_Request array_of_requests[], int *indx,
status);
if (mpi_errno != MPI_SUCCESS) goto fn_fail;
}
if (request_ptrs[i] != NULL && MPID_Request_is_pending_failure(request_ptrs[i])) {
mpi_errno = request_ptrs[i]->status.MPI_ERROR;
*flag = TRUE;
*indx = i;
goto fn_fail;
} else if (request_ptrs[i] != NULL && MPID_Request_is_complete(request_ptrs[i]))
if (request_ptrs[i] != NULL && MPID_Request_is_complete(request_ptrs[i]))
{
mpi_errno = MPIR_Request_complete(&array_of_requests[i],
request_ptrs[i],
......
......@@ -170,15 +170,7 @@ int MPI_Testsome(int incount, MPI_Request array_of_requests[], int *outcount,
if (mpi_errno != MPI_SUCCESS) goto fn_fail;
}
status_ptr = (array_of_statuses != MPI_STATUSES_IGNORE) ? &array_of_statuses[n_active] : MPI_STATUS_IGNORE;
if (request_ptrs[i] != NULL && MPID_Request_is_pending_failure(request_ptrs[i])) {
mpi_errno = MPI_ERR_IN_STATUS;
array_of_indices[n_active] = i;
n_active += 1;
rc = request_ptrs[i]->status.MPI_ERROR;
if (status_ptr != MPI_STATUS_IGNORE) {
status_ptr->MPI_ERROR = rc;
}
} else if (request_ptrs[i] != NULL && MPID_Request_is_complete(request_ptrs[i])) {
if (request_ptrs[i] != NULL && MPID_Request_is_complete(request_ptrs[i])) {
rc = MPIR_Request_complete(&array_of_requests[i], request_ptrs[i],
status_ptr, &active_flag);
if (active_flag)
......
......@@ -43,11 +43,6 @@ int MPIR_Wait_impl(MPI_Request *request, MPI_Status *status)
MPID_Request_get_ptr(*request, request_ptr);
if (MPID_Request_is_pending_failure(request_ptr)) {
mpi_errno = request_ptr->status.MPI_ERROR;
goto fn_fail;
}
if (!MPID_Request_is_complete(request_ptr))
{
MPID_Progress_state progress_state;
......
......@@ -154,7 +154,7 @@ int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
}
/* wait for ith request to complete */
while (!MPID_Request_is_complete(request_ptrs[i]) && !MPID_Request_is_pending_failure(request_ptrs[i]))
while (!MPID_Request_is_complete(request_ptrs[i]))
{
/* generalized requests should already be finished */
MPIU_Assert(request_ptrs[i]->kind != MPID_UREQUEST);
......@@ -172,11 +172,8 @@ int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
/* complete the request and check the status */
status_ptr = (ignoring_statuses) ? MPI_STATUS_IGNORE : &array_of_statuses[i];
rc = MPIR_Request_complete(&array_of_requests[i], request_ptrs[i], status_ptr, &active_flag);
} else {
/* If the request isn't complete, it's because it's pending due
* to a failure so set the rc accordingly. */
rc = request_ptrs[i]->status.MPI_ERROR;
proc_failure = 1;
}
}
if (rc == MPI_SUCCESS)
{
......
......@@ -156,11 +156,6 @@ int MPI_Waitany(int count, MPI_Request array_of_requests[], int *indx,
/* we found at least one non-null request */
found_nonnull_req = TRUE;
if (MPID_Request_is_pending_failure(request_ptrs[i])) {
mpi_errno = request_ptrs[i]->status.MPI_ERROR;
goto fn_progress_end_fail;
}
if (request_ptrs[i]->kind == MPID_UREQUEST && request_ptrs[i]->greq_fns->poll_fn != NULL)
{
/* this is a generalized request; make progress on it */
......
......@@ -221,12 +221,6 @@ int MPI_Waitsome(int incount, MPI_Request array_of_requests[],
request_ptrs[i] = NULL;
n_inactive += 1;
}
} else if ( request_ptrs[i] != NULL && MPID_Request_is_pending_failure(request_ptrs[i])) {
n_active += 1;
mpi_errno = MPI_ERR_IN_STATUS;
if (status_ptr != MPI_STATUS_IGNORE) {
status_ptr->MPI_ERROR = request_ptrs[i]->status.MPI_ERROR;
}
}
}
......
......@@ -1527,7 +1527,6 @@ MPID_Request * MPIDI_CH3U_Recvq_FDP_or_AEU(MPIDI_Message_match * match,
int * found);
int MPIDI_CH3U_Recvq_count_unexp(void);
int MPIDI_CH3U_Complete_posted_with_error(MPIDI_VC_t *vc);
int MPIDI_CH3U_Complete_disabled_anysources(void);
int MPIDI_CH3U_Clean_recvq(MPID_Comm *comm_ptr);
......
......@@ -384,12 +384,6 @@ int MPIDI_CH3I_Comm_handle_failed_procs(MPID_Group *new_failed_procs)
}
}
/* Now that we've marked communicators with disable anysource, we
complete-with-an-error all anysource receives posted on those
communicators */
mpi_errno = MPIDI_CH3U_Complete_disabled_anysources();
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_COMM_HANDLE_FAILED_PROCS);
return mpi_errno;
......
......@@ -559,12 +559,6 @@ MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag,
if (tag == MPI_ANY_TAG)
match.parts.tag = mask.parts.tag = 0;
if (source == MPI_ANY_SOURCE) {
if (!MPIDI_CH3I_Comm_AS_enabled(comm)) {
/* If MPI_ANY_SOURCE is disabled right now, we should
* just add this request to the posted queue instead and
* return the appropriate error. */
continue;
}
match.parts.rank = mask.parts.rank = 0;
}
do {
......@@ -639,17 +633,8 @@ MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag,
MPIDI_CH3U_Request_complete(rreq);
goto lock_exit;
}
} else if (!MPIDI_CH3I_Comm_AS_enabled(comm)) {
/* If this receive is for MPI_ANY_SOURCE, we will still add the
* request to the queue for now, but we will also set the error
* class to MPIX_ERR_PROC_FAILED_PENDING since the request shouldn't
* be matched as long as there is a failure pending. This will get
* checked again later during the completion function to see if the
* request can be completed at that time. */
MPIU_ERR_SET(mpi_errno, MPIX_ERR_PROC_FAILED_PENDING, "**failure_pending");
rreq->status.MPI_ERROR = mpi_errno;
}
rreq->dev.next = NULL;
if (recvq_posted_tail != NULL) {
recvq_posted_tail->dev.next = rreq;
......@@ -1134,46 +1119,6 @@ int MPIDI_CH3U_Clean_recvq(MPID_Comm *comm_ptr)
return mpi_errno;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3U_Complete_disabled_anysources
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIDI_CH3U_Complete_disabled_anysources(void)
{
int mpi_errno = MPI_SUCCESS;
MPID_Request *req, *prev_req;
int error = MPI_SUCCESS;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMPLETE_DISABLED_ANYSOURCES);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMPLETE_DISABLED_ANYSOURCES);
MPIU_THREAD_CS_ENTER(MSGQUEUE,);
MPIU_ERR_SETSIMPLE(error, MPIX_ERR_PROC_FAILED_PENDING, "**failure_pending");
/* Check each request in the posted queue, and complete-with-error any
anysource requests posted on communicators that have disabled
anysources */
req = recvq_posted_head;
prev_req = NULL;
while (req) {
if (req->dev.match.parts.rank == MPI_ANY_SOURCE && !MPIDI_CH3I_Comm_AS_enabled(req->comm)) {
dequeue_and_set_error(&req, prev_req, &recvq_posted_head, &recvq_posted_tail, &error, MPI_PROC_NULL); /* we don't know the rank of the failed proc */
} else {
prev_req = req;
req = req->dev.next;
}
}
fn_exit:
MPIU_THREAD_CS_EXIT(MSGQUEUE,);
MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMPLETE_DISABLED_ANYSOURCES);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDU_Complete_posted_with_error
#undef FCNAME
......
......@@ -148,37 +148,3 @@ int MPID_Comm_failed_bitarray(MPID_Comm *comm_ptr, uint32_t **bitarray, int acke
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPID_Request_is_pending_failure
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_Request_is_pending_failure(MPID_Request *request_ptr)
{
int ret = 0;
MPIDI_STATE_DECL(MPID_STATE_REQUEST_IS_PENDING_FAILURE);
MPIDI_FUNC_ENTER(MPID_STATE_REQUEST_IS_PENDING_FAILURE);
if (NULL == request_ptr || NULL == request_ptr->comm) {
goto fn_exit;
}
if (request_ptr->dev.match.parts.rank != MPI_ANY_SOURCE) {
goto fn_exit;
}
/* If the request is pending and the communicator has MPI_ANY_SOURCE
* enabled, then we can mark the request as not pending and let the
* request continue. */
if (MPIDI_CH3I_Comm_AS_enabled(request_ptr->comm)) {
request_ptr->status.MPI_ERROR = MPI_SUCCESS;
goto fn_exit;
}
/* Otherwise, the request shouldn't go into the progress engine. */
ret = 1;
fn_exit:
MPIU_DBG_MSG_S(CH3_OTHER, VERBOSE, "Request is%spending failure", ret ? " " : " not ");
MPIDI_FUNC_EXIT(MPID_STATE_REQUEST_IS_PENDING_FAILURE);
return ret;
}
......@@ -49,10 +49,6 @@ int MPID_Irecv(void * buf, int count, MPI_Datatype datatype, int rank, int tag,
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomemreq");
}
if (MPIR_ERR_GET_CLASS(rreq->status.MPI_ERROR) == MPIX_ERR_PROC_FAILED_PENDING) {
mpi_errno = rreq->status.MPI_ERROR;
}
if (found)
{
MPIDI_VC_t * vc;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment