Commit c2be640e authored by Wesley Bland's avatar Wesley Bland
Browse files

Check for pending any source ops



Before calling the progress engine, make sure none of the operations
should return an error for MPIX_ERR_PROC_FAILED_PENDING. They would
cause the progress engine to hang (potentially) so we can't enter it.
Instead, mark the appropriate error codes and return immediately.
Signed-off-by: default avatarHuiwei Lu <huiweilu@mcs.anl.gov>
parent e89e6c66
......@@ -66,6 +66,10 @@ int MPIR_Test_impl(MPI_Request *request, int *flag, MPI_Status *status)
*flag = TRUE;
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* Fall through to the exit */
} else if (MPID_Request_is_pending_failure(request_ptr)) {
*flag = TRUE;
mpi_errno = request_ptr->status.MPI_ERROR;
goto fn_fail;
}
fn_exit:
......
......@@ -166,13 +166,14 @@ int MPI_Testall(int count, MPI_Request array_of_requests[], int *flag,
&(array_of_statuses[i]));
if (mpi_errno != MPI_SUCCESS) goto fn_fail;
}
if (request_ptrs[i] != NULL && MPID_Request_is_complete(request_ptrs[i]))
if (request_ptrs[i] != NULL && (MPID_Request_is_complete(request_ptrs[i]) || MPID_Request_is_pending_failure(request_ptrs[i])))
{
n_completed++;
rc = MPIR_Request_get_error(request_ptrs[i]);
if (rc != MPI_SUCCESS)
{
if (MPIX_ERR_PROC_FAILED == MPIR_ERR_GET_CLASS(rc))
if (MPIX_ERR_PROC_FAILED == MPIR_ERR_GET_CLASS(rc) ||
MPIX_ERR_PROC_FAILED_PENDING == MPIR_ERR_GET_CLASS(rc))
proc_failure = 1;
mpi_errno = MPI_ERR_IN_STATUS;
}
......
......@@ -183,7 +183,12 @@ int MPI_Testany(int count, MPI_Request array_of_requests[], int *indx,
{
n_inactive += 1;
}
}
} else if (request_ptrs[i] != NULL && MPID_Request_is_pending_failure(request_ptrs[i])) {
mpi_errno = request_ptrs[i]->status.MPI_ERROR;
*flag = TRUE;
*indx = i;
goto fn_fail;
}
}
if (n_inactive == count)
......
......@@ -197,7 +197,13 @@ int MPI_Testsome(int incount, MPI_Request array_of_requests[], int *outcount,
request_ptrs[i] = NULL;
n_inactive += 1;
}
}
} else if (request_ptrs[i] != NULL && MPID_Request_is_pending_failure(request_ptrs[i])) {
mpi_errno = MPI_ERR_IN_STATUS;
array_of_indices[n_active] = i;
n_active += 1;
rc = request_ptrs[i]->status.MPI_ERROR;
status_ptr->MPI_ERROR = rc;
}
}
if (mpi_errno == MPI_ERR_IN_STATUS)
......
......@@ -43,6 +43,11 @@ int MPIR_Wait_impl(MPI_Request *request, MPI_Status *status)
MPID_Request_get_ptr(*request, request_ptr);
if (MPID_Request_is_pending_failure(request_ptr)) {
mpi_errno = request_ptr->status.MPI_ERROR;
goto fn_fail;
}
if (!MPID_Request_is_complete(request_ptr))
{
MPID_Progress_state progress_state;
......
......@@ -154,7 +154,7 @@ int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
}
/* wait for ith request to complete */
while (!MPID_Request_is_complete(request_ptrs[i]))
while (!MPID_Request_is_complete(request_ptrs[i]) && !MPID_Request_is_pending_failure(request_ptrs[i]))
{
/* generalized requests should already be finished */
MPIU_Assert(request_ptrs[i]->kind != MPID_UREQUEST);
......@@ -168,9 +168,16 @@ int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
}
}
/* complete the request and check the status */
status_ptr = (ignoring_statuses) ? MPI_STATUS_IGNORE : &array_of_statuses[i];
rc = MPIR_Request_complete(&array_of_requests[i], request_ptrs[i], status_ptr, &active_flag);
if (MPID_Request_is_complete(request_ptrs[i])) {
/* complete the request and check the status */
status_ptr = (ignoring_statuses) ? MPI_STATUS_IGNORE : &array_of_statuses[i];
rc = MPIR_Request_complete(&array_of_requests[i], request_ptrs[i], status_ptr, &active_flag);
} else {
/* If the request isn't complete, it's because it's pending due
* to a failure so set the rc accordingly. */
rc = request_ptrs[i]->status.MPI_ERROR;
proc_failure = 1;
}
if (rc == MPI_SUCCESS)
{
request_ptrs[i] = NULL;
......
......@@ -155,7 +155,12 @@ int MPI_Waitany(int count, MPI_Request array_of_requests[], int *indx,
continue;
/* we found at least one non-null request */
found_nonnull_req = TRUE;
if (MPID_Request_is_pending_failure(request_ptrs[i])) {
mpi_errno = request_ptrs[i]->status.MPI_ERROR;
goto fn_progress_end_fail;
}
if (request_ptrs[i]->kind == MPID_UREQUEST && request_ptrs[i]->greq_fns->poll_fn != NULL)
{
/* this is a generalized request; make progress on it */
......
......@@ -221,7 +221,13 @@ int MPI_Waitsome(int incount, MPI_Request array_of_requests[],
request_ptrs[i] = NULL;
n_inactive += 1;
}
}
} else if ( request_ptrs[i] != NULL && MPID_Request_is_pending_failure(request_ptrs[i])) {
n_active += 1;
mpi_errno = MPI_ERR_IN_STATUS;
if (status_ptr != MPI_STATUS_IGNORE) {
status_ptr->MPI_ERROR = request_ptrs[i]->status.MPI_ERROR;
}
}
}
if (mpi_errno == MPI_ERR_IN_STATUS)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment