Commit 9a1ae5a8 authored by Wesley Bland's avatar Wesley Bland
Browse files

Fix a bunch of bugs in the scheduler



The previous commits didn't take into account empty requests when
extracting the status. It also introduced a dumb bug that didn't get
tested first about a null pointer check.
Signed-off-by: default avatarJunchao Zhang <jczhang@mcs.anl.gov>
parent 15262441
......@@ -4559,7 +4559,8 @@ static inline int MPIR_Request_complete_fastpath(MPI_Request *request, MPID_Requ
#define FCNAME MPIU_QUOTE(FUNCNAME)
static inline void MPIR_Process_status(MPI_Status *status, mpir_errflag_t *errflag)
{
if ((MPIX_ERR_REVOKED == MPIR_ERR_GET_CLASS(status->MPI_ERROR) ||
if (MPI_PROC_NULL != status->MPI_SOURCE &&
(MPIX_ERR_REVOKED == MPIR_ERR_GET_CLASS(status->MPI_ERROR) ||
MPIX_ERR_PROC_FAILED == MPIR_ERR_GET_CLASS(status->MPI_ERROR) ||
MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) && !*errflag) {
/* If the receive was completed within the MPID_Recv, handle the
......
......@@ -228,10 +228,6 @@ int MPIC_Wait(MPID_Request * request_ptr, mpir_errflag_t *errflag)
MPID_Progress_end(&progress_state);
}
if (request_ptr->kind == MPID_REQUEST_RECV &&
request_ptr->status.MPI_SOURCE == MPI_PROC_NULL)
goto fn_exit;
MPIR_Process_status(&request_ptr->status, errflag);
fn_exit:
......
......@@ -841,37 +841,29 @@ static int MPIDU_Sched_progress_state(struct MPIDU_Sched_state *state, int *made
case MPIDU_SCHED_ENTRY_SEND:
if (e->u.send.sreq != NULL && MPID_Request_is_complete(e->u.send.sreq)) {
MPIU_DBG_MSG_FMT(COMM, VERBOSE, (MPIU_DBG_FDEST, "completed SEND entry %d, sreq=%p\n", (int) i, e->u.send.sreq));
e->status = MPIDU_SCHED_ENTRY_STATUS_COMPLETE;
/* This wait call won't enter the progress engine.
* It's just a convinient way to pull out the error
* information from the tag. */
MPIR_Process_status(&e->u.send.sreq->status, &s->req->dev.errflag);
if (s->req->dev.errflag != MPIR_ERR_NONE)
e->status = MPIDU_SCHED_ENTRY_STATUS_FAILED;
else
e->status = MPIDU_SCHED_ENTRY_STATUS_COMPLETE;
MPID_Request_release(e->u.send.sreq);
e->u.send.sreq = NULL;
MPIR_Comm_release(e->u.send.comm, /*isDisconnect=*/FALSE);
dtype_release_if_not_builtin(e->u.send.datatype);
if (e->u.send.sreq->status.MPI_ERROR)
e->status = MPIDU_SCHED_ENTRY_STATUS_FAILED;
else
e->status = MPIDU_SCHED_ENTRY_STATUS_COMPLETE;
}
break;
case MPIDU_SCHED_ENTRY_RECV:
if (e->u.recv.rreq != NULL && MPID_Request_is_complete(e->u.recv.rreq)) {
MPIU_DBG_MSG_FMT(COMM, VERBOSE, (MPIU_DBG_FDEST, "completed RECV entry %d, rreq=%p\n", (int) i, e->u.recv.rreq));
/* This wait call won't enter the progress engine.
* It's just a convinient way to pull out the error
* information from the tag. */
MPIR_Process_status(&e->u.recv.rreq->status, &s->req->dev.errflag);
MPIR_Request_extract_status(e->u.recv.rreq, e->u.recv.status);
if (s->req->dev.errflag != MPIR_ERR_NONE)
e->status = MPIDU_SCHED_ENTRY_STATUS_FAILED;
else
e->status = MPIDU_SCHED_ENTRY_STATUS_COMPLETE;
MPID_Request_release(e->u.recv.rreq);
e->u.recv.rreq = NULL;
MPIR_Comm_release(e->u.recv.comm, /*isDisconnect=*/FALSE);
dtype_release_if_not_builtin(e->u.recv.datatype);
if (e->u.recv.rreq->status.MPI_ERROR)
e->status = MPIDU_SCHED_ENTRY_STATUS_FAILED;
else
e->status = MPIDU_SCHED_ENTRY_STATUS_COMPLETE;
}
break;
default:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment