Commit 6eb0014e authored by Darius Buntinas's avatar Darius Buntinas
Browse files

[svn-r7819] added parameter to enable collective error returns. disabled by default

parent 9edd54f2
......@@ -584,7 +584,7 @@ int MPIC_Send_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
if (*errflag)
if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
mpi_errno = MPIC_Send(buf, count, datatype, dest, MPIR_ERROR_TAG, comm);
else
mpi_errno = MPIC_Send(buf, count, datatype, dest, tag, comm);
......@@ -611,6 +611,11 @@ int MPIC_Recv_ft(void *buf, int count, MPI_Datatype datatype, int source, int ta
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
if (!MPIR_PARAM_ENABLE_COLL_FT_RET) {
mpi_errno = MPIC_Recv(buf, count, datatype, source, tag, comm, status);
goto fn_exit;
}
if (status == MPI_STATUS_IGNORE)
status = &mystatus;
......@@ -650,7 +655,7 @@ int MPIC_Ssend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
if (*errflag)
if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
mpi_errno = MPIC_Ssend(buf, count, datatype, dest, MPIR_ERROR_TAG, comm);
else
mpi_errno = MPIC_Ssend(buf, count, datatype, dest, tag, comm);
......@@ -679,6 +684,13 @@ int MPIC_Sendrecv_ft(void *sendbuf, int sendcount, MPI_Datatype sendtype,
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
if (!MPIR_PARAM_ENABLE_COLL_FT_RET) {
mpi_errno = MPIC_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag,
recvbuf, recvcount, recvtype, source, recvtag,
comm, status);
goto fn_exit;
}
if (status == MPI_STATUS_IGNORE)
status = &mystatus;
......@@ -728,13 +740,21 @@ int MPIC_Sendrecv_replace_ft(void *buf, int count, MPI_Datatype datatype,
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
if (!MPIR_PARAM_ENABLE_COLL_FT_RET) {
mpi_errno = MPIC_Sendrecv_replace(buf, count, datatype,
dest, sendtag,
source, recvtag,
comm, status);
goto fn_exit;
}
if (status == MPI_STATUS_IGNORE)
status = &mystatus;
if (*errflag) {
mpi_errno = MPIC_Sendrecv_replace(buf, count, datatype,
dest, MPIR_ERROR_TAG,
source, recvtag,
source, MPI_ANY_TAG,
comm, status);
goto fn_exit;
}
......@@ -775,7 +795,7 @@ int MPIC_Isend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
if (*errflag)
if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
mpi_errno = MPIC_Isend(buf, count, datatype, dest, MPIR_ERROR_TAG, comm, request);
else
mpi_errno = MPIC_Isend(buf, count, datatype, dest, tag, comm, request);
......@@ -799,7 +819,10 @@ int MPIC_Irecv_ft(void *buf, int count, MPI_Datatype datatype, int source,
MPIDI_FUNC_ENTER(MPID_STATE_MPIC_IRECV_FT);
mpi_errno = MPIC_Irecv(buf, count, datatype, source, MPI_ANY_TAG, comm, request);
if (MPIR_PARAM_ENABLE_COLL_FT_RET)
mpi_errno = MPIC_Irecv(buf, count, datatype, source, MPI_ANY_TAG, comm, request);
else
mpi_errno = MPIC_Irecv(buf, count, datatype, source, tag, comm, request);
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPIC_IRECV_FT);
......@@ -828,7 +851,7 @@ int MPIC_Waitall_ft(int numreq, MPI_Request requests[], MPI_Status statuses[], i
mpi_errno = MPIR_Waitall_impl(numreq, requests, statuses);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
if (*errflag)
if (*errflag || !MPIR_PARAM_ENABLE_COLL_FT_RET)
goto fn_exit;
for (i = 0; i < numreq; ++i) {
......
......@@ -32,6 +32,8 @@ categories:
description : parameters relevant to the "MPIR" debugger interface
- name : checkpointing
description : parameters relevant to checkpointing
- name : fault_tolerance
description : parameters that control fault tolerance behavior
- name : threads
description : multi-threading parameters
- name : nemesis
......@@ -275,6 +277,19 @@ parameters:
If true, enables checkpointing support and returns an error if
checkpointing library cannot be initialized.
##############################################################
# fault-tolerance parameters
- category : fault_tolerance
name : ENABLE_COLL_FT_RET
type : boolean
default : false
description : >-
Collectives called on a communicator with a failed process
should not hang, however the result of the operation may be
invalid even though the function returns MPI_SUCCESS. This
option enables an experimental feature that will return an
error if the result of the collective is invalid.
##############################################################
# memory parameters
- category : memory
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment