Commit 47f62b0c authored by Wesley Bland's avatar Wesley Bland
Browse files

Correctly handle errflag in MPI collectives



The MPI collectives get and set the errflag used by the collective
helper functions (MPIC_*). The possible values of the errflag changed,
so the collective functions need to appropriately set this value using
either MPIR_ERR_NONE (MPI_SUCCESS), MPIR_ERR_PROC_FAILED
(MPIX_ERR_PROC_FAILED), or MPIR_ERR_OTHER (MPI_ERR_OTHER).

This should allow collectives to correctly report process failures when
they occur now, fixing the FT tests that use collectives (see #1945).
Signed-off-by: default avatarHuiwei Lu <huiweilu@mcs.anl.gov>
parent 3850e6bf
......@@ -728,7 +728,9 @@ sub ProcessFile
!($args[$errClassLoc] =~ /^MPI_T_ERR_/) &&
!($args[$errClassLoc] =~ /^MPIDU_SOCK_ERR_/) &&
!($args[$errClassLoc] =~ /^MPIX_ERR_/) &&
!($args[$errClassLoc] =~ /^errclass/) ) {
!($args[$errClassLoc] =~ /^errclass/) &&
!($args[$errClassLoc] =~ /^\*\(errflag_\)/) &&
!($args[$errClassLoc] =~ /^\*errflag/)) {
$bad_syntax_in_file{$filename} = 1;
print STDERR "Invalid argument $args[$errClassLoc] for the MPI Error class in $routineName in $filename\n";
next;
......
......@@ -212,8 +212,8 @@ int MPIR_Allgather_intra (
MPIR_ALLGATHER_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
} else
......@@ -277,8 +277,8 @@ int MPIR_Allgather_intra (
sent now. */
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -296,8 +296,8 @@ int MPIR_Allgather_intra (
no. of processes whose data we don't have */
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
} else
......@@ -380,8 +380,8 @@ int MPIR_Allgather_intra (
MPIR_ALLGATHER_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
} else
......@@ -435,8 +435,8 @@ int MPIR_Allgather_intra (
comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
/* last_recv_cnt was set in the previous
......@@ -457,8 +457,8 @@ int MPIR_Allgather_intra (
no. of processes whose data we don't have */
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
} else
......@@ -532,8 +532,8 @@ int MPIR_Allgather_intra (
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
curr_cnt *= 2;
......@@ -555,8 +555,8 @@ int MPIR_Allgather_intra (
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -617,8 +617,8 @@ int MPIR_Allgather_intra (
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
j = jnext;
......@@ -632,8 +632,8 @@ int MPIR_Allgather_intra (
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
else if (*errflag)
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
else if (*errflag != MPIR_ERR_NONE)
MPIU_ERR_SET(mpi_errno, *errflag, "**coll_fail");
return mpi_errno;
......@@ -702,8 +702,8 @@ int MPIR_Allgather_inter (
sendtype, 0, newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -718,8 +718,8 @@ int MPIR_Allgather_inter (
sendtype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -731,8 +731,8 @@ int MPIR_Allgather_inter (
recvtype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -745,8 +745,8 @@ int MPIR_Allgather_inter (
recvtype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -758,8 +758,8 @@ int MPIR_Allgather_inter (
sendtype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -769,8 +769,8 @@ int MPIR_Allgather_inter (
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
else if (*errflag)
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
else if (*errflag != MPIR_ERR_NONE)
MPIU_ERR_SET(mpi_errno, *errflag, "**coll_fail");
return mpi_errno;
......
......@@ -217,8 +217,8 @@ int MPIR_Allgatherv_intra (
comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
} else
......@@ -285,8 +285,8 @@ int MPIR_Allgatherv_intra (
MPIR_ALLGATHERV_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
/* last_recv_cnt was set in the previous
......@@ -309,8 +309,8 @@ int MPIR_Allgatherv_intra (
comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
} else
......@@ -418,8 +418,8 @@ int MPIR_Allgatherv_intra (
MPIR_ALLGATHERV_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
} else
......@@ -478,8 +478,8 @@ int MPIR_Allgatherv_intra (
comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
/* last_recv_cnt was set in the previous
......@@ -498,8 +498,8 @@ int MPIR_Allgatherv_intra (
comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
} else
......@@ -580,8 +580,8 @@ int MPIR_Allgatherv_intra (
src, MPIR_ALLGATHERV_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
recv_cnt = 0;
} else
......@@ -610,8 +610,8 @@ int MPIR_Allgatherv_intra (
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -698,8 +698,8 @@ int MPIR_Allgatherv_intra (
mpi_errno = MPIC_Recv(rbuf, recvnow, recvtype, left, MPIR_ALLGATHERV_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
torecv -= recvnow;
......@@ -708,8 +708,8 @@ int MPIR_Allgatherv_intra (
mpi_errno = MPIC_Send(sbuf, sendnow, recvtype, right, MPIR_ALLGATHERV_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
tosend -= sendnow;
......@@ -720,8 +720,8 @@ int MPIR_Allgatherv_intra (
comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
tosend -= sendnow;
......@@ -747,8 +747,8 @@ int MPIR_Allgatherv_intra (
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
else if (*errflag)
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
else if (*errflag != MPIR_ERR_NONE)
MPIU_ERR_SET(mpi_errno, *errflag, "**coll_fail");
return mpi_errno;
fn_fail:
......@@ -799,8 +799,8 @@ int MPIR_Allgatherv_inter (
comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
/* gatherv to right group */
......@@ -810,8 +810,8 @@ int MPIR_Allgatherv_inter (
comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -823,8 +823,8 @@ int MPIR_Allgatherv_inter (
comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
/* gatherv from left group */
......@@ -834,8 +834,8 @@ int MPIR_Allgatherv_inter (
comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -860,8 +860,8 @@ int MPIR_Allgatherv_inter (
mpi_errno = MPIR_Bcast_intra(recvbuf, 1, newtype, 0, newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -870,7 +870,7 @@ int MPIR_Allgatherv_inter (
fn_exit:
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
else if (*errflag)
else if (*errflag != MPIR_ERR_NONE)
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
......
......@@ -95,8 +95,8 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
tag, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -113,8 +113,8 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -162,8 +162,8 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
} else {
......@@ -246,8 +246,8 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -312,8 +312,8 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -343,8 +343,8 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
}
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -353,8 +353,8 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
else if (*errflag)
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
else if (*errflag != MPIR_ERR_NONE)
MPIU_ERR_SET(mpi_errno, *errflag, "**coll_fail");
return (mpi_errno);
fn_fail:
......
......@@ -228,16 +228,16 @@ int MPIR_Allreduce_intra (
mpi_errno = MPIR_Reduce_impl(recvbuf, NULL, count, datatype, op, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
} else {
mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -255,8 +255,8 @@ int MPIR_Allreduce_intra (
errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -266,8 +266,8 @@ int MPIR_Allreduce_intra (
mpi_errno = MPIR_Bcast_impl(recvbuf, count, datatype, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -290,16 +290,16 @@ int MPIR_Allreduce_intra (
op, 0, comm_ptr, errflag );
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
mpi_errno = MPIR_Bcast_impl( recvbuf, count, datatype, 0, comm_ptr, errflag );
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -352,8 +352,8 @@ int MPIR_Allreduce_intra (
MPIR_ALLREDUCE_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -369,8 +369,8 @@ int MPIR_Allreduce_intra (
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -415,8 +415,8 @@ int MPIR_Allreduce_intra (
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -500,8 +500,8 @@ int MPIR_Allreduce_intra (
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -565,8 +565,8 @@ int MPIR_Allreduce_intra (
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -592,8 +592,8 @@ int MPIR_Allreduce_intra (
MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -668,8 +668,8 @@ int MPIR_Allreduce_inter (
op, 0, newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -680,8 +680,8 @@ int MPIR_Allreduce_inter (
comm, MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
......@@ -691,8 +691,8 @@ int MPIR_Allreduce_inter (
0, newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
*errflag = MPIR_ERR_GET_CLASS(mpi_errno);
MPIU_ERR_SET(mpi_errno, *errflag, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
......@@ -701,8 +701,8 @@ int MPIR_Allreduce_inter (
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
else if (*errflag)
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
else if (*errflag != MPIR_ERR_NONE)