Commit 077f3c30 authored by Darius Buntinas's avatar Darius Buntinas
Browse files

[svn-r6789] removed NMPI_Allreduce. MPIR_Allreduce_impl should be used instead

parent 69799923
......@@ -3287,8 +3287,12 @@ int MPIR_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
int MPIR_Allgatherv_inter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int *recvcounts, int *displs,
MPI_Datatype recvtype, MPID_Comm *comm_ptr );
int MPIR_Allreduce_impl(void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
int MPIR_Allreduce(void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
int MPIR_Allreduce_intra(void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
int MPIR_Allreduce_inter(void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
int MPIR_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
......
......@@ -34,7 +34,6 @@
#define NMPI_Unpack MPI_Unpack
#define NMPI_Wait MPI_Wait
#define NMPI_Test MPI_Test
#define NMPI_Allreduce MPI_Allreduce
#define NMPI_Comm_get_attr MPI_Comm_get_attr
#define NMPI_Comm_set_attr MPI_Comm_set_attr
#define NMPI_Type_get_attr MPI_Type_get_attr
......@@ -121,7 +120,6 @@
#define NMPI_Unpack PMPI_Unpack
#define NMPI_Wait PMPI_Wait
#define NMPI_Test PMPI_Test
#define NMPI_Allreduce PMPI_Allreduce
#define NMPI_Comm_get_attr PMPI_Comm_get_attr
#define NMPI_Comm_set_attr PMPI_Comm_set_attr
#define NMPI_Type_get_attr PMPI_Type_get_attr
......
......@@ -89,10 +89,11 @@ MPIR_Op_check_dtype_fn *MPIR_Op_check_dtype_table[] = {
/* not declared static because a machine-specific function may call this one
in some cases */
#undef FCNAME
#define FCNAME "MPIR_Allreduce"
int MPIR_Allreduce (
#undef FUNCNAME
#define FUNCNAME MPIR_Allreduce_intra
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Allreduce_intra (
void *sendbuf,
void *recvbuf,
int count,
......@@ -119,7 +120,10 @@ int MPIR_Allreduce (
#endif
MPIU_CHKLMEM_DECL(3);
if (count == 0) return MPI_SUCCESS;
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
if (count == 0) goto fn_exit;
comm = comm_ptr->handle;
MPIU_THREADPRIV_GET;
......@@ -186,7 +190,7 @@ int MPIR_Allreduce (
/* need to allocate temporary buffer to store incoming data*/
mpi_errno = NMPI_Type_get_true_extent(datatype, &true_lb,
&true_extent);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
MPID_Datatype_get_extent_macro(datatype, extent);
MPID_Ensure_Aint_fits_in_pointer(count * MPIR_MAX(extent, true_extent));
......@@ -199,7 +203,7 @@ int MPIR_Allreduce (
if (sendbuf != MPI_IN_PLACE) {
mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, recvbuf,
count, datatype);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
MPID_Datatype_get_size_macro(datatype, type_size);
......@@ -217,15 +221,12 @@ int MPIR_Allreduce (
participate in the algorithm until the very end. The
remaining processes form a nice power-of-two. */
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
if (rank < 2*rem) {
if (rank % 2 == 0) { /* even */
mpi_errno = MPIC_Send(recvbuf, count,
datatype, rank+1,
MPIR_ALLREDUCE_TAG, comm);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* temporarily set the rank to -1 so that this
process does not pariticipate in recursive
......@@ -237,8 +238,8 @@ int MPIR_Allreduce (
datatype, rank-1,
MPIR_ALLREDUCE_TAG, comm,
MPI_STATUS_IGNORE);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* do the reduction on received data. since the
ordering is right, it doesn't matter whether
the operation is commutative or not. */
......@@ -286,7 +287,7 @@ int MPIR_Allreduce (
count, datatype, dst,
MPIR_ALLREDUCE_TAG, comm,
MPI_STATUS_IGNORE);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* tmp_buf contains data received in this step.
recvbuf contains data accumulated so far */
......@@ -320,7 +321,7 @@ int MPIR_Allreduce (
/* copy result back into recvbuf */
mpi_errno = MPIR_Localcopy(tmp_buf, count, datatype,
recvbuf, count, datatype);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
mask <<= 1;
}
......@@ -382,7 +383,7 @@ int MPIR_Allreduce (
recv_cnt, datatype, dst,
MPIR_ALLREDUCE_TAG, comm,
MPI_STATUS_IGNORE);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* tmp_buf contains data received in this step.
recvbuf contains data accumulated so far */
......@@ -442,7 +443,7 @@ int MPIR_Allreduce (
recv_cnt, datatype, dst,
MPIR_ALLREDUCE_TAG, comm,
MPI_STATUS_IGNORE);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
if (newrank > newdst) send_idx = recv_idx;
......@@ -463,18 +464,18 @@ int MPIR_Allreduce (
mpi_errno = MPIC_Recv(recvbuf, count,
datatype, rank+1,
MPIR_ALLREDUCE_TAG, comm,
MPI_STATUS_IGNORE);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
MPI_STATUS_IGNORE);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (MPIU_THREADPRIV_FIELD(op_errno))
mpi_errno = MPIU_THREADPRIV_FIELD(op_errno);
}
fn_exit:
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
MPIU_CHKLMEM_FREEALL();
MPIR_Nest_decr();
return (mpi_errno);
......@@ -486,8 +487,10 @@ int MPIR_Allreduce (
/* not declared static because a machine-specific function may call this one
in some cases */
#undef FUNCNAME
#define FUNCNAME MPIR_Allreduce_inter
#undef FCNAME
#define FCNAME "MPIR_Allreduce_inter"
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Allreduce_inter (
void *sendbuf,
void *recvbuf,
......@@ -506,11 +509,6 @@ int MPIR_Allreduce_inter (
*/
int rank, mpi_errno, root;
MPID_Comm *newcomm_ptr = NULL;
MPIU_THREADPRIV_DECL;
MPIU_THREADPRIV_GET;
MPIR_Nest_incr();
rank = comm_ptr->rank;
......@@ -521,26 +519,26 @@ int MPIR_Allreduce_inter (
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
root, comm_ptr);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* reduce to rank 0 of right group */
root = 0;
mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
root, comm_ptr);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
else {
/* reduce to rank 0 of left group */
root = 0;
mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
root, comm_ptr);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* reduce from right group to rank 0 */
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
root, comm_ptr);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
/* Get the local intracommunicator */
......@@ -550,16 +548,142 @@ int MPIR_Allreduce_inter (
newcomm_ptr = comm_ptr->local_comm;
mpi_errno = MPIR_Bcast(recvbuf, count, datatype, 0, newcomm_ptr);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
fn_exit:
MPIR_Nest_decr();
return mpi_errno;
fn_fail:
goto fn_exit;
}
/* MPIR_Allreduce performs an allreduce using point-to-point messages.
This is intended to be used by device-specific implementations of
allreduce. In all other cases MPIR_Allreduce_impl should be
used. */
#undef FUNCNAME
#define FUNCNAME MPIR_Allreduce
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
else {
/* intercommunicator */
mpi_errno = MPIR_Allreduce_inter(sendbuf, recvbuf, count, datatype, op, comm_ptr);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
fn_exit:
return mpi_errno;
fn_fail:
goto fn_exit;
}
/* MPIR_Allreduce_impl should be called by any internal component that
would otherwise call MPI_Allreduce. This differs from
MPIR_Allreduce in that this will call the coll_fns version if it
exists. */
#undef FUNCNAME
#define FUNCNAME MPIR_Allreduce_impl
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Allreduce_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Allreduce != NULL)
{
mpi_errno = comm_ptr->coll_fns->Allreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr);
}
else
{
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
#if defined(USE_SMP_COLLECTIVES)
MPID_Op *op_ptr;
int is_commutative;
/* is the op commutative? We do SMP optimizations only if it is. */
if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) {
is_commutative = 1;
} else {
MPID_Op_get_ptr(op, op_ptr);
is_commutative = (op_ptr->kind == MPID_OP_USER_NONCOMMUTE) ? 0 : 1;
}
if (MPIR_Comm_is_node_aware(comm_ptr) && is_commutative) {
/* on each node, do a reduce to the local root */
if (comm_ptr->node_comm != NULL)
{
/* take care of the MPI_IN_PLACE case. For reduce,
MPI_IN_PLACE is specified only on the root;
for allreduce it is specified on all processes. */
if ((sendbuf == MPI_IN_PLACE) && (comm_ptr->node_comm->rank != 0)) {
/* IN_PLACE and not root of reduce. Data supplied to this
allreduce is in recvbuf. Pass that as the sendbuf to reduce. */
mpi_errno = MPIR_Reduce_or_coll_fn(recvbuf, NULL, count, datatype, op, 0, comm_ptr->node_comm);
}
else {
mpi_errno = MPIR_Reduce_or_coll_fn(sendbuf, recvbuf, count, datatype, op, 0, comm_ptr->node_comm);
}
if (mpi_errno) goto fn_fail;
}
else {
/* only one process on the node. copy sendbuf to recvbuf */
if (sendbuf != MPI_IN_PLACE) {
mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, recvbuf, count, datatype);
if (mpi_errno) goto fn_fail;
}
}
/* now do an IN_PLACE allreduce among the local roots of all nodes */
if (comm_ptr->node_roots_comm != NULL) {
mpi_errno = MPIR_Allreduce_intra(MPI_IN_PLACE, recvbuf, count, datatype, op, comm_ptr->node_roots_comm);
if (mpi_errno) goto fn_fail;
}
/* now broadcast the result among local processes */
if (comm_ptr->node_comm != NULL) {
MPIU_THREADPRIV_GET;
MPIR_Nest_incr();
mpi_errno = MPIR_Bcast_or_coll_fn(recvbuf, count, datatype, 0, comm_ptr->node_comm);
MPIR_Nest_decr();
}
}
else {
mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr);
}
#else
mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr);
#endif
}
else {
/* intercommunicator */
mpi_errno = MPIR_Allreduce_inter(sendbuf, recvbuf, count, datatype, op, comm_ptr);
}
}
if (mpi_errno != MPI_SUCCESS) goto fn_fail;
fn_exit:
return mpi_errno;
fn_fail:
goto fn_exit;
}
#endif
#undef FUNCNAME
......@@ -672,91 +796,8 @@ int MPI_Allreduce ( void *sendbuf, void *recvbuf, int count,
/* ... body of routine ... */
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Allreduce != NULL)
{
mpi_errno = comm_ptr->coll_fns->Allreduce(sendbuf, recvbuf, count,
datatype, op, comm_ptr);
}
else
{
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
#if defined(USE_SMP_COLLECTIVES)
MPID_Op *op_ptr;
int is_commutative;
/* is the op commutative? We do SMP optimizations only if it is. */
if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN)
is_commutative = 1;
else {
MPID_Op_get_ptr(op, op_ptr);
is_commutative = (op_ptr->kind == MPID_OP_USER_NONCOMMUTE) ? 0 : 1;
}
if (MPIR_Comm_is_node_aware(comm_ptr) && is_commutative) {
/* on each node, do a reduce to the local root */
if (comm_ptr->node_comm != NULL)
{
/* take care of the MPI_IN_PLACE case. For reduce,
MPI_IN_PLACE is specified only on the root;
for allreduce it is specified on all processes. */
if ((sendbuf == MPI_IN_PLACE) && (comm_ptr->node_comm->rank != 0)) {
/* IN_PLACE and not root of reduce. Data supplied to this
allreduce is in recvbuf. Pass that as the sendbuf to reduce. */
mpi_errno = MPIR_Reduce_or_coll_fn(recvbuf, NULL, count, datatype,
op, 0, comm_ptr->node_comm);
}
else {
mpi_errno = MPIR_Reduce_or_coll_fn(sendbuf, recvbuf, count, datatype,
op, 0, comm_ptr->node_comm);
}
if (mpi_errno) goto fn_fail;
}
else {
/* only one process on the node. copy sendbuf to recvbuf */
if (sendbuf != MPI_IN_PLACE) {
mpi_errno = MPIR_Localcopy(sendbuf, count, datatype,
recvbuf, count, datatype);
if (mpi_errno) goto fn_fail;
}
}
/* now do an IN_PLACE allreduce among the local roots of all nodes */
if (comm_ptr->node_roots_comm != NULL) {
mpi_errno = MPIR_Allreduce(MPI_IN_PLACE, recvbuf, count, datatype,
op, comm_ptr->node_roots_comm);
if (mpi_errno) goto fn_fail;
}
/* now broadcast the result among local processes */
if (comm_ptr->node_comm != NULL) {
MPIU_THREADPRIV_GET;
MPIR_Nest_incr();
mpi_errno = MPIR_Bcast_or_coll_fn(recvbuf, count, datatype,
0, comm_ptr->node_comm);
MPIR_Nest_decr();
}
}
else {
mpi_errno = MPIR_Allreduce(sendbuf, recvbuf, count, datatype,
op, comm_ptr);
}
#else
mpi_errno = MPIR_Allreduce(sendbuf, recvbuf, count, datatype,
op, comm_ptr);
#endif
}
else {
/* intercommunicator */
mpi_errno = MPIR_Allreduce_inter(sendbuf, recvbuf, count,
datatype, op, comm_ptr);
}
}
if (mpi_errno != MPI_SUCCESS) goto fn_fail;
mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
......
......@@ -536,12 +536,9 @@ int MPIR_Get_contextid( MPID_Comm *comm_ptr, MPIR_Context_id_t *context_id )
MPIU_Memcpy( local_mask, context_mask, MPIR_MAX_CONTEXT_MASK * sizeof(int) );
/* Note that this is the unthreaded version */
MPIU_THREADPRIV_GET;
MPIR_Nest_incr();
/* Comm must be an intracommunicator */
mpi_errno = NMPI_Allreduce( MPI_IN_PLACE, local_mask, MPIR_MAX_CONTEXT_MASK,
MPI_INT, MPI_BAND, comm_ptr->handle );
MPIR_Nest_decr();
mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, local_mask, MPIR_MAX_CONTEXT_MASK,
MPI_INT, MPI_BAND, comm_ptr );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
*context_id = MPIR_Find_and_allocate_context_id(local_mask);
......@@ -577,19 +574,12 @@ int MPIR_Get_contextid( MPID_Comm *comm_ptr, MPIR_Context_id_t *context_id )
int testCount = 10; /* if you change this value, you need to also change
it below where it is reinitialized */
MPIU_THREADPRIV_DECL;
MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID);
MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_CONTEXTID);
MPIU_THREADPRIV_GET;
*context_id = 0;
/* We increment the nest level now because we need to know that we're
within another MPI routine before calling the CS_ENTER macro */
MPIR_Nest_incr();
/* We lock only around access to the mask. If another thread is
using the mask, we take a mask of zero */
MPIU_DBG_MSG_FMT(COMM, VERBOSE, (MPIU_DBG_FDEST,
......@@ -637,8 +627,8 @@ int MPIR_Get_contextid( MPID_Comm *comm_ptr, MPIR_Context_id_t *context_id )
release that global lock when it needs to wait. That will allow
other processes to enter the global or brief global critical section.
*/
mpi_errno = NMPI_Allreduce( MPI_IN_PLACE, local_mask, MPIR_MAX_CONTEXT_MASK,
MPI_INT, MPI_BAND, comm_ptr->handle );
mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, local_mask, MPIR_MAX_CONTEXT_MASK,
MPI_INT, MPI_BAND, comm_ptr );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
if (own_mask) {
......@@ -704,8 +694,8 @@ int MPIR_Get_contextid( MPID_Comm *comm_ptr, MPIR_Context_id_t *context_id )
* In the GLOBAL case, we actually are holding a lock here, so it
* doesn't trigger helgrind/DRD warnings. */
hasNoId = MPIR_Locate_context_bit(context_mask) == 0;
mpi_errno = NMPI_Allreduce( &hasNoId, &totalHasNoId, 1, MPI_INT,
MPI_MAX, comm_ptr->handle );
mpi_errno = MPIR_Allreduce_impl( &hasNoId, &totalHasNoId, 1, MPI_INT,
MPI_MAX, comm_ptr );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
if (totalHasNoId == 1) {
/* Release the mask for use by other threads */
......@@ -723,7 +713,6 @@ int MPIR_Get_contextid( MPID_Comm *comm_ptr, MPIR_Context_id_t *context_id )
fn_exit:
MPIU_DBG_MSG_S(COMM,VERBOSE,"Context mask = %s",MPIR_ContextMaskToStr());
MPIR_Nest_decr();
MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_CONTEXTID);
return mpi_errno;
fn_fail:
......
......@@ -136,10 +136,8 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
The Intel test suite checks for this; it is also an easy
error to make */
acthigh = high ? 1 : 0; /* Clamp high into 1 or 0 */
MPIR_Nest_incr();
mpi_errno = NMPI_Allreduce( MPI_IN_PLACE, &acthigh, 1, MPI_INT,
MPI_SUM, comm_ptr->local_comm->handle );
MPIR_Nest_decr();
mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, &acthigh, 1, MPI_INT,
MPI_SUM, comm_ptr->local_comm );
if (mpi_errno) goto fn_fail;
/* acthigh must either == 0 or the size of the local comm */
if (acthigh != 0 && acthigh != comm_ptr->local_size) {
......
......@@ -537,6 +537,7 @@ int MPID_PG_ForwardPGInfo( MPID_Comm *peer_ptr, MPID_Comm *comm_ptr,
int nPGids, const int gpids[],
int root )
{
int mpi_errno = MPI_SUCCESS;
int i, allfound = 1, pgid, pgidWorld;
MPIDI_PG_t *pg = 0;
MPIDI_PG_iterator iter;
......@@ -567,8 +568,8 @@ int MPID_PG_ForwardPGInfo( MPID_Comm *peer_ptr, MPID_Comm *comm_ptr,
}
/* See if everyone is happy */
NMPI_Allreduce( MPI_IN_PLACE, &allfound, 1, MPI_INT, MPI_LAND,
comm_ptr->handle );
mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, &allfound, 1, MPI_INT, MPI_LAND, comm_ptr );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
if (allfound) return MPI_SUCCESS;
......@@ -584,7 +585,10 @@ int MPID_PG_ForwardPGInfo( MPID_Comm *peer_ptr, MPID_Comm *comm_ptr,
from ch3u_port.c */
MPID_PG_BCast( peer_ptr, comm_ptr, root );
#endif
fn_exit:
return MPI_SUCCESS;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
......
......@@ -46,7 +46,7 @@ MPIDO_Allreduce(void * sendbuf,
(op_type_support == MPIDO_NOT_SUPPORTED))
{
comm->dcmf.last_algorithm = MPIDO_USE_MPICH_ALLREDUCE;
return MPIR_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
return MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm);
}
/* Type_get_extent should return the proper value */
PMPI_Type_get_extent(datatype, &data_true_lb, &data_true_extent);
......@@ -226,7 +226,7 @@ MPIDO_Allreduce(void * sendbuf,
*/
else
{
rc = MPIR_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
rc = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm);
comm->dcmf.last_algorithm = MPIDO_USE_MPICH_ALLREDUCE;
}
}
......@@ -262,7 +262,7 @@ MPIDO_Allreduce(void * sendbuf,
if (rc == STAR_FAILURE)
{
rc = MPIR_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
rc = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm);
}
MPIU_Free(tb_ptr);
......
......@@ -674,8 +674,8 @@ void MPIDI_Coll_Comm_create (MPID_Comm *comm)
&my_coords[3]);
/* find if the communicator is a rectangle */
MPIR_Allreduce(my_coords, min_coords,4, MPI_UNSIGNED, MPI_MIN, comm);
MPIR_Allreduce(my_coords, max_coords,4, MPI_UNSIGNED, MPI_MAX, comm);
MPIR_Allreduce_intra(my_coords, min_coords,4, MPI_UNSIGNED, MPI_MIN, comm);
MPIR_Allreduce_intra(my_coords, max_coords,4, MPI_UNSIGNED, MPI_MAX, comm);
t_size = (unsigned) (max_coords[3] - min_coords[3] + 1);
z_size = (unsigned) (max_coords[2] - min_coords[2] + 1);
......
......@@ -65,7 +65,7 @@ DCMF_Protocol_t bg1s_ct_proto;
* - \e w3 = 0
*
* \note Epoch End includes MPID_MSGTYPE_UNLOCK and MPID_MSGTYPE_COMPLETE
* (MPID_Win_fence() uses NMPI_Allreduce()).
* (MPID_Win_fence() uses MPIR_Allreduce_impl()).
*/
/** \brief global for our lpid */
......
......@@ -45,7 +45,7 @@
* a \e FENCE epoch is currently in affect.
* - If MPI_MODE_NOPRECEDE is asserted, fail with MPI_ERR_RMA_SYNC.
* - \e MPID_assert_debug that the local window is not locked.
* - Call NMPI_Allreduce on the window communicator to sum