Commit 4f977797 authored by Pavan Balaji's avatar Pavan Balaji
Browse files

Make SMP-awareness in MPI_Barrier consistent.



The current implementation of SMP-awareness in MPI_Barrier was in
MPIR_Barrier_impl.  This makes the default implementation of barrier
SMP-aware.  However, if a device overrides barrier and then calls back
the default implementation through MPIR_Barrier, it can no longer take
advantage of SMP-awareness.  This patch moves the SMP-aware
implementation to MPIR_Barrier_intra, which is called by
MPIR_Barrier_impl through MPIR_Barrier, in the default implementation.
See #1957.
Signed-off-by: default avatarMichael Blocksome <blocksom@us.ibm.com>
parent 0743c396
......@@ -33,8 +33,6 @@ cvars:
#endif
/* -- End Profiling Symbol Block */
PMPI_LOCAL int MPIR_Barrier_or_coll_fn(MPID_Comm *comm_ptr, int *errflag );
/* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
the MPI routines */
#ifndef MPICH_MPI_FROM_PMPI
......@@ -63,6 +61,66 @@ PMPI_LOCAL int MPIR_Barrier_or_coll_fn(MPID_Comm *comm_ptr, int *errflag );
This is an intracommunicator barrier only!
*/
#undef FUNCNAME
#define FUNCNAME barrier_smp_intra
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int barrier_smp_intra(MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno=MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
MPIU_Assert(MPIR_CVAR_ENABLE_SMP_COLLECTIVES && MPIR_CVAR_ENABLE_SMP_BARRIER &&
MPIR_Comm_is_node_aware(comm_ptr));
/* do the intranode barrier on all nodes */
if (comm_ptr->node_comm != NULL)
{
mpi_errno = MPIR_Barrier_impl(comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
/* do the barrier across roots of all nodes */
if (comm_ptr->node_roots_comm != NULL) {
mpi_errno = MPIR_Barrier_impl(comm_ptr->node_roots_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
/* release the local processes on each node with a 1-byte
broadcast (0-byte broadcast just returns without doing
anything) */
if (comm_ptr->node_comm != NULL)
{
int i=0;
mpi_errno = MPIR_Bcast_impl(&i, 1, MPI_BYTE, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
fn_exit:
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
else if (*errflag)
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
}
/* not declared static because it is called in ch3_comm_connect/accept */
#undef FUNCNAME
#define FUNCNAME MPIR_Barrier_intra
......@@ -82,6 +140,18 @@ int MPIR_Barrier_intra( MPID_Comm *comm_ptr, int *errflag )
/* Trivial barriers return immediately */
if (size == 1) goto fn_exit;
if (MPIR_CVAR_ENABLE_SMP_COLLECTIVES && MPIR_CVAR_ENABLE_SMP_BARRIER &&
MPIR_Comm_is_node_aware(comm_ptr)) {
mpi_errno = barrier_smp_intra(comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
goto fn_exit;
}
rank = comm_ptr->rank;
comm = comm_ptr->handle;
......@@ -113,37 +183,6 @@ int MPIR_Barrier_intra( MPID_Comm *comm_ptr, int *errflag )
goto fn_exit;
}
/* A simple utility function to that calls the comm_ptr->coll_fns->Barrier
override if it exists or else it calls MPIR_Barrier_intra with the same arguments. */
/* Note that this function must *not* be inline - if weak symbols are not
available, this function must be a global symbol. */
#undef FUNCNAME
#define FUNCNAME MPIR_Barrier_or_coll_fn
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
PMPI_LOCAL int MPIR_Barrier_or_coll_fn(MPID_Comm *comm_ptr, int *errflag )
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Barrier != NULL)
{
/* --BEGIN USEREXTENSION-- */
mpi_errno = comm_ptr->coll_fns->Barrier(comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* --END USEREXTENSION-- */
}
else {
mpi_errno = MPIR_Barrier_intra(comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
fn_exit:
return mpi_errno;
fn_fail:
goto fn_exit;
}
/* not declared static because a machine-specific function may call this one
in some cases */
#undef FUNCNAME
......@@ -285,58 +324,9 @@ int MPIR_Barrier_impl(MPID_Comm *comm_ptr, int *errflag)
}
else
{
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
if (MPIR_CVAR_ENABLE_SMP_COLLECTIVES && MPIR_CVAR_ENABLE_SMP_BARRIER &&
MPIR_Comm_is_node_aware(comm_ptr)) {
/* do the intranode barrier on all nodes */
if (comm_ptr->node_comm != NULL)
{
mpi_errno = MPIR_Barrier_or_coll_fn(comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
/* do the barrier across roots of all nodes */
if (comm_ptr->node_roots_comm != NULL) {
mpi_errno = MPIR_Barrier_or_coll_fn(comm_ptr->node_roots_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
/* release the local processes on each node with a 1-byte broadcast
(0-byte broadcast just returns without doing anything) */
if (comm_ptr->node_comm != NULL)
{
int i=0;
mpi_errno = MPIR_Bcast_impl(&i, 1, MPI_BYTE, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
}
else {
mpi_errno = MPIR_Barrier_intra( comm_ptr, errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
else {
/* intercommunicator */
mpi_errno = MPIR_Barrier_inter( comm_ptr, errflag );
mpi_errno = MPIR_Barrier(comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
fn_exit:
if (mpi_errno_ret)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment