Commit ac193bce authored by David Goodell's avatar David Goodell
Browse files

[svn-r5276] Add MPI_IN_PLACE to MPI_Alltoall{,v,w} (MPI Forum ticket #31).

All three all-to-all scatter/gather routines (MPI_Alltoall,
MPI_Alltoallv, and MPI_Alltoallw) now accept MPI_IN_PLACE as a valid
sendbuf argument.

This is an implementation of MPI Forum ticket #31 for MPI-2.2
compliance.  See the following URL for more information:

https://svn.mpi-forum.org/trac/mpi-forum-web/ticket/31

No reviewer.
parent 70f4742e
......@@ -3614,6 +3614,10 @@ int MPIC_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
int dest, int sendtag, void *recvbuf, int recvcount,
MPI_Datatype recvtype, int source, int recvtag,
MPI_Comm comm, MPI_Status *status);
int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype type,
int dest, int sendtag,
int source, int recvtag,
MPI_Comm comm, MPI_Status *status);
int MPIR_Localcopy(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype);
int MPIC_Irecv(void *buf, int count, MPI_Datatype datatype, int
......
......@@ -96,23 +96,57 @@ int MPIR_Alltoall(
int last_recv_cnt, mask, tmp_mask, tree_root, nprocs_completed;
#endif
if (sendcount == 0) return MPI_SUCCESS;
if (recvcount == 0) return MPI_SUCCESS;
comm = comm_ptr->handle;
comm_size = comm_ptr->local_size;
rank = comm_ptr->rank;
/* Get extent of send and recv types */
MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
MPID_Datatype_get_extent_macro(sendtype, sendtype_extent);
MPID_Datatype_get_size_macro(sendtype, sendtype_size);
nbytes = sendtype_size * sendcount;
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
if ((nbytes <= MPIR_ALLTOALL_SHORT_MSG) && (comm_size >= 8)) {
if (sendbuf == MPI_IN_PLACE) {
/* We use pair-wise sendrecv_replace in order to conserve memory usage,
* which is keeping with the spirit of the MPI-2.2 Standard. But
* because of this approach all processes must agree on the global
* schedule of sendrecv_replace operations to avoid deadlock.
*
* Note that this is not an especially efficient algorithm in terms of
* time and there will be multiple repeated malloc/free's rather than
* maintaining a single buffer across the whole loop. Something like
* MADRE is probably the best solution for the MPI_IN_PLACE scenario. */
for (i = 0; i < comm_size; ++i) {
/* start inner loop at i to avoid re-exchanging data */
for (j = i; j < comm_size; ++j) {
if (rank == i) {
/* also covers the (rank == i && rank == j) case */
mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + j*recvcount*recvtype_extent),
recvcount, recvtype,
j, MPIR_ALLTOALL_TAG,
j, MPIR_ALLTOALL_TAG,
comm, &status);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
else if (rank == j) {
/* same as above with i/j args reversed */
mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + i*recvcount*recvtype_extent),
recvcount, recvtype,
i, MPIR_ALLTOALL_TAG,
i, MPIR_ALLTOALL_TAG,
comm, &status);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
}
}
else if ((nbytes <= MPIR_ALLTOALL_SHORT_MSG) && (comm_size >= 8)) {
/* use the indexing algorithm by Jehoshua Bruck et al,
* IEEE TPDS, Nov. 97 */
......@@ -544,7 +578,7 @@ int MPIR_Alltoall_inter(
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
/* Do the pairwise exchanges */
max_size = MPIR_MAX(local_size, remote_size);
MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
......@@ -669,8 +703,10 @@ int MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
MPID_Datatype_valid_ptr( recvtype_ptr, mpi_errno );
MPID_Datatype_committed_ptr( recvtype_ptr, mpi_errno );
}
MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
if (comm_ptr->comm_kind == MPID_INTERCOMM) {
MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
}
MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcount, mpi_errno);
MPIR_ERRTEST_USERBUFFER(sendbuf,sendcount,sendtype,mpi_errno);
MPIR_ERRTEST_USERBUFFER(recvbuf,recvcount,recvtype,mpi_errno);
......
......@@ -40,6 +40,10 @@
*** Modification: We post only a small number of isends and irecvs
at a time and wait on them as suggested by Tony Ladd. ***
For MPI_IN_PLACE we use a completely different algorithm. We perform
pair-wise exchanges among all processes using sendrecv_replace. This
conserves memory usage at the expense of time performance.
Possible improvements:
End Algorithm: MPI_Alltoallv
......@@ -59,10 +63,11 @@ int MPIR_Alltoallv (
MPID_Comm *comm_ptr )
{
static const char FCNAME[] = "MPIR_Alltoallv";
int comm_size, i;
int comm_size, i, j;
MPI_Aint send_extent, recv_extent;
int mpi_errno = MPI_SUCCESS;
MPI_Status *starray;
MPI_Status status;
MPI_Request *reqarray;
int dst, rank, req_cnt;
MPI_Comm comm;
......@@ -81,56 +86,92 @@ int MPIR_Alltoallv (
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
bblock = MPIR_ALLTOALL_THROTTLE;
if (bblock == 0) bblock = comm_size;
MPIU_CHKLMEM_MALLOC(starray, MPI_Status*, 2*bblock*sizeof(MPI_Status), mpi_errno, "starray");
MPIU_CHKLMEM_MALLOC(reqarray, MPI_Request*, 2*bblock*sizeof(MPI_Request), mpi_errno, "reqarray");
/* post only bblock isends/irecvs at a time as suggested by Tony Ladd */
for (ii=0; ii<comm_size; ii+=bblock) {
req_cnt = 0;
ss = comm_size-ii < bblock ? comm_size-ii : bblock;
/* do the communication -- post ss sends and receives: */
for ( i=0; i<ss; i++ ) {
dst = (rank+i+ii) % comm_size;
if (recvcnts[dst]) {
MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
rdispls[dst]*recv_extent);
mpi_errno = MPIC_Irecv((char *)recvbuf+rdispls[dst]*recv_extent,
recvcnts[dst], recvtype, dst,
MPIR_ALLTOALLV_TAG, comm,
&reqarray[req_cnt]);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
req_cnt++;
if (sendbuf == MPI_IN_PLACE) {
/* We use pair-wise sendrecv_replace in order to conserve memory usage,
* which is keeping with the spirit of the MPI-2.2 Standard. But
* because of this approach all processes must agree on the global
* schedule of sendrecv_replace operations to avoid deadlock.
*
* Note that this is not an especially efficient algorithm in terms of
* time and there will be multiple repeated malloc/free's rather than
* maintaining a single buffer across the whole loop. Something like
* MADRE is probably the best solution for the MPI_IN_PLACE scenario. */
for (i = 0; i < comm_size; ++i) {
/* start inner loop at i to avoid re-exchanging data */
for (j = i; j < comm_size; ++j) {
if (rank == i) {
/* also covers the (rank == i && rank == j) case */
mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[j]*recv_extent),
recvcnts[j], recvtype,
j, MPIR_ALLTOALL_TAG,
j, MPIR_ALLTOALL_TAG,
comm, &status);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
else if (rank == j) {
/* same as above with i/j args reversed */
mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[i]*recv_extent),
recvcnts[i], recvtype,
i, MPIR_ALLTOALL_TAG,
i, MPIR_ALLTOALL_TAG,
comm, &status);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
}
}
else {
bblock = MPIR_ALLTOALL_THROTTLE;
if (bblock == 0) bblock = comm_size;
MPIU_CHKLMEM_MALLOC(starray, MPI_Status*, 2*bblock*sizeof(MPI_Status), mpi_errno, "starray");
MPIU_CHKLMEM_MALLOC(reqarray, MPI_Request*, 2*bblock*sizeof(MPI_Request), mpi_errno, "reqarray");
/* post only bblock isends/irecvs at a time as suggested by Tony Ladd */
for (ii=0; ii<comm_size; ii+=bblock) {
req_cnt = 0;
ss = comm_size-ii < bblock ? comm_size-ii : bblock;
/* do the communication -- post ss sends and receives: */
for ( i=0; i<ss; i++ ) {
dst = (rank+i+ii) % comm_size;
if (recvcnts[dst]) {
MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
rdispls[dst]*recv_extent);
mpi_errno = MPIC_Irecv((char *)recvbuf+rdispls[dst]*recv_extent,
recvcnts[dst], recvtype, dst,
MPIR_ALLTOALLV_TAG, comm,
&reqarray[req_cnt]);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
req_cnt++;
}
}
for ( i=0; i<ss; i++ ) {
dst = (rank-i-ii+comm_size) % comm_size;
if (sendcnts[dst]) {
MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
sdispls[dst]*send_extent);
mpi_errno = MPIC_Isend((char *)sendbuf+sdispls[dst]*send_extent,
sendcnts[dst], sendtype, dst,
MPIR_ALLTOALLV_TAG, comm,
&reqarray[req_cnt]);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
req_cnt++;
for ( i=0; i<ss; i++ ) {
dst = (rank-i-ii+comm_size) % comm_size;
if (sendcnts[dst]) {
MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
sdispls[dst]*send_extent);
mpi_errno = MPIC_Isend((char *)sendbuf+sdispls[dst]*send_extent,
sendcnts[dst], sendtype, dst,
MPIR_ALLTOALLV_TAG, comm,
&reqarray[req_cnt]);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
req_cnt++;
}
}
}
mpi_errno = NMPI_Waitall(req_cnt, reqarray, starray);
mpi_errno = NMPI_Waitall(req_cnt, reqarray, starray);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno == MPI_ERR_IN_STATUS) {
for (i=0; i<req_cnt; i++) {
if (starray[i].MPI_ERROR != MPI_SUCCESS)
mpi_errno = starray[i].MPI_ERROR;
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno == MPI_ERR_IN_STATUS) {
for (i=0; i<req_cnt; i++) {
if (starray[i].MPI_ERROR != MPI_SUCCESS)
mpi_errno = starray[i].MPI_ERROR;
}
}
/* --END ERROR HANDLING-- */
}
/* --END ERROR HANDLING-- */
}
fn_exit:
......@@ -314,22 +355,29 @@ int MPI_Alltoallv(void *sendbuf, int *sendcnts, int *sdispls,
{
MPID_Datatype *sendtype_ptr=NULL, *recvtype_ptr=NULL;
int i, comm_size;
int check_send = (comm_ptr->comm_kind == MPID_INTRACOMM && sendbuf != MPI_IN_PLACE);
MPID_Comm_valid_ptr( comm_ptr, mpi_errno );
if (mpi_errno != MPI_SUCCESS) goto fn_fail;
if (comm_ptr->comm_kind == MPID_INTRACOMM)
if (comm_ptr->comm_kind == MPID_INTRACOMM)
comm_size = comm_ptr->local_size;
else
comm_size = comm_ptr->remote_size;
if (comm_ptr->comm_kind == MPID_INTERCOMM && sendbuf == MPI_IN_PLACE) {
MPIU_ERR_SETANDJUMP(mpi_errno, MPIR_ERR_RECOVERABLE, "**sendbuf_inplace");
}
for (i=0; i<comm_size; i++) {
MPIR_ERRTEST_COUNT(sendcnts[i], mpi_errno);
if (check_send) {
MPIR_ERRTEST_COUNT(sendcnts[i], mpi_errno);
MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
}
MPIR_ERRTEST_COUNT(recvcnts[i], mpi_errno);
MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
}
if (HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
}
if (check_send && HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
MPID_Datatype_get_ptr(sendtype, sendtype_ptr);
MPID_Datatype_valid_ptr( sendtype_ptr, mpi_errno );
MPID_Datatype_committed_ptr( sendtype_ptr, mpi_errno );
......@@ -340,12 +388,9 @@ int MPI_Alltoallv(void *sendbuf, int *sendcnts, int *sdispls,
MPID_Datatype_committed_ptr( recvtype_ptr, mpi_errno );
}
for (i=0; i<comm_size; i++) {
for (i=0; i<comm_size && check_send; i++) {
if (sendcnts[i] > 0) {
MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcnts[i], mpi_errno);
MPIR_ERRTEST_USERBUFFER(sendbuf,sendcnts[i],sendtype,mpi_errno);
break;
MPIR_ERRTEST_USERBUFFER(sendbuf,sendcnts[i],sendtype,mpi_errno);
}
}
for (i=0; i<comm_size; i++) {
......
......@@ -58,8 +58,9 @@ int MPIR_Alltoallw (
MPID_Comm *comm_ptr )
{
static const char FCNAME[] = "MPIR_Alltoallw";
int comm_size, i;
int comm_size, i, j;
int mpi_errno = MPI_SUCCESS;
MPI_Status status;
MPI_Status *starray;
MPI_Request *reqarray;
int dst, rank;
......@@ -76,91 +77,127 @@ int MPIR_Alltoallw (
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
bblock = MPIR_ALLTOALL_THROTTLE;
if (bblock == 0) bblock = comm_size;
MPIU_CHKLMEM_MALLOC(starray, MPI_Status*, 2*bblock*sizeof(MPI_Status), mpi_errno, "starray");
MPIU_CHKLMEM_MALLOC(reqarray, MPI_Request*, 2*bblock*sizeof(MPI_Request), mpi_errno, "reqarray");
/* post only bblock isends/irecvs at a time as suggested by Tony Ladd */
for (ii=0; ii<comm_size; ii+=bblock) {
outstanding_requests = 0;
ss = comm_size-ii < bblock ? comm_size-ii : bblock;
/* do the communication -- post ss sends and receives: */
for ( i=0; i<ss; i++ ) {
dst = (rank+i+ii) % comm_size;
if (recvcnts[dst]) {
mpi_errno = MPIC_Irecv((char *)recvbuf+rdispls[dst],
recvcnts[dst], recvtypes[dst], dst,
MPIR_ALLTOALLW_TAG, comm,
&reqarray[outstanding_requests]);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
outstanding_requests++;
if (sendbuf == MPI_IN_PLACE) {
/* We use pair-wise sendrecv_replace in order to conserve memory usage,
* which is keeping with the spirit of the MPI-2.2 Standard. But
* because of this approach all processes must agree on the global
* schedule of sendrecv_replace operations to avoid deadlock.
*
* Note that this is not an especially efficient algorithm in terms of
* time and there will be multiple repeated malloc/free's rather than
* maintaining a single buffer across the whole loop. Something like
* MADRE is probably the best solution for the MPI_IN_PLACE scenario. */
for (i = 0; i < comm_size; ++i) {
/* start inner loop at i to avoid re-exchanging data */
for (j = i; j < comm_size; ++j) {
if (rank == i) {
/* also covers the (rank == i && rank == j) case */
mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[j]),
recvcnts[j], recvtypes[j],
j, MPIR_ALLTOALL_TAG,
j, MPIR_ALLTOALL_TAG,
comm, &status);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
else if (rank == j) {
/* same as above with i/j args reversed */
mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[i]),
recvcnts[i], recvtypes[i],
i, MPIR_ALLTOALL_TAG,
i, MPIR_ALLTOALL_TAG,
comm, &status);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
}
}
else {
bblock = MPIR_ALLTOALL_THROTTLE;
if (bblock == 0) bblock = comm_size;
MPIU_CHKLMEM_MALLOC(starray, MPI_Status*, 2*bblock*sizeof(MPI_Status), mpi_errno, "starray");
MPIU_CHKLMEM_MALLOC(reqarray, MPI_Request*, 2*bblock*sizeof(MPI_Request), mpi_errno, "reqarray");
/* post only bblock isends/irecvs at a time as suggested by Tony Ladd */
for (ii=0; ii<comm_size; ii+=bblock) {
outstanding_requests = 0;
ss = comm_size-ii < bblock ? comm_size-ii : bblock;
/* do the communication -- post ss sends and receives: */
for ( i=0; i<ss; i++ ) {
dst = (rank+i+ii) % comm_size;
if (recvcnts[dst]) {
mpi_errno = MPIC_Irecv((char *)recvbuf+rdispls[dst],
recvcnts[dst], recvtypes[dst], dst,
MPIR_ALLTOALLW_TAG, comm,
&reqarray[outstanding_requests]);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
outstanding_requests++;
}
}
for ( i=0; i<ss; i++ ) {
dst = (rank-i-ii+comm_size) % comm_size;
if (sendcnts[dst]) {
mpi_errno = MPIC_Isend((char *)sendbuf+sdispls[dst],
sendcnts[dst], sendtypes[dst], dst,
MPIR_ALLTOALLW_TAG, comm,
&reqarray[outstanding_requests]);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
outstanding_requests++;
for ( i=0; i<ss; i++ ) {
dst = (rank-i-ii+comm_size) % comm_size;
if (sendcnts[dst]) {
mpi_errno = MPIC_Isend((char *)sendbuf+sdispls[dst],
sendcnts[dst], sendtypes[dst], dst,
MPIR_ALLTOALLW_TAG, comm,
&reqarray[outstanding_requests]);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
outstanding_requests++;
}
}
}
mpi_errno = NMPI_Waitall(outstanding_requests, reqarray, starray);
mpi_errno = NMPI_Waitall(outstanding_requests, reqarray, starray);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno == MPI_ERR_IN_STATUS) {
for (i=0; i<outstanding_requests; i++) {
if (starray[i].MPI_ERROR != MPI_SUCCESS)
mpi_errno = starray[i].MPI_ERROR;
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno == MPI_ERR_IN_STATUS) {
for (i=0; i<outstanding_requests; i++) {
if (starray[i].MPI_ERROR != MPI_SUCCESS)
mpi_errno = starray[i].MPI_ERROR;
}
}
/* --END ERROR HANDLING-- */
}
/* --END ERROR HANDLING-- */
}
#ifdef FOO
/* Use pairwise exchange algorithm. */
/* Make local copy first */
mpi_errno = MPIR_Localcopy(((char *)sendbuf+sdispls[rank]),
sendcnts[rank], sendtypes[rank],
((char *)recvbuf+rdispls[rank]),
recvcnts[rank], recvtypes[rank]);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno)
{
mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
goto fn_fail;
}
/* --END ERROR HANDLING-- */
/* Do the pairwise exchange. */
for (i=1; i<comm_size; i++) {
src = (rank - i + comm_size) % comm_size;
dst = (rank + i) % comm_size;
mpi_errno = MPIC_Sendrecv(((char *)sendbuf+sdispls[dst]),
sendcnts[dst], sendtypes[dst], dst,
MPIR_ALLTOALLW_TAG,
((char *)recvbuf+rdispls[src]),
recvcnts[src], recvtypes[dst], src,
MPIR_ALLTOALLW_TAG, comm, &status);
/* --BEGIN ERROR HANDLING-- */
/* Use pairwise exchange algorithm. */
/* Make local copy first */
mpi_errno = MPIR_Localcopy(((char *)sendbuf+sdispls[rank]),
sendcnts[rank], sendtypes[rank],
((char *)recvbuf+rdispls[rank]),
recvcnts[rank], recvtypes[rank]);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno)
{
mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
goto fn_fail;
}
/* --END ERROR HANDLING-- */
}
{
mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
goto fn_fail;
}
/* --END ERROR HANDLING-- */
/* Do the pairwise exchange. */
for (i=1; i<comm_size; i++) {
src = (rank - i + comm_size) % comm_size;
dst = (rank + i) % comm_size;
mpi_errno = MPIC_Sendrecv(((char *)sendbuf+sdispls[dst]),
sendcnts[dst], sendtypes[dst], dst,
MPIR_ALLTOALLW_TAG,
((char *)recvbuf+rdispls[src]),
recvcnts[src], recvtypes[dst], src,
MPIR_ALLTOALLW_TAG, comm, &status);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno)
{
mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
goto fn_fail;
}
/* --END ERROR HANDLING-- */
}
#endif
}
/* check if multiple threads are calling this collective function */
fn_exit:
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
......@@ -336,30 +373,39 @@ int MPI_Alltoallw(void *sendbuf, int *sendcnts, int *sdispls,
{
MPID_Datatype *sendtype_ptr=NULL, *recvtype_ptr=NULL;
int i, comm_size;
int check_send;
MPID_Comm_valid_ptr( comm_ptr, mpi_errno );
if (mpi_errno != MPI_SUCCESS) goto fn_fail;
if (comm_ptr->comm_kind == MPID_INTRACOMM)
check_send = (comm_ptr->comm_kind == MPID_INTRACOMM && sendbuf != MPI_IN_PLACE);
if (comm_ptr->comm_kind == MPID_INTERCOMM && sendbuf == MPI_IN_PLACE) {
MPIU_ERR_SETANDJUMP(mpi_errno, MPIR_ERR_RECOVERABLE, "**sendbuf_inplace");
}
if (comm_ptr->comm_kind == MPID_INTRACOMM)
comm_size = comm_ptr->local_size;
else
comm_size = comm_ptr->remote_size;
for (i=0; i<comm_size; i++) {
MPIR_ERRTEST_COUNT(sendcnts[i], mpi_errno);
MPIR_ERRTEST_COUNT(recvcnts[i], mpi_errno);
if (sendcnts[i] > 0) {
MPIR_ERRTEST_DATATYPE(sendtypes[i], "sendtype[i]", mpi_errno);
if (check_send) {
MPIR_ERRTEST_COUNT(sendcnts[i], mpi_errno);
if (sendcnts[i] > 0) {
MPIR_ERRTEST_DATATYPE(sendtypes[i], "sendtype[i]", mpi_errno);
}
if ((sendcnts[i] > 0) && (HANDLE_GET_KIND(sendtypes[i]) != HANDLE_KIND_BUILTIN)) {
MPID_Datatype_get_ptr(sendtypes[i], sendtype_ptr);
MPID_Datatype_valid_ptr( sendtype_ptr, mpi_errno );
MPID_Datatype_committed_ptr( sendtype_ptr, mpi_errno );
}
}
MPIR_ERRTEST_COUNT(recvcnts[i], mpi_errno);
if (recvcnts[i] > 0) {
MPIR_ERRTEST_DATATYPE(recvtypes[i], "recvtype[i]", mpi_errno);
}
if ((sendcnts[i] > 0) && (HANDLE_GET_KIND(sendtypes[i]) != HANDLE_KIND_BUILTIN)) {
MPID_Datatype_get_ptr(sendtypes[i], sendtype_ptr);
MPID_Datatype_valid_ptr( sendtype_ptr, mpi_errno );
MPID_Datatype_committed_ptr( sendtype_ptr, mpi_errno );
}
if ((recvcnts[i] > 0) && (HANDLE_GET_KIND(recvtypes[i]) != HANDLE_KIND_BUILTIN)) {
MPID_Datatype_get_ptr(recvtypes[i], recvtype_ptr);
MPID_Datatype_valid_ptr( recvtype_ptr, mpi_errno );
......@@ -367,9 +413,8 @@ int MPI_Alltoallw(void *sendbuf, int *sendcnts, int *sdispls,
}
}
for (i=0; i<comm_size; i++) {
for (i=0; i<comm_size && check_send; i++) {
if (sendcnts[i] > 0) {
MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcnts[i], mpi_errno);
MPIR_ERRTEST_USERBUFFER(sendbuf,sendcnts[i],sendtypes[i],mpi_errno);
break;
}
......
......@@ -173,6 +173,110 @@ int MPIC_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
return mpi_errno;
}
/* NOTE: for regular collectives (as opposed to irregular collectives) calling
* this function repeatedly will almost always be slower than performing the
* equivalent inline because of the overhead of the repeated malloc/free */
#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv_replace
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
int dest, int sendtag,
int source, int recvtag,
MPI_Comm comm, MPI_Status *status)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Context_id_t context_id_offset;
MPID_Request *sreq;
MPID_Request *rreq;
void *tmpbuf = NULL;