Commit 7627ba59 authored by Pavan Balaji's avatar Pavan Balaji
Browse files

[svn-r3271] Committing the 64-bit optimization code for queue searching. This...

[svn-r3271] Committing the 64-bit optimization code for queue searching. This should make queue searches for the posted queue and the unexpected queue considerably faster for large queue lengths. Reviewed by
buntinas.
parent 6bab6254
......@@ -219,9 +219,9 @@ void MPID_nem_newtcp_module_vc_dbg_print_sendq(FILE *stream, MPIDI_VC_t *vc)
while (sreq)
{
fprintf(stream, "....[%d] sreq=%p ctx=%#x rank=%d tag=%d\n", i, sreq,
sreq->dev.match.context_id,
sreq->dev.match.rank,
sreq->dev.match.tag);
sreq->dev.match.parts.context_id,
sreq->dev.match.parts.rank,
sreq->dev.match.parts.tag);
++i;
sreq = sreq->dev.next;
}
......
......@@ -66,9 +66,9 @@ void MPID_nem_dbg_print_vc_sendq(FILE *stream, MPIDI_VC_t *vc)
sreq = MPIDI_CH3I_active_send[CH3_NORMAL_QUEUE];
if (sreq) {
fprintf(stream, ".... sreq=%p ctx=%#x rank=%d tag=%d\n", sreq,
sreq->dev.match.context_id,
sreq->dev.match.rank,
sreq->dev.match.tag);
sreq->dev.match.parts.context_id,
sreq->dev.match.parts.rank,
sreq->dev.match.parts.tag);
}
fprintf(stream, "....CH3_NORMAL_QUEUE queue (head-to-tail)\n");
......@@ -76,9 +76,9 @@ void MPID_nem_dbg_print_vc_sendq(FILE *stream, MPIDI_VC_t *vc)
i = 0;
while (sreq != NULL) {
fprintf(stream, "....[%d] sreq=%p ctx=%#x rank=%d tag=%d\n", i, sreq,
sreq->dev.match.context_id,
sreq->dev.match.rank,
sreq->dev.match.tag);
sreq->dev.match.parts.context_id,
sreq->dev.match.parts.rank,
sreq->dev.match.parts.tag);
++i;
sreq = sreq->dev.next;
}
......
......@@ -8,8 +8,8 @@
#define set_request_info(rreq_, pkt_, msg_type_) \
{ \
(rreq_)->status.MPI_SOURCE = (pkt_)->match.rank; \
(rreq_)->status.MPI_TAG = (pkt_)->match.tag; \
(rreq_)->status.MPI_SOURCE = (pkt_)->match.parts.rank; \
(rreq_)->status.MPI_TAG = (pkt_)->match.parts.tag; \
(rreq_)->status.count = (pkt_)->data_sz; \
(rreq_)->dev.sender_req_id = (pkt_)->sender_req_id; \
(rreq_)->dev.recv_data_sz = (pkt_)->data_sz; \
......@@ -89,9 +89,9 @@ int MPID_nem_lmt_RndvSend(MPID_Request **sreq_p, const void * buf, int count, MP
sreq->ch.lmt_tmp_cookie.MPID_IOV_LEN = 0;
MPIDI_Pkt_init(rts_pkt, MPIDI_NEM_PKT_LMT_RTS);
rts_pkt->match.rank = comm->rank;
rts_pkt->match.tag = tag;
rts_pkt->match.context_id = comm->context_id + context_offset;
rts_pkt->match.parts.rank = comm->rank;
rts_pkt->match.parts.tag = tag;
rts_pkt->match.parts.context_id = comm->context_id + context_offset;
rts_pkt->sender_req_id = sreq->handle;
rts_pkt->data_sz = data_sz;
......
......@@ -727,15 +727,15 @@ int MPIDI_CH3I_Posted_recv_enqueued (MPID_Request *rreq)
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_POSTED_RECV_ENQUEUED);
/* don't enqueue for anysource */
if (rreq->dev.match.rank < 0)
if (rreq->dev.match.parts.rank < 0)
goto fn_exit;
/* don't enqueue a fastbox for yourself */
MPIU_Assert(rreq->comm != NULL);
if (rreq->dev.match.rank == rreq->comm->rank)
if (rreq->dev.match.parts.rank == rreq->comm->rank)
goto fn_exit;
/* don't enqueue non-local processes */
MPIDI_Comm_get_vc(rreq->comm, rreq->dev.match.rank, &vc);
MPIDI_Comm_get_vc(rreq->comm, rreq->dev.match.parts.rank, &vc);
MPIU_Assert(vc != NULL);
if (!((MPIDI_CH3I_VC *)vc->channel_private)->is_local)
goto fn_exit;
......@@ -767,14 +767,14 @@ int MPIDI_CH3I_Posted_recv_dequeued (MPID_Request *rreq)
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_POSTED_RECV_DEQUEUED);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_POSTED_RECV_DEQUEUED);
if (rreq->dev.match.rank < 0)
if (rreq->dev.match.parts.rank < 0)
goto fn_exit;
if (rreq->dev.match.rank == rreq->comm->rank)
if (rreq->dev.match.parts.rank == rreq->comm->rank)
goto fn_exit;
/* don't use MPID_NEM_IS_LOCAL, it doesn't handle dynamic processes */
MPIDI_Comm_get_vc(rreq->comm, rreq->dev.match.rank, &vc);
MPIDI_Comm_get_vc(rreq->comm, rreq->dev.match.parts.rank, &vc);
MPIU_Assert(vc != NULL);
if (!((MPIDI_CH3I_VC *)vc->channel_private)->is_local)
goto fn_exit;
......
......@@ -282,9 +282,9 @@ extern MPIDI_Process_t MPIDI_Process;
(sreq_)->status.cancelled = FALSE; \
(sreq_)->dev.state = 0; \
(sreq_)->dev.cancel_pending = FALSE; \
(sreq_)->dev.match.rank = rank; \
(sreq_)->dev.match.tag = tag; \
(sreq_)->dev.match.context_id = comm->context_id + context_offset; \
(sreq_)->dev.match.parts.rank = rank; \
(sreq_)->dev.match.parts.tag = tag; \
(sreq_)->dev.match.parts.context_id = comm->context_id + context_offset; \
(sreq_)->dev.user_buf = (void *) buf; \
(sreq_)->dev.user_count = count; \
(sreq_)->dev.datatype = datatype; \
......
......@@ -24,9 +24,6 @@ struct MPID_Request;
typedef MPIR_Pint MPIDI_msg_sz_t;
#include "mpid_dataloop.h"
#if 0
struct MPID_Datatype;
#endif
/* Include definitions from the channel which must exist before items in this
file (mpidpre.h) or the file it includes (mpiimpl.h) can be defined. */
......@@ -50,10 +47,15 @@ typedef unsigned long MPID_Seqnum_t;
#include "mpichconf.h"
/* For the typical communication system for which the ch3 channel is
appropriate, 16 bits is sufficient for the rank. By also using 16 bits
for the context, we can reduce the size of the match information, which
is beneficial for slower communication links.
/* For the typical communication system for which the ch3 channel is
appropriate, 16 bits is sufficient for the rank. By also using 16
bits for the context, we can reduce the size of the match
information, which is beneficial for slower communication
links. Further, this allows the total structure size to be 64 bits
and the search operations can be optimized on 64-bit platforms. We
use a union of the actual required structure with a MPIR_Upint, so
in this optimized case, the "whole" field can be used for
comparisons.
Note that the MPICH2 code (in src/mpi) uses int for rank (and usually for
contextids, though some work is needed there).
......@@ -63,13 +65,14 @@ typedef unsigned long MPID_Seqnum_t;
create (that may make use of dynamically created processes) that the
size of the communicator is within range.
*/
typedef struct MPIDI_Message_match
{
int32_t tag;
int16_t rank;
MPIR_Context_id_t context_id;
}
MPIDI_Message_match;
typedef union {
struct {
int32_t tag;
int16_t rank;
MPIR_Context_id_t context_id;
} parts;
MPIR_Upint whole;
} MPIDI_Message_match;
#define MPIDI_TAG_UB (0x7fffffff)
/* Packet types are defined in mpidpkt.h . The intent is to remove the
......@@ -178,6 +181,7 @@ MPIDI_DEV_WIN_DECL
typedef struct MPIDI_Request {
MPIDI_Message_match match;
MPIDI_Message_match mask;
/* user_buf, user_count, and datatype needed to process
rendezvous messages. */
......@@ -201,14 +205,6 @@ typedef struct MPIDI_Request {
int iov_count;
int iov_offset;
#if 0
/* FIXME: RDMA values are specific to some channels? */
/* FIXME: Remove these (obsolete)? */
MPID_IOV rdma_iov[MPID_IOV_LIMIT];
int rdma_iov_count;
int rdma_iov_offset;
MPI_Request rdma_request;
#endif
/* OnDataAvail is the action to take when data is now available.
For example, when an operation described by an iov has
completed. This replaces the MPIDI_CA_t (completion action)
......
......@@ -101,9 +101,9 @@ int MPIDI_CH3_EagerNoncontigSend( MPID_Request **sreq_p,
sreq->dev.OnFinal = 0;
MPIDI_Pkt_init(eager_pkt, reqtype);
eager_pkt->match.rank = comm->rank;
eager_pkt->match.tag = tag;
eager_pkt->match.context_id = comm->context_id + context_offset;
eager_pkt->match.parts.rank = comm->rank;
eager_pkt->match.parts.tag = tag;
eager_pkt->match.parts.context_id = comm->context_id + context_offset;
eager_pkt->sender_req_id = MPI_REQUEST_NULL;
eager_pkt->data_sz = data_sz;
......@@ -157,9 +157,9 @@ int MPIDI_CH3_EagerContigSend( MPID_Request **sreq_p,
MPID_IOV iov[2];
MPIDI_Pkt_init(eager_pkt, reqtype);
eager_pkt->match.rank = comm->rank;
eager_pkt->match.tag = tag;
eager_pkt->match.context_id = comm->context_id + context_offset;
eager_pkt->match.parts.rank = comm->rank;
eager_pkt->match.parts.tag = tag;
eager_pkt->match.parts.context_id = comm->context_id + context_offset;
eager_pkt->sender_req_id = MPI_REQUEST_NULL;
eager_pkt->data_sz = data_sz;
......@@ -223,9 +223,9 @@ int MPIDI_CH3_EagerContigShortSend( MPID_Request **sreq_p,
/* printf( "Sending short eager\n"); fflush(stdout); */
MPIDI_Pkt_init(eagershort_pkt, reqtype);
eagershort_pkt->match.rank = comm->rank;
eagershort_pkt->match.tag = tag;
eagershort_pkt->match.context_id = comm->context_id + context_offset;
eagershort_pkt->match.parts.rank = comm->rank;
eagershort_pkt->match.parts.tag = tag;
eagershort_pkt->match.parts.context_id = comm->context_id + context_offset;
eagershort_pkt->data_sz = data_sz;
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
......@@ -297,8 +297,8 @@ int MPIDI_CH3_PktHandler_EagerShortSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**nomemreq");
}
(rreq)->status.MPI_SOURCE = (eagershort_pkt)->match.rank;
(rreq)->status.MPI_TAG = (eagershort_pkt)->match.tag;
(rreq)->status.MPI_SOURCE = (eagershort_pkt)->match.parts.rank;
(rreq)->status.MPI_TAG = (eagershort_pkt)->match.parts.tag;
(rreq)->status.count = (eagershort_pkt)->data_sz;
(rreq)->dev.recv_data_sz = (eagershort_pkt)->data_sz;
MPIDI_Request_set_seqnum((rreq), (eagershort_pkt)->seqnum);
......@@ -495,9 +495,9 @@ int MPIDI_CH3_EagerContigIsend( MPID_Request **sreq_p,
sreq->dev.OnDataAvail = 0;
MPIDI_Pkt_init(eager_pkt, reqtype);
eager_pkt->match.rank = comm->rank;
eager_pkt->match.tag = tag;
eager_pkt->match.context_id = comm->context_id + context_offset;
eager_pkt->match.parts.rank = comm->rank;
eager_pkt->match.parts.tag = tag;
eager_pkt->match.parts.context_id = comm->context_id + context_offset;
eager_pkt->sender_req_id = sreq->handle;
eager_pkt->data_sz = data_sz;
......@@ -538,8 +538,8 @@ int MPIDI_CH3_EagerContigIsend( MPID_Request **sreq_p,
#define set_request_info(rreq_, pkt_, msg_type_) \
{ \
(rreq_)->status.MPI_SOURCE = (pkt_)->match.rank; \
(rreq_)->status.MPI_TAG = (pkt_)->match.tag; \
(rreq_)->status.MPI_SOURCE = (pkt_)->match.parts.rank; \
(rreq_)->status.MPI_TAG = (pkt_)->match.parts.tag; \
(rreq_)->status.count = (pkt_)->data_sz; \
(rreq_)->dev.sender_req_id = (pkt_)->sender_req_id; \
(rreq_)->dev.recv_data_sz = (pkt_)->data_sz; \
......@@ -696,8 +696,8 @@ int MPIDI_CH3_PktHandler_ReadySend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
MPI_ERR_OTHER, "**rsendnomatch",
"**rsendnomatch %d %d",
ready_pkt->match.rank,
ready_pkt->match.tag);
ready_pkt->match.parts.rank,
ready_pkt->match.parts.tag);
rreq->status.count = 0;
if (rreq->dev.recv_data_sz > 0)
{
......
......@@ -44,9 +44,9 @@ int MPIDI_CH3_EagerSyncNoncontigSend( MPID_Request **sreq_p,
sreq->dev.OnFinal = 0;
MPIDI_Pkt_init(es_pkt, MPIDI_CH3_PKT_EAGER_SYNC_SEND);
es_pkt->match.rank = comm->rank;
es_pkt->match.tag = tag;
es_pkt->match.context_id = comm->context_id + context_offset;
es_pkt->match.parts.rank = comm->rank;
es_pkt->match.parts.tag = tag;
es_pkt->match.parts.context_id = comm->context_id + context_offset;
es_pkt->sender_req_id = sreq->handle;
es_pkt->data_sz = data_sz;
......@@ -125,9 +125,9 @@ int MPIDI_CH3_EagerSyncZero(MPID_Request **sreq_p, int rank, int tag,
sreq->dev.OnDataAvail = 0;
MPIDI_Pkt_init(es_pkt, MPIDI_CH3_PKT_EAGER_SYNC_SEND);
es_pkt->match.rank = comm->rank;
es_pkt->match.tag = tag;
es_pkt->match.context_id = comm->context_id + context_offset;
es_pkt->match.parts.rank = comm->rank;
es_pkt->match.parts.tag = tag;
es_pkt->match.parts.context_id = comm->context_id + context_offset;
es_pkt->sender_req_id = sreq->handle;
es_pkt->data_sz = 0;
......@@ -187,8 +187,8 @@ int MPIDI_CH3_EagerSyncAck( MPIDI_VC_t *vc, MPID_Request *rreq )
#define set_request_info(rreq_, pkt_, msg_type_) \
{ \
(rreq_)->status.MPI_SOURCE = (pkt_)->match.rank; \
(rreq_)->status.MPI_TAG = (pkt_)->match.tag; \
(rreq_)->status.MPI_SOURCE = (pkt_)->match.parts.rank; \
(rreq_)->status.MPI_TAG = (pkt_)->match.parts.tag; \
(rreq_)->status.count = (pkt_)->data_sz; \
(rreq_)->dev.sender_req_id = (pkt_)->sender_req_id; \
(rreq_)->dev.recv_data_sz = (pkt_)->data_sz; \
......
......@@ -22,8 +22,8 @@
#define set_request_info(rreq_, pkt_, msg_type_) \
{ \
(rreq_)->status.MPI_SOURCE = (pkt_)->match.rank; \
(rreq_)->status.MPI_TAG = (pkt_)->match.tag; \
(rreq_)->status.MPI_SOURCE = (pkt_)->match.parts.rank; \
(rreq_)->status.MPI_TAG = (pkt_)->match.parts.tag; \
(rreq_)->status.count = (pkt_)->data_sz; \
(rreq_)->dev.sender_req_id = (pkt_)->sender_req_id; \
(rreq_)->dev.recv_data_sz = (pkt_)->data_sz; \
......
......@@ -49,6 +49,37 @@ MPID_Request ** const MPID_Recvq_posted_head_ptr = &recvq_posted_head;
MPID_Request ** const MPID_Recvq_unexpected_head_ptr = &recvq_unexpected_head;
#endif
/* If the MPIDI_Message_match structure fits into a pointer size, we
* can directly work on it */
/* MATCH_WITH_NO_MASK compares the match values without masking
* them. This is useful for the case where there are no ANY_TAG or
* ANY_SOURCE wild cards. */
#define MATCH_WITH_NO_MASK(match1, match2) \
((sizeof(MPIDI_Message_match) == SIZEOF_VOID_P && 0) ? ((match1).whole == (match2).whole) : \
(((match1).parts.rank == (match2).parts.rank) && \
((match1).parts.tag == (match2).parts.tag) && \
((match1).parts.context_id == (match2).parts.context_id)))
/* MATCH_WITH_LEFT_MASK compares the match values after masking only
* the left field. This is useful for the case where the right match
* is a part of the unexpected queue and has no ANY_TAG or ANY_SOURCE
* wild cards, but the left match might have them. */
#define MATCH_WITH_LEFT_MASK(match1, match2, mask) \
((sizeof(MPIDI_Message_match) == SIZEOF_VOID_P && 0) ? (((match1).whole & (mask).whole) == (match2).whole) : \
((((match1).parts.rank & (mask).parts.rank) == (match2).parts.rank) && \
(((match1).parts.tag & (mask).parts.tag) == (match2).parts.tag) && \
((match1).parts.context_id == (match2).parts.context_id)))
/* This is the most general case where both matches have to be
* masked. Both matches are masked with the same value. There doesn't
* seem to be a need for two different masks at this time. */
#define MATCH_WITH_LEFT_RIGHT_MASK(match1, match2, mask) \
((sizeof(MPIDI_Message_match) == SIZEOF_VOID_P && 0) ? (((match1).whole & (mask).whole) == ((match2).whole & (mask).whole)) : \
((((match1).parts.rank & (mask).parts.rank) == ((match2).parts.rank & (mask).parts.rank)) && \
(((match1).parts.tag & (mask).parts.tag) == ((match2).parts.tag & (mask).parts.tag)) && \
((match1).parts.context_id == (match2).parts.context_id)))
/* FIXME: If this routine is only used by probe/iprobe, then we don't need
to set the cancelled field in status (only set for nonblocking requests) */
/*
......@@ -68,64 +99,34 @@ int MPIDI_CH3U_Recvq_FU(int source, int tag, int context_id, MPI_Status *s)
{
MPID_Request * rreq;
int found = 0;
MPIDI_Message_match match, mask;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FU);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FU);
if (tag != MPI_ANY_TAG && source != MPI_ANY_SOURCE)
{
rreq = recvq_unexpected_head;
/* FIXME: If the match data fits in an int64_t, we should try
to use a single test here */
while(rreq != NULL)
{
if (rreq->dev.match.context_id == context_id &&
rreq->dev.match.rank == source && rreq->dev.match.tag == tag)
{
rreq = recvq_unexpected_head;
match.parts.context_id = context_id;
match.parts.tag = tag;
match.parts.rank = source;
if (tag != MPI_ANY_TAG && source != MPI_ANY_SOURCE) {
while (rreq != NULL) {
if (MATCH_WITH_NO_MASK(rreq->dev.match, match))
break;
}
rreq = rreq->dev.next;
}
}
else
{
MPIDI_Message_match match;
MPIDI_Message_match mask;
match.context_id = context_id;
mask.context_id = ~0;
else {
mask.parts.context_id = mask.parts.rank = mask.parts.tag = ~0;
if (tag == MPI_ANY_TAG)
{
match.tag = 0;
mask.tag = 0;
}
else
{
match.tag = tag;
mask.tag = ~0;
}
match.parts.tag = mask.parts.tag = 0;
if (source == MPI_ANY_SOURCE)
{
match.rank = 0;
mask.rank = 0;
}
else
{
match.rank = source;
mask.rank = ~0;
}
rreq = recvq_unexpected_head;
while (rreq != NULL)
{
if (rreq->dev.match.context_id == match.context_id &&
(rreq->dev.match.rank & mask.rank) == match.rank &&
(rreq->dev.match.tag & mask.tag) == match.tag)
{
match.parts.rank = mask.parts.rank = 0;
while (rreq != NULL) {
if (MATCH_WITH_LEFT_MASK(rreq->dev.match, match, mask))
break;
}
rreq = rreq->dev.next;
}
}
......@@ -177,20 +178,16 @@ MPID_Request * MPIDI_CH3U_Recvq_FDU(MPI_Request sreq_id,
/* Note that since this routine is used only in the case of send_cancel,
there can be only one match if at all. */
cur_rreq = recvq_unexpected_head;
while(cur_rreq != NULL) {
if (cur_rreq->dev.sender_req_id == sreq_id &&
cur_rreq->dev.match.context_id == match->context_id &&
cur_rreq->dev.match.rank == match->rank &&
cur_rreq->dev.match.tag == match->tag)
{
matching_prev_rreq = prev_rreq;
matching_cur_rreq = cur_rreq;
}
while (cur_rreq != NULL) {
if (cur_rreq->dev.sender_req_id == sreq_id &&
(MATCH_WITH_NO_MASK(cur_rreq->dev.match, *match))) {
matching_prev_rreq = prev_rreq;
matching_cur_rreq = cur_rreq;
}
prev_rreq = cur_rreq;
cur_rreq = cur_rreq->dev.next;
}
if (matching_cur_rreq != NULL) {
if (matching_prev_rreq != NULL) {
matching_prev_rreq->dev.next = matching_cur_rreq->dev.next;
......@@ -230,6 +227,8 @@ MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag,
{
int found;
MPID_Request *rreq, *prev_rreq;
MPIDI_Message_match match;
MPIDI_Message_match mask;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);
......@@ -238,60 +237,46 @@ MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag,
rreq = recvq_unexpected_head;
if (rreq) {
prev_rreq = NULL;
match.parts.context_id = context_id;
match.parts.tag = tag;
match.parts.rank = source;
if (tag != MPI_ANY_TAG && source != MPI_ANY_SOURCE) {
do {
if (rreq->dev.match.context_id == context_id &&
rreq->dev.match.rank == source &&
rreq->dev.match.tag == tag) {
do {
if (MATCH_WITH_NO_MASK(rreq->dev.match, match)) {
if (prev_rreq != NULL) {
prev_rreq->dev.next = rreq->dev.next;
}
else {
recvq_unexpected_head = rreq->dev.next;
}
if (rreq->dev.next == NULL) {
recvq_unexpected_tail = prev_rreq;
}
rreq->comm = comm;
MPIR_Comm_add_ref(comm);
rreq->dev.user_buf = user_buf;
rreq->dev.user_count = user_count;
rreq->dev.datatype = datatype;
rreq->comm = comm;
MPIR_Comm_add_ref(comm);
rreq->dev.user_buf = user_buf;
rreq->dev.user_count = user_count;
rreq->dev.datatype = datatype;
found = TRUE;
goto lock_exit;
}
}
prev_rreq = rreq;
rreq = rreq->dev.next;
} while (rreq);
}
else {
MPIDI_Message_match match;
MPIDI_Message_match mask;
match.context_id = context_id;
mask.context_id = ~0;
if (tag == MPI_ANY_TAG) {
match.tag = 0;
mask.tag = 0;
}
else {
match.tag = tag;
mask.tag = ~0;
}
if (source == MPI_ANY_SOURCE) {
match.rank = 0;
mask.rank = 0;
}
else {
match.rank = source;
mask.rank = ~0;
}
mask.parts.context_id = mask.parts.rank = mask.parts.tag = ~0;
if (tag == MPI_ANY_TAG)
match.parts.tag = mask.parts.tag = 0;
if (source == MPI_ANY_SOURCE)
match.parts.rank = mask.parts.rank = 0;
do {
if (rreq->dev.match.context_id == match.context_id &&
(rreq->dev.match.rank & mask.rank) == match.rank &&
(rreq->dev.match.tag & mask.tag) == match.tag) {
if (MATCH_WITH_LEFT_MASK(rreq->dev.match, match, mask)) {
if (prev_rreq != NULL) {
prev_rreq->dev.next = rreq->dev.next;
}
......@@ -301,15 +286,14 @@ MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag,
if (rreq->dev.next == NULL) {
recvq_unexpected_tail = prev_rreq;
}
rreq->comm = comm;
MPIR_Comm_add_ref(comm);
rreq->dev.user_buf = user_buf;
rreq->dev.user_count = user_count;
rreq->dev.datatype = datatype;
rreq->comm = comm;
MPIR_Comm_add_ref(comm);
rreq->dev.user_buf = user_buf;
rreq->dev.user_count = user_count;
rreq->dev.datatype = datatype;
found = TRUE;
goto lock_exit;
}
prev_rreq = rreq;
rreq = rreq->dev.next;
} while (rreq);
......@@ -318,14 +302,26 @@ MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag,
/* A matching request was not found in the unexpected queue, so we
need to allocate a new request and add it to the posted queue */
{
int mpi_errno=0;
MPIDI_Request_create_rreq( rreq, mpi_errno,
found = FALSE;goto lock_exit );
rreq->dev.match.tag = tag;
rreq->dev.match.rank = source;
rreq->dev.match.context_id = context_id;
rreq->dev.match.parts.tag = tag;
rreq->dev.match.parts.rank = source;
rreq->dev.match.parts.context_id = context_id;
/* Added a mask for faster search on 64-bit capable
* platforms */
rreq->dev.mask.parts.context_id = ~0;
if (rreq->dev.match.parts.rank == MPI_ANY_SOURCE)
rreq->dev.mask.parts.rank = 0;
else
rreq->dev.mask.parts.rank = ~0;