Commit f86004a5 authored by Huiwei Lu's avatar Huiwei Lu Committed by Wesley Bland
Browse files

Fixes one missing case in MPIDI_CH3U_Clean_recvq



Revoke will call MPIDI_CH3U_Clean_recvq to dequeue all requests with
revoked communicators. There is one missing case: when there's hierarchy
communicators that use a different context id. This patch adds a case to
check the hierarchy communicators.
Signed-off-by: default avatarWesley Bland <wbland@anl.gov>
parent c668ce8c
......@@ -958,7 +958,8 @@ int MPIDI_CH3U_Clean_recvq(MPID_Comm *comm_ptr)
match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTRA_COLL;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (rreq->dev.match.parts.tag != MPIR_AGREE_TAG && rreq->dev.match.parts.tag != MPIR_SHRINK_TAG) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_SHRINK_TAG) {
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
"cleaning up unexpected collective pkt rank=%d tag=%d contextid=%d",
rreq->dev.match.parts.rank, rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id));
......@@ -967,12 +968,56 @@ int MPIDI_CH3U_Clean_recvq(MPID_Comm *comm_ptr)
}
}
if (MPIR_CVAR_ENABLE_SMP_COLLECTIVES && MPIR_Comm_is_node_aware(comm_ptr)) {
int offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
if (MPIR_Comm_is_node_aware(comm_ptr)) {
int offset;
offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_PT2PT : MPID_CONTEXT_INTER_PT2PT;
match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTRANODE_OFFSET + offset;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (rreq->dev.match.parts.tag != MPIR_AGREE_TAG && rreq->dev.match.parts.tag != MPIR_SHRINK_TAG) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_SHRINK_TAG) {
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
"cleaning up unexpected pt2pt pkt rank=%d tag=%d contextid=%d",
rreq->dev.match.parts.rank, rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id));
dequeue_and_set_error(&rreq, prev_rreq, &recvq_unexpected_head, &recvq_unexpected_tail, &error, MPI_PROC_NULL);
continue;
}
}
offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTRANODE_OFFSET + offset;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_SHRINK_TAG) {
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
"cleaning up unexpected collective pkt rank=%d tag=%d contextid=%d",
rreq->dev.match.parts.rank, rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id));
dequeue_and_set_error(&rreq, prev_rreq, &recvq_unexpected_head, &recvq_unexpected_tail, &error, MPI_PROC_NULL);
continue;
}
}
offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_PT2PT : MPID_CONTEXT_INTER_PT2PT;
match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTERNODE_OFFSET + offset;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_SHRINK_TAG) {
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
"cleaning up unexpected pt2pt pkt rank=%d tag=%d contextid=%d",
rreq->dev.match.parts.rank, rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id));
dequeue_and_set_error(&rreq, prev_rreq, &recvq_unexpected_head, &recvq_unexpected_tail, &error, MPI_PROC_NULL);
continue;
}
}
offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTERNODE_OFFSET + offset;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_SHRINK_TAG) {
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
"cleaning up unexpected collective pkt rank=%d tag=%d contextid=%d",
rreq->dev.match.parts.rank, rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id));
......@@ -1005,7 +1050,8 @@ int MPIDI_CH3U_Clean_recvq(MPID_Comm *comm_ptr)
match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTRA_COLL;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (rreq->dev.match.parts.tag != MPIR_AGREE_TAG && rreq->dev.match.parts.tag != MPIR_SHRINK_TAG) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_SHRINK_TAG) {
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
"cleaning up posted collective pkt rank=%d tag=%d contextid=%d",
rreq->dev.match.parts.rank, rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id));
......@@ -1014,12 +1060,54 @@ int MPIDI_CH3U_Clean_recvq(MPID_Comm *comm_ptr)
}
}
if (MPIR_CVAR_ENABLE_SMP_COLLECTIVES && MPIR_Comm_is_node_aware(comm_ptr)) {
int offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
if (MPIR_Comm_is_node_aware(comm_ptr)) {
int offset;
offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_PT2PT : MPID_CONTEXT_INTER_PT2PT;
match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTRANODE_OFFSET + offset;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (rreq->dev.match.parts.tag != MPIR_AGREE_TAG && rreq->dev.match.parts.tag != MPIR_SHRINK_TAG) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_SHRINK_TAG) {
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
"cleaning up posted pt2pt pkt rank=%d tag=%d contextid=%d",
rreq->dev.match.parts.rank, rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id));
dequeue_and_set_error(&rreq, prev_rreq, &recvq_posted_head, &recvq_posted_tail, &error, MPI_PROC_NULL);
continue;
}
}
offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_SHRINK_TAG) {
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
"cleaning up posted collective pkt rank=%d tag=%d contextid=%d",
rreq->dev.match.parts.rank, rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id));
dequeue_and_set_error(&rreq, prev_rreq, &recvq_posted_head, &recvq_posted_tail, &error, MPI_PROC_NULL);
continue;
}
}
offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_PT2PT : MPID_CONTEXT_INTER_PT2PT;
match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTERNODE_OFFSET + offset;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_SHRINK_TAG) {
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
"cleaning up posted pt2pt pkt rank=%d tag=%d contextid=%d",
rreq->dev.match.parts.rank, rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id));
dequeue_and_set_error(&rreq, prev_rreq, &recvq_posted_head, &recvq_posted_tail, &error, MPI_PROC_NULL);
continue;
}
}
offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_SHRINK_TAG) {
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
"cleaning up posted collective pkt rank=%d tag=%d contextid=%d",
rreq->dev.match.parts.rank, rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment