Commit 2069c15e authored by Valentin Petrov's avatar Valentin Petrov Committed by Charles J Archer
Browse files

OFI: Bug fix for RTS/CTS/DATA protocol.



MPID_nem_ofi_data_callback used to check sreq->cc in order to track progress of
the RTS/CTS/DATA protocol. The was an implicit assumption that fi_tsend with RTS
completes first. However this would cause a hang if fi_trecv completed earlier.
The fix is: don't rely on the cc but rather check the tag bits explicitly.
Note, the RTS/CTS/DATA bits are no longer accumulated (i.e., no more
"wc->tag | CTS/DATA").
Signed-off-by: default avatarCharles J Archer <charles.j.archer@intel.com>
parent 34e57aa8
...@@ -233,7 +233,7 @@ static inline int MPID_nem_ofi_preposted_callback(cq_tagged_entry_t * wc, MPID_R ...@@ -233,7 +233,7 @@ static inline int MPID_nem_ofi_preposted_callback(cq_tagged_entry_t * wc, MPID_R
REQ_OFI(new_rreq)->pack_buffer_size, REQ_OFI(new_rreq)->pack_buffer_size,
gl_data.mr, gl_data.mr,
VC_OFI(vc)->direct_addr, VC_OFI(vc)->direct_addr,
wc->tag | MPID_MSG_DATA, 0, &(REQ_OFI(new_rreq)->ofi_context)), trecv); MPID_MSG_DATA, 0, &(REQ_OFI(new_rreq)->ofi_context)), trecv);
MPID_nem_ofi_create_req(&sreq, 1); MPID_nem_ofi_create_req(&sreq, 1);
sreq->dev.OnDataAvail = NULL; sreq->dev.OnDataAvail = NULL;
...@@ -245,7 +245,7 @@ static inline int MPID_nem_ofi_preposted_callback(cq_tagged_entry_t * wc, MPID_R ...@@ -245,7 +245,7 @@ static inline int MPID_nem_ofi_preposted_callback(cq_tagged_entry_t * wc, MPID_R
0, 0,
gl_data.mr, gl_data.mr,
VC_OFI(vc)->direct_addr, VC_OFI(vc)->direct_addr,
wc->tag | MPID_MSG_CTS, &(REQ_OFI(sreq)->ofi_context)), tsend); MPID_MSG_CTS, &(REQ_OFI(sreq)->ofi_context)), tsend);
MPIU_Assert(gl_data.persistent_req == rreq); MPIU_Assert(gl_data.persistent_req == rreq);
rreq->dev.user_count = 0; rreq->dev.user_count = 0;
......
...@@ -73,18 +73,18 @@ ...@@ -73,18 +73,18 @@
REQ_OFI(sreq)->vc = vc; \ REQ_OFI(sreq)->vc = vc; \
REQ_OFI(sreq)->tag = match_bits; \ REQ_OFI(sreq)->tag = match_bits; \
\ \
MPID_nem_ofi_create_req(&cts_req, 1); \ MPID_nem_ofi_create_req(&cts_req, 1); \
cts_req->dev.OnDataAvail = NULL; \ cts_req->dev.OnDataAvail = NULL; \
cts_req->dev.next = NULL; \ cts_req->dev.next = NULL; \
REQ_OFI(cts_req)->event_callback = MPID_nem_ofi_cts_recv_callback; \ REQ_OFI(cts_req)->event_callback = MPID_nem_ofi_cts_recv_callback; \
REQ_OFI(cts_req)->parent = sreq; \ REQ_OFI(cts_req)->parent = sreq; \
\ \
FI_RC(fi_trecv(gl_data.endpoint, \ FI_RC(fi_trecv(gl_data.endpoint, \
NULL, \ NULL, \
0, \ 0, \
gl_data.mr, \ gl_data.mr, \
VC_OFI(vc)->direct_addr, \ VC_OFI(vc)->direct_addr, \
match_bits | MPID_MSG_CTS, \ MPID_MSG_CTS, \
0, /* Exact tag match, no ignore bits */ \ 0, /* Exact tag match, no ignore bits */ \
&(REQ_OFI(cts_req)->ofi_context)),trecv); \ &(REQ_OFI(cts_req)->ofi_context)),trecv); \
if (gl_data.api_set == API_SET_1){ \ if (gl_data.api_set == API_SET_1){ \
...@@ -122,15 +122,16 @@ static int MPID_nem_ofi_data_callback(cq_tagged_entry_t * wc, MPID_Request * sre ...@@ -122,15 +122,16 @@ static int MPID_nem_ofi_data_callback(cq_tagged_entry_t * wc, MPID_Request * sre
req_fn reqFn; req_fn reqFn;
uint64_t tag = 0; uint64_t tag = 0;
BEGIN_FUNC(FCNAME); BEGIN_FUNC(FCNAME);
if (MPID_cc_get(sreq->cc) == 2) { switch (wc->tag & MPID_PROTOCOL_MASK) {
case MPID_MSG_CTS:
vc = REQ_OFI(sreq)->vc; vc = REQ_OFI(sreq)->vc;
REQ_OFI(sreq)->tag = tag | MPID_MSG_DATA;
FI_RC(fi_tsend(gl_data.endpoint, FI_RC(fi_tsend(gl_data.endpoint,
REQ_OFI(sreq)->pack_buffer, REQ_OFI(sreq)->pack_buffer,
REQ_OFI(sreq)->pack_buffer_size, REQ_OFI(sreq)->pack_buffer_size,
gl_data.mr, gl_data.mr,
VC_OFI(vc)->direct_addr, VC_OFI(vc)->direct_addr,
MPID_MSG_DATA, (void *) &(REQ_OFI(sreq)->ofi_context)), tsend); MPID_MSG_DATA, (void *) &(REQ_OFI(sreq)->ofi_context)), tsend);
MPIDI_CH3U_Request_complete(sreq);
break; break;
case MPID_MSG_DATA: case MPID_MSG_DATA:
if (REQ_OFI(sreq)->pack_buffer) if (REQ_OFI(sreq)->pack_buffer)
...@@ -148,6 +149,7 @@ static int MPID_nem_ofi_data_callback(cq_tagged_entry_t * wc, MPID_Request * sre ...@@ -148,6 +149,7 @@ static int MPID_nem_ofi_data_callback(cq_tagged_entry_t * wc, MPID_Request * sre
break; break;
case MPID_MSG_RTS: case MPID_MSG_RTS:
MPIDI_CH3U_Request_complete(sreq); MPIDI_CH3U_Request_complete(sreq);
break;
} }
END_FUNC_RC(FCNAME); END_FUNC_RC(FCNAME);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment