Commit 5132e070 authored by Xin Zhao's avatar Xin Zhao Committed by Pavan Balaji
Browse files

Correct the usage of req's segment_first and segment_size in sendNonContig



The implementations of sendNoncontig for intra-node communication in
Nemesis and inter-node communication in network modules (except for
TCP and SCIF) assume that req->dev.segment_first is zero and
req->dev.segment_size is the size of data, which is not always true.
If we stream an RMA operation and issue partial of derived data,
req->dev.segment_first specifies the current starting location of the data
and req->dev.segment_size specifies the current ending location of the data.
Also, the data size should be (req->dev.segment_size - req->dev.segment_first).
This patch corrects this issue in Nemesis and network modules.
Signed-off-by: Pavan Balaji's avatarPavan Balaji <balaji@anl.gov>
parent 7c890ab2
...@@ -431,7 +431,6 @@ MPID_nem_mpich_send_seg_header (MPID_Segment *segment, MPIDI_msg_sz_t *segment_f ...@@ -431,7 +431,6 @@ MPID_nem_mpich_send_seg_header (MPID_Segment *segment, MPIDI_msg_sz_t *segment_f
MPIU_Assert(vc_ch->is_local); /* netmods will have their own implementation */ MPIU_Assert(vc_ch->is_local); /* netmods will have their own implementation */
MPIU_Assert(header_sz <= sizeof(MPIDI_CH3_Pkt_t)); MPIU_Assert(header_sz <= sizeof(MPIDI_CH3_Pkt_t));
MPIU_Assert(*segment_first == 0); /* this routine is only called for new messages */
DO_PAPI (PAPI_reset (PAPI_EventSet)); DO_PAPI (PAPI_reset (PAPI_EventSet));
......
...@@ -772,10 +772,9 @@ static int MPID_nem_ib_SendNoncontig_core(MPIDI_VC_t * vc, MPID_Request * sreq, ...@@ -772,10 +772,9 @@ static int MPID_nem_ib_SendNoncontig_core(MPIDI_VC_t * vc, MPID_Request * sreq,
MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_SENDNONCONTIG_CORE); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_SENDNONCONTIG_CORE);
MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_SENDNONCONTIG_CORE); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_SENDNONCONTIG_CORE);
MPIU_Assert(sreq->dev.segment_first == 0);
last = sreq->dev.segment_size; /* segment_size is byte offset */ last = sreq->dev.segment_size; /* segment_size is byte offset */
if (last > 0) { if (last > 0) {
REQ_FIELD(sreq, lmt_pack_buf) = MPIU_Malloc((size_t) sreq->dev.segment_size); REQ_FIELD(sreq, lmt_pack_buf) = MPIU_Malloc((size_t) (sreq->dev.segment_size - sreq->dev.segment_first));
MPIU_ERR_CHKANDJUMP(!REQ_FIELD(sreq, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, MPIU_ERR_CHKANDJUMP(!REQ_FIELD(sreq, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER,
"**outofmemory"); "**outofmemory");
MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last,
...@@ -792,7 +791,7 @@ static int MPID_nem_ib_SendNoncontig_core(MPIDI_VC_t * vc, MPID_Request * sreq, ...@@ -792,7 +791,7 @@ static int MPID_nem_ib_SendNoncontig_core(MPIDI_VC_t * vc, MPID_Request * sreq,
|| (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_ACCUMULATE))) { || (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_ACCUMULATE))) {
/* If request length is too long, create LMT packet */ /* If request length is too long, create LMT packet */
if ( MPID_NEM_IB_NETMOD_HDR_SIZEOF(vc_ib->ibcom->local_ringbuf_type) if ( MPID_NEM_IB_NETMOD_HDR_SIZEOF(vc_ib->ibcom->local_ringbuf_type)
+ sizeof(MPIDI_CH3_Pkt_t) + sreq->dev.segment_size + sizeof(MPIDI_CH3_Pkt_t) + sreq->dev.segment_size - sreq->dev.segment_first
> MPID_NEM_IB_COM_RDMABUF_SZSEG - sizeof(MPID_nem_ib_netmod_trailer_t)) { > MPID_NEM_IB_COM_RDMABUF_SZSEG - sizeof(MPID_nem_ib_netmod_trailer_t)) {
pkt_netmod.type = MPIDI_NEM_PKT_NETMOD; pkt_netmod.type = MPIDI_NEM_PKT_NETMOD;
......
...@@ -168,7 +168,7 @@ int MPID_nem_mxm_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, ...@@ -168,7 +168,7 @@ int MPID_nem_mxm_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
_dbg_mxm_output(5, _dbg_mxm_output(5,
"SendNoncontig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n", "SendNoncontig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n",
vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t), vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t),
sreq->dev.segment_size); sreq->dev.segment_size-sreq->dev.segment_first);
vc_area = VC_BASE(vc); vc_area = VC_BASE(vc);
req_area = REQ_BASE(sreq); req_area = REQ_BASE(sreq);
...@@ -179,17 +179,16 @@ int MPID_nem_mxm_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, ...@@ -179,17 +179,16 @@ int MPID_nem_mxm_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
req_area->iov_buf[0].ptr = (void *) &(sreq->dev.pending_pkt); req_area->iov_buf[0].ptr = (void *) &(sreq->dev.pending_pkt);
req_area->iov_buf[0].length = sizeof(MPIDI_CH3_Pkt_t); req_area->iov_buf[0].length = sizeof(MPIDI_CH3_Pkt_t);
MPIU_Assert(sreq->dev.segment_first == 0);
last = sreq->dev.segment_size; last = sreq->dev.segment_size;
if (last > 0) { if (last > 0) {
sreq->dev.tmpbuf = MPIU_Malloc((size_t) sreq->dev.segment_size); sreq->dev.tmpbuf = MPIU_Malloc((size_t) (sreq->dev.segment_size - sreq->dev.segment_first));
MPIU_Assert(sreq->dev.tmpbuf); MPIU_Assert(sreq->dev.tmpbuf);
MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.tmpbuf); MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.tmpbuf);
MPIU_Assert(last == sreq->dev.segment_size); MPIU_Assert(last == sreq->dev.segment_size);
req_area->iov_count = 2; req_area->iov_count = 2;
req_area->iov_buf[1].ptr = sreq->dev.tmpbuf; req_area->iov_buf[1].ptr = sreq->dev.tmpbuf;
req_area->iov_buf[1].length = last; req_area->iov_buf[1].length = last - sreq->dev.segment_first;
} }
vc_area->pending_sends += 1; vc_area->pending_sends += 1;
......
...@@ -127,6 +127,7 @@ int MPID_nem_newmad_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *head ...@@ -127,6 +127,7 @@ int MPID_nem_newmad_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *head
struct iovec newmad_iov[2]; struct iovec newmad_iov[2];
int num_iov = 1; int num_iov = 1;
MPIDI_msg_sz_t last; MPIDI_msg_sz_t last;
MPIDI_msg_sz_t data_sz;
/* /*
struct iovec *newmad_iov = (struct iovec *)MPIU_Malloc(NMAD_IOV_MAX_DEPTH*sizeof(struct iovec)); struct iovec *newmad_iov = (struct iovec *)MPIU_Malloc(NMAD_IOV_MAX_DEPTH*sizeof(struct iovec));
...@@ -147,11 +148,11 @@ int MPID_nem_newmad_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *head ...@@ -147,11 +148,11 @@ int MPID_nem_newmad_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *head
newmad_iov[0].iov_base = (char *)&(sreq->dev.pending_pkt); newmad_iov[0].iov_base = (char *)&(sreq->dev.pending_pkt);
newmad_iov[0].iov_len = sizeof(MPIDI_CH3_Pkt_t); newmad_iov[0].iov_len = sizeof(MPIDI_CH3_Pkt_t);
MPIU_Assert(sreq->dev.segment_first == 0); data_sz = sreq->dev.segment_size - sreq->dev.segment_first;
last = sreq->dev.segment_size; last = sreq->dev.segment_size;
if (last > 0) if (data_sz > 0)
{ {
sreq->dev.tmpbuf = MPIU_Malloc((size_t)sreq->dev.segment_size); sreq->dev.tmpbuf = MPIU_Malloc((size_t) data_sz);
REQ_FIELD(sreq,deltmpbuf) = TMP_DEL_VALUE; REQ_FIELD(sreq,deltmpbuf) = TMP_DEL_VALUE;
MPID_Segment_pack(sreq->dev.segment_ptr,sreq->dev.segment_first, &last,(char *)(sreq->dev.tmpbuf)); MPID_Segment_pack(sreq->dev.segment_ptr,sreq->dev.segment_first, &last,(char *)(sreq->dev.tmpbuf));
MPIU_Assert(last == sreq->dev.segment_size); MPIU_Assert(last == sreq->dev.segment_size);
......
...@@ -190,17 +190,19 @@ int MPID_nem_ofi_SendNoncontig(MPIDI_VC_t * vc, ...@@ -190,17 +190,19 @@ int MPID_nem_ofi_SendNoncontig(MPIDI_VC_t * vc,
MPI_Aint data_sz; MPI_Aint data_sz;
uint64_t match_bits; uint64_t match_bits;
MPID_Request *cts_req; MPID_Request *cts_req;
MPIDI_msg_sz_t first, last;
BEGIN_FUNC(FCNAME); BEGIN_FUNC(FCNAME);
MPIU_Assert(hdr_sz <= (MPIDI_msg_sz_t) sizeof(MPIDI_CH3_Pkt_t)); MPIU_Assert(hdr_sz <= (MPIDI_msg_sz_t) sizeof(MPIDI_CH3_Pkt_t));
MPIU_Assert(sreq->dev.segment_first == 0);
data_sz = sreq->dev.segment_size; first = sreq->dev.segment_first;
last = sreq->dev.segment_size;
data_sz = sreq->dev.segment_size - sreq->dev.segment_first;
pkt_len = sizeof(MPIDI_CH3_Pkt_t) + data_sz; pkt_len = sizeof(MPIDI_CH3_Pkt_t) + data_sz;
pack_buffer = MPIU_Malloc(pkt_len); pack_buffer = MPIU_Malloc(pkt_len);
MPIU_Assert(pack_buffer); MPIU_Assert(pack_buffer);
MPIU_Memcpy(pack_buffer, hdr, hdr_sz); MPIU_Memcpy(pack_buffer, hdr, hdr_sz);
MPID_Segment_pack(sreq->dev.segment_ptr, 0, &data_sz, pack_buffer + sizeof(MPIDI_CH3_Pkt_t)); MPID_Segment_pack(sreq->dev.segment_ptr, first, &last, pack_buffer + sizeof(MPIDI_CH3_Pkt_t));
START_COMM(); START_COMM();
MPID_nem_ofi_poll(MPID_NONBLOCKING_POLL); MPID_nem_ofi_poll(MPID_NONBLOCKING_POLL);
END_FUNC_RC(FCNAME); END_FUNC_RC(FCNAME);
......
...@@ -205,15 +205,14 @@ static int send_noncontig_pkt(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr_p) ...@@ -205,15 +205,14 @@ static int send_noncontig_pkt(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr_p)
MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
int ret; int ret;
char *sendbuf; char *sendbuf;
const size_t sent_sz = sreq->dev.segment_size < PAYLOAD_SIZE ? sreq->dev.segment_size : PAYLOAD_SIZE; const size_t data_sz = sreq->dev.segment_size - sreq->dev.segment_first;
const size_t sent_sz = data_sz < PAYLOAD_SIZE ? data_sz : PAYLOAD_SIZE;
const size_t sendbuf_sz = SENDBUF_SIZE(sent_sz); const size_t sendbuf_sz = SENDBUF_SIZE(sent_sz);
const size_t remaining = sreq->dev.segment_size - sent_sz; const size_t remaining = data_sz - sent_sz;
ptl_match_bits_t match_bits = NPTL_MATCH(CTL_TAG, 0, MPIDI_Process.my_pg_rank); ptl_match_bits_t match_bits = NPTL_MATCH(CTL_TAG, 0, MPIDI_Process.my_pg_rank);
MPIDI_STATE_DECL(MPID_STATE_SEND_NONCONTIG_PKT); MPIDI_STATE_DECL(MPID_STATE_SEND_NONCONTIG_PKT);
MPIDI_FUNC_ENTER(MPID_STATE_SEND_NONCONTIG_PKT); MPIDI_FUNC_ENTER(MPID_STATE_SEND_NONCONTIG_PKT);
MPIU_Assert(sreq->dev.segment_first == 0);
sendbuf = MPIU_Malloc(sendbuf_sz); sendbuf = MPIU_Malloc(sendbuf_sz);
MPIU_Assert(sendbuf != NULL); MPIU_Assert(sendbuf != NULL);
MPIU_Memcpy(sendbuf, hdr_p, sizeof(MPIDI_CH3_Pkt_t)); MPIU_Memcpy(sendbuf, hdr_p, sizeof(MPIDI_CH3_Pkt_t));
...@@ -221,15 +220,16 @@ static int send_noncontig_pkt(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr_p) ...@@ -221,15 +220,16 @@ static int send_noncontig_pkt(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr_p)
REQ_PTL(sreq)->num_gets = 0; REQ_PTL(sreq)->num_gets = 0;
REQ_PTL(sreq)->put_done = 0; REQ_PTL(sreq)->put_done = 0;
if (sreq->dev.segment_size) { if (data_sz) {
MPIDI_msg_sz_t last = sent_sz; MPIDI_msg_sz_t first = sreq->dev.segment_first;
MPID_Segment_pack(sreq->dev.segment_ptr, 0, &last, sendbuf + sizeof(MPIDI_CH3_Pkt_t)); MPIDI_msg_sz_t last = sreq->dev.segment_first + sent_sz;
MPID_Segment_pack(sreq->dev.segment_ptr, first, &last, sendbuf + sizeof(MPIDI_CH3_Pkt_t));
if (remaining) { /* Post MEs for the remote gets */ if (remaining) { /* Post MEs for the remote gets */
TMPBUF(sreq) = MPIU_Malloc(remaining); TMPBUF(sreq) = MPIU_Malloc(remaining);
sreq->dev.segment_first = last; first = last;
last = sreq->dev.segment_size; last = sreq->dev.segment_size;
MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, TMPBUF(sreq)); MPID_Segment_pack(sreq->dev.segment_ptr, first, &last, TMPBUF(sreq));
MPIU_Assert(last == sreq->dev.segment_size); MPIU_Assert(last == sreq->dev.segment_size);
mpi_errno = meappend_large(vc_ptl->id, sreq, NPTL_MATCH(GET_TAG, 0, MPIDI_Process.my_pg_rank), TMPBUF(sreq), remaining); mpi_errno = meappend_large(vc_ptl->id, sreq, NPTL_MATCH(GET_TAG, 0, MPIDI_Process.my_pg_rank), TMPBUF(sreq), remaining);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment