Commit d459c025 authored by Kenneth Raffenetti's avatar Kenneth Raffenetti Committed by Antonio J. Pena
Browse files

portals4: handle PTL_NO_SPACE



It is possible that PtlMEAppend can return a PTL_NO_SPACE error, meaning
there are too many outstanding operations already active. To avoid an abort
we simply retry after processing events that have queued up locally.
Signed-off-by: default avatarAntonio J. Pena <apenya@mcs.anl.gov>
parent b28801ec
...@@ -134,8 +134,15 @@ static inline int meappend_large(ptl_process_t id, MPID_Request *req, ptl_match_ ...@@ -134,8 +134,15 @@ static inline int meappend_large(ptl_process_t id, MPID_Request *req, ptl_match_
++REQ_PTL(req)->num_gets; ++REQ_PTL(req)->num_gets;
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &me, PTL_PRIORITY_LIST, req, /* if there is no space to append the entry, process outstanding events and try again */
&foo_me_handle); while (1) {
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &me, PTL_PRIORITY_LIST, req,
&foo_me_handle);
if (ret != PTL_NO_SPACE)
break;
MPID_nem_ptl_poll(1);
}
MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s",
MPID_nem_ptl_strerror(ret)); MPID_nem_ptl_strerror(ret));
MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "PtlMEAppend(req=%p tag=%#lx)", req, tag)); MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "PtlMEAppend(req=%p tag=%#lx)", req, tag));
...@@ -443,8 +450,14 @@ int MPID_nem_ptl_nm_ctl_event_handler(const ptl_event_t *e) ...@@ -443,8 +450,14 @@ int MPID_nem_ptl_nm_ctl_event_handler(const ptl_event_t *e)
} }
/* Repost the recv buffer */ /* Repost the recv buffer */
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &mes[buf_idx], /* if there is no space to append the entry, process outstanding events and try again */
PTL_PRIORITY_LIST, e->user_ptr /* buf_idx */, &me_handles[buf_idx]); while (1) {
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &mes[buf_idx],
PTL_PRIORITY_LIST, e->user_ptr /* buf_idx */, &me_handles[buf_idx]);
if (ret != PTL_NO_SPACE)
break;
MPID_nem_ptl_poll(1);
}
MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend",
"**ptlmeappend %s", MPID_nem_ptl_strerror(ret)); "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
} }
......
...@@ -105,8 +105,14 @@ static int append_overflow(int i) ...@@ -105,8 +105,14 @@ static int append_overflow(int i)
me.ignore_bits = ~((ptl_match_bits_t)0); me.ignore_bits = ~((ptl_match_bits_t)0);
me.min_free = PTL_MAX_EAGER; me.min_free = PTL_MAX_EAGER;
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_OVERFLOW_LIST, (void *)(size_t)i, /* if there is no space to append the entry, process outstanding events and try again */
&overflow_me_handle[i]); while (1) {
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_OVERFLOW_LIST, (void *)(size_t)i,
&overflow_me_handle[i]);
if (ret != PTL_NO_SPACE)
break;
MPID_nem_ptl_poll(1);
}
MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret)); MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
fn_exit: fn_exit:
......
...@@ -543,7 +543,13 @@ int MPID_nem_ptl_recv_posted(MPIDI_VC_t *vc, MPID_Request *rreq) ...@@ -543,7 +543,13 @@ int MPID_nem_ptl_recv_posted(MPIDI_VC_t *vc, MPID_Request *rreq)
} }
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_PRIORITY_LIST, rreq, &REQ_PTL(rreq)->put_me); /* if there is no space to append the entry, process outstanding events and try again */
while (1) {
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_PRIORITY_LIST, rreq, &REQ_PTL(rreq)->put_me);
if (ret != PTL_NO_SPACE)
break;
MPID_nem_ptl_poll(1);
}
MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret)); MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
DBG_MSG_MEAPPEND("REG", vc ? vc->pg_rank : MPI_ANY_SOURCE, me, rreq); DBG_MSG_MEAPPEND("REG", vc ? vc->pg_rank : MPI_ANY_SOURCE, me, rreq);
MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, " buf=%p", me.start); MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, " buf=%p", me.start);
......
...@@ -41,7 +41,13 @@ static void big_meappend(void *buf, ptl_size_t left_to_send, MPIDI_VC_t *vc, ptl ...@@ -41,7 +41,13 @@ static void big_meappend(void *buf, ptl_size_t left_to_send, MPIDI_VC_t *vc, ptl
else else
me.length = left_to_send; me.length = left_to_send;
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->get_me_p[i]); /* if there is no space to append the entry, process outstanding events and try again */
while (1) {
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->get_me_p[i]);
if (ret != PTL_NO_SPACE)
break;
MPID_nem_ptl_poll(1);
}
DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq); DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
MPIU_Assert(ret == 0); MPIU_Assert(ret == 0);
...@@ -381,8 +387,14 @@ static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void * ...@@ -381,8 +387,14 @@ static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *
MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p"); MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p");
REQ_PTL(sreq)->num_gets = 1; REQ_PTL(sreq)->num_gets = 1;
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, /* if there is no space to append the entry, process outstanding events and try again */
&REQ_PTL(sreq)->get_me_p[0]); while (1) {
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
&REQ_PTL(sreq)->get_me_p[0]);
if (ret != PTL_NO_SPACE)
break;
MPID_nem_ptl_poll(1);
}
MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret)); MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq); DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
......
...@@ -35,7 +35,11 @@ int rptli_post_control_buffer(ptl_handle_ni_t ni_handle, ptl_pt_index_t pt, ...@@ -35,7 +35,11 @@ int rptli_post_control_buffer(ptl_handle_ni_t ni_handle, ptl_pt_index_t pt,
me.ignore_bits = 0; me.ignore_bits = 0;
me.min_free = 0; me.min_free = 0;
ret = PtlMEAppend(ni_handle, pt, &me, PTL_PRIORITY_LIST, NULL, me_handle); while (1) {
ret = PtlMEAppend(ni_handle, pt, &me, PTL_PRIORITY_LIST, NULL, me_handle);
if (ret != PTL_NO_SPACE)
break;
}
RPTLU_ERR_POP(ret, "Error appending empty buffer to priority list\n"); RPTLU_ERR_POP(ret, "Error appending empty buffer to priority list\n");
fn_exit: fn_exit:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment