Commit 32e24c1d authored by Kenneth Raffenetti's avatar Kenneth Raffenetti
Browse files

portals4: improvements to VC close



Use the VC private area to track outstanding send operations. This way,
when a VC close packet comes in, we wait until all remaining operations
are complete before closing locally. This allows for a simpler netmod
finalize function where we are sure the network is safe to shutdown.
Signed-off-by: default avatarAntonio J. Pena <apenya@mcs.anl.gov>
parent 05fe55b2
......@@ -19,7 +19,6 @@
static char recvbufs[NUM_RECV_BUFS * PTL_MAX_EAGER];
static ptl_me_t mes[NUM_RECV_BUFS];
static ptl_handle_me_t me_handles[NUM_RECV_BUFS];
static unsigned long long put_cnt = 0; /* required to not finalizing too early */
#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_nm_init
......@@ -76,8 +75,6 @@ int MPID_nem_ptl_nm_finalize(void)
MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_NM_FINALIZE);
while (put_cnt) MPID_nem_ptl_poll(1); /* Wait for puts to finish */
for (i = 0; i < NUM_RECV_BUFS; ++i) {
ret = PtlMEUnlink(me_handles[i]);
MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeunlink", "**ptlmeunlink %s",
......@@ -176,6 +173,7 @@ static inline int send_pkt(MPIDI_VC_t *vc, void *hdr_p, void *data_p, MPIDI_msg_
}
SENDBUF(sreq) = sendbuf;
sreq->ch.vc = vc;
REQ_PTL(sreq)->event_handler = MPID_nem_ptl_nm_ctl_event_handler;
ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)sendbuf, sendbuf_sz, PTL_NO_ACK_REQ,
......@@ -185,7 +183,8 @@ static inline int send_pkt(MPIDI_VC_t *vc, void *hdr_p, void *data_p, MPIDI_msg_
MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "PtlPut(size=%lu id=(%#x,%#x) pt=%#x)",
sendbuf_sz, vc_ptl->id.phys.nid,
vc_ptl->id.phys.pid, vc_ptl->ptc));
++put_cnt;
vc_ptl->num_queued_sends++;
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_SEND_PKT);
......@@ -238,6 +237,7 @@ static int send_noncontig_pkt(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr_p)
}
SENDBUF(sreq) = sendbuf;
sreq->ch.vc = vc;
REQ_PTL(sreq)->event_handler = MPID_nem_ptl_nm_ctl_event_handler;
ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)sendbuf, sendbuf_sz, PTL_NO_ACK_REQ,
......@@ -247,7 +247,8 @@ static int send_noncontig_pkt(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr_p)
MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "PtlPut(size=%lu id=(%#x,%#x) pt=%#x)",
sendbuf_sz, vc_ptl->id.phys.nid,
vc_ptl->id.phys.pid, vc_ptl->ptc));
++put_cnt;
vc_ptl->num_queued_sends++;
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_SEND_NONCONTIG_PKT);
......@@ -340,6 +341,9 @@ int MPID_nem_ptl_iSendContig(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr, MPID
static inline void on_data_avail(MPID_Request * req)
{
int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
MPIDI_VC_t *vc = req->ch.vc;
MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
MPIDI_STATE_DECL(MPID_STATE_ON_DATA_AVAIL);
MPIDI_FUNC_ENTER(MPID_STATE_ON_DATA_AVAIL);
......@@ -351,12 +355,14 @@ static inline void on_data_avail(MPID_Request * req)
}
else {
int complete;
MPIDI_VC_t *vc = req->ch.vc;
reqFn(vc, req, &complete);
MPIU_Assert(complete == TRUE);
}
--put_cnt;
vc_ptl->num_queued_sends--;
if (vc->state == MPIDI_VC_STATE_CLOSED && vc_ptl->num_queued_sends == 0)
MPID_nem_ptl_vc_terminated(vc);
MPIDI_FUNC_EXIT(MPID_STATE_ON_DATA_AVAIL);
}
......
......@@ -64,6 +64,9 @@ static int handler_send(const ptl_event_t *e)
{
int mpi_errno = MPI_SUCCESS;
MPID_Request *const sreq = e->user_ptr;
MPIDI_VC_t *vc = sreq->ch.vc;
MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
int i, ret, incomplete;
MPIDI_STATE_DECL(MPID_STATE_HANDLER_SEND);
......@@ -87,6 +90,10 @@ static int handler_send(const ptl_event_t *e)
MPIU_Free(REQ_PTL(sreq)->get_me_p);
MPIDI_CH3U_Request_complete(sreq);
vc_ptl->num_queued_sends--;
if (vc->state == MPIDI_VC_STATE_CLOSED && vc_ptl->num_queued_sends == 0)
MPID_nem_ptl_vc_terminated(vc);
} else {
MPIDI_CH3U_Request_decrement_cc(sreq, &incomplete);
}
......@@ -127,6 +134,8 @@ static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *
sreq->dev.match.parts.rank = dest;
sreq->dev.match.parts.tag = tag;
sreq->dev.match.parts.context_id = comm->context_id + context_offset;
sreq->ch.vc = vc;
vc_ptl->num_queued_sends++;
if (!vc_ptl->id_initialized) {
mpi_errno = MPID_nem_ptl_init_id(vc);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment