ptl_nm.c 17.3 KB
Newer Older
1
2
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
Antonio J. Pena's avatar
Antonio J. Pena committed
3
 *  (C) 2014 by Argonne National Laboratory.
4
5
6
7
 *      See COPYRIGHT in top-level directory.
 */

#include "ptl_impl.h"
Antonio J. Pena's avatar
Antonio J. Pena committed
8
#include "stddef.h"  /* C99; for offsetof */
9
#include <mpl_utlist.h>
10
#include "rptl.h"
11

Antonio J. Pena's avatar
Antonio J. Pena committed
12
13
#define NUM_RECV_BUFS 50
#define CTL_TAG 0
14
#define GET_TAG 1
Antonio J. Pena's avatar
Antonio J. Pena committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#define PAYLOAD_SIZE  (PTL_MAX_EAGER - offsetof(buf_t, packet) - sizeof(MPIDI_CH3_Pkt_t))
#define SENDBUF_SIZE(sent_sz_) (offsetof(buf_t, packet) + sizeof(MPIDI_CH3_Pkt_t) + (sent_sz_))
#define SENDBUF(req_) REQ_PTL(req_)->chunk_buffer[0]
#define TMPBUF(req_) REQ_PTL(req_)->chunk_buffer[1]

typedef struct {
    size_t remaining;
    char packet[PTL_MAX_EAGER];
} buf_t;

static buf_t recvbufs[NUM_RECV_BUFS];
static ptl_me_t mes[NUM_RECV_BUFS];
static ptl_handle_me_t me_handles[NUM_RECV_BUFS];
static unsigned long long put_cnt = 0;  /* required to not finalizing too early */
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_nm_init
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_nem_ptl_nm_init(void)
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    int ret;
    ptl_process_t id_any;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_NM_INIT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_NM_INIT);

    /* init recv */
    id_any.phys.pid = PTL_PID_ANY;
    id_any.phys.nid = PTL_NID_ANY;
    
    for (i = 0; i < NUM_RECV_BUFS; ++i) {
Antonio J. Pena's avatar
Antonio J. Pena committed
49
50
51
52
53
54
55
        mes[i].start = &recvbufs[i];
        mes[i].length = sizeof(buf_t);
        mes[i].ct_handle = PTL_CT_NONE;
        mes[i].uid = PTL_UID_ANY;
        mes[i].options = (PTL_ME_OP_PUT | PTL_ME_USE_ONCE | PTL_ME_EVENT_UNLINK_DISABLE |
                         PTL_ME_EVENT_LINK_DISABLE | PTL_ME_IS_ACCESSIBLE);
        mes[i].match_id = id_any;
56
57
        mes[i].match_bits = NPTL_MATCH(CTL_TAG, 0, MPI_ANY_SOURCE);
        mes[i].ignore_bits = NPTL_MATCH_IGNORE;
Antonio J. Pena's avatar
Antonio J. Pena committed
58

59
60
        ret = MPID_nem_ptl_me_append(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &mes[i],
                                     PTL_PRIORITY_LIST, (void *)(uint64_t)i, &me_handles[i]);
Antonio J. Pena's avatar
Antonio J. Pena committed
61
62
        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s",
                             MPID_nem_ptl_strerror(ret));
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
    }

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_NM_INIT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_nm_finalize
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_nem_ptl_nm_finalize(void)
{
    int mpi_errno = MPI_SUCCESS;
    int ret;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_NM_FINALIZE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_NM_FINALIZE);

Antonio J. Pena's avatar
Antonio J. Pena committed
85
86
    while (put_cnt) MPID_nem_ptl_poll(1);  /* Wait for puts to finish */

87
    for (i = 0; i < NUM_RECV_BUFS; ++i) {
Antonio J. Pena's avatar
Antonio J. Pena committed
88
89
90
        ret = PtlMEUnlink(me_handles[i]);
        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeunlink", "**ptlmeunlink %s",
                             MPID_nem_ptl_strerror(ret));
91
92
93
94
95
96
97
98
99
100
    }

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_NM_FINALIZE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
Antonio J. Pena's avatar
Antonio J. Pena committed
101
#define FUNCNAME meappend_large
102
103
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
Antonio J. Pena's avatar
Antonio J. Pena committed
104
static inline int meappend_large(ptl_process_t id, MPID_Request *req, ptl_match_bits_t tag, void *buf, size_t remaining)
105
{
Antonio J. Pena's avatar
Antonio J. Pena committed
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
    int mpi_errno = MPI_SUCCESS;
    int ret;
    ptl_me_t me;
    MPIDI_STATE_DECL(MPID_STATE_MEAPPEND_LARGE);

    MPIDI_FUNC_ENTER(MPID_STATE_MEAPPEND_LARGE);

    me.start = buf;
    me.length = remaining < MPIDI_nem_ptl_ni_limits.max_msg_size ?
                    remaining : MPIDI_nem_ptl_ni_limits.max_msg_size;
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = ( PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE |
                   PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE );
    me.match_id = id;
121
    me.match_bits = tag;
122
    me.ignore_bits = NPTL_MATCH_IGNORE;
Antonio J. Pena's avatar
Antonio J. Pena committed
123
124
125
126
127
    me.min_free = 0;

    while (remaining) {
        ptl_handle_me_t foo_me_handle;

128
        ++REQ_PTL(req)->num_gets;
Antonio J. Pena's avatar
Antonio J. Pena committed
129

130
131
        ret = MPID_nem_ptl_me_append(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &me, PTL_PRIORITY_LIST, req,
                                     &foo_me_handle);
Antonio J. Pena's avatar
Antonio J. Pena committed
132
133
        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s",
                             MPID_nem_ptl_strerror(ret));
134
        MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "PtlMEAppend(req=%p tag=%#lx)", req, tag));
Antonio J. Pena's avatar
Antonio J. Pena committed
135
136
137
138
139

        me.start = (char *)me.start + me.length;
        remaining -= me.length;
        if (remaining < MPIDI_nem_ptl_ni_limits.max_msg_size)
            me.length = remaining;
140
    }
Pavan Balaji's avatar
Pavan Balaji committed
141

Antonio J. Pena's avatar
Antonio J. Pena committed
142
143
144
145
146
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MEAPPEND_LARGE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
147
148
149
150
151
152
}

#undef FUNCNAME
#define FUNCNAME send_pkt
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
Antonio J. Pena's avatar
Antonio J. Pena committed
153
154
static inline int send_pkt(MPIDI_VC_t *vc, void *hdr_p, void *data_p, MPIDI_msg_sz_t data_sz,
                           MPID_Request *sreq)
155
156
157
158
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
    int ret;
Antonio J. Pena's avatar
Antonio J. Pena committed
159
160
161
    buf_t *sendbuf;
    const size_t sent_sz = data_sz < PAYLOAD_SIZE ? data_sz : PAYLOAD_SIZE;
    const size_t sendbuf_sz = SENDBUF_SIZE(sent_sz);
162
    ptl_match_bits_t match_bits = NPTL_MATCH(CTL_TAG, 0, MPIDI_Process.my_pg_rank);
163
164
165
166
    MPIDI_STATE_DECL(MPID_STATE_SEND_PKT);

    MPIDI_FUNC_ENTER(MPID_STATE_SEND_PKT);
    
Antonio J. Pena's avatar
Antonio J. Pena committed
167
168
169
170
171
    sendbuf = MPIU_Malloc(sendbuf_sz);
    MPIU_Assert(sendbuf != NULL);
    MPIU_Memcpy(sendbuf->packet, hdr_p, sizeof(MPIDI_CH3_Pkt_t));
    sendbuf->remaining = data_sz - sent_sz;
    TMPBUF(sreq) = NULL;
172
    REQ_PTL(sreq)->num_gets = 0;
173
    REQ_PTL(sreq)->put_done = 0;
174

Antonio J. Pena's avatar
Antonio J. Pena committed
175
176
177
    if (data_sz) {
        MPIU_Memcpy(sendbuf->packet + sizeof(MPIDI_CH3_Pkt_t), data_p, sent_sz);
        if (sendbuf->remaining)  /* Post MEs for the remote gets */
178
179
            mpi_errno = meappend_large(vc_ptl->id, sreq, NPTL_MATCH(GET_TAG, 0, MPIDI_Process.my_pg_rank),
                                       (char *)data_p + sent_sz, sendbuf->remaining);
Antonio J. Pena's avatar
Antonio J. Pena committed
180
181
            if (mpi_errno)
                goto fn_fail;
182
183
    }

Antonio J. Pena's avatar
Antonio J. Pena committed
184
185
186
187
    SENDBUF(sreq) = sendbuf;
    REQ_PTL(sreq)->event_handler = MPID_nem_ptl_nm_ctl_event_handler;

    ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)sendbuf, sendbuf_sz, PTL_NO_ACK_REQ,
188
                                vc_ptl->id, vc_ptl->ptc, match_bits, 0, sreq, 0);
Antonio J. Pena's avatar
Antonio J. Pena committed
189
190
191
192
193
    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s",
                         MPID_nem_ptl_strerror(ret));
    MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "PtlPut(size=%lu id=(%#x,%#x) pt=%#x)",
                                            sendbuf_sz, vc_ptl->id.phys.nid,
                                            vc_ptl->id.phys.pid, vc_ptl->ptc));
194
195
    ++put_cnt;

196
197
198
199
200
201
202
203
204
205
206
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_SEND_PKT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME send_noncontig_pkt
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
Antonio J. Pena's avatar
Antonio J. Pena committed
207
static int send_noncontig_pkt(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr_p)
208
209
210
211
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
    int ret;
Antonio J. Pena's avatar
Antonio J. Pena committed
212
213
214
    buf_t *sendbuf;
    const size_t sent_sz = sreq->dev.segment_size < PAYLOAD_SIZE ? sreq->dev.segment_size : PAYLOAD_SIZE;
    size_t sendbuf_sz = SENDBUF_SIZE(sent_sz);
215
    ptl_match_bits_t match_bits = NPTL_MATCH(CTL_TAG, 0, MPIDI_Process.my_pg_rank);
216
217
218
    MPIDI_STATE_DECL(MPID_STATE_SEND_NONCONTIG_PKT);
    MPIDI_FUNC_ENTER(MPID_STATE_SEND_NONCONTIG_PKT);

Antonio J. Pena's avatar
Antonio J. Pena committed
219
    MPIU_Assert(sreq->dev.segment_first == 0);
220

Antonio J. Pena's avatar
Antonio J. Pena committed
221
222
223
224
225
    sendbuf = MPIU_Malloc(sendbuf_sz);
    MPIU_Assert(sendbuf != NULL);
    MPIU_Memcpy(sendbuf->packet, hdr_p, sizeof(MPIDI_CH3_Pkt_t));
    sendbuf->remaining = sreq->dev.segment_size - sent_sz;
    TMPBUF(sreq) = NULL;
226
    REQ_PTL(sreq)->num_gets = 0;
227
    REQ_PTL(sreq)->put_done = 0;
228

Antonio J. Pena's avatar
Antonio J. Pena committed
229
230
231
    if (sreq->dev.segment_size) {
        MPIDI_msg_sz_t last = sent_sz;
        MPID_Segment_pack(sreq->dev.segment_ptr, 0, &last, sendbuf->packet + sizeof(MPIDI_CH3_Pkt_t));
232

Antonio J. Pena's avatar
Antonio J. Pena committed
233
234
235
        if (sendbuf->remaining) {  /* Post MEs for the remote gets */
            TMPBUF(sreq) = MPIU_Malloc(sendbuf->remaining);
            sreq->dev.segment_first = last;
236
            last = sreq->dev.segment_size;
Antonio J. Pena's avatar
Antonio J. Pena committed
237
238
            MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, TMPBUF(sreq));
            MPIU_Assert(last == sreq->dev.segment_size);
239

240
            mpi_errno = meappend_large(vc_ptl->id, sreq, NPTL_MATCH(GET_TAG, 0, MPIDI_Process.my_pg_rank), TMPBUF(sreq), sendbuf->remaining);
Antonio J. Pena's avatar
Antonio J. Pena committed
241
242
            if (mpi_errno)
                goto fn_fail;
243
244
245
        }
    }

Antonio J. Pena's avatar
Antonio J. Pena committed
246
247
    SENDBUF(sreq) = sendbuf;
    REQ_PTL(sreq)->event_handler = MPID_nem_ptl_nm_ctl_event_handler;
248

Antonio J. Pena's avatar
Antonio J. Pena committed
249
    ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)sendbuf, sendbuf_sz, PTL_NO_ACK_REQ,
250
                                vc_ptl->id, vc_ptl->ptc, match_bits, 0, sreq, 0);
Antonio J. Pena's avatar
Antonio J. Pena committed
251
252
253
254
255
    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s",
                         MPID_nem_ptl_strerror(ret));
    MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "PtlPut(size=%lu id=(%#x,%#x) pt=%#x)",
                                            sendbuf_sz, vc_ptl->id.phys.nid,
                                            vc_ptl->id.phys.pid, vc_ptl->ptc));
256
    ++put_cnt;
257
258

 fn_exit:
Antonio J. Pena's avatar
Antonio J. Pena committed
259
    MPIDI_FUNC_EXIT(MPID_STATE_SEND_NONCONTIG_PKT);
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_SendNoncontig
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_nem_ptl_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr, MPIDI_msg_sz_t hdr_sz)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_SENDNONCONTIG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_SENDNONCONTIG);
    
    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
Antonio J. Pena's avatar
Antonio J. Pena committed
278
    mpi_errno = send_noncontig_pkt(vc, sreq, hdr);
279
280
281
282
283
284
285
286
287
288
289
290
291
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_SENDNONCONTIG);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_iStartContigMsg
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
Antonio J. Pena's avatar
Antonio J. Pena committed
292
293
int MPID_nem_ptl_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data,
                                 MPIDI_msg_sz_t data_sz, MPID_Request **sreq_ptr)
294
295
296
297
298
299
300
301
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISTARTCONTIGMSG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ISTARTCONTIGMSG);
    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));

    /* create a request */
Antonio J. Pena's avatar
Antonio J. Pena committed
302
303
304
305
306
307
308
309
    *sreq_ptr = MPID_Request_create();
    MPIU_Assert(*sreq_ptr != NULL);
    MPIU_Object_set_ref(*sreq_ptr, 2);
    (*sreq_ptr)->kind = MPID_REQUEST_SEND;
    (*sreq_ptr)->dev.OnDataAvail = NULL;
    (*sreq_ptr)->dev.user_buf = NULL;

    mpi_errno = send_pkt(vc, hdr, data, data_sz, *sreq_ptr);
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ISTARTCONTIGMSG);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_iSendContig
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_nem_ptl_iSendContig(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr, MPIDI_msg_sz_t hdr_sz,
                               void *data, MPIDI_msg_sz_t data_sz)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISENDCONTIG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ISENDCONTIG);
    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
    
Antonio J. Pena's avatar
Antonio J. Pena committed
332
    mpi_errno = send_pkt(vc, hdr, data, data_sz, sreq);
333
334
335
336
337
338
339
340
341
342
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ISENDCONTIG);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
Antonio J. Pena's avatar
Antonio J. Pena committed
343
#define FUNCNAME on_data_avail
344
345
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
Antonio J. Pena's avatar
Antonio J. Pena committed
346
static inline void on_data_avail(MPID_Request * req)
347
{
348
    int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
Antonio J. Pena's avatar
Antonio J. Pena committed
349
    MPIDI_STATE_DECL(MPID_STATE_ON_DATA_AVAIL);
350

Antonio J. Pena's avatar
Antonio J. Pena committed
351
    MPIDI_FUNC_ENTER(MPID_STATE_ON_DATA_AVAIL);
352

Antonio J. Pena's avatar
Antonio J. Pena committed
353
354
355
    reqFn = req->dev.OnDataAvail;
    if (!reqFn) {
        MPIDI_CH3U_Request_complete(req);
356
357
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
    }
Antonio J. Pena's avatar
Antonio J. Pena committed
358
359
360
361
362
    else {
        int complete;
        MPIDI_VC_t *vc = req->ch.vc;
        reqFn(vc, req, &complete);
        MPIU_Assert(complete == TRUE);
363
    }
364
365
366

    --put_cnt;

Antonio J. Pena's avatar
Antonio J. Pena committed
367
    MPIDI_FUNC_EXIT(MPID_STATE_ON_DATA_AVAIL);
368
369
370
}

#undef FUNCNAME
Antonio J. Pena's avatar
Antonio J. Pena committed
371
#define FUNCNAME MPID_nem_ptl_nm_ctl_event_handler
372
373
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
Antonio J. Pena's avatar
Antonio J. Pena committed
374
int MPID_nem_ptl_nm_ctl_event_handler(const ptl_event_t *e)
375
376
{
    int mpi_errno = MPI_SUCCESS;
Antonio J. Pena's avatar
Antonio J. Pena committed
377
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_NM_CTL_EVENT_HANDLER);
378

Antonio J. Pena's avatar
Antonio J. Pena committed
379
380
381
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_NM_CTL_EVENT_HANDLER);

    switch(e->type) {
382
383

    case PTL_EVENT_PUT:
384
        {
Antonio J. Pena's avatar
Antonio J. Pena committed
385
386
387
388
389
390
391
392
            int ret;
            const uint64_t buf_idx = (uint64_t) e->user_ptr;
            const size_t packet_sz = e->mlength - offsetof(buf_t, packet);
            MPIDI_VC_t *vc;
            MPID_nem_ptl_vc_area * vc_ptl;

            MPIU_Assert(e->start == &recvbufs[buf_idx]);

393
            MPIDI_PG_Get_vc(MPIDI_Process.my_pg, NPTL_MATCH_GET_RANK(e->match_bits), &vc);
Antonio J. Pena's avatar
Antonio J. Pena committed
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
            vc_ptl = VC_PTL(vc);

            if (recvbufs[buf_idx].remaining == 0) {
                mpi_errno = MPID_nem_handle_pkt(vc, recvbufs[buf_idx].packet, packet_sz);
                if (mpi_errno)
                    MPIU_ERR_POP(mpi_errno);
            }
            else {
                int incomplete;
                size_t size;
                char *buf_ptr;

                MPID_Request *req = MPID_Request_create();
                MPIU_Assert(req != NULL);
                MPIDI_CH3U_Request_decrement_cc(req, &incomplete);  /* We'll increment it below */
                REQ_PTL(req)->event_handler = MPID_nem_ptl_nm_ctl_event_handler;
                REQ_PTL(req)->bytes_put = packet_sz + recvbufs[buf_idx].remaining;
                TMPBUF(req) = MPIU_Malloc(REQ_PTL(req)->bytes_put);
                MPIU_Assert(TMPBUF(req) != NULL);
                MPIU_Memcpy(TMPBUF(req), recvbufs[buf_idx].packet, packet_sz);

                req->ch.vc = vc;

                size = recvbufs[buf_idx].remaining < MPIDI_nem_ptl_ni_limits.max_msg_size ?
                           recvbufs[buf_idx].remaining : MPIDI_nem_ptl_ni_limits.max_msg_size;
                buf_ptr = (char *)TMPBUF(req) + packet_sz;
                while (recvbufs[buf_idx].remaining) {
                    MPIDI_CH3U_Request_increment_cc(req, &incomplete);  /* Will be decremented - and eventually freed in REPLY */
                    ret = MPID_nem_ptl_rptl_get(MPIDI_nem_ptl_global_md, (ptl_size_t)buf_ptr,
423
                                                size, vc_ptl->id, vc_ptl->ptc, NPTL_MATCH(GET_TAG, 0, MPIDI_Process.my_pg_rank), 0, req);
Antonio J. Pena's avatar
Antonio J. Pena committed
424
425
426
                    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlget", "**ptlget %s",
                                         MPID_nem_ptl_strerror(ret));
                    MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST,
427
                                                            "PtlGet(size=%lu id=(%#x,%#x) pt=%#x tag=%d)", size,
Antonio J. Pena's avatar
Antonio J. Pena committed
428
                                                            vc_ptl->id.phys.nid,
429
                                                            vc_ptl->id.phys.pid, vc_ptl->ptc, GET_TAG));
Antonio J. Pena's avatar
Antonio J. Pena committed
430
431
432
433
434
435
436
437
                    buf_ptr += size;
                    recvbufs[buf_idx].remaining -= size;
                    if (recvbufs[buf_idx].remaining < MPIDI_nem_ptl_ni_limits.max_msg_size)
                        size = recvbufs[buf_idx].remaining;
                }
            }

            /* Repost the recv buffer */
438
439
            ret = MPID_nem_ptl_me_append(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &mes[buf_idx],
                                         PTL_PRIORITY_LIST, e->user_ptr /* buf_idx */, &me_handles[buf_idx]);
Antonio J. Pena's avatar
Antonio J. Pena committed
440
441
442
            MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend",
                                 "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
        }
443
        break;
Antonio J. Pena's avatar
Antonio J. Pena committed
444

445
446
447
448
449
450
    case PTL_EVENT_GET:
        {
            MPID_Request *const req = e->user_ptr;

            if (--REQ_PTL(req)->num_gets == 0) {
                MPIU_Free(TMPBUF(req));
451
                if (REQ_PTL(req)->put_done)
452
453
454
455
456
457
458
459
460
461
                    on_data_avail(req);  /* Otherwise we'll do it on the SEND */
            }
        }
        break;

    case PTL_EVENT_SEND:
        {
            MPID_Request *const req = e->user_ptr;

            MPIU_Free(SENDBUF(req));
462
            REQ_PTL(req)->put_done = 1;
463
464
465
466
467
            if (REQ_PTL(req)->num_gets == 0)  /* Otherwise GET will do it */
                on_data_avail(req);
        }
        break;

Antonio J. Pena's avatar
Antonio J. Pena committed
468
469
470
    case PTL_EVENT_REPLY:
        {
            int incomplete;
471
            MPID_Request *const req = e->user_ptr;
Antonio J. Pena's avatar
Antonio J. Pena committed
472

473
            MPIDI_CH3U_Request_decrement_cc(req, &incomplete);
Antonio J. Pena's avatar
Antonio J. Pena committed
474
            if (!incomplete) {
475
                mpi_errno = MPID_nem_handle_pkt(req->ch.vc, TMPBUF(req), REQ_PTL(req)->bytes_put);
Antonio J. Pena's avatar
Antonio J. Pena committed
476
477
478
479
                if (mpi_errno)
                    MPIU_ERR_POP(mpi_errno);

                /* Free resources */
480
481
                MPIU_Free(TMPBUF(req));
                MPID_Request_release(req);
Antonio J. Pena's avatar
Antonio J. Pena committed
482
483
            }
        }
484
        break;
Antonio J. Pena's avatar
Antonio J. Pena committed
485

486
487
488
489
490
491
492
    default:
        MPIU_Error_printf("Received unexpected event type: %d %s", e->type, MPID_nem_ptl_strevent(e));
        MPIU_ERR_INTERNALANDJUMP(mpi_errno, "Unexpected event type");
        break;
    }

 fn_exit:
Antonio J. Pena's avatar
Antonio J. Pena committed
493
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_NM_CTL_EVENT_HANDLER);
494
495
496
497
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}