ptl_send.c 21.5 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2
3
4
5
6
/*
 *  (C) 2012 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

7
#include "ptl_impl.h"
8
#include "rptl.h"
9

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#undef FUNCNAME
#define FUNCNAME big_meappend
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static void big_meappend(void *buf, ptl_size_t left_to_send, MPIDI_VC_t *vc, ptl_match_bits_t match_bits, MPID_Request *sreq)
{
    int i, ret;
    MPID_nem_ptl_vc_area *vc_ptl;
    ptl_me_t me;

    vc_ptl = VC_PTL(vc);

    me.start = buf;
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE |
                   PTL_ME_EVENT_UNLINK_DISABLE );
    me.match_id = vc_ptl->id;
    me.match_bits = match_bits;
    me.ignore_bits = 0;
    me.min_free = 0;

    /* allocate enough handles to cover all get operations */
    REQ_PTL(sreq)->get_me_p = MPIU_Malloc(sizeof(ptl_handle_me_t) *
                                        ((left_to_send / MPIDI_nem_ptl_ni_limits.max_msg_size) + 1));

    /* queue up as many entries as necessary to describe the entire message */
    for (i = 0; left_to_send > 0; i++) {
        /* send up to the maximum allowed by the portals interface */
        if (left_to_send > MPIDI_nem_ptl_ni_limits.max_msg_size)
            me.length = MPIDI_nem_ptl_ni_limits.max_msg_size;
41
        else
42
43
            me.length = left_to_send;

44
        ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->get_me_p[i]);
45
46
47
48
49
50
        DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
        MPIU_Assert(ret == 0);

        /* account for what has been sent */
        me.start = (char *)me.start + me.length;
        left_to_send -= me.length;
51
        REQ_PTL(sreq)->num_gets++;
52
53
54
    }
}

55
56
57
58
59
60
61
62
63
#undef FUNCNAME
#define FUNCNAME handler_send_complete
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int handler_send_complete(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const sreq = e->user_ptr;
    int ret;
64
    int i;
65
66
67
68
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_SEND_COMPLETE);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_SEND_COMPLETE);

69
    MPIU_Assert(e->type == PTL_EVENT_SEND || e->type == PTL_EVENT_GET);
70

71
72
    if (REQ_PTL(sreq)->md != PTL_INVALID_HANDLE) {
        ret = PtlMDRelease(REQ_PTL(sreq)->md);
73
        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdrelease", "**ptlmdrelease %s", MPID_nem_ptl_strerror(ret));
74
75
76
    }

    for (i = 0; i < MPID_NEM_PTL_NUM_CHUNK_BUFFERS; ++i)
77
78
        if (REQ_PTL(sreq)->chunk_buffer[i])
            MPIU_Free(REQ_PTL(sreq)->chunk_buffer[i]);
79
80
81

    if (REQ_PTL(sreq)->get_me_p)
        MPIU_Free(REQ_PTL(sreq)->get_me_p);
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
    
    MPIDI_CH3U_Request_complete(sreq);

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_SEND_COMPLETE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME handler_large
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int handler_large(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const sreq = e->user_ptr;
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_LARGE);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_LARGE);

104
    MPIU_Assert(e->type == PTL_EVENT_SEND || e->type == PTL_EVENT_GET);
105

106
    if (e->type == PTL_EVENT_SEND) {
107
        REQ_PTL(sreq)->put_acked = 1;
108
109
    } else if (e->type == PTL_EVENT_GET) {
        /* decrement the remaining get operations */
110
        REQ_PTL(sreq)->num_gets--;
111
112
    }

113
114
115
    if (REQ_PTL(sreq)->num_gets == 0 && REQ_PTL(sreq)->put_acked)
        mpi_errno = handler_send_complete(e);

116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_LARGE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#if 0

#undef FUNCNAME
#define FUNCNAME handler_pack_chunk
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int handler_pack_chunk(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const sreq = e->user_ptr;
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_PACK_CHUNK);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_PACK_CHUNK);

137
138
139
140
141
142
143
144
    MPIU_Assert(e->type == PTL_EVENT_GET || e->type == PTL_EVENT_PUT);

    if (e->type == PTL_EVENT_PUT) {
        mpi_errno = handler_send_complete(e);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        goto fn_exit;
    }

145
    /* pack next chunk */
146
    MPI_nem_ptl_pack_byte(sreq->dev.segment_ptr, sreq->dev.segment_first, sreq->dev.segment_first + PTL_LARGE_THRESHOLD,
147
148
149
150
              REQ_PTL(sreq_)->chunk_buffer[1], &REQ_PTL(sreq)->overflow[1]);
    sreq->dev.segment_first += PTL_LARGE_THRESHOLD;

    /* notify receiver */
151
    ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, 0, 0, PTL_ACK_REQ, vc_ptl->id,
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
                 vc_ptl->pt, ?????, 0, sreq,
                 NPTL_HEADER(?????, MPIDI_Process.my_pg_rank, me.match_bits));


 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_PACK_CHUNK);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME handler_multi_put
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int handler_multi_put(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const sreq = e->user_ptr;
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_MULTI_PUT);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_MULTI_PUT);

    
    

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_MULTI_PUT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


#undef FUNCNAME
#define FUNCNAME handler_large_multi
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int handler_large_multi(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const sreq = e->user_ptr;
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_LARGE_MULTI);

195
196
    MPIU_Assert(e->type == PTL_EVENT_ACK);

197
198
199
200
201
202
    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_LARGE_MULTI);
    if (e->mlength < PTL_LARGE_THRESHOLD) {
        /* truncated message */
        mpi_errno = handler_send_complete(e);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    } else {
203
        REQ_PTL(sreq)->event_handler = handler_pack_chunk;
204
205
206
207
    }
    
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_LARGE_MULTI);
208
209
210
211
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
212

213
#endif
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231


/* Send message for either isend or issend */
#undef FUNCNAME
#define FUNCNAME send_msg
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *buf, int count, MPI_Datatype datatype, int dest,
                    int tag, MPID_Comm *comm, int context_offset, struct MPID_Request **request)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
    int ret;
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype *dt_ptr;
    MPID_Request *sreq = NULL;
232
233
234
235
    ptl_me_t me;
    int initial_iov_count, remaining_iov_count;
    ptl_md_t md;
    MPI_Aint last;
236
237
238
239
240
    MPIU_CHKPMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_SEND_MSG);

    MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG);

241
    MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm);
242
    sreq->dev.match.parts.rank = dest;
243
244
    sreq->dev.match.parts.tag = tag;
    sreq->dev.match.parts.context_id = comm->context_id + context_offset;
245
246

    if (!vc_ptl->id_initialized) {
247
        mpi_errno = MPID_nem_ptl_init_id(vc);
248
249
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }
250

251
    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
252
    MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "count=%d datatype=%#x contig=%d data_sz=%lu", count, datatype, dt_contig, data_sz));
253

254
    if (data_sz <= PTL_LARGE_THRESHOLD) {
255
256
        /* Small message.  Send all data eagerly */
        if (dt_contig) {
257
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small contig message");
258
            REQ_PTL(sreq)->event_handler = handler_send_complete;
259
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "&REQ_PTL(sreq)->event_handler = %p", &(REQ_PTL(sreq)->event_handler));
260
            ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)((char *)buf + dt_true_lb), data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
261
                         NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
262
                                        NPTL_HEADER(ssend_flag, data_sz));
263
            MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
264
            DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
265
266
267
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.nid = %#x", vc_ptl->id.phys.nid);
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.pid = %#x", vc_ptl->id.phys.pid);
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "sreq = %p", sreq);
268
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "vc_ptl->pt = %d", vc_ptl->pt);
269
270
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "REQ_PTL(sreq)->event_handler = %p", REQ_PTL(sreq)->event_handler);
           goto fn_exit;
271
272
273
        }
        
        /* noncontig data */
274
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small noncontig message");
275
276
277
        sreq->dev.segment_ptr = MPID_Segment_alloc();
        MPIU_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
        MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
278
        sreq->dev.segment_first = 0;
279
        sreq->dev.segment_size = data_sz;
280

281
282
283
        last = sreq->dev.segment_size;
        sreq->dev.iov_count = MPID_IOV_LIMIT;
        MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);
284

285
286
        if (last == sreq->dev.segment_size) {
            /* IOV is able to describe entire message */
287
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    entire message fits in IOV");
288
289
290
            md.start = sreq->dev.iov;
            md.length = sreq->dev.iov_count;
            md.options = PTL_IOVEC;
291
            md.eq_handle = MPIDI_nem_ptl_origin_eq;
292
293
            md.ct_handle = PTL_CT_NONE;
            ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
294
            MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));
295
                
296
            REQ_PTL(sreq)->event_handler = handler_send_complete;
297
            ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
298
                         NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
299
                                        NPTL_HEADER(ssend_flag, data_sz));
300
            MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
301
            DBG_MSG_PUT("sreq", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
302
303
304
305
            goto fn_exit;
        }
        
        /* IOV is not long enough to describe entire message */
306
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    IOV too long: using bounce buffer");
307
        MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer");
308
309
        MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
        sreq->dev.segment_first = 0;
310
311
312
        last = data_sz;
        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, REQ_PTL(sreq)->chunk_buffer[0]);
        MPIU_Assert(last == sreq->dev.segment_size);
313
        REQ_PTL(sreq)->event_handler = handler_send_complete;
314
        ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], data_sz, PTL_NO_ACK_REQ,
315
                     vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
316
                                    NPTL_HEADER(ssend_flag, data_sz));
317
        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
318
        DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
319
320
321
322
323
324
        goto fn_exit;
    }
        
    /* Large message.  Send first chunk of data and let receiver get the rest */
    if (dt_contig) {
        /* create ME for buffer so receiver can issue a GET for the data */
325
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large contig message");
326
327
        big_meappend((char *)buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc,
                     NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), sreq);
328
        REQ_PTL(sreq)->large = TRUE;
329

330
        REQ_PTL(sreq)->event_handler = handler_large;
331
        ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)((char *)buf + dt_true_lb), PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
332
                     NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
333
                                    NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
334
        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
335
        DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
336
337
338
339
        goto fn_exit;
    }
    
    /* Large noncontig data */
340
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large noncontig message");
341
342
343
    sreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIU_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
    MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
344
    sreq->dev.segment_first = 0;
345
346
347
348
349
350
351
352
353
354
355
    sreq->dev.segment_size = data_sz;

    last = PTL_LARGE_THRESHOLD;
    sreq->dev.iov_count = MPID_IOV_LIMIT;
    MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);

    initial_iov_count = sreq->dev.iov_count;
    sreq->dev.segment_first = last;

    if (last == PTL_LARGE_THRESHOLD) {
        /* first chunk of message fits into IOV */
356
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    first chunk fits in IOV");
357
358
359
        if (initial_iov_count < MPID_IOV_LIMIT) {
            /* There may be space for the rest of the message in this IOV */
            sreq->dev.iov_count = MPID_IOV_LIMIT - sreq->dev.iov_count;
360
            last = sreq->dev.segment_size;
361
362
363
                    
            MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last,
                                     &sreq->dev.iov[initial_iov_count], &sreq->dev.iov_count);
364
            remaining_iov_count = sreq->dev.iov_count;
365

366
            if (last == sreq->dev.segment_size && last <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) {
367
                /* Entire message fit in one IOV */
368
                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    rest of message fits in one IOV");
369
370
                /* Create ME for remaining data */
                me.start = &sreq->dev.iov[initial_iov_count];
371
                me.length = remaining_iov_count;
372
373
                me.ct_handle = PTL_CT_NONE;
                me.uid = PTL_UID_ANY;
374
                me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE |
375
376
                               PTL_ME_EVENT_UNLINK_DISABLE | PTL_IOVEC );
                me.match_id = vc_ptl->id;
377
                me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank);
378
379
                me.ignore_bits = 0;
                me.min_free = 0;
380
381

                MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p");
382
383

                REQ_PTL(sreq)->num_gets = 1;
384
                ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
385
                                  &REQ_PTL(sreq)->get_me_p[0]);
386
                MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
387
                DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
388
389

                /* Create MD for first chunk */
390
                md.start = sreq->dev.iov;
391
                md.length = initial_iov_count;
392
                md.options = PTL_IOVEC;
393
                md.eq_handle = MPIDI_nem_ptl_origin_eq;
394
395
                md.ct_handle = PTL_CT_NONE;
                ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
396
                MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));
397
398

                REQ_PTL(sreq)->large = TRUE;
399

400
                REQ_PTL(sreq)->event_handler = handler_large;
401
                ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
402
                             NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
403
                                            NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
404
                MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
405
                DBG_MSG_PUT("req", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
406
                goto fn_exit;
407
408
            }
        }
409
410
411
        /* First chunk of message fits, but the rest doesn't */
        /* Don't handle this case separately */
    }
412

413
414
    /* allocate a temporary buffer and copy all the data to send */
    MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "tmpbuf");
415

416
417
418
    last = data_sz;
    MPID_Segment_pack(sreq->dev.segment_ptr, 0, &last, REQ_PTL(sreq)->chunk_buffer[0]);
    MPIU_Assert(last == data_sz);
419

420
421
    big_meappend((char *)REQ_PTL(sreq)->chunk_buffer[0] + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc,
                 NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), sreq);
422
    REQ_PTL(sreq)->large = TRUE;
423

424
    REQ_PTL(sreq)->event_handler = handler_large;
425
    ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], PTL_LARGE_THRESHOLD,
426
                                PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank),
427
                                0, sreq, NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
428
    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
429
    DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
430
    
431
432
433
434
435
436
437
 fn_exit:
    *request = sreq;
    MPIU_CHKPMEM_COMMIT();
    MPIDI_FUNC_EXIT(MPID_STATE_SEND_MSG);
    return mpi_errno;
 fn_fail:
    if (sreq) {
438
        MPID_Request_release(sreq);
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
        sreq = NULL;
    }
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_isend
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_nem_ptl_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
                       MPID_Comm *comm, int context_offset, struct MPID_Request **request)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ISEND);

    mpi_errno = send_msg(0, vc, buf, count, datatype, dest, tag, comm, context_offset, request);

    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ISEND);
    return mpi_errno;
}


#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_issend
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_nem_ptl_issend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
                        MPID_Comm *comm, int context_offset, struct MPID_Request **request)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISSEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ISSEND);

    mpi_errno = send_msg(NPTL_SSEND, vc, buf, count, datatype, dest, tag, comm, context_offset, request);

    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ISSEND);
    return mpi_errno;
}
481
482
483
484
485
486
487
488

#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_cancel_send
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_nem_ptl_cancel_send(struct MPIDI_VC *vc,  struct MPID_Request *sreq)
{
    int mpi_errno = MPI_SUCCESS;
489
490
491
492
    MPID_PKT_DECL_CAST(upkt, MPIDI_nem_ptl_pkt_cancel_send_req_t, csr_pkt);
    MPID_Request *csr_sreq;
    int was_incomplete;

493
494
495
496
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);

497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
    /* The completion counter and reference count are incremented to keep
       the request around long enough to receive a
       response regardless of what the user does (free the request before
       waiting, etc.). */
    MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete);
    if (!was_incomplete) {
        /* The reference count is incremented only if the request was
           complete before the increment. */
        MPIR_Request_add_ref(sreq);
    }

    csr_pkt->type = MPIDI_NEM_PKT_NETMOD;
    csr_pkt->subtype = MPIDI_NEM_PTL_PKT_CANCEL_SEND_REQ;
    csr_pkt->match.parts.rank = sreq->dev.match.parts.rank;
    csr_pkt->match.parts.tag = sreq->dev.match.parts.tag;
    csr_pkt->match.parts.context_id = sreq->dev.match.parts.context_id;
    csr_pkt->sender_req_id = sreq->handle;

    MPID_nem_ptl_iStartContigMsg(vc, csr_pkt, sizeof(*csr_pkt), NULL,
                                 0, &csr_sreq);

    if (csr_sreq != NULL)
        MPID_Request_release(csr_sreq);
520
521
522
523
524
525
526

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}