ptl_send.c 21.9 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2
3
4
5
6
/*
 *  (C) 2012 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

7
#include "ptl_impl.h"
8
9
10
11
12
13
14
15
16
17

#undef FUNCNAME
#define FUNCNAME handler_send_complete
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int handler_send_complete(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const sreq = e->user_ptr;
    int ret;
18
    int i;
19
20
21
22
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_SEND_COMPLETE);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_SEND_COMPLETE);

23
24
    MPIU_Assert(e->type == PTL_EVENT_ACK || e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_GET);

25
26
    if (REQ_PTL(sreq)->md != PTL_INVALID_HANDLE) {
        ret = PtlMDRelease(REQ_PTL(sreq)->md);
27
        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdrelease", "**ptlmdrelease %s", MPID_nem_ptl_strerror(ret));
28
29
30
    }

    for (i = 0; i < MPID_NEM_PTL_NUM_CHUNK_BUFFERS; ++i)
31
32
        if (REQ_PTL(sreq)->chunk_buffer[i])
            MPIU_Free(REQ_PTL(sreq)->chunk_buffer[i]);
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
    
    MPIDI_CH3U_Request_complete(sreq);

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_SEND_COMPLETE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME handler_large
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int handler_large(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const sreq = e->user_ptr;
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_LARGE);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_LARGE);

55
56
57
58
59
60
61
    if (e->type != PTL_EVENT_ACK && e->type != PTL_EVENT_GET)
        MPIU_Error_printf("ACK event expected, received %s ni_fail=%s list=%s user_ptr=%p hdr_data=%#lx\n",
                          MPID_nem_ptl_strevent(e), MPID_nem_ptl_strnifail(e->ni_fail_type),
                          MPID_nem_ptl_strlist(e->ptl_list), e->user_ptr, e->hdr_data);
    MPIU_Assert(e->type == PTL_EVENT_ACK || e->type != PTL_EVENT_GET);
    
    if (e->type == PTL_EVENT_ACK && e->mlength < PTL_LARGE_THRESHOLD) {
62
63
64
65
        /* truncated message */
        mpi_errno = handler_send_complete(e);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    } else {
66
        REQ_PTL(sreq)->event_handler = handler_send_complete;
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
    }

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_LARGE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#if 0

#undef FUNCNAME
#define FUNCNAME handler_pack_chunk
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int handler_pack_chunk(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const sreq = e->user_ptr;
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_PACK_CHUNK);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_PACK_CHUNK);

90
91
92
93
94
95
96
97
    MPIU_Assert(e->type == PTL_EVENT_GET || e->type == PTL_EVENT_PUT);

    if (e->type == PTL_EVENT_PUT) {
        mpi_errno = handler_send_complete(e);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        goto fn_exit;
    }

98
    /* pack next chunk */
99
    MPI_nem_ptl_pack_byte(sreq->dev.segment_ptr, sreq->dev.segment_first, sreq->dev.segment_first + PTL_LARGE_THRESHOLD,
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
              REQ_PTL(sreq_)->chunk_buffer[1], &REQ_PTL(sreq)->overflow[1]);
    sreq->dev.segment_first += PTL_LARGE_THRESHOLD;

    /* notify receiver */
    ret = PtlPut(MPIDI_nem_ptl_global_md, 0, 0, PTL_ACK_REQ, vc_ptl->id,
                 vc_ptl->pt, ?????, 0, sreq,
                 NPTL_HEADER(?????, MPIDI_Process.my_pg_rank, me.match_bits));


 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_PACK_CHUNK);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME handler_multi_put
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int handler_multi_put(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const sreq = e->user_ptr;
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_MULTI_PUT);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_MULTI_PUT);

    
    

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_MULTI_PUT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


#undef FUNCNAME
#define FUNCNAME handler_large_multi
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int handler_large_multi(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const sreq = e->user_ptr;
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_LARGE_MULTI);

148
149
    MPIU_Assert(e->type == PTL_EVENT_ACK);

150
151
152
153
154
155
    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_LARGE_MULTI);
    if (e->mlength < PTL_LARGE_THRESHOLD) {
        /* truncated message */
        mpi_errno = handler_send_complete(e);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    } else {
156
        REQ_PTL(sreq)->event_handler = handler_pack_chunk;
157
158
159
160
    }
    
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_LARGE_MULTI);
161
162
163
164
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
165

166
#endif
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184


/* Send message for either isend or issend */
#undef FUNCNAME
#define FUNCNAME send_msg
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *buf, int count, MPI_Datatype datatype, int dest,
                    int tag, MPID_Comm *comm, int context_offset, struct MPID_Request **request)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
    int ret;
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype *dt_ptr;
    MPID_Request *sreq = NULL;
185
186
187
188
    ptl_me_t me;
    int initial_iov_count, remaining_iov_count;
    ptl_md_t md;
    MPI_Aint last;
189
190
191
192
193
    MPIU_CHKPMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_SEND_MSG);

    MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG);

194
    MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm);
195
196

    if (!vc_ptl->id_initialized) {
197
        mpi_errno = MPID_nem_ptl_init_id(vc);
198
199
200
201
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }
    
    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
202
203
    MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "count=%d datatype=%#x contig=%d data_sz=%lu", count, datatype, dt_contig, data_sz));
    
204
205
206
    if (data_sz < PTL_LARGE_THRESHOLD) {
        /* Small message.  Send all data eagerly */
        if (dt_contig) {
207
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small contig message");
208
            REQ_PTL(sreq)->event_handler = handler_send_complete;
209
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "&REQ_PTL(sreq)->event_handler = %p", &(REQ_PTL(sreq)->event_handler));
210
            ret = PtlPut(MPIDI_nem_ptl_global_md, (ptl_size_t)buf, data_sz, PTL_ACK_REQ, vc_ptl->id, vc_ptl->pt,
211
                         NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
212
                         NPTL_HEADER(ssend_flag, data_sz));
213
            MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
214
            DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
215
216
217
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.nid = %#x", vc_ptl->id.phys.nid);
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.pid = %#x", vc_ptl->id.phys.pid);
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "sreq = %p", sreq);
218
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "vc_ptl->pt = %d", vc_ptl->pt);
219
220
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "REQ_PTL(sreq)->event_handler = %p", REQ_PTL(sreq)->event_handler);
           goto fn_exit;
221
222
223
        }
        
        /* noncontig data */
224
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small noncontig message");
225
226
227
228
229
        sreq->dev.segment_ptr = MPID_Segment_alloc();
        MPIU_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
        MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
        sreq->dev.segment_first = 0;
        sreq->dev.segment_size = data_sz;
230

231
232
233
        last = sreq->dev.segment_size;
        sreq->dev.iov_count = MPID_IOV_LIMIT;
        MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);
234

235
236
        if (last == sreq->dev.segment_size) {
            /* IOV is able to describe entire message */
237
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    entire message fits in IOV");
238
239
240
241
242
243
            md.start = sreq->dev.iov;
            md.length = sreq->dev.iov_count;
            md.options = PTL_IOVEC;
            md.eq_handle = MPIDI_nem_ptl_eq;
            md.ct_handle = PTL_CT_NONE;
            ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
244
            MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));
245
                
246
            REQ_PTL(sreq)->event_handler = handler_send_complete;
247
            ret = PtlPut(REQ_PTL(sreq)->md, 0, data_sz, PTL_ACK_REQ, vc_ptl->id, vc_ptl->pt,
248
                         NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
249
                         NPTL_HEADER(ssend_flag, data_sz));
250
            MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
251
            DBG_MSG_PUT("sreq", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
252
253
254
255
            goto fn_exit;
        }
        
        /* IOV is not long enough to describe entire message */
256
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    IOV too long: using bounce buffer");
257
258
259
260
261
        MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer");
        sreq->dev.segment_first = 0;
        last = data_sz;
        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, REQ_PTL(sreq)->chunk_buffer[0]);
        MPIU_Assert(last == sreq->dev.segment_size);
262
        REQ_PTL(sreq)->event_handler = handler_send_complete;
263
        ret = PtlPut(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], data_sz, PTL_ACK_REQ,
264
                     vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
265
                     NPTL_HEADER(ssend_flag, data_sz));
266
        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
267
        DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
268
269
270
271
272
273
        goto fn_exit;
    }
        
    /* Large message.  Send first chunk of data and let receiver get the rest */
    if (dt_contig) {
        /* create ME for buffer so receiver can issue a GET for the data */
274
275
276
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large contig message");
        me.start = (char *)buf + PTL_LARGE_THRESHOLD;
        me.length = data_sz - PTL_LARGE_THRESHOLD;
277
278
        me.ct_handle = PTL_CT_NONE;
        me.uid = PTL_UID_ANY;
279
        me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE |
280
281
                       PTL_ME_EVENT_UNLINK_DISABLE );
        me.match_id = vc_ptl->id;
282
        me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank);
283
284
285
        me.ignore_bits = 0;
        me.min_free = 0;

286
        ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->me);
287
        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
288
289
        DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
        
290
291
        REQ_PTL(sreq)->large = TRUE;
            
292
        REQ_PTL(sreq)->event_handler = handler_large;
293
        ret = PtlPut(MPIDI_nem_ptl_global_md, (ptl_size_t)buf, PTL_LARGE_THRESHOLD, PTL_ACK_REQ, vc_ptl->id, vc_ptl->pt,
294
                     NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
295
                     NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
296
        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
297
        DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
298
299
300
301
        goto fn_exit;
    }
    
    /* Large noncontig data */
302
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large noncontig message");
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
    sreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIU_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
    MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
    sreq->dev.segment_first = 0;
    sreq->dev.segment_size = data_sz;

    last = PTL_LARGE_THRESHOLD;
    sreq->dev.iov_count = MPID_IOV_LIMIT;
    MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);

    initial_iov_count = sreq->dev.iov_count;
    sreq->dev.segment_first = last;

    if (last == PTL_LARGE_THRESHOLD) {
        /* first chunk of message fits into IOV */
318
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    first chunk fits in IOV");
319
320
321
        if (initial_iov_count < MPID_IOV_LIMIT) {
            /* There may be space for the rest of the message in this IOV */
            sreq->dev.iov_count = MPID_IOV_LIMIT - sreq->dev.iov_count;
322
            last = sreq->dev.segment_size;
323
324
325
                    
            MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last,
                                     &sreq->dev.iov[initial_iov_count], &sreq->dev.iov_count);
326
            remaining_iov_count = sreq->dev.iov_count;
327
328

            if (last == sreq->dev.segment_size) {
329
                /* Entire message fit in one IOV */
330
                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    rest of message fits in one IOV");
331
332
                /* Create ME for remaining data */
                me.start = &sreq->dev.iov[initial_iov_count];
333
                me.length = remaining_iov_count;
334
335
                me.ct_handle = PTL_CT_NONE;
                me.uid = PTL_UID_ANY;
336
                me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE |
337
338
                               PTL_ME_EVENT_UNLINK_DISABLE | PTL_IOVEC );
                me.match_id = vc_ptl->id;
339
                me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank);
340
341
342
                me.ignore_bits = 0;
                me.min_free = 0;
                        
343
                ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
344
                                  &REQ_PTL(sreq)->me);
345
                MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
346
                DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
347
348

                /* Create MD for first chunk */
349
                md.start = sreq->dev.iov;
350
                md.length = initial_iov_count;
351
352
353
354
                md.options = PTL_IOVEC;
                md.eq_handle = MPIDI_nem_ptl_eq;
                md.ct_handle = PTL_CT_NONE;
                ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
355
                MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));
356
357
358

                REQ_PTL(sreq)->large = TRUE;
                        
359
                REQ_PTL(sreq)->event_handler = handler_large;
360
                ret = PtlPut(REQ_PTL(sreq)->md, 0, PTL_LARGE_THRESHOLD, PTL_ACK_REQ, vc_ptl->id, vc_ptl->pt,
361
                             NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
362
                             NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
363
                MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
364
                DBG_MSG_PUT("req", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
365
                goto fn_exit;
366
367
            }
        }
368
369
370
        /* First chunk of message fits, but the rest doesn't */
        /* Don't handle this case separately */
    }
371

372
    /* Message doesn't fit in IOV, pack into buffers */
373
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    Message doesn't fit in IOV: use bounce buffer");
374
375
376

    /* FIXME: For now, allocate a single large buffer to hold entire message */
    MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer");
377
    MPI_nem_ptl_pack_byte(sreq->dev.segment_ptr, 0, data_sz, REQ_PTL(sreq)->chunk_buffer[0], &REQ_PTL(sreq)->overflow[0]);
378
379
380
381
382
383

    /* create ME for buffer so receiver can issue a GET for the data */
    me.start = REQ_PTL(sreq)->chunk_buffer[0];
    me.length = data_sz;
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
384
    me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE |
385
386
                   PTL_ME_EVENT_UNLINK_DISABLE );
    me.match_id = vc_ptl->id;
387
    me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank);
388
389
    me.ignore_bits = 0;
    me.min_free = 0;
390
391

    DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
392
    ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->me);
393
    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
394

395
396
    REQ_PTL(sreq)->large = TRUE;
    
397
    REQ_PTL(sreq)->event_handler = handler_large;
398
    ret = PtlPut(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], PTL_LARGE_THRESHOLD, PTL_ACK_REQ,
399
                 vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
400
                 NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
401
    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
402
    DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
403
    goto fn_exit;
404

405
406
#if 0
    sreq->dev.segment_first = 0;
407

408
409
    /* Pack first chunk of message */
    MPIU_CHKPMEM_MALLOC(req_PTL(sreq_)->chunk_buffer, void *, PTL_LARGE_THRESHOLD, mpi_errno, "chunk_buffer");
410
    MPI_nem_ptl_pack_byte(sreq->dev.segment_ptr, 0, PTL_LARGE_THRESHOLD, REQ_PTL(sreq_)->chunk_buffer[0],
411
412
              &REQ_PTL(sreq)->overflow[0]);
    sreq->dev.segment_first = PTL_LARGE_THRESHOLD;
413
            
414
415
    /* Pack second chunk of message */
    MPIU_CHKPMEM_MALLOC(req_PTL(sreq_)->chunk_buffer, void *, PTL_LARGE_THRESHOLD, mpi_errno, "chunk_buffer");
416
    MPI_nem_ptl_pack_byte(sreq->dev.segment_ptr, sreq->dev.segment_first, sreq->dev.segment_first + PTL_LARGE_THRESHOLD,
417
418
419
420
421
422
423
424
              REQ_PTL(sreq_)->chunk_buffer[1], &REQ_PTL(sreq)->overflow[1]);
    sreq->dev.segment_first += PTL_LARGE_THRESHOLD;

    /* create ME for second chunk */
    me.start = REQ_PTL(sreq_)->chunk_buffer[1];
    me.length = PTL_LARGE_THRESHOLD;
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
425
    me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE |
426
427
                   PTL_ME_EVENT_UNLINK_DISABLE );
    me.match_id = vc_ptl->id;
428
    me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank);
429
430
    me.ignore_bits = 0;
    me.min_free = 0;
431
            
432
    ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->me);
433
    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
434
435


436
    REQ_PTL(sreq)->large = TRUE;
437
                        
438
    REQ_PTL(sreq)->event_handler = handler_large_multi;
439
    ret = PtlPut(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq_)->chunk_buffer[0], PTL_LARGE_THRESHOLD, PTL_ACK_REQ, vc_ptl->id,
440
                 vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
441
                 NPTL_HEADER(ssend_flag | NPTL_LARGE | NPTL_MULTIPLE, data_sz));
442
    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
443
444
#endif
    
445
446
447
448
449
450
451
 fn_exit:
    *request = sreq;
    MPIU_CHKPMEM_COMMIT();
    MPIDI_FUNC_EXIT(MPID_STATE_SEND_MSG);
    return mpi_errno;
 fn_fail:
    if (sreq) {
452
        MPID_Request_release(sreq);
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
        sreq = NULL;
    }
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_isend
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_nem_ptl_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
                       MPID_Comm *comm, int context_offset, struct MPID_Request **request)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ISEND);

    mpi_errno = send_msg(0, vc, buf, count, datatype, dest, tag, comm, context_offset, request);

    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ISEND);
    return mpi_errno;
}


#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_issend
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_nem_ptl_issend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
                        MPID_Comm *comm, int context_offset, struct MPID_Request **request)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISSEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ISSEND);

    mpi_errno = send_msg(NPTL_SSEND, vc, buf, count, datatype, dest, tag, comm, context_offset, request);

    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ISSEND);
    return mpi_errno;
}
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515

#undef FUNCNAME
#define FUNCNAME MPID_nem_ptl_cancel_send
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_nem_ptl_cancel_send(struct MPIDI_VC *vc,  struct MPID_Request *sreq)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);

    /* portals4 has no way of cancelling a send */
    MPIU_ERR_SETFATAL(mpi_errno, MPI_ERR_OTHER, "**notimpl");

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}