ch3u_handle_recv_req.c 69.3 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpidimpl.h"
#include "mpidrma.h"

static int create_derived_datatype(MPID_Request * rreq, MPID_Datatype ** dtp);

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3U_Handle_recv_req
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
16
int MPIDI_CH3U_Handle_recv_req(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
17
{
18
    static int in_routine ATTRIBUTE((unused)) = FALSE;
19
    int mpi_errno = MPI_SUCCESS;
20
    int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
21
22
23
24
25
26
27
28
29
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);

    MPIU_Assert(in_routine == FALSE);
    in_routine = TRUE;

    reqFn = rreq->dev.OnDataAvail;
    if (!reqFn) {
30
31
32
        MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV);
        MPIDI_CH3U_Request_complete(rreq);
        *complete = TRUE;
33
34
    }
    else {
35
        mpi_errno = reqFn(vc, rreq, complete);
36
37
38
39
40
41
42
43
    }

    in_routine = FALSE;
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);
    return mpi_errno;
}

/* ----------------------------------------------------------------------- */
44
/* Here are the functions that implement the actions that are taken when
45
46
47
48
49
 * data is available for a receive request (or other completion operations)
 * These include "receive" requests that are part of the RMA implementation.
 *
 * The convention for the names of routines that are called when data is
 * available is
50
51
 *    MPIDI_CH3_ReqHandler_<type>(MPIDI_VC_t *, MPID_Request *, int *)
 * as in
52
53
 *    MPIDI_CH3_ReqHandler_...
 *
54
 * ToDo:
55
56
 *    We need a way for each of these functions to describe what they are,
 *    so that given a pointer to one of these functions, we can retrieve
57
 *    a description of the routine.  We may want to use a static string
58
59
60
61
 *    and require the user to maintain thread-safety, at least while
 *    accessing the string.
 */
/* ----------------------------------------------------------------------- */
62
63
int MPIDI_CH3_ReqHandler_RecvComplete(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                      MPID_Request * rreq, int *complete)
64
65
66
67
68
69
70
71
{
    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);
    *complete = TRUE;
    return MPI_SUCCESS;
}

#undef FUNCNAME
72
#define FUNCNAME MPIDI_CH3_ReqHandler_PutRecvComplete
73
74
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
75
int MPIDI_CH3_ReqHandler_PutRecvComplete(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
76
77
78
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Win *win_ptr;
79
80
    MPI_Win source_win_handle = rreq->dev.source_win_handle;
    MPIDI_CH3_Pkt_flags_t flags = rreq->dev.flags;
81
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
82

83
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
84

85
    /* NOTE: It is possible that this request is already completed before
86
87
88
89
90
91
92
93
94
95
96
97
     * entering this handler. This happens when this req handler is called
     * within the same req handler on the same request.
     * Consider this case: req is queued up in SHM queue with ref count of 2:
     * one is for completing the request and another is for dequeueing from
     * the queue. The first called req handler on this request completed
     * this request and decrement ref counter to 1. Request is still in the
     * queue. Within this handler, we call the req handler on the same request
     * for the second time (for example when making progress on SHM queue),
     * and the second called handler also tries to complete this request,
     * which leads to wrong execution.
     * Here we check if req is already completed to prevent processing the
     * same request twice. */
98
99
100
101
102
    if (MPID_Request_is_complete(rreq)) {
        *complete = FALSE;
        goto fn_exit;
    }

103
104
    MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);

105
106
107
108
    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);

    /* NOTE: finish_op_on_target() must be called after we complete this request,
109
110
111
112
     * because inside finish_op_on_target() we may call this request handler
     * on the same request again (in release_lock()). Marking this request as
     * completed will prevent us from processing the same request twice. */
    mpi_errno = finish_op_on_target(win_ptr, vc, FALSE /* has no response data */ ,
113
                                    flags, source_win_handle);
114
115
    if (mpi_errno != MPI_SUCCESS)
        MPIU_ERR_POP(mpi_errno);
116

117
    *complete = TRUE;
118

119
  fn_exit:
120
121
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
    return MPI_SUCCESS;
122

123
    /* --BEGIN ERROR HANDLING-- */
124
  fn_fail:
125
126
127
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
128
129


130
131
132
133
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_AccumRecvComplete
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
134
int MPIDI_CH3_ReqHandler_AccumRecvComplete(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
135
136
137
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Win *win_ptr;
138
139
    MPI_Win source_win_handle = rreq->dev.source_win_handle;
    MPIDI_CH3_Pkt_flags_t flags = rreq->dev.flags;
140
141
    MPI_Datatype predef_datatype;
    MPI_Aint predef_count, predef_dtp_size;
142
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
143

144
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
145

146
    /* NOTE: It is possible that this request is already completed before
147
148
149
150
151
152
153
154
155
156
157
158
     * entering this handler. This happens when this req handler is called
     * within the same req handler on the same request.
     * Consider this case: req is queued up in SHM queue with ref count of 2:
     * one is for completing the request and another is for dequeueing from
     * the queue. The first called req handler on this request completed
     * this request and decrement ref counter to 1. Request is still in the
     * queue. Within this handler, we call the req handler on the same request
     * for the second time (for example when making progress on SHM queue),
     * and the second called handler also tries to complete this request,
     * which leads to wrong execution.
     * Here we check if req is already completed to prevent processing the
     * same request twice. */
159
160
161
162
163
    if (MPID_Request_is_complete(rreq)) {
        *complete = FALSE;
        goto fn_exit;
    }

164
    MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);
165

166
    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV);
167

168
169
170
    if (MPIR_DATATYPE_IS_PREDEFINED(rreq->dev.datatype))
        predef_datatype = rreq->dev.datatype;
    else {
171
        predef_datatype = rreq->dev.datatype_ptr->basic_type;
172
173
174
175
176
177
178
    }
    MPIU_Assert(predef_datatype != MPI_DATATYPE_NULL);

    MPID_Datatype_get_size_macro(predef_datatype, predef_dtp_size);
    predef_count = rreq->dev.recv_data_sz / predef_dtp_size;
    MPIU_Assert(predef_count > 0);

179
180
181
    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
    /* accumulate data from tmp_buf into user_buf */
182
183
184
    mpi_errno = do_accumulate_op(rreq->dev.user_buf, predef_count, predef_datatype,
                                 rreq->dev.real_user_buf, rreq->dev.user_count, rreq->dev.datatype,
                                 rreq->dev.stream_offset, rreq->dev.op);
185
186
187
188
189
190
    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
    if (mpi_errno) {
        MPIU_ERR_POP(mpi_errno);
    }

191
    /* free the temporary buffer */
192
    MPIDI_CH3U_SRBuf_free(rreq);
193

194
195
196
197
    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);

    /* NOTE: finish_op_on_target() must be called after we complete this request,
198
199
200
201
     * because inside finish_op_on_target() we may call this request handler
     * on the same request again (in release_lock()). Marking this request as
     * completed will prevent us from processing the same request twice. */
    mpi_errno = finish_op_on_target(win_ptr, vc, FALSE /* has no response data */ ,
202
                                    flags, source_win_handle);
203
204
    if (mpi_errno != MPI_SUCCESS)
        MPIU_ERR_POP(mpi_errno);
205

206
    *complete = TRUE;
207

208
  fn_exit:
209
210
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
    return MPI_SUCCESS;
211

212
    /* --BEGIN ERROR HANDLING-- */
213
  fn_fail:
214
215
216
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
217
218


219
220
221
222
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_GaccumRecvComplete
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
223
int MPIDI_CH3_ReqHandler_GaccumRecvComplete(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
224
225
226
227
228
229
230
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Win *win_ptr;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_get_accum_resp_t *get_accum_resp_pkt = &upkt.get_accum_resp;
    MPID_Request *resp_req;
    MPID_IOV iov[MPID_IOV_LIMIT];
231
    int iovcnt;
232
    int is_contig;
233
234
    MPI_Datatype predef_datatype;
    MPI_Aint predef_count, predef_dtp_size;
235
236
237
238
239
240
241
    MPIU_CHKPMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);

    MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);

242
243
244
    if (MPIR_DATATYPE_IS_PREDEFINED(rreq->dev.datatype))
        predef_datatype = rreq->dev.datatype;
    else {
245
        predef_datatype = rreq->dev.datatype_ptr->basic_type;
246
247
248
249
250
251
252
    }
    MPIU_Assert(predef_datatype != MPI_DATATYPE_NULL);

    MPID_Datatype_get_size_macro(predef_datatype, predef_dtp_size);
    predef_count = rreq->dev.recv_data_sz / predef_dtp_size;
    MPIU_Assert(predef_count > 0);

253
    MPIDI_Pkt_init(get_accum_resp_pkt, MPIDI_CH3_PKT_GET_ACCUM_RESP);
254
255
    get_accum_resp_pkt->request_handle = rreq->dev.resp_request_handle;
    get_accum_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
256
    get_accum_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
257
258
    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)
259
        get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
260
261
    if ((rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
        (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
262
        get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
263

264
265
    MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig);

266
267
268
269
    /* Copy data into a temporary buffer */
    resp_req = MPID_Request_create();
    MPIU_ERR_CHKANDJUMP(resp_req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
    MPIU_Object_set_ref(resp_req, 1);
270
    MPIDI_Request_set_type(resp_req, MPIDI_REQUEST_TYPE_GET_ACCUM_RESP);
271

272
    MPIU_CHKPMEM_MALLOC(resp_req->dev.user_buf, void *, rreq->dev.recv_data_sz,
273
                        mpi_errno, "GACC resp. buffer");
274

275
276
277
    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);

278
    if (is_contig) {
279
280
281
        MPIU_Memcpy(resp_req->dev.user_buf,
                    (void *) ((char *) rreq->dev.real_user_buf + rreq->dev.stream_offset),
                    rreq->dev.recv_data_sz);
282
283
    }
    else {
284
        MPID_Segment *seg = MPID_Segment_alloc();
285
286
        MPI_Aint first = rreq->dev.stream_offset;
        MPI_Aint last = first + rreq->dev.recv_data_sz;
287

288
289
290
        if (seg == NULL) {
            if (win_ptr->shm_allocated == TRUE)
                MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
291
        }
292
293
294
295
        MPIU_ERR_CHKANDJUMP1(seg == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
                             "MPID_Segment");
        MPID_Segment_init(rreq->dev.real_user_buf, rreq->dev.user_count, rreq->dev.datatype, seg,
                          0);
296
        MPID_Segment_pack(seg, first, &last, resp_req->dev.user_buf);
297
        MPID_Segment_free(seg);
298
    }
299

300
    /* accumulate data from tmp_buf into user_buf */
301
302
303
    mpi_errno = do_accumulate_op(rreq->dev.user_buf, predef_count, predef_datatype,
                                 rreq->dev.real_user_buf, rreq->dev.user_count, rreq->dev.datatype,
                                 rreq->dev.stream_offset, rreq->dev.op);
304
305
306
307

    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);

308
309
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);
310

Xin Zhao's avatar
Xin Zhao committed
311
312
    resp_req->dev.OnFinal = MPIDI_CH3_ReqHandler_GaccumSendComplete;
    resp_req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GaccumSendComplete;
313
314
    resp_req->dev.target_win_handle = rreq->dev.target_win_handle;
    resp_req->dev.flags = rreq->dev.flags;
315

316
    /* here we increment the Active Target counter to guarantee the GET-like
317
     * operation are completed when counter reaches zero. */
318
    win_ptr->at_completion_counter++;
319

320
321
322
    iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
    iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
    iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) ((char *) resp_req->dev.user_buf);
323
    iov[1].MPID_IOV_LEN = rreq->dev.recv_data_sz;
324
    iovcnt = 2;
325

326
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
327
    mpi_errno = MPIDI_CH3_iSendv(vc, resp_req, iov, iovcnt);
328
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
329
330
331
332
333
334

    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");

    /* Mark get portion as handled */
    rreq->dev.resp_request_handle = MPI_REQUEST_NULL;

335
    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV);
336

337
    /* free the temporary buffer */
338
    MPIDI_CH3U_SRBuf_free(rreq);
339

340
341
342
    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);
    *complete = TRUE;
343
  fn_exit:
344
    MPIU_CHKPMEM_COMMIT();
345
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);
346
    return MPI_SUCCESS;
347
348

    /* --BEGIN ERROR HANDLING-- */
349
  fn_fail:
350
351
352
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
    /* --END ERROR HANDLING-- */
353
354
}

355
356
357
358
359

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_FOPRecvComplete
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
360
int MPIDI_CH3_ReqHandler_FOPRecvComplete(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
361
362
363
364
365
366
367
368
369
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Win *win_ptr = NULL;
    MPI_Aint type_size;
    MPID_Request *resp_req = NULL;
    MPID_IOV iov[MPID_IOV_LIMIT];
    int iovcnt;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_fop_resp_t *fop_resp_pkt = &upkt.fop_resp;
370
    int is_contig;
371
372
373
374
375
    MPIU_CHKPMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);

Xin Zhao's avatar
Xin Zhao committed
376
377
    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_FOP_RECV);

378
379
380
381
    MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);

    MPID_Datatype_get_size_macro(rreq->dev.datatype, type_size);

382
383
    MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig);

384
385
386
    /* Create response request */
    resp_req = MPID_Request_create();
    MPIU_ERR_CHKANDJUMP(resp_req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
Xin Zhao's avatar
Xin Zhao committed
387
    MPIDI_Request_set_type(resp_req, MPIDI_REQUEST_TYPE_FOP_RESP);
388
389
390
391
392
393
    MPIU_Object_set_ref(resp_req, 1);
    resp_req->dev.OnFinal = MPIDI_CH3_ReqHandler_FOPSendComplete;
    resp_req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_FOPSendComplete;
    resp_req->dev.target_win_handle = rreq->dev.target_win_handle;
    resp_req->dev.flags = rreq->dev.flags;

394
    MPIU_CHKPMEM_MALLOC(resp_req->dev.user_buf, void *, type_size, mpi_errno, "FOP resp. buffer");
395
396

    /* here we increment the Active Target counter to guarantee the GET-like
397
     * operation are completed when counter reaches zero. */
398
399
400
401
402
403
    win_ptr->at_completion_counter++;

    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);

    /* Copy data into a temporary buffer in response request */
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
    if (is_contig) {
        MPIU_Memcpy(resp_req->dev.user_buf, rreq->dev.real_user_buf, type_size);
    }
    else {
        MPID_Segment *seg = MPID_Segment_alloc();
        MPI_Aint last = type_size;

        if (seg == NULL) {
            if (win_ptr->shm_allocated == TRUE)
                MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
        }
        MPIU_ERR_CHKANDJUMP1(seg == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
                             "MPID_Segment");
        MPID_Segment_init(rreq->dev.real_user_buf, 1, rreq->dev.datatype, seg, 0);
        MPID_Segment_pack(seg, 0, &last, resp_req->dev.user_buf);
        MPID_Segment_free(seg);
    }
421
422
423

    /* Perform accumulate computation */
    if (rreq->dev.op != MPI_NO_OP) {
424
425
426
        mpi_errno = do_accumulate_op(rreq->dev.user_buf, 1, rreq->dev.datatype,
                                     rreq->dev.real_user_buf, 1, rreq->dev.datatype, 0,
                                     rreq->dev.op);
427
428
429
430
431
    }

    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);

432
433
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);
434

435
436
437
438
439
440
441
442
443
444
445
446
    /* Send back data */
    MPIDI_Pkt_init(fop_resp_pkt, MPIDI_CH3_PKT_FOP_RESP);
    fop_resp_pkt->request_handle = rreq->dev.resp_request_handle;
    fop_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
    fop_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)
        fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
    if ((rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
        (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
        fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;

447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
    iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) fop_resp_pkt;
    iov[0].MPID_IOV_LEN = sizeof(*fop_resp_pkt);
    iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) ((char *) resp_req->dev.user_buf);
    iov[1].MPID_IOV_LEN = type_size;
    iovcnt = 2;

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iSendv(vc, resp_req, iov, iovcnt);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);

    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");

    /* free the temporary buffer */
    MPIU_Free((char *) rreq->dev.user_buf);

    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);
    *complete = TRUE;

  fn_exit:
    MPIU_CHKPMEM_COMMIT();
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);
    return MPI_SUCCESS;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
    /* --END ERROR HANDLING-- */
475
476
477
}


478
#undef FUNCNAME
479
#define FUNCNAME MPIDI_CH3_ReqHandler_PutDerivedDTRecvComplete
480
481
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
482
483
int MPIDI_CH3_ReqHandler_PutDerivedDTRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                                  MPID_Request * rreq, int *complete)
484
485
{
    int mpi_errno = MPI_SUCCESS;
486
    MPID_Datatype *new_dtp = NULL;
487
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
488

489
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
490

491
492
    /* create derived datatype */
    create_derived_datatype(rreq, &new_dtp);
493

494
    /* update request to get the data */
495
    MPIDI_Request_set_type(rreq, MPIDI_REQUEST_TYPE_PUT_RECV);
496
    rreq->dev.datatype = new_dtp->handle;
497
498
    rreq->dev.recv_data_sz = new_dtp->size * rreq->dev.user_count;

499
500
    rreq->dev.datatype_ptr = new_dtp;
    /* this will cause the datatype to be freed when the
501
     * request is freed. free dtype_info here. */
502
    MPIU_Free(rreq->dev.dtype_info);
503
504
505
506

    rreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIU_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem",
                         "**nomem %s", "MPID_Segment_alloc");
507

508
    MPID_Segment_init(rreq->dev.user_buf,
509
                      rreq->dev.user_count, rreq->dev.datatype, rreq->dev.segment_ptr, 0);
510
511
    rreq->dev.segment_first = 0;
    rreq->dev.segment_size = rreq->dev.recv_data_sz;
512

513
514
    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
    if (mpi_errno != MPI_SUCCESS) {
515
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|loadrecviov");
516
    }
517
518
519
    if (!rreq->dev.OnDataAvail)
        rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PutRecvComplete;

520
    *complete = FALSE;
521
  fn_fail:
522
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
523
524
525
526
    return mpi_errno;
}

#undef FUNCNAME
527
#define FUNCNAME MPIDI_CH3_ReqHandler_AccumDerivedDTRecvComplete
528
529
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
530
531
int MPIDI_CH3_ReqHandler_AccumDerivedDTRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                                    MPID_Request * rreq, int *complete)
532
533
{
    int mpi_errno = MPI_SUCCESS;
534
    MPID_Datatype *new_dtp = NULL;
535
536
    MPI_Aint predef_type_extent, predef_type_size;
    MPI_Aint total_len, rest_len, stream_elem_count;
537
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMDERIVEDDTRECVCOMPLETE);
538

539
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMDERIVEDDTRECVCOMPLETE);
540

541
542
    /* create derived datatype */
    create_derived_datatype(rreq, &new_dtp);
543

544
    /* update new request to get the data */
545
    MPIDI_Request_set_type(rreq, MPIDI_REQUEST_TYPE_ACCUM_RECV);
546

547
548
    MPID_Datatype_get_size_macro(new_dtp->basic_type, predef_type_size);
    MPID_Datatype_get_extent_macro(new_dtp->basic_type, predef_type_extent);
549

550
551
552
553
554
555
556
557
558
559
560
    MPIU_Assert(!MPIDI_Request_get_srbuf_flag(rreq));
    /* allocate a SRBuf for receiving stream unit */
    MPIDI_CH3U_SRBuf_alloc(rreq, MPIDI_CH3U_SRBuf_size);
    /* --BEGIN ERROR HANDLING-- */
    if (rreq->dev.tmpbuf_sz == 0) {
        MPIU_DBG_MSG(CH3_CHANNEL, TYPICAL, "SRBuf allocation failure");
        mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
                                         FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem",
                                         "**nomem %d", MPIDI_CH3U_SRBuf_size);
        rreq->status.MPI_ERROR = mpi_errno;
        goto fn_fail;
561
    }
562
    /* --END ERROR HANDLING-- */
563

564
    rreq->dev.user_buf = rreq->dev.tmpbuf;
565
    rreq->dev.datatype = new_dtp->handle;
566
567
568
569
570
571

    total_len = new_dtp->size * rreq->dev.user_count;
    rest_len = total_len - rreq->dev.stream_offset;
    stream_elem_count = MPIDI_CH3U_SRBuf_size / predef_type_extent;

    rreq->dev.recv_data_sz = MPIR_MIN(rest_len, stream_elem_count * predef_type_size);
572
573
    rreq->dev.datatype_ptr = new_dtp;
    /* this will cause the datatype to be freed when the
574
     * request is freed. free dtype_info here. */
575
    MPIU_Free(rreq->dev.dtype_info);
576
577
578
579

    rreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIU_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem",
                         "**nomem %s", "MPID_Segment_alloc");
580

581
    MPID_Segment_init(rreq->dev.user_buf,
582
                      (rreq->dev.recv_data_sz / predef_type_size),
583
                      new_dtp->basic_type, rreq->dev.segment_ptr, 0);
584
585
    rreq->dev.segment_first = 0;
    rreq->dev.segment_size = rreq->dev.recv_data_sz;
586

587
588
    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
    if (mpi_errno != MPI_SUCCESS) {
589
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|loadrecviov");
590
591
    }
    if (!rreq->dev.OnDataAvail)
592
593
        rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_AccumRecvComplete;

594
    *complete = FALSE;
595
  fn_fail:
596
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMDERIVEDDTRECVCOMPLETE);
597
598
599
    return mpi_errno;
}

600
601

#undef FUNCNAME
602
#define FUNCNAME MPIDI_CH3_ReqHandler_GaccumDerivedDTRecvComplete
603
604
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
605
606
int MPIDI_CH3_ReqHandler_GaccumDerivedDTRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                                     MPID_Request * rreq, int *complete)
607
608
{
    int mpi_errno = MPI_SUCCESS;
609
    MPID_Datatype *new_dtp = NULL;
610
611
    MPI_Aint predef_type_extent, predef_type_size;
    MPI_Aint total_len, rest_len, stream_elem_count;
612
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMDERIVEDDTRECVCOMPLETE);
613

614
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMDERIVEDDTRECVCOMPLETE);
615

616
617
    /* create derived datatype */
    create_derived_datatype(rreq, &new_dtp);
618

619
    /* update new request to get the data */
620
    MPIDI_Request_set_type(rreq, MPIDI_REQUEST_TYPE_GET_ACCUM_RECV);
621

622
623
    MPID_Datatype_get_size_macro(new_dtp->basic_type, predef_type_size);
    MPID_Datatype_get_extent_macro(new_dtp->basic_type, predef_type_extent);
624

625
626
627
628
629
630
631
632
633
634
635
    MPIU_Assert(!MPIDI_Request_get_srbuf_flag(rreq));
    /* allocate a SRBuf for receiving stream unit */
    MPIDI_CH3U_SRBuf_alloc(rreq, MPIDI_CH3U_SRBuf_size);
    /* --BEGIN ERROR HANDLING-- */
    if (rreq->dev.tmpbuf_sz == 0) {
        MPIU_DBG_MSG(CH3_CHANNEL, TYPICAL, "SRBuf allocation failure");
        mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
                                         FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem",
                                         "**nomem %d", MPIDI_CH3U_SRBuf_size);
        rreq->status.MPI_ERROR = mpi_errno;
        goto fn_fail;
636
    }
637
    /* --END ERROR HANDLING-- */
638

639
    rreq->dev.user_buf = rreq->dev.tmpbuf;
640
    rreq->dev.datatype = new_dtp->handle;
641
642
643
644
645
646

    total_len = new_dtp->size * rreq->dev.user_count;
    rest_len = total_len - rreq->dev.stream_offset;
    stream_elem_count = MPIDI_CH3U_SRBuf_size / predef_type_extent;

    rreq->dev.recv_data_sz = MPIR_MIN(rest_len, stream_elem_count * predef_type_size);
647
648
    rreq->dev.datatype_ptr = new_dtp;
    /* this will cause the datatype to be freed when the
649
     * request is freed. free dtype_info here. */
650
651
    MPIU_Free(rreq->dev.dtype_info);

652
653
654
    rreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIU_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem",
                         "**nomem %s", "MPID_Segment_alloc");
655
656

    MPID_Segment_init(rreq->dev.user_buf,
657
                      (rreq->dev.recv_data_sz / predef_type_size),
658
                      new_dtp->basic_type, rreq->dev.segment_ptr, 0);
659
660
661
662
663
    rreq->dev.segment_first = 0;
    rreq->dev.segment_size = rreq->dev.recv_data_sz;

    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
    if (mpi_errno != MPI_SUCCESS) {
664
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|loadrecviov");
665
666
    }
    if (!rreq->dev.OnDataAvail)
667
        rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GaccumRecvComplete;
668
669

    *complete = FALSE;
670
  fn_fail:
671
672
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMDERIVEDDTRECVCOMPLETE);
    return mpi_errno;
673
674
675
}


Xin Zhao's avatar
Xin Zhao committed
676

677
#undef FUNCNAME
678
#define FUNCNAME MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete
679
680
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
681
682
int MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete(MPIDI_VC_t * vc,
                                                  MPID_Request * rreq, int *complete)
683
684
{
    int mpi_errno = MPI_SUCCESS;
685
    MPID_Datatype *new_dtp = NULL;
686
    MPIDI_CH3_Pkt_t upkt;
687
688
    MPIDI_CH3_Pkt_get_resp_t *get_resp_pkt = &upkt.get_resp;
    MPID_Request *sreq;
689
    MPID_Win *win_ptr;
690
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
691

692
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
693

694
695
    MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);

696
697
    MPIU_Assert(!(rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP));

698
699
700
    /* create derived datatype */
    create_derived_datatype(rreq, &new_dtp);
    MPIU_Free(rreq->dev.dtype_info);
701

702
703
    /* create request for sending data */
    sreq = MPID_Request_create();
704
705
    MPIU_ERR_CHKANDJUMP(sreq == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");

706
707
    sreq->kind = MPID_REQUEST_SEND;
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_GET_RESP);
708
    sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GetSendComplete;
709
    sreq->dev.OnFinal = MPIDI_CH3_ReqHandler_GetSendComplete;
710
711
712
713
714
    sreq->dev.user_buf = rreq->dev.user_buf;
    sreq->dev.user_count = rreq->dev.user_count;
    sreq->dev.datatype = new_dtp->handle;
    sreq->dev.datatype_ptr = new_dtp;
    sreq->dev.target_win_handle = rreq->dev.target_win_handle;
715
    sreq->dev.flags = rreq->dev.flags;
716

717
    MPIDI_Pkt_init(get_resp_pkt, MPIDI_CH3_PKT_GET_RESP);
718
    get_resp_pkt->request_handle = rreq->dev.request_handle;
719
720
    get_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
    get_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
721
722
    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)
723
        get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
724
725
    if ((rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
        (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
726
        get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
727
728
729
730

    sreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIU_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem",
                         "**nomem %s", "MPID_Segment_alloc");
731

732
    MPID_Segment_init(sreq->dev.user_buf,
733
                      sreq->dev.user_count, sreq->dev.datatype, sreq->dev.segment_ptr, 0);
734
735
    sreq->dev.segment_first = 0;
    sreq->dev.segment_size = new_dtp->size * sreq->dev.user_count;
736

737
    /* Because this is in a packet handler, it is already within a critical section */
738
    /* MPIU_THREAD_CS_ENTER(CH3COMM,vc); */
739
    mpi_errno = vc->sendNoncontig_fn(vc, sreq, get_resp_pkt, sizeof(*get_resp_pkt));
740
    /* MPIU_THREAD_CS_EXIT(CH3COMM,vc); */
741
    /* --BEGIN ERROR HANDLING-- */
742
    if (mpi_errno != MPI_SUCCESS) {
743
        MPID_Request_release(sreq);
744
        sreq = NULL;
745
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
746
    }
747
    /* --END ERROR HANDLING-- */
748
749
750

    /* mark receive data transfer as complete and decrement CC in receive
     * request */
751
752
    MPIDI_CH3U_Request_complete(rreq);
    *complete = TRUE;
753
754

  fn_fail:
755
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
756
757
758
    return mpi_errno;
}

759

760
761
762
763
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_UnpackUEBufComplete
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
764
765
int MPIDI_CH3_ReqHandler_UnpackUEBufComplete(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                             MPID_Request * rreq, int *complete)
766
767
768
{
    int recv_pending;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
769

770
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
771

772
773
    MPIDI_Request_decr_pending(rreq);
    MPIDI_Request_check_pending(rreq, &recv_pending);
774
775
776
777
778
779
780
781
782
783
784
    if (!recv_pending) {
        if (rreq->dev.recv_data_sz > 0) {
            MPIDI_CH3U_Request_unpack_uebuf(rreq);
            MPIU_Free(rreq->dev.tmpbuf);
        }
    }
    else {
        /* The receive has not been posted yet.  MPID_{Recv/Irecv}()
         * is responsible for unpacking the buffer. */
    }

785
786
787
    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);
    *complete = TRUE;
788

789
790
791
792
793
794
795
796
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
    return MPI_SUCCESS;
}

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_UnpackSRBufComplete
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
797
int MPIDI_CH3_ReqHandler_UnpackSRBufComplete(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
798
799
800
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);
801

802
803
804
805
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);

    MPIDI_CH3U_Request_unpack_srbuf(rreq);

806
    if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_PUT_RECV) {
807
        mpi_errno = MPIDI_CH3_ReqHandler_PutRecvComplete(vc, rreq, complete);
808
    }
809
    else if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV) {
810
        mpi_errno = MPIDI_CH3_ReqHandler_AccumRecvComplete(vc, rreq, complete);
811
    }
812
    else if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV) {
813
        mpi_errno = MPIDI_CH3_ReqHandler_GaccumRecvComplete(vc, rreq, complete);
814
    }
Xin Zhao's avatar
Xin Zhao committed
815
816
817
    else if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_FOP_RECV) {
        mpi_errno = MPIDI_CH3_ReqHandler_FOPRecvComplete(vc, rreq, complete);
    }
818
    else {
819
820
821
        /* mark data transfer as complete and decrement CC */
        MPIDI_CH3U_Request_complete(rreq);
        *complete = TRUE;
822
823
824
825
826
827
828
829
830
831
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);
    return mpi_errno;
}

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
832
833
int MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                              MPID_Request * rreq, int *complete)
834
835
836
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);
837

838
839
840
841
842
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);

    MPIDI_CH3U_Request_unpack_srbuf(rreq);
    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
    if (mpi_errno != MPI_SUCCESS) {
843
        MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|loadrecviov");
844
845
    }
    *complete = FALSE;
846
  fn_fail:
847
848
849
850
851
852
853
854
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);
    return mpi_errno;
}

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_ReloadIOV
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
855
856
int MPIDI_CH3_ReqHandler_ReloadIOV(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                   MPID_Request * rreq, int *complete)
857
858
859
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);
860

861
862
863
864
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);

    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
    if (mpi_errno != MPI_SUCCESS) {
865
        MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|loadrecviov");
866
867
    }
    *complete = FALSE;
868
  fn_fail:
869
870
871
872
873
874
875
876
877
878
879
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);
    return mpi_errno;
}

/* ----------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */

#undef FUNCNAME
#define FUNCNAME create_derived_datatype
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
880
static int create_derived_datatype(MPID_Request * req, MPID_Datatype ** dtp)
881
882
883
{
    MPIDI_RMA_dtype_info *dtype_info;
    MPID_Datatype *new_dtp;
884
    int mpi_errno = MPI_SUCCESS;
885
886
    MPI_Aint ptrdiff;
    MPIDI_STATE_DECL(MPID_STATE_CREATE_DERIVED_DATATYPE);
887

888
889
890
891
892
893
894
    MPIDI_FUNC_ENTER(MPID_STATE_CREATE_DERIVED_DATATYPE);

    dtype_info = req->dev.dtype_info;

    /* allocate new datatype object and handle */
    new_dtp = (MPID_Datatype *) MPIU_Handle_obj_alloc(&MPID_Datatype_mem);
    if (!new_dtp) {
895
896
        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
                             "MPID_Datatype_mem");
897
898
899
    }

    *dtp = new_dtp;
900

901
902
903
904
    /* Note: handle is filled in by MPIU_Handle_obj_alloc() */
    MPIU_Object_set_ref(new_dtp, 1);
    new_dtp->is_permanent = 0;
    new_dtp->is_committed = 1;
905
906
907
    new_dtp->attributes = 0;
    new_dtp->cache_id = 0;
    new_dtp->name[0] = 0;
908
    new_dtp->is_contig = dtype_info->is_contig;
909
    new_dtp->max_contig_blocks = dtype_info->max_contig_blocks;
910
911
912
    new_dtp->size = dtype_info->size;
    new_dtp->extent = dtype_info->extent;
    new_dtp->dataloop_size = dtype_info->dataloop_size;
913
    new_dtp->dataloop_depth = dtype_info->dataloop_depth;
914
    new_dtp->basic_type = dtype_info->basic_type;
915
916
    /* set dataloop pointer */
    new_dtp->dataloop = req->dev.dataloop;
917

918
919
920
921
922
923
924
    new_dtp->ub = dtype_info->ub;
    new_dtp->lb = dtype_info->lb;
    new_dtp->true_ub = dtype_info->true_ub;
    new_dtp->true_lb = dtype_info->true_lb;
    new_dtp->has_sticky_ub = dtype_info->has_sticky_ub;
    new_dtp->has_sticky_lb = dtype_info->has_sticky_lb;
    /* update pointers in dataloop */
925
926
927
    ptrdiff = (MPI_Aint) ((char *) (new_dtp->dataloop) - (char *)
                          (dtype_info->dataloop));

928
929
930
931
932
933
934
    /* FIXME: Temp to avoid SEGV when memory tracing */
    new_dtp->hetero_dloop = 0;

    MPID_Dataloop_update(new_dtp->dataloop, ptrdiff);

    new_dtp->contents = NULL;

935
  fn_fail:
936
937
938
939
940
    MPIDI_FUNC_EXIT(MPID_STATE_CREATE_DERIVED_DATATYPE);

    return mpi_errno;
}

941

942
static inline int perform_put_in_lock_queue(MPID_Win * win_ptr, MPIDI_RMA_Lock_entry_t * lock_entry)
943
944
945
946
{
    MPIDI_CH3_Pkt_put_t *put_pkt = &((lock_entry->pkt).put);
    int mpi_errno = MPI_SUCCESS;

Xin Zhao's avatar
Xin Zhao committed
947
948
949
950
951
952
    /* Piggyback candidate should have basic datatype for target datatype. */
    MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(put_pkt->datatype));

    /* Make sure that all data is received for this op. */
    MPIU_Assert(lock_entry->all_data_recved == 1);

953
    if (put_pkt->type == MPIDI_CH3_PKT_PUT_IMMED) {
954
        /* all data fits in packet header */
955
        mpi_errno = MPIR_Localcopy(put_pkt->info.data, put_pkt->count, put_pkt->datatype,
956
                                   put_pkt->addr, put_pkt->count, put_pkt->datatype);
957
958
        if (mpi_errno != MPI_SUCCESS)
            MPIU_ERR_POP(mpi_errno);
959
960
    }
    else {
961
962
        MPIU_Assert(put_pkt->type == MPIDI_CH3_PKT_PUT);

963
964
        mpi_errno = MPIR_Localcopy(lock_entry->data, put_pkt->count, put_pkt->datatype,
                                   put_pkt->addr, put_pkt->count, put_pkt->datatype);
965
966
        if (mpi_errno != MPI_SUCCESS)
            MPIU_ERR_POP(mpi_errno);
967
968
969
    }

    /* do final action */
970
    mpi_errno = finish_op_on_target(win_ptr, lock_entry->vc, FALSE /* has no response data */ ,
971
                                    put_pkt->flags, put_pkt->source_win_handle);