ch3u_handle_recv_req.c 69.5 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpidimpl.h"
#include "mpidrma.h"

static int create_derived_datatype(MPID_Request * rreq, MPID_Datatype ** dtp);

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3U_Handle_recv_req
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
16
int MPIDI_CH3U_Handle_recv_req(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
17
{
18
    static int in_routine ATTRIBUTE((unused)) = FALSE;
19
    int mpi_errno = MPI_SUCCESS;
20
    int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
21
22
23
24
25
26
27
28
29
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);

    MPIU_Assert(in_routine == FALSE);
    in_routine = TRUE;

    reqFn = rreq->dev.OnDataAvail;
    if (!reqFn) {
30
31
32
        MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV);
        MPIDI_CH3U_Request_complete(rreq);
        *complete = TRUE;
33
34
    }
    else {
35
        mpi_errno = reqFn(vc, rreq, complete);
36
37
38
39
40
41
42
43
    }

    in_routine = FALSE;
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);
    return mpi_errno;
}

/* ----------------------------------------------------------------------- */
44
/* Here are the functions that implement the actions that are taken when
45
46
47
48
49
 * data is available for a receive request (or other completion operations)
 * These include "receive" requests that are part of the RMA implementation.
 *
 * The convention for the names of routines that are called when data is
 * available is
50
51
 *    MPIDI_CH3_ReqHandler_<type>(MPIDI_VC_t *, MPID_Request *, int *)
 * as in
52
53
 *    MPIDI_CH3_ReqHandler_...
 *
54
 * ToDo:
55
56
 *    We need a way for each of these functions to describe what they are,
 *    so that given a pointer to one of these functions, we can retrieve
57
 *    a description of the routine.  We may want to use a static string
58
59
60
61
 *    and require the user to maintain thread-safety, at least while
 *    accessing the string.
 */
/* ----------------------------------------------------------------------- */
62
63
int MPIDI_CH3_ReqHandler_RecvComplete(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                      MPID_Request * rreq, int *complete)
64
65
66
67
68
69
70
71
{
    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);
    *complete = TRUE;
    return MPI_SUCCESS;
}

#undef FUNCNAME
72
#define FUNCNAME MPIDI_CH3_ReqHandler_PutRecvComplete
73
74
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
75
int MPIDI_CH3_ReqHandler_PutRecvComplete(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
76
77
78
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Win *win_ptr;
79
80
    MPI_Win source_win_handle = rreq->dev.source_win_handle;
    MPIDI_CH3_Pkt_flags_t flags = rreq->dev.flags;
81
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
82

83
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
84

85
    /* NOTE: It is possible that this request is already completed before
86
87
88
89
90
91
92
93
94
95
96
97
     * entering this handler. This happens when this req handler is called
     * within the same req handler on the same request.
     * Consider this case: req is queued up in SHM queue with ref count of 2:
     * one is for completing the request and another is for dequeueing from
     * the queue. The first called req handler on this request completed
     * this request and decrement ref counter to 1. Request is still in the
     * queue. Within this handler, we call the req handler on the same request
     * for the second time (for example when making progress on SHM queue),
     * and the second called handler also tries to complete this request,
     * which leads to wrong execution.
     * Here we check if req is already completed to prevent processing the
     * same request twice. */
98
99
100
101
102
    if (MPID_Request_is_complete(rreq)) {
        *complete = FALSE;
        goto fn_exit;
    }

103
104
    MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);

105
106
107
108
    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);

    /* NOTE: finish_op_on_target() must be called after we complete this request,
109
110
111
112
     * because inside finish_op_on_target() we may call this request handler
     * on the same request again (in release_lock()). Marking this request as
     * completed will prevent us from processing the same request twice. */
    mpi_errno = finish_op_on_target(win_ptr, vc, FALSE /* has no response data */ ,
113
                                    flags, source_win_handle);
114
115
    if (mpi_errno != MPI_SUCCESS)
        MPIU_ERR_POP(mpi_errno);
116

117
    *complete = TRUE;
118

119
  fn_exit:
120
121
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
    return MPI_SUCCESS;
122

123
    /* --BEGIN ERROR HANDLING-- */
124
  fn_fail:
125
126
127
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
128
129


130
131
132
133
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_AccumRecvComplete
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
134
int MPIDI_CH3_ReqHandler_AccumRecvComplete(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
135
136
137
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Win *win_ptr;
138
139
    MPI_Win source_win_handle = rreq->dev.source_win_handle;
    MPIDI_CH3_Pkt_flags_t flags = rreq->dev.flags;
140
    MPI_Datatype basic_type;
141
    MPI_Aint predef_count, predef_dtp_size;
142
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
143

144
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
145

146
    /* NOTE: It is possible that this request is already completed before
147
148
149
150
151
152
153
154
155
156
157
158
     * entering this handler. This happens when this req handler is called
     * within the same req handler on the same request.
     * Consider this case: req is queued up in SHM queue with ref count of 2:
     * one is for completing the request and another is for dequeueing from
     * the queue. The first called req handler on this request completed
     * this request and decrement ref counter to 1. Request is still in the
     * queue. Within this handler, we call the req handler on the same request
     * for the second time (for example when making progress on SHM queue),
     * and the second called handler also tries to complete this request,
     * which leads to wrong execution.
     * Here we check if req is already completed to prevent processing the
     * same request twice. */
159
160
161
162
163
    if (MPID_Request_is_complete(rreq)) {
        *complete = FALSE;
        goto fn_exit;
    }

164
    MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);
165

166
    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV);
167

168
    if (MPIR_DATATYPE_IS_PREDEFINED(rreq->dev.datatype))
169
        basic_type = rreq->dev.datatype;
170
    else {
171
        basic_type = rreq->dev.datatype_ptr->basic_type;
172
    }
173
    MPIU_Assert(basic_type != MPI_DATATYPE_NULL);
174

175
    MPID_Datatype_get_size_macro(basic_type, predef_dtp_size);
176
177
178
    predef_count = rreq->dev.recv_data_sz / predef_dtp_size;
    MPIU_Assert(predef_count > 0);

179
180
181
    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
    /* accumulate data from tmp_buf into user_buf */
182
    mpi_errno = do_accumulate_op(rreq->dev.user_buf, predef_count, basic_type,
183
184
                                 rreq->dev.real_user_buf, rreq->dev.user_count, rreq->dev.datatype,
                                 rreq->dev.stream_offset, rreq->dev.op);
185
186
187
188
189
190
    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
    if (mpi_errno) {
        MPIU_ERR_POP(mpi_errno);
    }

191
    /* free the temporary buffer */
192
    MPIDI_CH3U_SRBuf_free(rreq);
193

194
195
196
197
    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);

    /* NOTE: finish_op_on_target() must be called after we complete this request,
198
199
200
201
     * because inside finish_op_on_target() we may call this request handler
     * on the same request again (in release_lock()). Marking this request as
     * completed will prevent us from processing the same request twice. */
    mpi_errno = finish_op_on_target(win_ptr, vc, FALSE /* has no response data */ ,
202
                                    flags, source_win_handle);
203
204
    if (mpi_errno != MPI_SUCCESS)
        MPIU_ERR_POP(mpi_errno);
205

206
    *complete = TRUE;
207

208
  fn_exit:
209
210
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
    return MPI_SUCCESS;
211

212
    /* --BEGIN ERROR HANDLING-- */
213
  fn_fail:
214
215
216
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
217
218


219
220
221
222
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_GaccumRecvComplete
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
223
int MPIDI_CH3_ReqHandler_GaccumRecvComplete(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
224
225
226
227
228
229
230
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Win *win_ptr;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_get_accum_resp_t *get_accum_resp_pkt = &upkt.get_accum_resp;
    MPID_Request *resp_req;
    MPID_IOV iov[MPID_IOV_LIMIT];
231
    int iovcnt;
232
    int is_contig;
233
    MPI_Datatype basic_type;
234
    MPI_Aint predef_count, predef_dtp_size;
235
236
237
238
239
240
241
    MPIU_CHKPMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);

    MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);

242
    if (MPIR_DATATYPE_IS_PREDEFINED(rreq->dev.datatype))
243
        basic_type = rreq->dev.datatype;
244
    else {
245
        basic_type = rreq->dev.datatype_ptr->basic_type;
246
    }
247
    MPIU_Assert(basic_type != MPI_DATATYPE_NULL);
248

249
    MPID_Datatype_get_size_macro(basic_type, predef_dtp_size);
250
251
252
    predef_count = rreq->dev.recv_data_sz / predef_dtp_size;
    MPIU_Assert(predef_count > 0);

253
    MPIDI_Pkt_init(get_accum_resp_pkt, MPIDI_CH3_PKT_GET_ACCUM_RESP);
254
255
    get_accum_resp_pkt->request_handle = rreq->dev.resp_request_handle;
    get_accum_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
256
    get_accum_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
257
258
    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)
259
        get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
260
261
    if ((rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
        (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
262
        get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
263

264
265
    MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig);

266
267
268
    resp_req = MPID_Request_create();
    MPIU_ERR_CHKANDJUMP(resp_req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
    MPIU_Object_set_ref(resp_req, 1);
269
    MPIDI_Request_set_type(resp_req, MPIDI_REQUEST_TYPE_GET_ACCUM_RESP);
270

271
    MPIU_CHKPMEM_MALLOC(resp_req->dev.user_buf, void *, rreq->dev.recv_data_sz,
272
                        mpi_errno, "GACC resp. buffer");
273

274
275
    /* NOTE: 'copy data + ACC' needs to be atomic */

276
277
278
    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);

279
280
    /* Copy data from target window to temporary buffer */

281
    if (is_contig) {
282
283
284
        MPIU_Memcpy(resp_req->dev.user_buf,
                    (void *) ((char *) rreq->dev.real_user_buf + rreq->dev.stream_offset),
                    rreq->dev.recv_data_sz);
285
286
    }
    else {
287
        MPID_Segment *seg = MPID_Segment_alloc();
288
289
        MPI_Aint first = rreq->dev.stream_offset;
        MPI_Aint last = first + rreq->dev.recv_data_sz;
290

291
292
293
        if (seg == NULL) {
            if (win_ptr->shm_allocated == TRUE)
                MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
294
        }
295
296
297
298
        MPIU_ERR_CHKANDJUMP1(seg == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
                             "MPID_Segment");
        MPID_Segment_init(rreq->dev.real_user_buf, rreq->dev.user_count, rreq->dev.datatype, seg,
                          0);
299
        MPID_Segment_pack(seg, first, &last, resp_req->dev.user_buf);
300
        MPID_Segment_free(seg);
301
    }
302

303
    /* accumulate data from tmp_buf into user_buf */
304
    mpi_errno = do_accumulate_op(rreq->dev.user_buf, predef_count, basic_type,
305
306
                                 rreq->dev.real_user_buf, rreq->dev.user_count, rreq->dev.datatype,
                                 rreq->dev.stream_offset, rreq->dev.op);
307
308
309
310

    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);

311
312
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);
313

Xin Zhao's avatar
Xin Zhao committed
314
315
    resp_req->dev.OnFinal = MPIDI_CH3_ReqHandler_GaccumSendComplete;
    resp_req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GaccumSendComplete;
316
317
    resp_req->dev.target_win_handle = rreq->dev.target_win_handle;
    resp_req->dev.flags = rreq->dev.flags;
318

319
    /* here we increment the Active Target counter to guarantee the GET-like
320
     * operation are completed when counter reaches zero. */
321
    win_ptr->at_completion_counter++;
322

323
324
325
    iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
    iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
    iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) ((char *) resp_req->dev.user_buf);
326
    iov[1].MPID_IOV_LEN = rreq->dev.recv_data_sz;
327
    iovcnt = 2;
328

329
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
330
    mpi_errno = MPIDI_CH3_iSendv(vc, resp_req, iov, iovcnt);
331
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
332
333
334
335
336
337

    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");

    /* Mark get portion as handled */
    rreq->dev.resp_request_handle = MPI_REQUEST_NULL;

338
    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV);
339

340
    /* free the temporary buffer */
341
    MPIDI_CH3U_SRBuf_free(rreq);
342

343
344
345
    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);
    *complete = TRUE;
346
  fn_exit:
347
    MPIU_CHKPMEM_COMMIT();
348
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);
349
    return MPI_SUCCESS;
350
351

    /* --BEGIN ERROR HANDLING-- */
352
  fn_fail:
353
354
355
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
    /* --END ERROR HANDLING-- */
356
357
}

358
359
360
361
362

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_FOPRecvComplete
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
363
int MPIDI_CH3_ReqHandler_FOPRecvComplete(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
364
365
366
367
368
369
370
371
372
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Win *win_ptr = NULL;
    MPI_Aint type_size;
    MPID_Request *resp_req = NULL;
    MPID_IOV iov[MPID_IOV_LIMIT];
    int iovcnt;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_fop_resp_t *fop_resp_pkt = &upkt.fop_resp;
373
    int is_contig;
374
375
376
377
378
    MPIU_CHKPMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);

Xin Zhao's avatar
Xin Zhao committed
379
380
    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_FOP_RECV);

381
382
383
384
    MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);

    MPID_Datatype_get_size_macro(rreq->dev.datatype, type_size);

385
386
    MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig);

387
388
389
    /* Create response request */
    resp_req = MPID_Request_create();
    MPIU_ERR_CHKANDJUMP(resp_req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
Xin Zhao's avatar
Xin Zhao committed
390
    MPIDI_Request_set_type(resp_req, MPIDI_REQUEST_TYPE_FOP_RESP);
391
392
393
394
395
396
    MPIU_Object_set_ref(resp_req, 1);
    resp_req->dev.OnFinal = MPIDI_CH3_ReqHandler_FOPSendComplete;
    resp_req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_FOPSendComplete;
    resp_req->dev.target_win_handle = rreq->dev.target_win_handle;
    resp_req->dev.flags = rreq->dev.flags;

397
    MPIU_CHKPMEM_MALLOC(resp_req->dev.user_buf, void *, type_size, mpi_errno, "FOP resp. buffer");
398
399

    /* here we increment the Active Target counter to guarantee the GET-like
400
     * operation are completed when counter reaches zero. */
401
402
    win_ptr->at_completion_counter++;

403
404
    /* NOTE: 'copy data + ACC' needs to be atomic */

405
406
407
408
    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);

    /* Copy data into a temporary buffer in response request */
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
    if (is_contig) {
        MPIU_Memcpy(resp_req->dev.user_buf, rreq->dev.real_user_buf, type_size);
    }
    else {
        MPID_Segment *seg = MPID_Segment_alloc();
        MPI_Aint last = type_size;

        if (seg == NULL) {
            if (win_ptr->shm_allocated == TRUE)
                MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
        }
        MPIU_ERR_CHKANDJUMP1(seg == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
                             "MPID_Segment");
        MPID_Segment_init(rreq->dev.real_user_buf, 1, rreq->dev.datatype, seg, 0);
        MPID_Segment_pack(seg, 0, &last, resp_req->dev.user_buf);
        MPID_Segment_free(seg);
    }
426
427
428

    /* Perform accumulate computation */
    if (rreq->dev.op != MPI_NO_OP) {
429
430
431
        mpi_errno = do_accumulate_op(rreq->dev.user_buf, 1, rreq->dev.datatype,
                                     rreq->dev.real_user_buf, 1, rreq->dev.datatype, 0,
                                     rreq->dev.op);
432
433
434
435
436
    }

    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);

437
438
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);
439

440
441
442
443
444
445
446
447
448
449
450
451
    /* Send back data */
    MPIDI_Pkt_init(fop_resp_pkt, MPIDI_CH3_PKT_FOP_RESP);
    fop_resp_pkt->request_handle = rreq->dev.resp_request_handle;
    fop_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
    fop_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)
        fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
    if ((rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
        (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
        fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;

452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
    iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) fop_resp_pkt;
    iov[0].MPID_IOV_LEN = sizeof(*fop_resp_pkt);
    iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) ((char *) resp_req->dev.user_buf);
    iov[1].MPID_IOV_LEN = type_size;
    iovcnt = 2;

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iSendv(vc, resp_req, iov, iovcnt);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);

    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");

    /* free the temporary buffer */
    MPIU_Free((char *) rreq->dev.user_buf);

    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);
    *complete = TRUE;

  fn_exit:
    MPIU_CHKPMEM_COMMIT();
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);
    return MPI_SUCCESS;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
    /* --END ERROR HANDLING-- */
480
481
482
}


483
#undef FUNCNAME
484
#define FUNCNAME MPIDI_CH3_ReqHandler_PutDerivedDTRecvComplete
485
486
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
487
488
int MPIDI_CH3_ReqHandler_PutDerivedDTRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                                  MPID_Request * rreq, int *complete)
489
490
{
    int mpi_errno = MPI_SUCCESS;
491
    MPID_Datatype *new_dtp = NULL;
492
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
493

494
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
495

496
497
    /* create derived datatype */
    create_derived_datatype(rreq, &new_dtp);
498

499
    /* update request to get the data */
500
    MPIDI_Request_set_type(rreq, MPIDI_REQUEST_TYPE_PUT_RECV);
501
    rreq->dev.datatype = new_dtp->handle;
502
503
    rreq->dev.recv_data_sz = new_dtp->size * rreq->dev.user_count;

504
505
    rreq->dev.datatype_ptr = new_dtp;
    /* this will cause the datatype to be freed when the
506
     * request is freed. free dtype_info here. */
507
    MPIU_Free(rreq->dev.dtype_info);
508
509
510
511

    rreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIU_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem",
                         "**nomem %s", "MPID_Segment_alloc");
512

513
    MPID_Segment_init(rreq->dev.user_buf,
514
                      rreq->dev.user_count, rreq->dev.datatype, rreq->dev.segment_ptr, 0);
515
516
    rreq->dev.segment_first = 0;
    rreq->dev.segment_size = rreq->dev.recv_data_sz;
517

518
519
    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
    if (mpi_errno != MPI_SUCCESS) {
520
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|loadrecviov");
521
    }
522
523
524
    if (!rreq->dev.OnDataAvail)
        rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PutRecvComplete;

525
    *complete = FALSE;
526
  fn_fail:
527
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
528
529
530
531
    return mpi_errno;
}

#undef FUNCNAME
532
#define FUNCNAME MPIDI_CH3_ReqHandler_AccumDerivedDTRecvComplete
533
534
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
535
536
int MPIDI_CH3_ReqHandler_AccumDerivedDTRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                                    MPID_Request * rreq, int *complete)
537
538
{
    int mpi_errno = MPI_SUCCESS;
539
    MPID_Datatype *new_dtp = NULL;
540
    MPI_Aint basic_type_extent, basic_type_size;
541
    MPI_Aint total_len, rest_len, stream_elem_count;
542
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMDERIVEDDTRECVCOMPLETE);
543

544
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMDERIVEDDTRECVCOMPLETE);
545

546
547
    /* create derived datatype */
    create_derived_datatype(rreq, &new_dtp);
548

549
    /* update new request to get the data */
550
    MPIDI_Request_set_type(rreq, MPIDI_REQUEST_TYPE_ACCUM_RECV);
551

552
553
    MPID_Datatype_get_size_macro(new_dtp->basic_type, basic_type_size);
    MPID_Datatype_get_extent_macro(new_dtp->basic_type, basic_type_extent);
554

555
556
557
558
559
560
561
562
563
564
565
    MPIU_Assert(!MPIDI_Request_get_srbuf_flag(rreq));
    /* allocate a SRBuf for receiving stream unit */
    MPIDI_CH3U_SRBuf_alloc(rreq, MPIDI_CH3U_SRBuf_size);
    /* --BEGIN ERROR HANDLING-- */
    if (rreq->dev.tmpbuf_sz == 0) {
        MPIU_DBG_MSG(CH3_CHANNEL, TYPICAL, "SRBuf allocation failure");
        mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
                                         FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem",
                                         "**nomem %d", MPIDI_CH3U_SRBuf_size);
        rreq->status.MPI_ERROR = mpi_errno;
        goto fn_fail;
566
    }
567
    /* --END ERROR HANDLING-- */
568

569
    rreq->dev.user_buf = rreq->dev.tmpbuf;
570
    rreq->dev.datatype = new_dtp->handle;
571
572
573

    total_len = new_dtp->size * rreq->dev.user_count;
    rest_len = total_len - rreq->dev.stream_offset;
574
    stream_elem_count = MPIDI_CH3U_SRBuf_size / basic_type_extent;
575

576
    rreq->dev.recv_data_sz = MPIR_MIN(rest_len, stream_elem_count * basic_type_size);
577
578
    rreq->dev.datatype_ptr = new_dtp;
    /* this will cause the datatype to be freed when the
579
     * request is freed. free dtype_info here. */
580
    MPIU_Free(rreq->dev.dtype_info);
581
582
583
584

    rreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIU_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem",
                         "**nomem %s", "MPID_Segment_alloc");
585

586
    MPID_Segment_init(rreq->dev.user_buf,
587
                      (rreq->dev.recv_data_sz / basic_type_size),
588
                      new_dtp->basic_type, rreq->dev.segment_ptr, 0);
589
590
    rreq->dev.segment_first = 0;
    rreq->dev.segment_size = rreq->dev.recv_data_sz;
591

592
593
    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
    if (mpi_errno != MPI_SUCCESS) {
594
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|loadrecviov");
595
596
    }
    if (!rreq->dev.OnDataAvail)
597
598
        rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_AccumRecvComplete;

599
    *complete = FALSE;
600
  fn_fail:
601
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMDERIVEDDTRECVCOMPLETE);
602
603
604
    return mpi_errno;
}

605
606

#undef FUNCNAME
607
#define FUNCNAME MPIDI_CH3_ReqHandler_GaccumDerivedDTRecvComplete
608
609
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
610
611
int MPIDI_CH3_ReqHandler_GaccumDerivedDTRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                                     MPID_Request * rreq, int *complete)
612
613
{
    int mpi_errno = MPI_SUCCESS;
614
    MPID_Datatype *new_dtp = NULL;
615
    MPI_Aint basic_type_extent, basic_type_size;
616
    MPI_Aint total_len, rest_len, stream_elem_count;
617
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMDERIVEDDTRECVCOMPLETE);
618

619
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMDERIVEDDTRECVCOMPLETE);
620

621
622
    /* create derived datatype */
    create_derived_datatype(rreq, &new_dtp);
623

624
    /* update new request to get the data */
625
    MPIDI_Request_set_type(rreq, MPIDI_REQUEST_TYPE_GET_ACCUM_RECV);
626

627
628
    MPID_Datatype_get_size_macro(new_dtp->basic_type, basic_type_size);
    MPID_Datatype_get_extent_macro(new_dtp->basic_type, basic_type_extent);
629

630
631
632
633
634
635
636
637
638
639
640
    MPIU_Assert(!MPIDI_Request_get_srbuf_flag(rreq));
    /* allocate a SRBuf for receiving stream unit */
    MPIDI_CH3U_SRBuf_alloc(rreq, MPIDI_CH3U_SRBuf_size);
    /* --BEGIN ERROR HANDLING-- */
    if (rreq->dev.tmpbuf_sz == 0) {
        MPIU_DBG_MSG(CH3_CHANNEL, TYPICAL, "SRBuf allocation failure");
        mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
                                         FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem",
                                         "**nomem %d", MPIDI_CH3U_SRBuf_size);
        rreq->status.MPI_ERROR = mpi_errno;
        goto fn_fail;
641
    }
642
    /* --END ERROR HANDLING-- */
643

644
    rreq->dev.user_buf = rreq->dev.tmpbuf;
645
    rreq->dev.datatype = new_dtp->handle;
646
647
648

    total_len = new_dtp->size * rreq->dev.user_count;
    rest_len = total_len - rreq->dev.stream_offset;
649
    stream_elem_count = MPIDI_CH3U_SRBuf_size / basic_type_extent;
650

651
    rreq->dev.recv_data_sz = MPIR_MIN(rest_len, stream_elem_count * basic_type_size);
652
653
    rreq->dev.datatype_ptr = new_dtp;
    /* this will cause the datatype to be freed when the
654
     * request is freed. free dtype_info here. */
655
656
    MPIU_Free(rreq->dev.dtype_info);

657
658
659
    rreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIU_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem",
                         "**nomem %s", "MPID_Segment_alloc");
660
661

    MPID_Segment_init(rreq->dev.user_buf,
662
                      (rreq->dev.recv_data_sz / basic_type_size),
663
                      new_dtp->basic_type, rreq->dev.segment_ptr, 0);
664
665
666
667
668
    rreq->dev.segment_first = 0;
    rreq->dev.segment_size = rreq->dev.recv_data_sz;

    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
    if (mpi_errno != MPI_SUCCESS) {
669
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|loadrecviov");
670
671
    }
    if (!rreq->dev.OnDataAvail)
672
        rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GaccumRecvComplete;
673
674

    *complete = FALSE;
675
  fn_fail:
676
677
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMDERIVEDDTRECVCOMPLETE);
    return mpi_errno;
678
679
680
}


Xin Zhao's avatar
Xin Zhao committed
681

682
#undef FUNCNAME
683
#define FUNCNAME MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete
684
685
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
686
687
int MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete(MPIDI_VC_t * vc,
                                                  MPID_Request * rreq, int *complete)
688
689
{
    int mpi_errno = MPI_SUCCESS;
690
    MPID_Datatype *new_dtp = NULL;
691
    MPIDI_CH3_Pkt_t upkt;
692
693
    MPIDI_CH3_Pkt_get_resp_t *get_resp_pkt = &upkt.get_resp;
    MPID_Request *sreq;
694
    MPID_Win *win_ptr;
695
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
696

697
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
698

699
700
    MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);

701
702
    MPIU_Assert(!(rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP));

703
704
705
    /* create derived datatype */
    create_derived_datatype(rreq, &new_dtp);
    MPIU_Free(rreq->dev.dtype_info);
706

707
708
    /* create request for sending data */
    sreq = MPID_Request_create();
709
710
    MPIU_ERR_CHKANDJUMP(sreq == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");

711
712
    sreq->kind = MPID_REQUEST_SEND;
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_GET_RESP);
713
    sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GetSendComplete;
714
    sreq->dev.OnFinal = MPIDI_CH3_ReqHandler_GetSendComplete;
715
716
717
718
719
    sreq->dev.user_buf = rreq->dev.user_buf;
    sreq->dev.user_count = rreq->dev.user_count;
    sreq->dev.datatype = new_dtp->handle;
    sreq->dev.datatype_ptr = new_dtp;
    sreq->dev.target_win_handle = rreq->dev.target_win_handle;
720
    sreq->dev.flags = rreq->dev.flags;
721

722
    MPIDI_Pkt_init(get_resp_pkt, MPIDI_CH3_PKT_GET_RESP);
723
    get_resp_pkt->request_handle = rreq->dev.request_handle;
724
725
    get_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
    get_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
726
727
    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)
728
        get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
729
730
    if ((rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
        (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
731
        get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
732
733
734
735

    sreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIU_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem",
                         "**nomem %s", "MPID_Segment_alloc");
736

737
    MPID_Segment_init(sreq->dev.user_buf,
738
                      sreq->dev.user_count, sreq->dev.datatype, sreq->dev.segment_ptr, 0);
739
740
    sreq->dev.segment_first = 0;
    sreq->dev.segment_size = new_dtp->size * sreq->dev.user_count;
741

742
    /* Because this is in a packet handler, it is already within a critical section */
743
    /* MPIU_THREAD_CS_ENTER(CH3COMM,vc); */
744
    mpi_errno = vc->sendNoncontig_fn(vc, sreq, get_resp_pkt, sizeof(*get_resp_pkt));
745
    /* MPIU_THREAD_CS_EXIT(CH3COMM,vc); */
746
    /* --BEGIN ERROR HANDLING-- */
747
    if (mpi_errno != MPI_SUCCESS) {
748
        MPID_Request_release(sreq);
749
        sreq = NULL;
750
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
751
    }
752
    /* --END ERROR HANDLING-- */
753
754
755

    /* mark receive data transfer as complete and decrement CC in receive
     * request */
756
757
    MPIDI_CH3U_Request_complete(rreq);
    *complete = TRUE;
758
759

  fn_fail:
760
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
761
762
763
    return mpi_errno;
}

764

765
766
767
768
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_UnpackUEBufComplete
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
769
770
int MPIDI_CH3_ReqHandler_UnpackUEBufComplete(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                             MPID_Request * rreq, int *complete)
771
772
773
{
    int recv_pending;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
774

775
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
776

777
778
    MPIDI_Request_decr_pending(rreq);
    MPIDI_Request_check_pending(rreq, &recv_pending);
779
780
781
782
783
784
785
786
787
788
789
    if (!recv_pending) {
        if (rreq->dev.recv_data_sz > 0) {
            MPIDI_CH3U_Request_unpack_uebuf(rreq);
            MPIU_Free(rreq->dev.tmpbuf);
        }
    }
    else {
        /* The receive has not been posted yet.  MPID_{Recv/Irecv}()
         * is responsible for unpacking the buffer. */
    }

790
791
792
    /* mark data transfer as complete and decrement CC */
    MPIDI_CH3U_Request_complete(rreq);
    *complete = TRUE;
793

794
795
796
797
798
799
800
801
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
    return MPI_SUCCESS;
}

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_UnpackSRBufComplete
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
802
int MPIDI_CH3_ReqHandler_UnpackSRBufComplete(MPIDI_VC_t * vc, MPID_Request * rreq, int *complete)
803
804
805
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);
806

807
808
809
810
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);

    MPIDI_CH3U_Request_unpack_srbuf(rreq);

811
    if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_PUT_RECV) {
812
        mpi_errno = MPIDI_CH3_ReqHandler_PutRecvComplete(vc, rreq, complete);
813
    }
814
    else if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV) {
815
        mpi_errno = MPIDI_CH3_ReqHandler_AccumRecvComplete(vc, rreq, complete);
816
    }
817
    else if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV) {
818
        mpi_errno = MPIDI_CH3_ReqHandler_GaccumRecvComplete(vc, rreq, complete);
819
    }
Xin Zhao's avatar
Xin Zhao committed
820
821
822
    else if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_FOP_RECV) {
        mpi_errno = MPIDI_CH3_ReqHandler_FOPRecvComplete(vc, rreq, complete);
    }
823
    else {
824
825
826
        /* mark data transfer as complete and decrement CC */
        MPIDI_CH3U_Request_complete(rreq);
        *complete = TRUE;
827
828
829
830
831
832
833
834
835
836
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);
    return mpi_errno;
}

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
837
838
int MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                              MPID_Request * rreq, int *complete)
839
840
841
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);
842

843
844
845
846
847
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);

    MPIDI_CH3U_Request_unpack_srbuf(rreq);
    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
    if (mpi_errno != MPI_SUCCESS) {
848
        MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|loadrecviov");
849
850
    }
    *complete = FALSE;
851
  fn_fail:
852
853
854
855
856
857
858
859
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);
    return mpi_errno;
}

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_ReqHandler_ReloadIOV
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
860
861
int MPIDI_CH3_ReqHandler_ReloadIOV(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                   MPID_Request * rreq, int *complete)
862
863
864
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);
865

866
867
868
869
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);

    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
    if (mpi_errno != MPI_SUCCESS) {
870
        MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|loadrecviov");
871
872
    }
    *complete = FALSE;
873
  fn_fail:
874
875
876
877
878
879
880
881
882
883
884
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);
    return mpi_errno;
}

/* ----------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */

#undef FUNCNAME
#define FUNCNAME create_derived_datatype
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
885
static int create_derived_datatype(MPID_Request * req, MPID_Datatype ** dtp)
886
887
888
{
    MPIDI_RMA_dtype_info *dtype_info;
    MPID_Datatype *new_dtp;
889
    int mpi_errno = MPI_SUCCESS;
890
891
    MPI_Aint ptrdiff;
    MPIDI_STATE_DECL(MPID_STATE_CREATE_DERIVED_DATATYPE);
892

893
894
895
896
897
898
899
    MPIDI_FUNC_ENTER(MPID_STATE_CREATE_DERIVED_DATATYPE);

    dtype_info = req->dev.dtype_info;

    /* allocate new datatype object and handle */
    new_dtp = (MPID_Datatype *) MPIU_Handle_obj_alloc(&MPID_Datatype_mem);
    if (!new_dtp) {
900
901
        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
                             "MPID_Datatype_mem");
902
903
904
    }

    *dtp = new_dtp;
905

906
907
908
909
    /* Note: handle is filled in by MPIU_Handle_obj_alloc() */
    MPIU_Object_set_ref(new_dtp, 1);
    new_dtp->is_permanent = 0;
    new_dtp->is_committed = 1;
910
911
912
    new_dtp->attributes = 0;
    new_dtp->cache_id = 0;
    new_dtp->name[0] = 0;
913
    new_dtp->is_contig = dtype_info->is_contig;
914
    new_dtp->max_contig_blocks = dtype_info->max_contig_blocks;
915
916
917
    new_dtp->size = dtype_info->size;
    new_dtp->extent = dtype_info->extent;
    new_dtp->dataloop_size = dtype_info->dataloop_size;
918
    new_dtp->dataloop_depth = dtype_info->dataloop_depth;
919
    new_dtp->basic_type = dtype_info->basic_type;
920
921
    /* set dataloop pointer */
    new_dtp->dataloop = req->dev.dataloop;
922

923
924
925
926
927
928
929
    new_dtp->ub = dtype_info->ub;
    new_dtp->lb = dtype_info->lb;
    new_dtp->true_ub = dtype_info->true_ub;
    new_dtp->true_lb = dtype_info->true_lb;
    new_dtp->has_sticky_ub = dtype_info->has_sticky_ub;
    new_dtp->has_sticky_lb = dtype_info->has_sticky_lb;
    /* update pointers in dataloop */
930
931
932
    ptrdiff = (MPI_Aint) ((char *) (new_dtp->dataloop) - (char *)
                          (dtype_info->dataloop));

933