ch3u_rma_pkthandler.c 74.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpidimpl.h"
#include "mpidrma.h"

Xin Zhao's avatar
Xin Zhao committed
10
11
12
13
14
15
16
17
18
19
20
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_put);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_get);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_acc);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_get_accum);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_cas);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_fop);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_get_resp);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_get_accum_resp);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_cas_resp);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_fop_resp);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_lock);
21
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_lock_ack);
Xin Zhao's avatar
Xin Zhao committed
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_unlock);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_flush);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_flush_ack);
MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_decr_at_cnt);

void MPIDI_CH3_RMA_Init_pkthandler_pvars(void)
{
    /* rma_rmapkt_put */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_put,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Put (in seconds)");

    /* rma_rmapkt_get */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_get,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Get (in seconds)");

    /* rma_rmapkt_acc */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_acc,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Accumulate (in seconds)");

    /* rma_rmapkt_get_accum */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_get_accum,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Get-Accumulate (in seconds)");

    /* rma_rmapkt_cas */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_cas,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Compare-and-swap (in seconds)");

    /* rma_rmapkt_fop */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_fop,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Fetch-and-op (in seconds)");

    /* rma_rmapkt_get_resp */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_get_resp,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Get response (in seconds)");

    /* rma_rmapkt_get_accum_resp */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_get_accum_resp,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Get-Accumulate response (in seconds)");

    /* rma_rmapkt_cas_resp */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_cas_resp,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Compare-and-Swap response (in seconds)");

    /* rma_rmapkt_fop_resp */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_fop_resp,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Fetch-and-op response (in seconds)");

    /* rma_rmapkt_lock */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_lock,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Lock (in seconds)");

    /* rma_rmapkt_lock_granted */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
131
                                      rma_rmapkt_lock_ack,
Xin Zhao's avatar
Xin Zhao committed
132
133
134
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
135
                                      "RMA", "RMA:PKTHANDLER for Lock-Ack (in seconds)");
Xin Zhao's avatar
Xin Zhao committed
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

    /* rma_rmapkt_unlock */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_unlock,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Unlock (in seconds)");

    /* rma_rmapkt_flush */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_flush,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Flush (in seconds)");

    /* rma_rmapkt_flush_ack */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_flush_ack,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Flush-Ack (in seconds)");

    /* rma_rmapkt_decr_at_cnt */
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                      MPI_DOUBLE,
                                      rma_rmapkt_decr_at_cnt,
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                      MPI_T_BIND_NO_OBJECT,
                                      MPIR_T_PVAR_FLAG_READONLY,
                                      "RMA", "RMA:PKTHANDLER for Decr-At-Cnt (in seconds)");
}
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194

/* ------------------------------------------------------------------------ */
/*
 * The following routines are the packet handlers for the packet types
 * used above in the implementation of the RMA operations in terms
 * of messages.
 */
/* ------------------------------------------------------------------------ */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_PktHandler_Put
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3_PktHandler_Put(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
                             MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
{
    MPIDI_CH3_Pkt_put_t *put_pkt = &pkt->put;
    MPID_Request *req = NULL;
    MPI_Aint type_size;
    int complete = 0;
    char *data_buf = NULL;
    MPIDI_msg_sz_t data_len;
    MPID_Win *win_ptr;
195
    int acquire_lock_fail = 0;
196
197
198
199
200
201
202
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_PUT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_PUT);

    MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "received put pkt");

Xin Zhao's avatar
Xin Zhao committed
203
204
    MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_put);

205
206
207
    MPIU_Assert(put_pkt->target_win_handle != MPI_WIN_NULL);
    MPID_Win_get_ptr(put_pkt->target_win_handle, win_ptr);

208
209
    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen,
                                     &acquire_lock_fail, &req);
210
211
212
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    if (acquire_lock_fail) {
213
        (*rreqp) = req;
214
215
216
        goto fn_exit;
    }

217
218
219
220
221
222
223
224
225
226
227
    MPID_Datatype_get_size_macro(put_pkt->datatype, type_size);

    if (pkt->type == MPIDI_CH3_PKT_PUT_IMMED) {

        /* Immed packet type is used when target datatype is predefined datatype. */
        MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(put_pkt->datatype));

        /* copy data from packet header to target buffer */
        MPIU_Memcpy(put_pkt->addr, put_pkt->info.data, put_pkt->count*type_size);

        /* trigger final action */
228
        mpi_errno = finish_op_on_target(win_ptr, vc, FALSE /* has no response data */,
229
230
231
232
233
234
235
236
237
                                        put_pkt->flags, put_pkt->source_win_handle);
        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

        *buflen = sizeof(MPIDI_CH3_Pkt_t);
        *rreqp = NULL;
    }
    else {
    MPIU_Assert(pkt->type == MPIDI_CH3_PKT_PUT);

238
239
240
241
    /* get start location of data and length of data */
    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);

242
243
244
245
246
247
248
249
    req = MPID_Request_create();
    MPIU_Object_set_ref(req, 1);

    req->dev.user_buf = put_pkt->addr;
    req->dev.user_count = put_pkt->count;
    req->dev.target_win_handle = put_pkt->target_win_handle;
    req->dev.source_win_handle = put_pkt->source_win_handle;
    req->dev.flags = put_pkt->flags;
250
    req->dev.OnFinal = MPIDI_CH3_ReqHandler_PutRecvComplete;
251
252
253
254
255
256

    if (MPIR_DATATYPE_IS_PREDEFINED(put_pkt->datatype)) {
        MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_PUT_RESP);
        req->dev.datatype = put_pkt->datatype;

        req->dev.recv_data_sz = type_size * put_pkt->count;
257
        MPIU_Assert(req->dev.recv_data_sz > 0);
Xin Zhao's avatar
Xin Zhao committed
258

259
260
261
262
263
264
265
266
        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
        MPIU_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                             "**ch3|postrecv %s", "MPIDI_CH3_PKT_PUT");

        /* return the number of bytes processed in this function */
        *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len;

        if (complete) {
267
            mpi_errno = MPIDI_CH3_ReqHandler_PutRecvComplete(vc, req, &complete);
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
            if (mpi_errno)
                MPIU_ERR_POP(mpi_errno);
            if (complete) {
                *rreqp = NULL;
                goto fn_exit;
            }
        }
    }
    else {
        /* derived datatype */
        MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_PUT_RESP_DERIVED_DT);
        req->dev.datatype = MPI_DATATYPE_NULL;

        req->dev.dtype_info = (MPIDI_RMA_dtype_info *)
            MPIU_Malloc(sizeof(MPIDI_RMA_dtype_info));
        if (!req->dev.dtype_info) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
                                 "MPIDI_RMA_dtype_info");
        }

288
        req->dev.dataloop = MPIU_Malloc(put_pkt->info.dataloop_size);
289
290
        if (!req->dev.dataloop) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %d",
291
                                 put_pkt->info.dataloop_size);
292
293
294
295
296
        }

        /* if we received all of the dtype_info and dataloop, copy it
         * now and call the handler, otherwise set the iov and let the
         * channel copy it */
297
        if (data_len >= sizeof(MPIDI_RMA_dtype_info) + put_pkt->info.dataloop_size) {
298
299
300
            /* copy all of dtype_info and dataloop */
            MPIU_Memcpy(req->dev.dtype_info, data_buf, sizeof(MPIDI_RMA_dtype_info));
            MPIU_Memcpy(req->dev.dataloop, data_buf + sizeof(MPIDI_RMA_dtype_info),
301
                        put_pkt->info.dataloop_size);
302
303

            *buflen =
304
                sizeof(MPIDI_CH3_Pkt_t) + sizeof(MPIDI_RMA_dtype_info) + put_pkt->info.dataloop_size;
305
306

            /* All dtype data has been received, call req handler */
307
            mpi_errno = MPIDI_CH3_ReqHandler_PutDerivedDTRecvComplete(vc, req, &complete);
308
309
310
311
312
313
314
315
316
317
318
            MPIU_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                                 "**ch3|postrecv %s", "MPIDI_CH3_PKT_PUT");
            if (complete) {
                *rreqp = NULL;
                goto fn_exit;
            }
        }
        else {
            req->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) ((char *) req->dev.dtype_info);
            req->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_RMA_dtype_info);
            req->dev.iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) req->dev.dataloop;
319
            req->dev.iov[1].MPID_IOV_LEN = put_pkt->info.dataloop_size;
320
321
322
323
            req->dev.iov_count = 2;

            *buflen = sizeof(MPIDI_CH3_Pkt_t);

324
            req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PutDerivedDTRecvComplete;
325
326
327
328
329
330
331
332
333
334
        }

    }

    *rreqp = req;

    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                      "**ch3|postrecv %s", "MPIDI_CH3_PKT_PUT");
    }
335
    }
336
337
338


  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
339
    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_put);
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_PUT);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_PktHandler_Get
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
                             MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
{
    MPIDI_CH3_Pkt_get_t *get_pkt = &pkt->get;
    MPID_Request *req = NULL;
    MPID_IOV iov[MPID_IOV_LIMIT];
356
    int complete = 0;
357
358
359
360
361
    char *data_buf = NULL;
    MPIDI_msg_sz_t data_len;
    MPID_Win *win_ptr;
    int mpi_errno = MPI_SUCCESS;
    MPI_Aint type_size;
362
    int acquire_lock_fail = 0;
363
364
365
366
367
368
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET);

    MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "received get pkt");

Xin Zhao's avatar
Xin Zhao committed
369
370
    MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_get);

371
372
373
    MPIU_Assert(get_pkt->target_win_handle != MPI_WIN_NULL);
    MPID_Win_get_ptr(get_pkt->target_win_handle, win_ptr);

374
375
    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt,
                                     buflen, &acquire_lock_fail, &req);
376
377
378
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    if (acquire_lock_fail) {
379
        (*rreqp) = req;
380
381
382
        goto fn_exit;
    }

383
384
385
386
387
    req = MPID_Request_create();
    req->dev.target_win_handle = get_pkt->target_win_handle;
    req->dev.source_win_handle = get_pkt->source_win_handle;
    req->dev.flags = get_pkt->flags;

388
389
390
391
    /* get start location of data and length of data */
    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);

392
393
394
395
    /* here we increment the Active Target counter to guarantee the GET-like
       operation are completed when counter reaches zero. */
    win_ptr->at_completion_counter++;

396
397
398
399
    if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP) {
        MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(get_pkt->datatype));
    }

400
401
402
403
    if (MPIR_DATATYPE_IS_PREDEFINED(get_pkt->datatype)) {
        /* basic datatype. send the data. */
        MPIDI_CH3_Pkt_t upkt;
        MPIDI_CH3_Pkt_get_resp_t *get_resp_pkt = &upkt.get_resp;
404
405
        size_t len;
        int iovcnt;
406
407

        MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_GET_RESP);
408
409
        req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GetSendComplete;
        req->dev.OnFinal = MPIDI_CH3_ReqHandler_GetSendComplete;
410
411
        req->kind = MPID_REQUEST_SEND;

412
413
414
415
416
417
        if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP) {
            MPIDI_Pkt_init(get_resp_pkt, MPIDI_CH3_PKT_GET_RESP_IMMED);
        }
        else {
            MPIDI_Pkt_init(get_resp_pkt, MPIDI_CH3_PKT_GET_RESP);
        }
418
        get_resp_pkt->request_handle = get_pkt->request_handle;
419
        get_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
420
421
        if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
            get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)
422
            get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
423
424
        if ((get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
            (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
425
426
427
            get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
        get_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
        get_resp_pkt->source_win_handle = get_pkt->source_win_handle;
428

429
        /* length of target data */
430
        MPID_Datatype_get_size_macro(get_pkt->datatype, type_size);
431
432

        if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP) {
433
434
435
436
            MPIU_Assign_trunc(len, get_pkt->count * type_size, size_t);
            void *src = (void*)(get_pkt->addr), *dest = (void*)(get_resp_pkt->info.data);
            mpi_errno = immed_copy(src, dest, len);
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
437
438
439
440
441
442
443
444

            iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
            iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
            iovcnt = 1;
        }
        else {
            iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
            iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
445
446
            iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) ((char *)get_pkt->addr);
            iov[1].MPID_IOV_LEN = get_pkt->count * type_size;
447
448
            iovcnt = 2;
        }
449
450

        MPIU_THREAD_CS_ENTER(CH3COMM, vc);
451
        mpi_errno = MPIDI_CH3_iSendv(vc, req, iov, iovcnt);
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
        MPIU_THREAD_CS_EXIT(CH3COMM, vc);
        /* --BEGIN ERROR HANDLING-- */
        if (mpi_errno != MPI_SUCCESS) {
            MPIU_Object_set_ref(req, 0);
            MPIDI_CH3_Request_destroy(req);
            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
        }
        /* --END ERROR HANDLING-- */

        *buflen = sizeof(MPIDI_CH3_Pkt_t);
        *rreqp = NULL;
    }
    else {
        /* derived datatype. first get the dtype_info and dataloop. */

        MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_GET_RESP_DERIVED_DT);
468
        req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete;
469
470
471
472
473
474
475
476
477
478
479
480
481
        req->dev.OnFinal = 0;
        req->dev.user_buf = get_pkt->addr;
        req->dev.user_count = get_pkt->count;
        req->dev.datatype = MPI_DATATYPE_NULL;
        req->dev.request_handle = get_pkt->request_handle;

        req->dev.dtype_info = (MPIDI_RMA_dtype_info *)
            MPIU_Malloc(sizeof(MPIDI_RMA_dtype_info));
        if (!req->dev.dtype_info) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
                                 "MPIDI_RMA_dtype_info");
        }

482
        req->dev.dataloop = MPIU_Malloc(get_pkt->info.dataloop_size);
483
484
        if (!req->dev.dataloop) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %d",
485
                                 get_pkt->info.dataloop_size);
486
487
488
489
490
        }

        /* if we received all of the dtype_info and dataloop, copy it
         * now and call the handler, otherwise set the iov and let the
         * channel copy it */
491
        if (data_len >= sizeof(MPIDI_RMA_dtype_info) + get_pkt->info.dataloop_size) {
492
493
494
            /* copy all of dtype_info and dataloop */
            MPIU_Memcpy(req->dev.dtype_info, data_buf, sizeof(MPIDI_RMA_dtype_info));
            MPIU_Memcpy(req->dev.dataloop, data_buf + sizeof(MPIDI_RMA_dtype_info),
495
                        get_pkt->info.dataloop_size);
496
497

            *buflen =
498
                sizeof(MPIDI_CH3_Pkt_t) + sizeof(MPIDI_RMA_dtype_info) + get_pkt->info.dataloop_size;
499
500

            /* All dtype data has been received, call req handler */
501
            mpi_errno = MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete(vc, req, &complete);
502
503
504
505
506
507
508
509
510
            MPIU_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                                 "**ch3|postrecv %s", "MPIDI_CH3_PKT_GET");
            if (complete)
                *rreqp = NULL;
        }
        else {
            req->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) req->dev.dtype_info;
            req->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_RMA_dtype_info);
            req->dev.iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) req->dev.dataloop;
511
            req->dev.iov[1].MPID_IOV_LEN = get_pkt->info.dataloop_size;
512
513
514
515
516
517
518
            req->dev.iov_count = 2;

            *buflen = sizeof(MPIDI_CH3_Pkt_t);
            *rreqp = req;
        }

    }
519
  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
520
    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_get);
521
522
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET);
    return mpi_errno;
523
524
  fn_fail:
    goto fn_exit;
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
}

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_PktHandler_Accumulate
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
                                    MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
{
    MPIDI_CH3_Pkt_accum_t *accum_pkt = &pkt->accum;
    MPID_Request *req = NULL;
    MPI_Aint true_lb, true_extent, extent;
    void *tmp_buf = NULL;
    int complete = 0;
    char *data_buf = NULL;
    MPIDI_msg_sz_t data_len;
    MPID_Win *win_ptr;
542
    int acquire_lock_fail = 0;
543
544
545
546
547
548
549
550
    int mpi_errno = MPI_SUCCESS;
    MPI_Aint type_size;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);

    MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "received accumulate pkt");

Xin Zhao's avatar
Xin Zhao committed
551
552
    MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_acc);

553
554
555
    MPIU_Assert(accum_pkt->target_win_handle != MPI_WIN_NULL);
    MPID_Win_get_ptr(accum_pkt->target_win_handle, win_ptr);

556
557
    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen,
                                     &acquire_lock_fail, &req);
558
559
560
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    if (acquire_lock_fail) {
561
        (*rreqp) = req;
562
563
564
        goto fn_exit;
    }

565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
    if (pkt->type == MPIDI_CH3_PKT_ACCUMULATE_IMMED) {
        /* Immed packet type is used when target datatype is predefined datatype. */
        MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(accum_pkt->datatype));

        if (win_ptr->shm_allocated == TRUE)
            MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
        mpi_errno = do_accumulate_op(accum_pkt->info.data, accum_pkt->addr,
                                     accum_pkt->count, accum_pkt->datatype, accum_pkt->op);
        if (win_ptr->shm_allocated == TRUE)
            MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
        if (mpi_errno) {
            MPIU_ERR_POP(mpi_errno);
        }

        /* trigger final action */
580
        mpi_errno = finish_op_on_target(win_ptr, vc, FALSE /* has no response data */,
581
582
583
584
585
586
587
588
589
                                        accum_pkt->flags, accum_pkt->source_win_handle);
        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

        *buflen = sizeof(MPIDI_CH3_Pkt_t);
        *rreqp = NULL;
    }
    else {
    MPIU_Assert(pkt->type == MPIDI_CH3_PKT_ACCUMULATE);

590
591
592
593
594
595
596
597
598
599
600
    req = MPID_Request_create();
    MPIU_Object_set_ref(req, 1);
    *rreqp = req;

    req->dev.user_count = accum_pkt->count;
    req->dev.op = accum_pkt->op;
    req->dev.real_user_buf = accum_pkt->addr;
    req->dev.target_win_handle = accum_pkt->target_win_handle;
    req->dev.source_win_handle = accum_pkt->source_win_handle;
    req->dev.flags = accum_pkt->flags;

601
    req->dev.resp_request_handle = MPI_REQUEST_NULL;
602
    req->dev.OnFinal = MPIDI_CH3_ReqHandler_AccumRecvComplete;
603

604
605
606
607
    /* get start location of data and length of data */
    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);

608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
    if (MPIR_DATATYPE_IS_PREDEFINED(accum_pkt->datatype)) {
        MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_ACCUM_RESP);
        req->dev.datatype = accum_pkt->datatype;

        MPIR_Type_get_true_extent_impl(accum_pkt->datatype, &true_lb, &true_extent);
        MPID_Datatype_get_extent_macro(accum_pkt->datatype, extent);

        /* Predefined types should always have zero lb */
        MPIU_Assert(true_lb == 0);

        tmp_buf = MPIU_Malloc(accum_pkt->count * (MPIR_MAX(extent, true_extent)));
        if (!tmp_buf) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %d",
                                 accum_pkt->count * MPIR_MAX(extent, true_extent));
        }

        req->dev.user_buf = tmp_buf;

        MPID_Datatype_get_size_macro(accum_pkt->datatype, type_size);
        req->dev.recv_data_sz = type_size * accum_pkt->count;
628
        MPIU_Assert(req->dev.recv_data_sz > 0);
Xin Zhao's avatar
Xin Zhao committed
629

630
631
632
        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
        MPIU_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                             "**ch3|postrecv %s", "MPIDI_CH3_PKT_ACCUMULATE");
633

634
635
636
637
        /* return the number of bytes processed in this function */
        *buflen = data_len + sizeof(MPIDI_CH3_Pkt_t);

        if (complete) {
638
            mpi_errno = MPIDI_CH3_ReqHandler_AccumRecvComplete(vc, req, &complete);
639
640
641
642
643
644
645
646
647
648
            if (mpi_errno)
                MPIU_ERR_POP(mpi_errno);
            if (complete) {
                *rreqp = NULL;
                goto fn_exit;
            }
        }
    }
    else {
        MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_ACCUM_RESP_DERIVED_DT);
649
        req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_AccumDerivedDTRecvComplete;
650
651
652
653
654
655
656
657
658
        req->dev.datatype = MPI_DATATYPE_NULL;

        req->dev.dtype_info = (MPIDI_RMA_dtype_info *)
            MPIU_Malloc(sizeof(MPIDI_RMA_dtype_info));
        if (!req->dev.dtype_info) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
                                 "MPIDI_RMA_dtype_info");
        }

659
        req->dev.dataloop = MPIU_Malloc(accum_pkt->info.dataloop_size);
660
661
        if (!req->dev.dataloop) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %d",
662
                                 accum_pkt->info.dataloop_size);
663
664
        }

665
        if (data_len >= sizeof(MPIDI_RMA_dtype_info) + accum_pkt->info.dataloop_size) {
666
667
668
            /* copy all of dtype_info and dataloop */
            MPIU_Memcpy(req->dev.dtype_info, data_buf, sizeof(MPIDI_RMA_dtype_info));
            MPIU_Memcpy(req->dev.dataloop, data_buf + sizeof(MPIDI_RMA_dtype_info),
669
                        accum_pkt->info.dataloop_size);
670
671

            *buflen =
672
                sizeof(MPIDI_CH3_Pkt_t) + sizeof(MPIDI_RMA_dtype_info) + accum_pkt->info.dataloop_size;
673
674

            /* All dtype data has been received, call req handler */
675
            mpi_errno = MPIDI_CH3_ReqHandler_AccumDerivedDTRecvComplete(vc, req, &complete);
676
677
678
679
680
681
682
683
684
685
686
            MPIU_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                                 "**ch3|postrecv %s", "MPIDI_CH3_ACCUMULATE");
            if (complete) {
                *rreqp = NULL;
                goto fn_exit;
            }
        }
        else {
            req->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) req->dev.dtype_info;
            req->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_RMA_dtype_info);
            req->dev.iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) req->dev.dataloop;
687
            req->dev.iov[1].MPID_IOV_LEN = accum_pkt->info.dataloop_size;
688
689
690
691
692
            req->dev.iov_count = 2;
            *buflen = sizeof(MPIDI_CH3_Pkt_t);
        }

    }
693
    }
694
695
696
697
698
699
700

    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                             "**ch3|postrecv %s", "MPIDI_CH3_PKT_ACCUMULATE");
    }

  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
701
    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_acc);
702
703
704
705
706
707
708
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);
    return mpi_errno;
  fn_fail:
    goto fn_exit;

}

709
710
711
712
713
714
715
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_PktHandler_GetAccumulate
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
                                       MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
{
Xin Zhao's avatar
Xin Zhao committed
716
    MPIDI_CH3_Pkt_get_accum_t *get_accum_pkt = &pkt->get_accum;
717
718
719
720
721
722
723
    MPID_Request *req = NULL;
    MPI_Aint true_lb, true_extent, extent;
    void *tmp_buf = NULL;
    int complete = 0;
    char *data_buf = NULL;
    MPIDI_msg_sz_t data_len;
    MPID_Win *win_ptr;
724
    int acquire_lock_fail = 0;
725
726
727
728
729
730
731
732
    int mpi_errno = MPI_SUCCESS;
    MPI_Aint type_size;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETACCUMULATE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETACCUMULATE);

    MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "received accumulate pkt");

Xin Zhao's avatar
Xin Zhao committed
733
734
    MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_get_accum);

Xin Zhao's avatar
Xin Zhao committed
735
736
    MPIU_Assert(get_accum_pkt->target_win_handle != MPI_WIN_NULL);
    MPID_Win_get_ptr(get_accum_pkt->target_win_handle, win_ptr);
737

738
739
740
    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt,
                                     buflen,
                                     &acquire_lock_fail, &req);
741
742
743
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    if (acquire_lock_fail) {
744
        (*rreqp) = req;
745
746
747
        goto fn_exit;
    }

748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
    MPID_Datatype_get_size_macro(get_accum_pkt->datatype, type_size);

    if (pkt->type == MPIDI_CH3_PKT_GET_ACCUM_IMMED) {
        size_t len;
        void *src = NULL, *dest = NULL;
        MPID_Request *resp_req = NULL;
        MPIDI_CH3_Pkt_t upkt;
        MPIDI_CH3_Pkt_get_accum_resp_t *get_accum_resp_pkt = &upkt.get_accum_resp;
        MPID_IOV iov[MPID_IOV_LIMIT];
        int iovcnt;

        *buflen = sizeof(MPIDI_CH3_Pkt_t);
        *rreqp = NULL;

        /* Immed packet type is used when target datatype is predefined datatype. */
        MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(get_accum_pkt->datatype));

        resp_req = MPID_Request_create();
        resp_req->dev.target_win_handle = get_accum_pkt->target_win_handle;
        resp_req->dev.source_win_handle = get_accum_pkt->source_win_handle;
        resp_req->dev.flags = get_accum_pkt->flags;

        MPIDI_Request_set_type(resp_req, MPIDI_REQUEST_TYPE_GET_ACCUM_RESP);
        resp_req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GaccumSendComplete;
        resp_req->dev.OnFinal = MPIDI_CH3_ReqHandler_GaccumSendComplete;
        resp_req->kind = MPID_REQUEST_SEND;
        if (!(get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP)) {
            tmp_buf = MPIU_Malloc(get_accum_pkt->count * type_size);
            if (!tmp_buf) {
                MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,"**nomem","**nomem %d",
                                     get_accum_pkt->count * type_size);
            }
            resp_req->dev.user_buf = tmp_buf;
        }

        /* here we increment the Active Target counter to guarantee the GET-like
           operation are completed when counter reaches zero. */
        win_ptr->at_completion_counter++;

        /* Calculate the length of reponse data, ensure that it fits into immed packet. */
        MPIU_Assign_trunc(len, get_accum_pkt->count * type_size, size_t);

        if (get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP) {
            MPIDI_Pkt_init(get_accum_resp_pkt, MPIDI_CH3_PKT_GET_ACCUM_RESP_IMMED);
        }
        else {
            MPIDI_Pkt_init(get_accum_resp_pkt, MPIDI_CH3_PKT_GET_ACCUM_RESP);
        }

        get_accum_resp_pkt->request_handle = get_accum_pkt->request_handle;
        get_accum_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
        get_accum_resp_pkt->source_win_handle = get_accum_pkt->source_win_handle;
        get_accum_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
        if (get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
            get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)
            get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
        if ((get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
            (get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
            get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;

        if (win_ptr->shm_allocated == TRUE)
            MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);

        if (get_accum_resp_pkt->type == MPIDI_CH3_PKT_GET_ACCUM_RESP_IMMED) {
            /* copy data from target buffer to response packet header */
            src = (void *) (get_accum_pkt->addr), dest = (void *) (get_accum_resp_pkt->info.data);
            mpi_errno = immed_copy(src, dest, len);
815
816
817
818
819
            if (mpi_errno != MPI_SUCCESS) {
                if (win_ptr->shm_allocated == TRUE)
                    MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
                MPIU_ERR_POP(mpi_errno);
            }
820
821
822
823
824
825
826
827
828
829
830
831
832
833
        }
        else {
            MPIU_Memcpy(resp_req->dev.user_buf, get_accum_pkt->addr,
                        get_accum_pkt->count * type_size);
        }

        /* perform accumulate operation. */
        mpi_errno = do_accumulate_op(get_accum_pkt->info.data, get_accum_pkt->addr,
                                     get_accum_pkt->count, get_accum_pkt->datatype,
                                     get_accum_pkt->op);

        if (win_ptr->shm_allocated == TRUE)
            MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);

834
835
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
        if (get_accum_resp_pkt->type == MPIDI_CH3_PKT_GET_ACCUM_RESP_IMMED) {
            iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
            iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
            iovcnt = 1;
        }
        else {
            iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
            iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
            iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) ((char *)resp_req->dev.user_buf);
            iov[1].MPID_IOV_LEN = get_accum_pkt->count * type_size;
            iovcnt = 2;
        }

        MPIU_THREAD_CS_ENTER(CH3COMM, vc);
        mpi_errno = MPIDI_CH3_iSendv(vc, resp_req, iov, iovcnt);
        MPIU_THREAD_CS_EXIT(CH3COMM, vc);
        /* --BEGIN ERROR HANDLING-- */
        if (mpi_errno != MPI_SUCCESS) {
            MPIU_Object_set_ref(resp_req, 0);
            MPIDI_CH3_Request_destroy(resp_req);
            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
        }
        /* --END ERROR HANDLING-- */
    }
    else {
    MPIU_Assert(pkt->type == MPIDI_CH3_PKT_GET_ACCUM);

863
864
865
866
    req = MPID_Request_create();
    MPIU_Object_set_ref(req, 1);
    *rreqp = req;

Xin Zhao's avatar
Xin Zhao committed
867
868
869
870
871
872
    req->dev.user_count = get_accum_pkt->count;
    req->dev.op = get_accum_pkt->op;
    req->dev.real_user_buf = get_accum_pkt->addr;
    req->dev.target_win_handle = get_accum_pkt->target_win_handle;
    req->dev.source_win_handle = get_accum_pkt->source_win_handle;
    req->dev.flags = get_accum_pkt->flags;
873

Xin Zhao's avatar
Xin Zhao committed
874
    req->dev.resp_request_handle = get_accum_pkt->request_handle;
875
    req->dev.OnFinal = MPIDI_CH3_ReqHandler_GaccumRecvComplete;
876

877
878
879
880
    /* get start location of data and length of data */
    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);

Xin Zhao's avatar
Xin Zhao committed
881
    if (MPIR_DATATYPE_IS_PREDEFINED(get_accum_pkt->datatype)) {
882
        MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_GET_ACCUM_RESP);
Xin Zhao's avatar
Xin Zhao committed
883
        req->dev.datatype = get_accum_pkt->datatype;
884

Xin Zhao's avatar
Xin Zhao committed
885
886
        MPIR_Type_get_true_extent_impl(get_accum_pkt->datatype, &true_lb, &true_extent);
        MPID_Datatype_get_extent_macro(get_accum_pkt->datatype, extent);
887
888
889
890

        /* Predefined types should always have zero lb */
        MPIU_Assert(true_lb == 0);

Xin Zhao's avatar
Xin Zhao committed
891
        tmp_buf = MPIU_Malloc(get_accum_pkt->count * (MPIR_MAX(extent, true_extent)));
892
893
        if (!tmp_buf) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %d",
Xin Zhao's avatar
Xin Zhao committed
894
                                 get_accum_pkt->count * MPIR_MAX(extent, true_extent));
895
896
897
898
        }

        req->dev.user_buf = tmp_buf;

Xin Zhao's avatar
Xin Zhao committed
899
        req->dev.recv_data_sz = type_size * get_accum_pkt->count;
900
        MPIU_Assert(req->dev.recv_data_sz > 0);
Xin Zhao's avatar
Xin Zhao committed
901

902
903
904
        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
        MPIU_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                             "**ch3|postrecv %s", "MPIDI_CH3_PKT_ACCUMULATE");
905

906
907
908
909
        /* return the number of bytes processed in this function */
        *buflen = data_len + sizeof(MPIDI_CH3_Pkt_t);

        if (complete) {
910
            mpi_errno = MPIDI_CH3_ReqHandler_GaccumRecvComplete(vc, req, &complete);
911
912
913
914
915
916
917
918
919
920
            if (mpi_errno)
                MPIU_ERR_POP(mpi_errno);
            if (complete) {
                *rreqp = NULL;
                goto fn_exit;
            }
        }
    }
    else {
        MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_GET_ACCUM_RESP_DERIVED_DT);
921
        req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GaccumDerivedDTRecvComplete;
922
923
924
925
926
927
928
929
930
        req->dev.datatype = MPI_DATATYPE_NULL;

        req->dev.dtype_info = (MPIDI_RMA_dtype_info *)
            MPIU_Malloc(sizeof(MPIDI_RMA_dtype_info));
        if (!req->dev.dtype_info) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
                                 "MPIDI_RMA_dtype_info");
        }

931
        req->dev.dataloop = MPIU_Malloc(get_accum_pkt->info.dataloop_size);
932
933
        if (!req->dev.dataloop) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %d",
934
                                 get_accum_pkt->info.dataloop_size);
935
936
        }

937
        if (data_len >= sizeof(MPIDI_RMA_dtype_info) + get_accum_pkt->info.dataloop_size) {
938
939
940
            /* copy all of dtype_info and dataloop */
            MPIU_Memcpy(req->dev.dtype_info, data_buf, sizeof(MPIDI_RMA_dtype_info));
            MPIU_Memcpy(req->dev.dataloop, data_buf + sizeof(MPIDI_RMA_dtype_info),
941
                        get_accum_pkt->info.dataloop_size);
942
943

            *buflen =
944
                sizeof(MPIDI_CH3_Pkt_t) + sizeof(MPIDI_RMA_dtype_info) + get_accum_pkt->info.dataloop_size;
945
946

            /* All dtype data has been received, call req handler */
947
            mpi_errno = MPIDI_CH3_ReqHandler_GaccumDerivedDTRecvComplete(vc, req, &complete);
948
949
950
951
952
953
954
955
956
957
958
            MPIU_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                                 "**ch3|postrecv %s", "MPIDI_CH3_ACCUMULATE");
            if (complete) {
                *rreqp = NULL;
                goto fn_exit;
            }
        }
        else {
            req->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) req->dev.dtype_info;
            req->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_RMA_dtype_info);
            req->dev.iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) req->dev.dataloop;
959
            req->dev.iov[1].MPID_IOV_LEN = get_accum_pkt->info.dataloop_size;
960
961
962
963
964
965
966
967
968
969
            req->dev.iov_count = 2;
            *buflen = sizeof(MPIDI_CH3_Pkt_t);
        }

    }

    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                             "**ch3|postrecv %s", "MPIDI_CH3_PKT_ACCUMULATE");
    }
970
    }
971
972

  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
973
    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_get_accum);
974
975
976
977
978
979
980
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETACCUMULATE);
    return mpi_errno;
  fn_fail:
    goto fn_exit;

}

981
982
983
984
985
986
987
988
989
990
991
992
993
994

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_PktHandler_CAS
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3_PktHandler_CAS(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
                             MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_cas_resp_t *cas_resp_pkt = &upkt.cas_resp;
    MPIDI_CH3_Pkt_cas_t *cas_pkt = &pkt->cas;
    MPID_Win *win_ptr;
    MPID_Request *req;
995
    MPID_Request *rreq = NULL;
996
    MPI_Aint len;
997
    int acquire_lock_fail = 0;
998
999
1000
1001
1002
1003
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_CAS);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_CAS);

    MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "received CAS pkt");

Xin Zhao's avatar
Xin Zhao committed
1004
1005
    MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_cas);

1006
1007
1008
    MPIU_Assert(cas_pkt->target_win_handle != MPI_WIN_NULL);
    MPID_Win_get_ptr(cas_pkt->target_win_handle, win_ptr);

1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen,
                                     &acquire_lock_fail, &rreq);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    MPIU_Assert(rreq == NULL); /* CAS should not have request because all data
                                  can fit in packet header */

    if (acquire_lock_fail) {
        (*rreqp) = rreq;
        goto fn_exit;
    }

1020
1021
1022
1023
1024
    /* return the number of bytes processed in this function */
    /* data_len == 0 (all within packet) */
    *buflen = sizeof(MPIDI_CH3_Pkt_t);
    *rreqp = NULL;

1025
    MPIDI_Pkt_init(cas_resp_pkt, MPIDI_CH3_PKT_CAS_RESP_IMMED);
1026
    cas_resp_pkt->request_handle = cas_pkt->request_handle;
1027
1028
1029
    cas_resp_pkt->source_win_handle = cas_pkt->source_win_handle;
    cas_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
    cas_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
1030
1031
    if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)
1032
        cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
1033
1034
    if ((cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
        (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
1035
        cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
1036
1037
1038
1039
1040
1041
1042
1043

    /* Copy old value into the response packet */
    MPID_Datatype_get_size_macro(cas_pkt->datatype, len);
    MPIU_Assert(len <= sizeof(MPIDI_CH3_CAS_Immed_u));

    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);

1044
    MPIU_Memcpy((void *) &cas_resp_pkt->info.data, cas_pkt->addr, len);
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066

    /* Compare and replace if equal */
    if (MPIR_Compare_equal(&cas_pkt->compare_data, cas_pkt->addr, cas_pkt->datatype)) {
        MPIU_Memcpy(cas_pkt->addr, &cas_pkt->origin_data, len);
    }

    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);

    /* Send the response packet */
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, cas_resp_pkt, sizeof(*cas_resp_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);

    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");

    if (req != NULL) {
        if (!MPID_Request_is_complete(req)) {
            /* sending process is not completed, set proper OnDataAvail
               (it is initialized to NULL by lower layer) */
            req->dev.target_win_handle = cas_pkt->target_win_handle;
            req->dev.flags = cas_pkt->flags;
Xin Zhao's avatar
Xin Zhao committed
1067
            req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_CASSendComplete;
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079

            /* here we increment the Active Target counter to guarantee the GET-like
               operation are completed when counter reaches zero. */
            win_ptr->at_completion_counter++;

            MPID_Request_release(req);
            goto fn_exit;
        }
        else
            MPID_Request_release(req);
    }

1080
    mpi_errno = finish_op_on_target(win_ptr, vc, TRUE /* has response data */,
1081
1082
                                    cas_pkt->flags, cas_pkt->source_win_handle);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
1083
1084

  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
1085
    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_cas);
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_CAS);
    return mpi_errno;
  fn_fail:
    goto fn_exit;

}


#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_PktHandler_CASResp
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3_PktHandler_CASResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                 MPIDI_CH3_Pkt_t * pkt,
                                 MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_cas_resp_t *cas_resp_pkt = &pkt->cas_resp;
    MPID_Request *req;
    MPI_Aint len;
1106
    MPID_Win *win_ptr;
1107
    int target_rank = cas_resp_pkt->target_rank;
1108
1109
1110
1111
1112
1113
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_CASRESP);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_CASRESP);

    MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "received CAS response pkt");

Xin Zhao's avatar
Xin Zhao committed
1114
1115
    MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_cas_resp);

1116
1117
1118
    MPID_Win_get_ptr(cas_resp_pkt->source_win_handle, win_ptr);

    /* decrement ack_counter on this target */
1119
    if (cas_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
1120
        mpi_errno = handle_lock_ack(win_ptr, target_rank,
1121
1122
1123
1124
1125
                                          cas_resp_pkt->flags);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        mpi_errno = adjust_op_piggybacked_with_lock(win_ptr, target_rank,
                                                    cas_resp_pkt->flags);
1126
1127
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }
1128
    if (cas_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK) {
1129
1130
1131
        mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    }
1132

1133
1134
1135
    MPID_Request_get_ptr(cas_resp_pkt->request_handle, req);
    MPID_Datatype_get_size_macro(req->dev.datatype, len);

1136
    MPIU_Memcpy(req->dev.user_buf, (void *) &cas_resp_pkt->info.data, len);
1137
1138
1139
1140
1141
1142

    MPIDI_CH3U_Request_complete(req);
    *buflen = sizeof(MPIDI_CH3_Pkt_t);
    *rreqp = NULL;

  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
1143
    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_cas_resp);
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_CASRESP);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}


#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_PktHandler_FOP
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
                             MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_fop_t *fop_pkt = &pkt->fop;
1160
1161
1162
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_fop_resp_t *fop_resp_pkt = &upkt.fop_resp;
    MPID_Request *resp_req = NULL;
1163
    MPID_Request *rreq = NULL;
1164
    int acquire_lock_fail = 0;
1165
    MPID_Win *win_ptr = NULL;
1166
    MPI_Aint type_size;
1167
1168
1169
1170
1171
1172
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);

    MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "received FOP pkt");

Xin Zhao's avatar
Xin Zhao committed
1173
1174
    MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_fop);

1175
1176
    MPID_Win_get_ptr(fop_pkt->target_win_handle, win_ptr);

1177
1178
    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen,
                                     &acquire_lock_fail, &rreq);
1179
1180
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

1181
1182
    MPIU_Assert(rreq == NULL); /* FOP should not have request because all data
                                  can fit in packet header */
1183
    if (acquire_lock_fail) {
1184
        (*rreqp) = rreq;
1185
1186
1187
        goto fn_exit;
    }

1188
1189
1190
    (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
    (*rreqp) = NULL;

1191
1192
1193
1194
1195
1196
1197
    MPID_Datatype_get_size_macro(fop_pkt->datatype, type_size);

    if (pkt->type == MPIDI_CH3_PKT_FOP_IMMED) {

    MPIU_Assert(fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP);

    MPIDI_Pkt_init(fop_resp_pkt, MPIDI_CH3_PKT_FOP_RESP_IMMED);
1198
1199
1200
1201
    fop_resp_pkt->request_handle = fop_pkt->request_handle;
    fop_resp_pkt->source_win_handle = fop_pkt->source_win_handle;
    fop_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
    fop_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
1202
1203
    if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)
1204
        fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
1205
1206
    if ((fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
        (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
1207
1208
        fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;

1209
1210
1211
    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);

1212
    /* copy data to resp pkt header */
1213
1214
    void *src = fop_pkt->addr, *dest = fop_resp_pkt->info.data;
    mpi_errno = immed_copy(src, dest, type_size);
1215
1216
1217
1218
1219
    if (mpi_errno != MPI_SUCCESS) {
        if (win_ptr->shm_allocated == TRUE)
            MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
        MPIU_ERR_POP(mpi_errno);
    }
1220
1221
1222

    /* Apply the op */
    if (fop_pkt->op != MPI_NO_OP) {
1223
1224
        mpi_errno = do_accumulate_op(fop_pkt->info.data, fop_pkt->addr,
                                     1, fop_pkt->datatype, fop_pkt->op);
1225
1226
    }

1227
1228
1229
    if (win_ptr->shm_allocated == TRUE)
        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);

1230
1231
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

1232
1233
1234
1235
1236
    /* send back the original data */
    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, fop_resp_pkt, sizeof(*fop_resp_pkt), &resp_req);
    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
1237

1238
1239
1240
1241
1242
1243
    if (resp_req != NULL) {
        if (!MPID_Request_is_complete(resp_req)) {
            /* sending process is not completed, set proper OnDataAvail
               (it is initialized to NULL by lower layer) */
            resp_req->dev.target_win_handle = fop_pkt->target_win_handle;
            resp_req->dev.flags = fop_pkt->flags;
Xin Zhao's avatar
Xin Zhao committed
1244
            resp_req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_FOPSendComplete;
1245

1246
1247
1248
            /* here we increment the Active Target counter to guarantee the GET-like
               operation are completed when counter reaches zero. */
            win_ptr->at_completion_counter++;
1249

1250
1251
            MPID_Request_release(resp_req);
            goto fn_exit;
1252
        }
1253
1254
1255
1256
        else {
            MPID_Request_release(resp_req);
        }
    }
1257

1258
    mpi_errno = finish_op_on_target(win_ptr, vc, TRUE /* has response data */,
1259
1260
                                    fop_pkt->flags, fop_pkt->source_win_handle);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
    }
    else {
        MPIU_Assert(pkt->type == MPIDI_CH3_PKT_FOP);

        MPIU_Assert(!(fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP));

        MPID_Request *req = NULL;
        char *data_buf = NULL;
        MPIDI_msg_sz_t data_len;
        MPI_Aint extent;
        int complete = 0;

        req = MPID_Request_create();
        MPIU_Object_set_ref(req, 1);
        *rreqp = req;

        req->dev.op = fop_pkt->op;
        req->dev.real_user_buf = fop_pkt->addr;
        req->dev.target_win_handle = fop_pkt->target_win_handle;
        req->dev.source_win_handle = fop_pkt->source_win_handle;
        req->dev.flags = fop_pkt->flags;
        req->dev.resp_request_handle = fop_pkt->request_handle;
        req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_FOPRecvComplete;
        req->dev.OnFinal = MPIDI_CH3_ReqHandler_FOPRecvComplete;
        req->dev.datatype = fop_pkt->datatype;
        req->dev.user_count = 1;

        /* get start location of data and length of data */
        data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
        data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);

        MPID_Datatype_get_extent_macro(fop_pkt->datatype, extent);

        req->dev.user_buf = MPIU_Malloc(extent);
        if (!req->dev.user_buf) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %d",
                                 extent);
        }

        req->dev.recv_data_sz = type_size;
        MPIU_Assert(req->dev.recv_data_sz > 0);

        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
        MPIU_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                             "**ch3|postrecv %s", "MPIDI_CH3_PKT_ACCUMULATE");

        /* return the number of bytes processed in this function */
        *buflen = data_len + sizeof(MPIDI_CH3_Pkt_t);

        if (complete) {
            mpi_errno = MPIDI_CH3_ReqHandler_FOPRecvComplete(vc, req, &complete);
            if (mpi_errno)
                MPIU_ERR_POP(mpi_errno);
            if (complete) {
                *rreqp = NULL;
                goto fn_exit;
            }
        }
    }
1320
1321

  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
1322
    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_fop);
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_PktHandler_FOPResp
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3_PktHandler_FOPResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                 MPIDI_CH3_Pkt_t * pkt,
                                 MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_fop_resp_t *fop_resp_pkt = &pkt->fop_resp;
1342
1343
    MPID_Request *req = NULL;
    MPID_Win *win_ptr = NULL;
1344
1345
1346
1347
    MPI_Aint type_size;
    MPIDI_msg_sz_t data_len;
    char *data_buf = NULL;
    int complete = 0;
1348
    int target_rank = fop_resp_pkt->target_rank;
1349
1350
1351
1352
1353
1354
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOPRESP);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOPRESP);

    MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "received FOP response pkt");

Xin Zhao's avatar
Xin Zhao committed
1355
1356
    MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_fop_resp);

1357
1358
    MPID_Win_get_ptr(fop_resp_pkt->source_win_handle, win_ptr);

1359
1360
    MPID_Request_get_ptr(fop_resp_pkt->request_handle, req);

1361
    /* decrement ack_counter */
1362
    if (fop_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
1363
        mpi_errno = handle_lock_ack(win_ptr, target_rank,
1364
1365
1366
1367
1368
                                          fop_resp_pkt->flags);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        mpi_errno = adjust_op_piggybacked_with_lock(win_ptr, target_rank,
                                                    fop_resp_pkt->flags);
1369
1370
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }
1371
    if (fop_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK) {
1372
1373
1374
        mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }
1375

1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);

    MPID_Datatype_get_size_macro(req->dev.datatype, type_size);
    req->dev.recv_data_sz = type_size;
    req->dev.user_count = 1;

    *rreqp = req;

    if (fop_resp_pkt->type == MPIDI_CH3_PKT_FOP_RESP_IMMED) {
        MPIU_Memcpy(req->dev.user_buf, fop_resp_pkt->info.data, req->dev.recv_data_sz);

        /* return the number of bytes processed in this function */
        *buflen = sizeof(MPIDI_CH3_Pkt_t);
        complete = 1;
    }
    else {
        MPIU_Assert(fop_resp_pkt->type == MPIDI_CH3_PKT_FOP_RESP);

        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
        MPIU_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv", "**ch3|postrecv %s",
                             "MPIDI_CH3_PKT_FOP_RESP");

        /* return the number of bytes processed in this function */
        *buflen = data_len + sizeof(MPIDI_CH3_Pkt_t);
    }

    if (complete) {
1404
1405
    MPIDI_CH3U_Request_complete(req);
    *rreqp = NULL;
1406
    }
1407
1408

  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
1409
    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_fop_resp);
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOPRESP);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_PktHandler_Get_AccumResp
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_CH3_PktHandler_Get_AccumResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
                                       MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
{
    MPIDI_CH3_Pkt_get_accum_resp_t *get_accum_resp_pkt = &pkt->get_accum_resp;
    MPID_Request *req;
1428
    int complete = 0;
1429
1430
1431
1432
    char *data_buf = NULL;
    MPIDI_msg_sz_t data_len;
    int mpi_errno = MPI_SUCCESS;
    MPI_Aint type_size;
1433
    MPID_Win *win_ptr;
1434
    int target_rank