mpidrma.h 35.7 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 3 4 5 6
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

7 8
#if !defined(MPID_RMA_H_INCLUDED)
#define MPID_RMA_H_INCLUDED
9

10 11 12
#include "mpid_rma_types.h"
#include "mpid_rma_oplist.h"
#include "mpid_rma_shm.h"
13
#include "mpid_rma_issue.h"
14
#include "mpid_rma_lockqueue.h"
15

16
#undef FUNCNAME
17
#define FUNCNAME send_lock_msg
18 19
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
20
static inline int send_lock_msg(int dest, int lock_type, MPID_Win * win_ptr)
21
{
22 23 24 25 26 27 28
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_lock_t *lock_pkt = &upkt.lock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_LOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_LOCK_MSG);
29

30
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
31

32 33 34
    MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
    lock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
    lock_pkt->source_win_handle = win_ptr->handle;
35 36 37 38 39 40 41
    lock_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
    if (lock_type == MPI_LOCK_SHARED)
        lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED;
    else {
        MPIU_Assert(lock_type == MPI_LOCK_EXCLUSIVE);
        lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE;
    }
42

43 44 45 46
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_pkt, sizeof(*lock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
47

48 49 50 51
    /* release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
    }
52

53
  fn_exit:
54
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_LOCK_MSG);
55
    return mpi_errno;
56
    /* --BEGIN ERROR HANDLING-- */
57
  fn_fail:
58
    goto fn_exit;
59
    /* --END ERROR HANDLING-- */
60 61
}

62
#undef FUNCNAME
63
#define FUNCNAME send_unlock_msg
64 65
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
66 67
static inline int send_unlock_msg(int dest, MPID_Win * win_ptr,
                                  MPIDI_CH3_Pkt_flags_t flags)
68 69
{
    int mpi_errno = MPI_SUCCESS;
70 71 72 73 74 75
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_unlock_t *unlock_pkt = &upkt.unlock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_UNLOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_UNLOCK_MSG);
76

77
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
78

79 80
    /* Send a lock packet over to the target. wait for the lock_granted
     * reply. Then do all the RMA ops. */
81

82 83
    MPIDI_Pkt_init(unlock_pkt, MPIDI_CH3_PKT_UNLOCK);
    unlock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
84
    unlock_pkt->source_win_handle = win_ptr->handle;
85
    unlock_pkt->flags = flags;
86

87 88 89 90
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, unlock_pkt, sizeof(*unlock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
91

92 93 94
    /* Release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
95
    }
96

97
  fn_exit:
98
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_UNLOCK_MSG);
99 100
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
101
  fn_fail:
102 103 104 105
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

106

107
#undef FUNCNAME
108
#define FUNCNAME MPIDI_CH3I_Send_lock_ack_pkt
109 110
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
111 112 113
static inline int MPIDI_CH3I_Send_lock_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr,
                                               MPIDI_CH3_Pkt_flags_t flags,
                                               MPI_Win source_win_handle)
114
{
115
    MPIDI_CH3_Pkt_t upkt;
116
    MPIDI_CH3_Pkt_lock_ack_t *lock_ack_pkt = &upkt.lock_ack;
117 118
    MPID_Request *req = NULL;
    int mpi_errno;
119
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
120

121
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
122

123 124 125 126
    /* send lock ack packet */
    MPIDI_Pkt_init(lock_ack_pkt, MPIDI_CH3_PKT_LOCK_ACK);
    lock_ack_pkt->source_win_handle = source_win_handle;
    lock_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
127
    lock_ack_pkt->flags = flags;
128 129

    MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
130 131
                     (MPIU_DBG_FDEST, "sending lock ack pkt on vc=%p, source_win_handle=%#08x",
                      vc, lock_ack_pkt->source_win_handle));
132 133

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
134
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_ack_pkt, sizeof(*lock_ack_pkt), &req);
135 136 137
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    if (mpi_errno) {
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
138 139
    }

140 141
    if (req != NULL) {
        MPID_Request_release(req);
142
    }
143

144
  fn_fail:
145
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
146 147

    return mpi_errno;
148 149
}

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_Send_lock_op_ack_pkt
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int MPIDI_CH3I_Send_lock_op_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr,
                                                  MPIDI_CH3_Pkt_flags_t flags,
                                                  MPI_Win source_win_handle)
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_lock_op_ack_t *lock_op_ack_pkt = &upkt.lock_op_ack;
    MPID_Request *req = NULL;
    int mpi_errno;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);

    /* send lock ack packet */
    MPIDI_Pkt_init(lock_op_ack_pkt, MPIDI_CH3_PKT_LOCK_OP_ACK);
    lock_op_ack_pkt->source_win_handle = source_win_handle;
    lock_op_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
    lock_op_ack_pkt->flags = flags;

    MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
                     (MPIU_DBG_FDEST, "sending lock op ack pkt on vc=%p, source_win_handle=%#08x",
                      vc, lock_op_ack_pkt->source_win_handle));

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_op_ack_pkt, sizeof(*lock_op_ack_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    if (mpi_errno) {
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
    }

    if (req != NULL) {
        MPID_Request_release(req);
    }

  fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);
    return mpi_errno;
}

192 193

#undef FUNCNAME
Xin Zhao's avatar
Xin Zhao committed
194
#define FUNCNAME MPIDI_CH3I_Send_flush_ack_pkt
195 196
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
Xin Zhao's avatar
Xin Zhao committed
197
static inline int MPIDI_CH3I_Send_flush_ack_pkt(MPIDI_VC_t *vc, MPID_Win *win_ptr,
198
                                    MPI_Win source_win_handle)
199
{
200
    MPIDI_CH3_Pkt_t upkt;
Xin Zhao's avatar
Xin Zhao committed
201
    MPIDI_CH3_Pkt_flush_ack_t *flush_ack_pkt = &upkt.flush_ack;
202 203
    MPID_Request *req;
    int mpi_errno=MPI_SUCCESS;
Xin Zhao's avatar
Xin Zhao committed
204
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
205

Xin Zhao's avatar
Xin Zhao committed
206
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
207

Xin Zhao's avatar
Xin Zhao committed
208 209 210
    MPIDI_Pkt_init(flush_ack_pkt, MPIDI_CH3_PKT_FLUSH_ACK);
    flush_ack_pkt->source_win_handle = source_win_handle;
    flush_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
211

212 213
    /* Because this is in a packet handler, it is already within a critical section */	
    /* MPIU_THREAD_CS_ENTER(CH3COMM,vc); */
Xin Zhao's avatar
Xin Zhao committed
214
    mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_ack_pkt, sizeof(*flush_ack_pkt), &req);
215 216 217
    /* MPIU_THREAD_CS_EXIT(CH3COMM,vc); */
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg");
218 219
    }

220 221 222
    if (req != NULL)
    {
        MPID_Request_release(req);
223 224
    }

225
 fn_fail:
Xin Zhao's avatar
Xin Zhao committed
226
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
227 228
    return mpi_errno;
}
229

230

231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
#undef FUNCNAME
#define FUNCNAME send_decr_at_cnt_msg
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int send_decr_at_cnt_msg(int dst, MPID_Win * win_ptr)
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_decr_at_counter_t *decr_at_cnt_pkt = &upkt.decr_at_cnt;
    MPIDI_VC_t * vc;
    MPID_Request *request = NULL;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_SEND_DECR_AT_CNT_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_DECR_AT_CNT_MSG);

    MPIDI_Pkt_init(decr_at_cnt_pkt, MPIDI_CH3_PKT_DECR_AT_COUNTER);
    decr_at_cnt_pkt->target_win_handle = win_ptr->all_win_handles[dst];

    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dst, &vc);

    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, decr_at_cnt_pkt,
                                    sizeof(*decr_at_cnt_pkt), &request);
    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg" );
    }

    if (request != NULL) {
        MPID_Request_release(request);
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_DECR_AT_CNT_MSG);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

271

272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
#undef FUNCNAME
#define FUNCNAME send_flush_msg
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int send_flush_msg(int dest, MPID_Win * win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_flush_t *flush_pkt = &upkt.flush;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_FLUSH_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_FLUSH_MSG);

    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);

    MPIDI_Pkt_init(flush_pkt, MPIDI_CH3_PKT_FLUSH);
    flush_pkt->target_win_handle = win_ptr->all_win_handles[dest];
    flush_pkt->source_win_handle = win_ptr->handle;

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_pkt, sizeof(*flush_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");

    /* Release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_FLUSH_MSG);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

311 312

/* enqueue an unsatisfied origin in passive target at target side. */
313 314 315 316
static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                      MPIDI_CH3_Pkt_t *pkt,
                                      MPIDI_msg_sz_t *buflen,
                                      MPID_Request **reqp)
317
{
318
    MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
319 320 321
    MPIDI_CH3_Pkt_flags_t flag;
    MPI_Win source_win_handle;
    int lock_discarded = 0, data_discarded = 0;
322 323
    int mpi_errno = MPI_SUCCESS;

324
    (*reqp) = NULL;
325

326
    new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, pkt);
327 328
    if (new_ptr != NULL) {
        MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);
329
        new_ptr->vc = vc;
330 331 332
    }
    else {
        lock_discarded = 1;
333
    }
334

335 336 337 338
    if (pkt->type == MPIDI_CH3_PKT_LOCK ||
        pkt->type == MPIDI_CH3_PKT_GET ||
        pkt->type == MPIDI_CH3_PKT_FOP ||
        pkt->type == MPIDI_CH3_PKT_CAS) {
339

340 341
        /* return bytes of data processed in this pkt handler */
        (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
342 343 344 345 346

        if (new_ptr != NULL)
            new_ptr->all_data_recved = 1;

        goto issue_ack;
347 348 349 350 351 352 353
    }
    else {
        MPI_Aint type_size = 0;
        MPIDI_msg_sz_t recv_data_sz = 0;
        MPID_Request *req = NULL;
        MPI_Datatype target_dtp;
        int target_count;
354
        int immed_len = 0;
355 356 357 358 359 360 361 362 363 364 365 366 367 368
        void *immed_data = NULL;
        int complete = 0;
        MPIDI_msg_sz_t data_len;
        char *data_buf = NULL;

        /* This is PUT, ACC, GACC */

        MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE((*pkt), target_dtp, mpi_errno);
        MPIDI_CH3_PKT_RMA_GET_TARGET_COUNT((*pkt), target_count, mpi_errno);

        MPID_Datatype_get_size_macro(target_dtp, type_size);
        recv_data_sz = type_size * target_count;

        if (recv_data_sz <= MPIDI_RMA_IMMED_BYTES) {
369

370 371
            /* return bytes of data processed in this pkt handler */
            (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
372 373 374 375 376

            if (new_ptr != NULL)
                new_ptr->all_data_recved = 1;

            goto issue_ack;
377 378
        }

379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
        if (new_ptr != NULL) {
            if (win_ptr->current_lock_data_bytes + recv_data_sz
                < MPIR_CVAR_CH3_RMA_LOCK_DATA_BYTES) {
                new_ptr->data = MPIU_Malloc(recv_data_sz);
            }

            if (new_ptr->data == NULL) {
                /* Note that there are two possible reasons to make new_ptr->data to be NULL:
                 * (1) win_ptr->current_lock_data_bytes + recv_data_sz >= MPIR_CVAR_CH3_RMA_LOCK_DATA_BYTES;
                 * (2) MPIU_Malloc(recv_data_sz) failed.
                 * In such cases, we cannot allocate memory for lock data, so we give up
                 * buffering lock data, however, we still buffer lock request.
                 */
                MPIDI_CH3_Pkt_t new_pkt;
                MPIDI_CH3_Pkt_lock_t *lock_pkt = &new_pkt.lock;
                MPI_Win target_win_handle;
395
                MPIDI_CH3_Pkt_flags_t flags;
396 397 398

                MPIDI_CH3_PKT_RMA_GET_TARGET_WIN_HANDLE((*pkt), target_win_handle, mpi_errno);
                MPIDI_CH3_PKT_RMA_GET_SOURCE_WIN_HANDLE((*pkt), source_win_handle, mpi_errno);
399
                MPIDI_CH3_PKT_RMA_GET_FLAGS((*pkt), flags, mpi_errno);
400 401 402 403

                MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
                lock_pkt->target_win_handle = target_win_handle;
                lock_pkt->source_win_handle = source_win_handle;
404
                lock_pkt->flags = flags;
405 406 407 408 409 410 411 412 413 414 415

                /* replace original pkt with lock pkt */
                new_ptr->pkt = new_pkt;
                new_ptr->all_data_recved = 1;

                data_discarded = 1;
            }
            else {
                win_ptr->current_lock_data_bytes += recv_data_sz;
                new_ptr->data_size = recv_data_sz;
            }
416 417 418 419 420 421 422
        }

        /* create request to receive upcoming requests */
        req = MPID_Request_create();
        MPIU_Object_set_ref(req, 1);

        /* fill in area in req that will be used in Receive_data_found() */
423
        if (lock_discarded || data_discarded) {
424
            req->dev.drop_data = TRUE;
425 426 427 428 429 430
            req->dev.user_buf = NULL;
            req->dev.user_count = target_count;
            req->dev.datatype = target_dtp;
            req->dev.recv_data_sz = recv_data_sz;
            req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
            req->dev.OnFinal = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
431
            req->dev.lock_queue_entry = new_ptr;
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461

            MPIDI_CH3_PKT_RMA_GET_IMMED_LEN((*pkt), immed_len, mpi_errno);
            if (immed_len > 0) {
                req->dev.recv_data_sz -= immed_len;
            }
            data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
            data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
            MPIU_Assert(req->dev.recv_data_sz > 0);
        }
        else {
            req->dev.user_buf = new_ptr->data;
            req->dev.user_count = target_count;
            req->dev.datatype = target_dtp;
            req->dev.recv_data_sz = recv_data_sz;
            req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
            req->dev.OnFinal = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
            req->dev.lock_queue_entry = new_ptr;

            MPIDI_CH3_PKT_RMA_GET_IMMED_LEN((*pkt), immed_len, mpi_errno);
            MPIDI_CH3_PKT_RMA_GET_IMMED_DATA_PTR((*pkt), immed_data, mpi_errno);

            if (immed_len > 0) {
                /* see if we can receive some data from packet header */
                MPIU_Memcpy(req->dev.user_buf, immed_data, (size_t)immed_len);
                req->dev.user_buf = (void*)((char*)req->dev.user_buf + immed_len);
                req->dev.recv_data_sz -= immed_len;
            }
            data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
            data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
            MPIU_Assert(req->dev.recv_data_sz > 0);
462 463 464 465 466 467 468 469 470 471 472 473
        }

        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

        /* return bytes of data processed in this pkt handler */
        (*buflen) = sizeof(MPIDI_CH3_Pkt_t) + data_len;

        if (complete) {
            mpi_errno = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete(vc, req, &complete);
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            if (complete) {
474
                goto issue_ack;
475 476 477 478 479 480
            }
        }

        (*reqp) = req;
    }

481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
 issue_ack:
    MPIDI_CH3_PKT_RMA_GET_SOURCE_WIN_HANDLE((*pkt), source_win_handle, mpi_errno);
    if (pkt->type == MPIDI_CH3_PKT_LOCK) {
        if (lock_discarded) flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED;
        else flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED;

        mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr, flag, source_win_handle);
        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    }
    else {
        if (lock_discarded) flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED;
        else if (data_discarded) flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_DISCARDED;
        else flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED;

        mpi_errno = MPIDI_CH3I_Send_lock_op_ack_pkt(vc, win_ptr, flag, source_win_handle);
        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    }

499 500 501 502 503 504 505
 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


506
static inline int handle_lock_ack(MPID_Win *win_ptr, int target_rank,
507
                                        MPIDI_CH3_Pkt_flags_t flags)
508
{
509
    MPIDI_RMA_Target_t *t = NULL;
510 511
    int mpi_errno = MPI_SUCCESS;

512 513 514 515 516 517 518 519 520 521
    MPIU_Assert(win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED ||
                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED);

    if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED) {
        MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &orig_vc);
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
        if (win_ptr->comm_ptr->rank == target_rank ||
            (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
522 523 524 525 526 527 528 529 530
            if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
                win_ptr->outstanding_locks--;
                MPIU_Assert(win_ptr->outstanding_locks >= 0);
            }
            else if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED) {
                /* re-send lock request message. */
                mpi_errno = send_lock_msg(target_rank, MPI_LOCK_SHARED, win_ptr);
                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            }
531 532 533 534
            goto fn_exit;
        }
    }
    else if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED) {
535 536 537 538 539 540 541 542 543
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
            win_ptr->outstanding_locks--;
            MPIU_Assert(win_ptr->outstanding_locks >= 0);
        }
        else if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED) {
            /* re-send lock request message. */
            mpi_errno = send_lock_msg(target_rank, MPI_LOCK_SHARED, win_ptr);
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
        }
544
        goto fn_exit;
545 546
    }

547 548 549
    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    MPIU_Assert(t != NULL);
550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577

    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED)
        t->access_state = MPIDI_RMA_LOCK_GRANTED;

    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED)
        t->access_state = MPIDI_RMA_LOCK_CALLED;

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

static inline int adjust_op_piggybacked_with_lock (MPID_Win *win_ptr,
                                                   int target_rank,
                                                   MPIDI_CH3_Pkt_flags_t flags) {
    MPIDI_RMA_Target_t *target = NULL;
    MPIDI_RMA_Op_t *op = NULL;
    MPIDI_CH3_Pkt_flags_t op_flags = MPIDI_CH3_PKT_FLAG_NONE;
    int mpi_errno = MPI_SUCCESS;

    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &target);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    MPIU_Assert(target != NULL);

    op = target->pending_op_list;
    if (op != NULL) MPIDI_CH3_PKT_RMA_GET_FLAGS(op->pkt, op_flags, mpi_errno);

578 579
    if (op_flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        op_flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE) {
580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED ||
            flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED) {
            if (!op->request) {
                if (op->ureq) {
                    /* Complete user request and release the ch3 ref */
                    MPID_Request_set_completed(op->ureq);
                    MPID_Request_release(op->ureq);
                }

                MPIDI_CH3I_RMA_Ops_free_elem(win_ptr, &(target->pending_op_list),
                                             &(target->pending_op_list_tail), op);
            }
            else {
                MPIDI_CH3I_RMA_Ops_unlink(&(target->pending_op_list),
                                          &(target->pending_op_list_tail), op);
                if (op->is_dt) {
                    MPIDI_CH3I_RMA_Ops_append(&(target->dt_op_list),
                                              &(target->dt_op_list_tail), op);
                }
                else if (op->pkt.type == MPIDI_CH3_PKT_PUT ||
                         op->pkt.type == MPIDI_CH3_PKT_ACCUMULATE) {
                    MPIDI_CH3I_RMA_Ops_append(&(target->write_op_list),
                                              &(target->write_op_list_tail), op);
                }
                else {
                    MPIDI_CH3I_RMA_Ops_append(&(target->read_op_list),
                                              &(target->read_op_list_tail), op);
                }

                if (op->ureq) {
                    if (MPID_Request_is_complete(op->request)) {
                        /* Complete user request, let cleanup function to release
                           ch3 ref */
                        MPID_Request_set_completed(op->ureq);
                    }
                    else {
                        /* Increase ref for completion handler */
                        MPIU_Object_add_ref(op->ureq);
                        op->request->dev.request_handle = op->ureq->handle;
                        if (op->request->dev.OnDataAvail == NULL) {
                            op->request->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReqOpsComplete;
                        }
                        op->request->dev.OnFinal = MPIDI_CH3_ReqHandler_ReqOpsComplete;
                    }
                }
            }
        }
        else if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_DISCARDED ||
                 flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED) {
            /* We need to re-transmit this operation, so we destroy
               the internal request and erase all flags in current
               operation. */
            if (op->request) {
                MPIDI_CH3_Request_destroy(op->request);
                op->request = NULL;
                win_ptr->active_req_cnt--;
            }
            MPIDI_CH3_PKT_RMA_ERASE_FLAGS(op->pkt, mpi_errno);

            target->next_op_to_issue = op;
        }
    }
642 643 644 645 646 647 648 649

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


650 651 652 653
#undef FUNCNAME
#define FUNCNAME acquire_local_lock
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
654
static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
655 656 657 658 659
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_ACQUIRE_LOCAL_LOCK);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ACQUIRE_LOCAL_LOCK);

Xin Zhao's avatar
Xin Zhao committed
660 661
    MPIR_T_PVAR_TIMER_START(RMA, rma_winlock_getlocallock);

662
    if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 1) {
663
        mpi_errno = handle_lock_ack(win_ptr, win_ptr->comm_ptr->rank,
664
                                          MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
665 666 667 668 669 670
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }
    else {
        /* Queue the lock information. */
        MPIDI_CH3_Pkt_t pkt;
        MPIDI_CH3_Pkt_lock_t *lock_pkt = &pkt.lock;
671
        MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
672
        MPIDI_VC_t *my_vc;
673 674

        MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
675 676 677 678 679 680 681
        lock_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
        if (lock_type == MPI_LOCK_SHARED)
            lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED;
        else {
            MPIU_Assert(lock_type == MPI_LOCK_EXCLUSIVE);
            lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE;
        }
682

683 684
        new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, &pkt);
        if (new_ptr == NULL) {
685
            mpi_errno = handle_lock_ack(win_ptr, win_ptr->comm_ptr->rank,
686 687 688
                                              MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED);
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            goto fn_exit;
689
        }
690
        MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);
691 692
        MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &my_vc);
        new_ptr->vc = my_vc;
693 694

        new_ptr->all_data_recved = 1;
695 696
    }

697
  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
698
    MPIR_T_PVAR_TIMER_END(RMA, rma_winlock_getlocallock);
699
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ACQUIRE_LOCAL_LOCK);
700 701
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
702
  fn_fail:
703 704 705 706 707
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


708 709 710 711 712 713 714 715 716 717 718 719
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Handle_flush_ack
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int MPIDI_CH3I_RMA_Handle_flush_ack(MPID_Win * win_ptr, int target_rank)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_RMA_Target_t *t;

    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

720 721 722 723
    t->sync.outstanding_acks--;
    MPIU_Assert(t->sync.outstanding_acks >= 0);

    t->put_acc_issued = 0; /* reset PUT_ACC_FLAG after FLUSH is completed */
724 725 726 727 728 729 730 731

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


732
#undef FUNCNAME
733
#define FUNCNAME do_accumulate_op
734 735
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
736 737
static inline int do_accumulate_op(void *source_buf, void *target_buf,
                                   int acc_count, MPI_Datatype acc_dtp, MPI_Op acc_op)
738 739
{
    int mpi_errno = MPI_SUCCESS;
740 741 742 743 744
    MPI_User_function *uop;
    MPIDI_STATE_DECL(MPID_STATE_DO_ACCUMULATE_OP);

    MPIDI_FUNC_ENTER(MPID_STATE_DO_ACCUMULATE_OP);

745
    if (acc_op == MPI_REPLACE)
746 747
    {
        /* simply copy the data */
748 749
        mpi_errno = MPIR_Localcopy(source_buf, acc_count, acc_dtp,
                                   target_buf, acc_count, acc_dtp);
750
        if (mpi_errno) {
751 752
	    MPIU_ERR_POP(mpi_errno);
	}
753 754 755
        goto fn_exit;
    }

756
    if (HANDLE_GET_KIND(acc_op) == HANDLE_KIND_BUILTIN)
757 758
    {
        /* get the function by indexing into the op table */
759
        uop = MPIR_OP_HDL_TO_FN(acc_op);
760 761 762 763
    }
    else
    {
	/* --BEGIN ERROR HANDLING-- */
764
        mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opnotpredefined", "**opnotpredefined %d", acc_op );
765 766
        return mpi_errno;
	/* --END ERROR HANDLING-- */
767 768
    }

769
    if (MPIR_DATATYPE_IS_PREDEFINED(acc_dtp))
770
    {
771
        (*uop)(source_buf, target_buf, &acc_count, &acc_dtp);
772 773 774 775
    }
    else
    {
	/* derived datatype */
776 777 778
        MPID_Segment *segp;
        DLOOP_VECTOR *dloop_vec;
        MPI_Aint first, last;
779 780
        int vec_len, i, count;
        MPI_Aint type_size;
781
        MPI_Datatype type;
782 783 784 785 786 787 788 789 790
        MPID_Datatype *dtp;

        segp = MPID_Segment_alloc();
	/* --BEGIN ERROR HANDLING-- */
        if (!segp)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
791
        }
792
	/* --END ERROR HANDLING-- */
793 794
        MPID_Segment_init(NULL, acc_count,
			  acc_dtp, segp, 0);
795 796 797
        first = 0;
        last  = SEGMENT_IGNORE_LAST;

798 799
        MPID_Datatype_get_ptr(acc_dtp, dtp);
        vec_len = dtp->max_contig_blocks * acc_count + 1;
800 801 802 803 804 805 806 807 808
        /* +1 needed because Rob says so */
        dloop_vec = (DLOOP_VECTOR *)
            MPIU_Malloc(vec_len * sizeof(DLOOP_VECTOR));
	/* --BEGIN ERROR HANDLING-- */
        if (!dloop_vec)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
809
        }
810 811 812 813 814 815 816 817 818
	/* --END ERROR HANDLING-- */

        MPID_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);

        type = dtp->eltype;
        MPID_Datatype_get_size_macro(type, type_size);
        for (i=0; i<vec_len; i++)
	{
            MPIU_Assign_trunc(count, (dloop_vec[i].DLOOP_VECTOR_LEN)/type_size, int);
819 820
            (*uop)((char *)source_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
                   (char *)target_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
821
                   &count, &type);
822 823
        }

824 825
        MPID_Segment_free(segp);
        MPIU_Free(dloop_vec);
826 827
    }

828 829
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
830 831

    return mpi_errno;
832
 fn_fail:
833 834 835
    goto fn_exit;
}

836

837 838 839 840 841
static inline int check_piggyback_lock(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                       MPIDI_CH3_Pkt_t *pkt,
                                       MPIDI_msg_sz_t *buflen,
                                       int *acquire_lock_fail,
                                       MPID_Request **reqp) {
842 843 844 845 846
    int lock_type;
    MPIDI_CH3_Pkt_flags_t flags;
    int mpi_errno = MPI_SUCCESS;

    (*acquire_lock_fail) = 0;
847
    (*reqp) = NULL;
848 849

    MPIDI_CH3_PKT_RMA_GET_FLAGS((*pkt), flags, mpi_errno);
850 851 852 853 854 855 856 857 858
    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE) {

        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED)
            lock_type = MPI_LOCK_SHARED;
        else {
            MPIU_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE);
            lock_type = MPI_LOCK_EXCLUSIVE;
        }
859 860 861

        if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 0) {
            /* cannot acquire the lock, queue up this operation. */
862
            mpi_errno = enqueue_lock_origin(win_ptr, vc, pkt, buflen, reqp);
863 864 865 866 867 868 869 870 871 872 873
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            (*acquire_lock_fail) = 1;
        }
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

874 875 876 877 878 879 880 881
static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                      MPIDI_CH3_Pkt_type_t type,
                                      MPIDI_CH3_Pkt_flags_t flags,
                                      MPI_Win source_win_handle) {
    int mpi_errno = MPI_SUCCESS;

    if (type == MPIDI_CH3_PKT_PUT || type == MPIDI_CH3_PKT_ACCUMULATE) {
        /* This is PUT or ACC */
882 883
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
            flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE) {
884
            MPIDI_CH3_Pkt_flags_t pkt_flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
885 886
            if ((flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
                (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
887
                pkt_flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
888 889 890
            mpi_errno = MPIDI_CH3I_Send_lock_op_ack_pkt(vc, win_ptr,
                                                        pkt_flags,
                                                        source_win_handle);
891 892
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            MPIDI_CH3_Progress_signal_completion();
893 894
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) {
895 896
            if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
                  flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)) {
897 898 899 900 901 902
                /* If op is piggybacked with both LOCK and FLUSH,
                   we only send LOCK ACK back, do not send FLUSH ACK. */
                mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr,
                                                          source_win_handle);
                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
            }
903 904 905 906 907 908 909 910 911 912
            MPIDI_CH3_Progress_signal_completion();
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER) {
            win_ptr->at_completion_counter--;
            MPIU_Assert(win_ptr->at_completion_counter >= 0);
            /* Signal the local process when the op counter reaches 0. */
            if (win_ptr->at_completion_counter == 0)
                MPIDI_CH3_Progress_signal_completion();
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
913 914
            if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
                  flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)) {
915 916 917 918 919 920
                /* If op is piggybacked with both LOCK and UNLOCK,
                   we only send LOCK ACK back, do not send FLUSH (UNLOCK) ACK. */
                mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr,
                                                          source_win_handle);
                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
            }
921 922
            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948
            MPIDI_CH3_Progress_signal_completion();
        }
    }
    else {
        /* This is GACC / GET / CAS / FOP */

        if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
            MPIDI_CH3_Progress_signal_completion();
        }

        if (flags & MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER) {
            win_ptr->at_completion_counter--;
            MPIU_Assert(win_ptr->at_completion_counter >= 0);
            /* Signal the local process when the op counter reaches 0. */
            if (win_ptr->at_completion_counter == 0)
                MPIDI_CH3_Progress_signal_completion();
        }
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
949

950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984

static inline int fill_ranks_in_win_grp(MPID_Win *win_ptr, MPID_Group *group_ptr,
                                        int *ranks_in_win_grp)
{
    int mpi_errno = MPI_SUCCESS;
    int i, *ranks_in_grp;
    MPID_Group *win_grp_ptr;
    MPIU_CHKLMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_FILL_RANKS_IN_WIN_GRP);

    MPIDI_RMA_FUNC_ENTER(MPID_STATE_FILL_RANKS_IN_WIN_GRP);

    MPIU_CHKLMEM_MALLOC(ranks_in_grp, int *, group_ptr->size * sizeof(int),
                        mpi_errno, "ranks_in_grp");
    for (i = 0; i < group_ptr->size; i++) ranks_in_grp[i] = i;

    mpi_errno = MPIR_Comm_group_impl(win_ptr->comm_ptr, &win_grp_ptr);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_translate_ranks_impl(group_ptr, group_ptr->size,
                                                ranks_in_grp, win_grp_ptr, ranks_in_win_grp);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_free_impl(win_grp_ptr);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

  fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


985
static inline int wait_progress_engine(void)
986 987
{
    int mpi_errno = MPI_SUCCESS;
988
    MPID_Progress_state progress_state;
989

990 991 992 993 994 995
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_wait(&progress_state);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS) {
        MPID_Progress_end(&progress_state);
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**winnoprogress");
996
    }
997 998
    /* --END ERROR HANDLING-- */
    MPID_Progress_end(&progress_state);
999

1000
  fn_exit:
1001
    return mpi_errno;
1002
  fn_fail:
1003 1004 1005
    goto fn_exit;
}

1006
static inline int poke_progress_engine(void)
1007 1008
{
    int mpi_errno = MPI_SUCCESS;
1009
    MPID_Progress_state progress_state;
1010

1011 1012 1013 1014 1015
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_poke();
    if (mpi_errno != MPI_SUCCESS)
        MPIU_ERR_POP(mpi_errno);
    MPID_Progress_end(&progress_state);
1016

1017
  fn_exit:
1018
    return mpi_errno;
1019
  fn_fail:
1020 1021 1022
    goto fn_exit;
}

1023
#endif /* MPID_RMA_H_INCLUDED */