mpidrma.h 36.6 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 3 4 5 6
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

7 8
#if !defined(MPID_RMA_H_INCLUDED)
#define MPID_RMA_H_INCLUDED
9

10 11 12
#include "mpid_rma_types.h"
#include "mpid_rma_oplist.h"
#include "mpid_rma_shm.h"
13
#include "mpid_rma_issue.h"
14
#include "mpid_rma_lockqueue.h"
15

16
#undef FUNCNAME
17
#define FUNCNAME send_lock_msg
18 19
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
20
static inline int send_lock_msg(int dest, int lock_type, MPID_Win * win_ptr)
21
{
22 23 24 25 26 27 28
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_lock_t *lock_pkt = &upkt.lock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_LOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_LOCK_MSG);
29

30
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
31

32 33 34
    MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
    lock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
    lock_pkt->source_win_handle = win_ptr->handle;
35
    lock_pkt->request_handle = MPI_REQUEST_NULL;
36 37 38 39 40 41 42
    lock_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
    if (lock_type == MPI_LOCK_SHARED)
        lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED;
    else {
        MPIU_Assert(lock_type == MPI_LOCK_EXCLUSIVE);
        lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE;
    }
43

44 45 46 47
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_pkt, sizeof(*lock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
48

49 50 51 52
    /* release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
    }
53

54
  fn_exit:
55
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_LOCK_MSG);
56
    return mpi_errno;
57
    /* --BEGIN ERROR HANDLING-- */
58
  fn_fail:
59
    goto fn_exit;
60
    /* --END ERROR HANDLING-- */
61 62
}

63
#undef FUNCNAME
64
#define FUNCNAME send_unlock_msg
65 66
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
67 68
static inline int send_unlock_msg(int dest, MPID_Win * win_ptr,
                                  MPIDI_CH3_Pkt_flags_t flags)
69 70
{
    int mpi_errno = MPI_SUCCESS;
71 72 73 74 75 76
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_unlock_t *unlock_pkt = &upkt.unlock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_UNLOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_UNLOCK_MSG);
77

78
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
79

80 81
    /* Send a lock packet over to the target. wait for the lock_granted
     * reply. Then do all the RMA ops. */
82

83 84
    MPIDI_Pkt_init(unlock_pkt, MPIDI_CH3_PKT_UNLOCK);
    unlock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
85
    unlock_pkt->source_win_handle = win_ptr->handle;
86
    unlock_pkt->flags = flags;
87

88 89 90 91
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, unlock_pkt, sizeof(*unlock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
92

93 94 95
    /* Release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
96
    }
97

98
  fn_exit:
99
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_UNLOCK_MSG);
100 101
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
102
  fn_fail:
103 104 105 106
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

107

108
#undef FUNCNAME
109
#define FUNCNAME MPIDI_CH3I_Send_lock_ack_pkt
110 111
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
112 113
static inline int MPIDI_CH3I_Send_lock_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr,
                                               MPIDI_CH3_Pkt_flags_t flags,
114 115
                                               MPI_Win source_win_handle,
                                               MPI_Request request_handle)
116
{
117
    MPIDI_CH3_Pkt_t upkt;
118
    MPIDI_CH3_Pkt_lock_ack_t *lock_ack_pkt = &upkt.lock_ack;
119 120
    MPID_Request *req = NULL;
    int mpi_errno;
121
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
122

123
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
124

125 126
    MPIU_Assert(!(source_win_handle != MPI_WIN_NULL && request_handle != MPI_REQUEST_NULL));

127 128 129
    /* send lock ack packet */
    MPIDI_Pkt_init(lock_ack_pkt, MPIDI_CH3_PKT_LOCK_ACK);
    lock_ack_pkt->source_win_handle = source_win_handle;
130
    lock_ack_pkt->request_handle = request_handle;
131
    lock_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
132
    lock_ack_pkt->flags = flags;
133 134

    MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
135 136
                     (MPIU_DBG_FDEST, "sending lock ack pkt on vc=%p, source_win_handle=%#08x",
                      vc, lock_ack_pkt->source_win_handle));
137 138

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
139
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_ack_pkt, sizeof(*lock_ack_pkt), &req);
140 141 142
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    if (mpi_errno) {
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
143 144
    }

145 146
    if (req != NULL) {
        MPID_Request_release(req);
147
    }
148

149
  fn_fail:
150
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
151 152

    return mpi_errno;
153 154
}

155 156 157 158 159 160
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_Send_lock_op_ack_pkt
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int MPIDI_CH3I_Send_lock_op_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr,
                                                  MPIDI_CH3_Pkt_flags_t flags,
161 162
                                                  MPI_Win source_win_handle,
                                                  MPI_Request request_handle)
163 164 165 166 167 168 169 170 171
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_lock_op_ack_t *lock_op_ack_pkt = &upkt.lock_op_ack;
    MPID_Request *req = NULL;
    int mpi_errno;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);

172 173
    MPIU_Assert(!(source_win_handle != MPI_WIN_NULL && request_handle != MPI_REQUEST_NULL));

174 175 176
    /* send lock ack packet */
    MPIDI_Pkt_init(lock_op_ack_pkt, MPIDI_CH3_PKT_LOCK_OP_ACK);
    lock_op_ack_pkt->source_win_handle = source_win_handle;
177
    lock_op_ack_pkt->request_handle = request_handle;
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
    lock_op_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
    lock_op_ack_pkt->flags = flags;

    MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
                     (MPIU_DBG_FDEST, "sending lock op ack pkt on vc=%p, source_win_handle=%#08x",
                      vc, lock_op_ack_pkt->source_win_handle));

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_op_ack_pkt, sizeof(*lock_op_ack_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    if (mpi_errno) {
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
    }

    if (req != NULL) {
        MPID_Request_release(req);
    }

  fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);
    return mpi_errno;
}

201 202

#undef FUNCNAME
Xin Zhao's avatar
Xin Zhao committed
203
#define FUNCNAME MPIDI_CH3I_Send_flush_ack_pkt
204 205
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
Xin Zhao's avatar
Xin Zhao committed
206
static inline int MPIDI_CH3I_Send_flush_ack_pkt(MPIDI_VC_t *vc, MPID_Win *win_ptr,
207
                                    MPI_Win source_win_handle)
208
{
209
    MPIDI_CH3_Pkt_t upkt;
Xin Zhao's avatar
Xin Zhao committed
210
    MPIDI_CH3_Pkt_flush_ack_t *flush_ack_pkt = &upkt.flush_ack;
211 212
    MPID_Request *req;
    int mpi_errno=MPI_SUCCESS;
Xin Zhao's avatar
Xin Zhao committed
213
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
214

Xin Zhao's avatar
Xin Zhao committed
215
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
216

Xin Zhao's avatar
Xin Zhao committed
217 218 219
    MPIDI_Pkt_init(flush_ack_pkt, MPIDI_CH3_PKT_FLUSH_ACK);
    flush_ack_pkt->source_win_handle = source_win_handle;
    flush_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
220

221 222
    /* Because this is in a packet handler, it is already within a critical section */	
    /* MPIU_THREAD_CS_ENTER(CH3COMM,vc); */
Xin Zhao's avatar
Xin Zhao committed
223
    mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_ack_pkt, sizeof(*flush_ack_pkt), &req);
224 225 226
    /* MPIU_THREAD_CS_EXIT(CH3COMM,vc); */
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg");
227 228
    }

229 230 231
    if (req != NULL)
    {
        MPID_Request_release(req);
232 233
    }

234
 fn_fail:
Xin Zhao's avatar
Xin Zhao committed
235
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
236 237
    return mpi_errno;
}
238

239

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
#undef FUNCNAME
#define FUNCNAME send_decr_at_cnt_msg
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int send_decr_at_cnt_msg(int dst, MPID_Win * win_ptr)
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_decr_at_counter_t *decr_at_cnt_pkt = &upkt.decr_at_cnt;
    MPIDI_VC_t * vc;
    MPID_Request *request = NULL;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_SEND_DECR_AT_CNT_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_DECR_AT_CNT_MSG);

    MPIDI_Pkt_init(decr_at_cnt_pkt, MPIDI_CH3_PKT_DECR_AT_COUNTER);
    decr_at_cnt_pkt->target_win_handle = win_ptr->all_win_handles[dst];

    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dst, &vc);

    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, decr_at_cnt_pkt,
                                    sizeof(*decr_at_cnt_pkt), &request);
    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg" );
    }

    if (request != NULL) {
        MPID_Request_release(request);
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_DECR_AT_CNT_MSG);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

280

281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
#undef FUNCNAME
#define FUNCNAME send_flush_msg
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int send_flush_msg(int dest, MPID_Win * win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_flush_t *flush_pkt = &upkt.flush;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_FLUSH_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_FLUSH_MSG);

    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);

    MPIDI_Pkt_init(flush_pkt, MPIDI_CH3_PKT_FLUSH);
    flush_pkt->target_win_handle = win_ptr->all_win_handles[dest];
    flush_pkt->source_win_handle = win_ptr->handle;

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_pkt, sizeof(*flush_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");

    /* Release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_FLUSH_MSG);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

320 321

/* enqueue an unsatisfied origin in passive target at target side. */
322 323 324 325
static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                      MPIDI_CH3_Pkt_t *pkt,
                                      MPIDI_msg_sz_t *buflen,
                                      MPID_Request **reqp)
326
{
327
    MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
328 329
    MPIDI_CH3_Pkt_flags_t flag;
    MPI_Win source_win_handle;
330
    MPI_Request request_handle;
331
    int lock_discarded = 0, data_discarded = 0;
332 333
    int mpi_errno = MPI_SUCCESS;

334
    (*reqp) = NULL;
335

336
    new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, pkt);
337 338
    if (new_ptr != NULL) {
        MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);
339
        new_ptr->vc = vc;
340 341 342
    }
    else {
        lock_discarded = 1;
343
    }
344

345
    if (pkt->type == MPIDI_CH3_PKT_LOCK ||
346 347
        pkt->type == MPIDI_CH3_PKT_PUT_IMMED ||
        pkt->type == MPIDI_CH3_PKT_ACCUMULATE_IMMED ||
348
        pkt->type == MPIDI_CH3_PKT_GET ||
349 350 351
        pkt->type == MPIDI_CH3_PKT_GET_ACCUM_IMMED ||
        pkt->type == MPIDI_CH3_PKT_FOP_IMMED ||
        pkt->type == MPIDI_CH3_PKT_CAS_IMMED) {
352

353 354
        /* return bytes of data processed in this pkt handler */
        (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
355 356 357 358 359

        if (new_ptr != NULL)
            new_ptr->all_data_recved = 1;

        goto issue_ack;
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
    }
    else {
        MPI_Aint type_size = 0;
        MPIDI_msg_sz_t recv_data_sz = 0;
        MPID_Request *req = NULL;
        MPI_Datatype target_dtp;
        int target_count;
        int complete = 0;
        MPIDI_msg_sz_t data_len;
        char *data_buf = NULL;

        /* This is PUT, ACC, GACC */

        MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE((*pkt), target_dtp, mpi_errno);
        MPIDI_CH3_PKT_RMA_GET_TARGET_COUNT((*pkt), target_count, mpi_errno);

        MPID_Datatype_get_size_macro(target_dtp, type_size);
        recv_data_sz = type_size * target_count;

379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
        if (new_ptr != NULL) {
            if (win_ptr->current_lock_data_bytes + recv_data_sz
                < MPIR_CVAR_CH3_RMA_LOCK_DATA_BYTES) {
                new_ptr->data = MPIU_Malloc(recv_data_sz);
            }

            if (new_ptr->data == NULL) {
                /* Note that there are two possible reasons to make new_ptr->data to be NULL:
                 * (1) win_ptr->current_lock_data_bytes + recv_data_sz >= MPIR_CVAR_CH3_RMA_LOCK_DATA_BYTES;
                 * (2) MPIU_Malloc(recv_data_sz) failed.
                 * In such cases, we cannot allocate memory for lock data, so we give up
                 * buffering lock data, however, we still buffer lock request.
                 */
                MPIDI_CH3_Pkt_t new_pkt;
                MPIDI_CH3_Pkt_lock_t *lock_pkt = &new_pkt.lock;
                MPI_Win target_win_handle;
395
                MPIDI_CH3_Pkt_flags_t flags;
396 397

                MPIDI_CH3_PKT_RMA_GET_TARGET_WIN_HANDLE((*pkt), target_win_handle, mpi_errno);
398
                MPIDI_CH3_PKT_RMA_GET_FLAGS((*pkt), flags, mpi_errno);
399

400 401 402 403 404 405 406 407 408
                if (pkt->type == MPIDI_CH3_PKT_PUT || pkt->type == MPIDI_CH3_PKT_ACCUMULATE) {
                    MPIDI_CH3_PKT_RMA_GET_SOURCE_WIN_HANDLE((*pkt), source_win_handle, mpi_errno);
                    request_handle = MPI_REQUEST_NULL;
                }
                else {
                    source_win_handle = MPI_WIN_NULL;
                    MPIDI_CH3_PKT_RMA_GET_REQUEST_HANDLE((*pkt), request_handle, mpi_errno);
                }

409 410 411
                MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
                lock_pkt->target_win_handle = target_win_handle;
                lock_pkt->source_win_handle = source_win_handle;
412
                lock_pkt->request_handle = request_handle;
413
                lock_pkt->flags = flags;
414 415 416 417 418 419 420 421 422 423 424

                /* replace original pkt with lock pkt */
                new_ptr->pkt = new_pkt;
                new_ptr->all_data_recved = 1;

                data_discarded = 1;
            }
            else {
                win_ptr->current_lock_data_bytes += recv_data_sz;
                new_ptr->data_size = recv_data_sz;
            }
425 426 427 428 429 430 431
        }

        /* create request to receive upcoming requests */
        req = MPID_Request_create();
        MPIU_Object_set_ref(req, 1);

        /* fill in area in req that will be used in Receive_data_found() */
432
        if (lock_discarded || data_discarded) {
433
            req->dev.drop_data = TRUE;
434 435 436 437 438 439
            req->dev.user_buf = NULL;
            req->dev.user_count = target_count;
            req->dev.datatype = target_dtp;
            req->dev.recv_data_sz = recv_data_sz;
            req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
            req->dev.OnFinal = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
440
            req->dev.lock_queue_entry = new_ptr;
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457

            data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
            data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
            MPIU_Assert(req->dev.recv_data_sz > 0);
        }
        else {
            req->dev.user_buf = new_ptr->data;
            req->dev.user_count = target_count;
            req->dev.datatype = target_dtp;
            req->dev.recv_data_sz = recv_data_sz;
            req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
            req->dev.OnFinal = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
            req->dev.lock_queue_entry = new_ptr;

            data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
            data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
            MPIU_Assert(req->dev.recv_data_sz > 0);
458 459 460 461 462 463 464 465 466 467 468 469
        }

        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

        /* return bytes of data processed in this pkt handler */
        (*buflen) = sizeof(MPIDI_CH3_Pkt_t) + data_len;

        if (complete) {
            mpi_errno = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete(vc, req, &complete);
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            if (complete) {
470
                goto issue_ack;
471 472 473 474 475 476
            }
        }

        (*reqp) = req;
    }

477 478 479 480 481
 issue_ack:
    if (pkt->type == MPIDI_CH3_PKT_LOCK) {
        if (lock_discarded) flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED;
        else flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED;

482 483 484 485
        MPIDI_CH3_PKT_RMA_GET_SOURCE_WIN_HANDLE((*pkt), source_win_handle, mpi_errno);
        MPIDI_CH3_PKT_RMA_GET_REQUEST_HANDLE((*pkt), request_handle, mpi_errno);

        mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr, flag, source_win_handle, request_handle);
486 487 488 489 490 491 492
        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    }
    else {
        if (lock_discarded) flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED;
        else if (data_discarded) flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_DISCARDED;
        else flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED;

493 494 495 496 497 498 499 500 501 502 503
        if (pkt->type == MPIDI_CH3_PKT_PUT || pkt->type == MPIDI_CH3_PKT_PUT_IMMED ||
            pkt->type == MPIDI_CH3_PKT_ACCUMULATE || pkt->type == MPIDI_CH3_PKT_ACCUMULATE_IMMED) {
            MPIDI_CH3_PKT_RMA_GET_SOURCE_WIN_HANDLE((*pkt), source_win_handle, mpi_errno);
            request_handle = MPI_REQUEST_NULL;
        }
        else {
            source_win_handle = MPI_WIN_NULL;
            MPIDI_CH3_PKT_RMA_GET_REQUEST_HANDLE((*pkt), request_handle, mpi_errno);
        }

        mpi_errno = MPIDI_CH3I_Send_lock_op_ack_pkt(vc, win_ptr, flag, source_win_handle, request_handle);
504 505 506
        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    }

507 508 509 510 511 512 513
 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


514
static inline int handle_lock_ack(MPID_Win *win_ptr, int target_rank,
515
                                        MPIDI_CH3_Pkt_flags_t flags)
516
{
517
    MPIDI_RMA_Target_t *t = NULL;
518 519
    int mpi_errno = MPI_SUCCESS;

520 521 522 523 524 525 526 527 528 529
    MPIU_Assert(win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED ||
                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED);

    if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED) {
        MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &orig_vc);
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
        if (win_ptr->comm_ptr->rank == target_rank ||
            (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
530 531 532 533 534 535 536 537 538
            if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
                win_ptr->outstanding_locks--;
                MPIU_Assert(win_ptr->outstanding_locks >= 0);
            }
            else if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED) {
                /* re-send lock request message. */
                mpi_errno = send_lock_msg(target_rank, MPI_LOCK_SHARED, win_ptr);
                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            }
539 540 541 542
            goto fn_exit;
        }
    }
    else if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED) {
543 544 545 546 547 548 549 550 551
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
            win_ptr->outstanding_locks--;
            MPIU_Assert(win_ptr->outstanding_locks >= 0);
        }
        else if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED) {
            /* re-send lock request message. */
            mpi_errno = send_lock_msg(target_rank, MPI_LOCK_SHARED, win_ptr);
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
        }
552
        goto fn_exit;
553 554
    }

555 556 557
    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    MPIU_Assert(t != NULL);
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585

    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED)
        t->access_state = MPIDI_RMA_LOCK_GRANTED;

    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED)
        t->access_state = MPIDI_RMA_LOCK_CALLED;

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

static inline int adjust_op_piggybacked_with_lock (MPID_Win *win_ptr,
                                                   int target_rank,
                                                   MPIDI_CH3_Pkt_flags_t flags) {
    MPIDI_RMA_Target_t *target = NULL;
    MPIDI_RMA_Op_t *op = NULL;
    MPIDI_CH3_Pkt_flags_t op_flags = MPIDI_CH3_PKT_FLAG_NONE;
    int mpi_errno = MPI_SUCCESS;

    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &target);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    MPIU_Assert(target != NULL);

    op = target->pending_op_list;
    if (op != NULL) MPIDI_CH3_PKT_RMA_GET_FLAGS(op->pkt, op_flags, mpi_errno);

586 587
    if (op_flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        op_flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE) {
588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED ||
            flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED) {
            if (!op->request) {
                if (op->ureq) {
                    /* Complete user request and release the ch3 ref */
                    MPID_Request_set_completed(op->ureq);
                    MPID_Request_release(op->ureq);
                }

                MPIDI_CH3I_RMA_Ops_free_elem(win_ptr, &(target->pending_op_list),
                                             &(target->pending_op_list_tail), op);
            }
            else {
                MPIDI_CH3I_RMA_Ops_unlink(&(target->pending_op_list),
                                          &(target->pending_op_list_tail), op);
                if (op->is_dt) {
                    MPIDI_CH3I_RMA_Ops_append(&(target->dt_op_list),
                                              &(target->dt_op_list_tail), op);
                }
                else if (op->pkt.type == MPIDI_CH3_PKT_PUT ||
608 609 610
                         op->pkt.type == MPIDI_CH3_PKT_PUT_IMMED ||
                         op->pkt.type == MPIDI_CH3_PKT_ACCUMULATE ||
                         op->pkt.type == MPIDI_CH3_PKT_ACCUMULATE_IMMED) {
611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651
                    MPIDI_CH3I_RMA_Ops_append(&(target->write_op_list),
                                              &(target->write_op_list_tail), op);
                }
                else {
                    MPIDI_CH3I_RMA_Ops_append(&(target->read_op_list),
                                              &(target->read_op_list_tail), op);
                }

                if (op->ureq) {
                    if (MPID_Request_is_complete(op->request)) {
                        /* Complete user request, let cleanup function to release
                           ch3 ref */
                        MPID_Request_set_completed(op->ureq);
                    }
                    else {
                        /* Increase ref for completion handler */
                        MPIU_Object_add_ref(op->ureq);
                        op->request->dev.request_handle = op->ureq->handle;
                        if (op->request->dev.OnDataAvail == NULL) {
                            op->request->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReqOpsComplete;
                        }
                        op->request->dev.OnFinal = MPIDI_CH3_ReqHandler_ReqOpsComplete;
                    }
                }
            }
        }
        else if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_DISCARDED ||
                 flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED) {
            /* We need to re-transmit this operation, so we destroy
               the internal request and erase all flags in current
               operation. */
            if (op->request) {
                MPIDI_CH3_Request_destroy(op->request);
                op->request = NULL;
                win_ptr->active_req_cnt--;
            }
            MPIDI_CH3_PKT_RMA_ERASE_FLAGS(op->pkt, mpi_errno);

            target->next_op_to_issue = op;
        }
    }
652 653 654 655 656 657 658 659

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


660 661 662 663
#undef FUNCNAME
#define FUNCNAME acquire_local_lock
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
664
static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
665 666 667 668 669
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_ACQUIRE_LOCAL_LOCK);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ACQUIRE_LOCAL_LOCK);

Xin Zhao's avatar
Xin Zhao committed
670 671
    MPIR_T_PVAR_TIMER_START(RMA, rma_winlock_getlocallock);

672
    if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 1) {
673
        mpi_errno = handle_lock_ack(win_ptr, win_ptr->comm_ptr->rank,
674
                                          MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
675 676 677 678 679 680
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }
    else {
        /* Queue the lock information. */
        MPIDI_CH3_Pkt_t pkt;
        MPIDI_CH3_Pkt_lock_t *lock_pkt = &pkt.lock;
681
        MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
682
        MPIDI_VC_t *my_vc;
683 684

        MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
685 686 687 688 689 690 691
        lock_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
        if (lock_type == MPI_LOCK_SHARED)
            lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED;
        else {
            MPIU_Assert(lock_type == MPI_LOCK_EXCLUSIVE);
            lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE;
        }
692

693 694
        new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, &pkt);
        if (new_ptr == NULL) {
695
            mpi_errno = handle_lock_ack(win_ptr, win_ptr->comm_ptr->rank,
696 697 698
                                              MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED);
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            goto fn_exit;
699
        }
700
        MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);
701 702
        MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &my_vc);
        new_ptr->vc = my_vc;
703 704

        new_ptr->all_data_recved = 1;
705 706
    }

707
  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
708
    MPIR_T_PVAR_TIMER_END(RMA, rma_winlock_getlocallock);
709
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ACQUIRE_LOCAL_LOCK);
710 711
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
712
  fn_fail:
713 714 715 716 717
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


718 719 720 721 722 723 724 725 726 727 728 729
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Handle_flush_ack
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int MPIDI_CH3I_RMA_Handle_flush_ack(MPID_Win * win_ptr, int target_rank)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_RMA_Target_t *t;

    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

730 731 732 733
    t->sync.outstanding_acks--;
    MPIU_Assert(t->sync.outstanding_acks >= 0);

    t->put_acc_issued = 0; /* reset PUT_ACC_FLAG after FLUSH is completed */
734 735 736 737 738 739 740 741

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


742
#undef FUNCNAME
743
#define FUNCNAME do_accumulate_op
744 745
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
746 747
static inline int do_accumulate_op(void *source_buf, void *target_buf,
                                   int acc_count, MPI_Datatype acc_dtp, MPI_Op acc_op)
748 749
{
    int mpi_errno = MPI_SUCCESS;
750 751 752 753 754
    MPI_User_function *uop;
    MPIDI_STATE_DECL(MPID_STATE_DO_ACCUMULATE_OP);

    MPIDI_FUNC_ENTER(MPID_STATE_DO_ACCUMULATE_OP);

755
    if (acc_op == MPI_REPLACE)
756 757
    {
        /* simply copy the data */
758 759
        mpi_errno = MPIR_Localcopy(source_buf, acc_count, acc_dtp,
                                   target_buf, acc_count, acc_dtp);
760
        if (mpi_errno) {
761 762
	    MPIU_ERR_POP(mpi_errno);
	}
763 764 765
        goto fn_exit;
    }

766
    if (HANDLE_GET_KIND(acc_op) == HANDLE_KIND_BUILTIN)
767 768
    {
        /* get the function by indexing into the op table */
769
        uop = MPIR_OP_HDL_TO_FN(acc_op);
770 771 772 773
    }
    else
    {
	/* --BEGIN ERROR HANDLING-- */
774
        mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opnotpredefined", "**opnotpredefined %d", acc_op );
775 776
        return mpi_errno;
	/* --END ERROR HANDLING-- */
777 778
    }

779
    if (MPIR_DATATYPE_IS_PREDEFINED(acc_dtp))
780
    {
781
        (*uop)(source_buf, target_buf, &acc_count, &acc_dtp);
782 783 784 785
    }
    else
    {
	/* derived datatype */
786 787 788
        MPID_Segment *segp;
        DLOOP_VECTOR *dloop_vec;
        MPI_Aint first, last;
789 790
        int vec_len, i, count;
        MPI_Aint type_size;
791
        MPI_Datatype type;
792 793 794 795 796 797 798 799 800
        MPID_Datatype *dtp;

        segp = MPID_Segment_alloc();
	/* --BEGIN ERROR HANDLING-- */
        if (!segp)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
801
        }
802
	/* --END ERROR HANDLING-- */
803 804
        MPID_Segment_init(NULL, acc_count,
			  acc_dtp, segp, 0);
805 806 807
        first = 0;
        last  = SEGMENT_IGNORE_LAST;

808 809
        MPID_Datatype_get_ptr(acc_dtp, dtp);
        vec_len = dtp->max_contig_blocks * acc_count + 1;
810 811 812 813 814 815 816 817 818
        /* +1 needed because Rob says so */
        dloop_vec = (DLOOP_VECTOR *)
            MPIU_Malloc(vec_len * sizeof(DLOOP_VECTOR));
	/* --BEGIN ERROR HANDLING-- */
        if (!dloop_vec)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
819
        }
820 821 822 823 824 825 826 827 828
	/* --END ERROR HANDLING-- */

        MPID_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);

        type = dtp->eltype;
        MPID_Datatype_get_size_macro(type, type_size);
        for (i=0; i<vec_len; i++)
	{
            MPIU_Assign_trunc(count, (dloop_vec[i].DLOOP_VECTOR_LEN)/type_size, int);
829 830
            (*uop)((char *)source_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
                   (char *)target_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
831
                   &count, &type);
832 833
        }

834 835
        MPID_Segment_free(segp);
        MPIU_Free(dloop_vec);
836 837
    }

838 839
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
840 841

    return mpi_errno;
842
 fn_fail:
843 844 845
    goto fn_exit;
}

846

847 848 849 850 851
static inline int check_piggyback_lock(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                       MPIDI_CH3_Pkt_t *pkt,
                                       MPIDI_msg_sz_t *buflen,
                                       int *acquire_lock_fail,
                                       MPID_Request **reqp) {
852 853 854 855 856
    int lock_type;
    MPIDI_CH3_Pkt_flags_t flags;
    int mpi_errno = MPI_SUCCESS;

    (*acquire_lock_fail) = 0;
857
    (*reqp) = NULL;
858 859

    MPIDI_CH3_PKT_RMA_GET_FLAGS((*pkt), flags, mpi_errno);
860 861 862 863 864 865 866 867 868
    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
        flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE) {

        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED)
            lock_type = MPI_LOCK_SHARED;
        else {
            MPIU_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE);
            lock_type = MPI_LOCK_EXCLUSIVE;
        }
869 870 871

        if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 0) {
            /* cannot acquire the lock, queue up this operation. */
872
            mpi_errno = enqueue_lock_origin(win_ptr, vc, pkt, buflen, reqp);
873 874 875 876 877 878 879 880 881 882 883
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            (*acquire_lock_fail) = 1;
        }
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

884
static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
885
                                      int has_response_data,
886 887 888 889
                                      MPIDI_CH3_Pkt_flags_t flags,
                                      MPI_Win source_win_handle) {
    int mpi_errno = MPI_SUCCESS;

890
    if (!has_response_data) {
891
        /* This is PUT or ACC */
892 893
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
            flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE) {
894
            MPIDI_CH3_Pkt_flags_t pkt_flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
895 896
            if ((flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
                (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
897
                pkt_flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
898
            MPIU_Assert(source_win_handle != MPI_WIN_NULL);
899 900
            mpi_errno = MPIDI_CH3I_Send_lock_op_ack_pkt(vc, win_ptr,
                                                        pkt_flags,
901 902
                                                        source_win_handle,
                                                        MPI_REQUEST_NULL);
903 904
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            MPIDI_CH3_Progress_signal_completion();
905 906
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) {
907 908
            if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
                  flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)) {
909 910 911 912 913 914
                /* If op is piggybacked with both LOCK and FLUSH,
                   we only send LOCK ACK back, do not send FLUSH ACK. */
                mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr,
                                                          source_win_handle);
                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
            }
915 916 917 918 919 920 921 922 923 924
            MPIDI_CH3_Progress_signal_completion();
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER) {
            win_ptr->at_completion_counter--;
            MPIU_Assert(win_ptr->at_completion_counter >= 0);
            /* Signal the local process when the op counter reaches 0. */
            if (win_ptr->at_completion_counter == 0)
                MPIDI_CH3_Progress_signal_completion();
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
925 926
            if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
                  flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE)) {
927 928 929 930 931 932
                /* If op is piggybacked with both LOCK and UNLOCK,
                   we only send LOCK ACK back, do not send FLUSH (UNLOCK) ACK. */
                mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr,
                                                          source_win_handle);
                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
            }
933 934
            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960
            MPIDI_CH3_Progress_signal_completion();
        }
    }
    else {
        /* This is GACC / GET / CAS / FOP */

        if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
            MPIDI_CH3_Progress_signal_completion();
        }

        if (flags & MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER) {
            win_ptr->at_completion_counter--;
            MPIU_Assert(win_ptr->at_completion_counter >= 0);
            /* Signal the local process when the op counter reaches 0. */
            if (win_ptr->at_completion_counter == 0)
                MPIDI_CH3_Progress_signal_completion();
        }
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
961

962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996

static inline int fill_ranks_in_win_grp(MPID_Win *win_ptr, MPID_Group *group_ptr,
                                        int *ranks_in_win_grp)
{
    int mpi_errno = MPI_SUCCESS;
    int i, *ranks_in_grp;
    MPID_Group *win_grp_ptr;
    MPIU_CHKLMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_FILL_RANKS_IN_WIN_GRP);

    MPIDI_RMA_FUNC_ENTER(MPID_STATE_FILL_RANKS_IN_WIN_GRP);

    MPIU_CHKLMEM_MALLOC(ranks_in_grp, int *, group_ptr->size * sizeof(int),
                        mpi_errno, "ranks_in_grp");
    for (i = 0; i < group_ptr->size; i++) ranks_in_grp[i] = i;

    mpi_errno = MPIR_Comm_group_impl(win_ptr->comm_ptr, &win_grp_ptr);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_translate_ranks_impl(group_ptr, group_ptr->size,
                                                ranks_in_grp, win_grp_ptr, ranks_in_win_grp);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_free_impl(win_grp_ptr);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

  fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


997
static inline int wait_progress_engine(void)
998 999
{
    int mpi_errno = MPI_SUCCESS;
1000
    MPID_Progress_state progress_state;
1001

1002 1003 1004 1005 1006 1007
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_wait(&progress_state);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS) {
        MPID_Progress_end(&progress_state);
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**winnoprogress");
1008
    }
1009 1010
    /* --END ERROR HANDLING-- */
    MPID_Progress_end(&progress_state);
1011

1012
  fn_exit:
1013
    return mpi_errno;
1014
  fn_fail:
1015 1016 1017
    goto fn_exit;
}

1018
static inline int poke_progress_engine(void)
1019 1020
{
    int mpi_errno = MPI_SUCCESS;
1021
    MPID_Progress_state progress_state;
1022

1023 1024 1025 1026 1027
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_poke();
    if (mpi_errno != MPI_SUCCESS)
        MPIU_ERR_POP(mpi_errno);
    MPID_Progress_end(&progress_state);
1028

1029
  fn_exit:
1030
    return mpi_errno;
1031
  fn_fail:
1032 1033 1034
    goto fn_exit;
}

1035
#endif /* MPID_RMA_H_INCLUDED */