mpidrma.h 24.9 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 3 4 5 6
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

7 8
#if !defined(MPID_RMA_H_INCLUDED)
#define MPID_RMA_H_INCLUDED
9

10 11 12
#include "mpid_rma_types.h"
#include "mpid_rma_oplist.h"
#include "mpid_rma_shm.h"
13
#include "mpid_rma_issue.h"
14
#include "mpid_rma_lockqueue.h"
15

16
#undef FUNCNAME
17
#define FUNCNAME send_lock_msg
18 19
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
20
static inline int send_lock_msg(int dest, int lock_type, MPID_Win * win_ptr)
21
{
22 23 24 25 26 27 28
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_lock_t *lock_pkt = &upkt.lock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_LOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_LOCK_MSG);
29

30
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
31

32 33 34 35
    MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
    lock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
    lock_pkt->source_win_handle = win_ptr->handle;
    lock_pkt->lock_type = lock_type;
36
    lock_pkt->origin_rank = win_ptr->comm_ptr->rank;
37

38 39 40 41
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_pkt, sizeof(*lock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
42

43 44 45 46
    /* release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
    }
47

48
  fn_exit:
49
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_LOCK_MSG);
50
    return mpi_errno;
51
    /* --BEGIN ERROR HANDLING-- */
52
  fn_fail:
53
    goto fn_exit;
54
    /* --END ERROR HANDLING-- */
55 56
}

57
#undef FUNCNAME
58
#define FUNCNAME send_unlock_msg
59 60
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
61 62
static inline int send_unlock_msg(int dest, MPID_Win * win_ptr,
                                  MPIDI_CH3_Pkt_flags_t flags)
63 64
{
    int mpi_errno = MPI_SUCCESS;
65 66 67 68 69 70
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_unlock_t *unlock_pkt = &upkt.unlock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_UNLOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_UNLOCK_MSG);
71

72
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
73

74 75
    /* Send a lock packet over to the target. wait for the lock_granted
     * reply. Then do all the RMA ops. */
76

77 78
    MPIDI_Pkt_init(unlock_pkt, MPIDI_CH3_PKT_UNLOCK);
    unlock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
79
    unlock_pkt->source_win_handle = win_ptr->handle;
80
    unlock_pkt->flags = flags;
81

82 83 84 85
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, unlock_pkt, sizeof(*unlock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
86

87 88 89
    /* Release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
90
    }
91

92
  fn_exit:
93
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_UNLOCK_MSG);
94 95
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
96
  fn_fail:
97 98 99 100
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

101

102
#undef FUNCNAME
103
#define FUNCNAME MPIDI_CH3I_Send_lock_ack_pkt
104 105
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
106 107 108
static inline int MPIDI_CH3I_Send_lock_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr,
                                               MPIDI_CH3_Pkt_flags_t flags,
                                               MPI_Win source_win_handle)
109
{
110
    MPIDI_CH3_Pkt_t upkt;
111
    MPIDI_CH3_Pkt_lock_ack_t *lock_ack_pkt = &upkt.lock_ack;
112 113
    MPID_Request *req = NULL;
    int mpi_errno;
114
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
115

116
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
117

118 119 120 121
    /* send lock ack packet */
    MPIDI_Pkt_init(lock_ack_pkt, MPIDI_CH3_PKT_LOCK_ACK);
    lock_ack_pkt->source_win_handle = source_win_handle;
    lock_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
122
    lock_ack_pkt->flags = flags;
123 124

    MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
125 126
                     (MPIU_DBG_FDEST, "sending lock ack pkt on vc=%p, source_win_handle=%#08x",
                      vc, lock_ack_pkt->source_win_handle));
127 128

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
129
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_ack_pkt, sizeof(*lock_ack_pkt), &req);
130 131 132
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    if (mpi_errno) {
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
133 134
    }

135 136
    if (req != NULL) {
        MPID_Request_release(req);
137
    }
138

139
  fn_fail:
140
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
141 142

    return mpi_errno;
143 144 145 146
}


#undef FUNCNAME
Xin Zhao's avatar
Xin Zhao committed
147
#define FUNCNAME MPIDI_CH3I_Send_flush_ack_pkt
148 149
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
Xin Zhao's avatar
Xin Zhao committed
150
static inline int MPIDI_CH3I_Send_flush_ack_pkt(MPIDI_VC_t *vc, MPID_Win *win_ptr,
151
                                                MPIDI_CH3_Pkt_flags_t flags,
152
                                    MPI_Win source_win_handle)
153
{
154
    MPIDI_CH3_Pkt_t upkt;
Xin Zhao's avatar
Xin Zhao committed
155
    MPIDI_CH3_Pkt_flush_ack_t *flush_ack_pkt = &upkt.flush_ack;
156 157
    MPID_Request *req;
    int mpi_errno=MPI_SUCCESS;
Xin Zhao's avatar
Xin Zhao committed
158
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
159

Xin Zhao's avatar
Xin Zhao committed
160
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
161

Xin Zhao's avatar
Xin Zhao committed
162 163 164
    MPIDI_Pkt_init(flush_ack_pkt, MPIDI_CH3_PKT_FLUSH_ACK);
    flush_ack_pkt->source_win_handle = source_win_handle;
    flush_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
165 166 167
    flush_ack_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
        flush_ack_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
168

169 170
    /* Because this is in a packet handler, it is already within a critical section */	
    /* MPIU_THREAD_CS_ENTER(CH3COMM,vc); */
Xin Zhao's avatar
Xin Zhao committed
171
    mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_ack_pkt, sizeof(*flush_ack_pkt), &req);
172 173 174
    /* MPIU_THREAD_CS_EXIT(CH3COMM,vc); */
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg");
175 176
    }

177 178 179
    if (req != NULL)
    {
        MPID_Request_release(req);
180 181
    }

182
 fn_fail:
Xin Zhao's avatar
Xin Zhao committed
183
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
184 185
    return mpi_errno;
}
186

187

188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
#undef FUNCNAME
#define FUNCNAME send_decr_at_cnt_msg
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int send_decr_at_cnt_msg(int dst, MPID_Win * win_ptr)
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_decr_at_counter_t *decr_at_cnt_pkt = &upkt.decr_at_cnt;
    MPIDI_VC_t * vc;
    MPID_Request *request = NULL;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_SEND_DECR_AT_CNT_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_DECR_AT_CNT_MSG);

    MPIDI_Pkt_init(decr_at_cnt_pkt, MPIDI_CH3_PKT_DECR_AT_COUNTER);
    decr_at_cnt_pkt->target_win_handle = win_ptr->all_win_handles[dst];

    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dst, &vc);

    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, decr_at_cnt_pkt,
                                    sizeof(*decr_at_cnt_pkt), &request);
    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg" );
    }

    if (request != NULL) {
        MPID_Request_release(request);
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_DECR_AT_CNT_MSG);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

228

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
#undef FUNCNAME
#define FUNCNAME send_flush_msg
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int send_flush_msg(int dest, MPID_Win * win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_flush_t *flush_pkt = &upkt.flush;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_FLUSH_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_FLUSH_MSG);

    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);

    MPIDI_Pkt_init(flush_pkt, MPIDI_CH3_PKT_FLUSH);
    flush_pkt->target_win_handle = win_ptr->all_win_handles[dest];
    flush_pkt->source_win_handle = win_ptr->handle;

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_pkt, sizeof(*flush_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");

    /* Release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_FLUSH_MSG);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

268 269

/* enqueue an unsatisfied origin in passive target at target side. */
270 271 272 273
static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                      MPIDI_CH3_Pkt_t *pkt,
                                      MPIDI_msg_sz_t *buflen,
                                      MPID_Request **reqp)
274
{
275
    MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
276 277
    int mpi_errno = MPI_SUCCESS;

278
    (*reqp) = NULL;
279

280 281 282 283 284
    new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, pkt);
    if (new_ptr == NULL) {
        /* FIXME: we run out of resources of lock requests, needs to
           send LOCK DISCARDED packet back to origin */
    }
285 286
    MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);

287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
    if (pkt->type == MPIDI_CH3_PKT_LOCK ||
        pkt->type == MPIDI_CH3_PKT_GET ||
        pkt->type == MPIDI_CH3_PKT_FOP ||
        pkt->type == MPIDI_CH3_PKT_CAS) {
        new_ptr->all_data_recved = 1;
        /* return bytes of data processed in this pkt handler */
        (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
        goto fn_exit;
    }
    else {
        MPI_Aint type_size = 0;
        MPIDI_msg_sz_t recv_data_sz = 0;
        MPID_Request *req = NULL;
        MPI_Datatype target_dtp;
        int target_count;
302
        int immed_len = 0;
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
        void *immed_data = NULL;
        int complete = 0;
        MPIDI_msg_sz_t data_len;
        char *data_buf = NULL;

        /* This is PUT, ACC, GACC */

        MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE((*pkt), target_dtp, mpi_errno);
        MPIDI_CH3_PKT_RMA_GET_TARGET_COUNT((*pkt), target_count, mpi_errno);

        MPID_Datatype_get_size_macro(target_dtp, type_size);
        recv_data_sz = type_size * target_count;

        if (recv_data_sz <= MPIDI_RMA_IMMED_BYTES) {
            /* all data fits in packet header */
            new_ptr->all_data_recved = 1;
            /* return bytes of data processed in this pkt handler */
            (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
            goto fn_exit;
        }

        /* allocate tmp buffer to recieve data. */
        new_ptr->data = MPIU_Malloc(recv_data_sz);
        if (new_ptr->data == NULL) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %d",
                                 recv_data_sz);
        }

        /* create request to receive upcoming requests */
        req = MPID_Request_create();
        MPIU_Object_set_ref(req, 1);

        /* fill in area in req that will be used in Receive_data_found() */
        req->dev.user_buf = new_ptr->data;
        req->dev.user_count = target_count;
        req->dev.datatype = target_dtp;
        req->dev.recv_data_sz = recv_data_sz;
        req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
        req->dev.OnFinal = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
        req->dev.lock_queue_entry = new_ptr;

        MPIDI_CH3_PKT_RMA_GET_IMMED_LEN((*pkt), immed_len, mpi_errno);
        MPIDI_CH3_PKT_RMA_GET_IMMED_DATA_PTR((*pkt), immed_data, mpi_errno);

        if (immed_len > 0) {
            /* see if we can receive some data from packet header */
349
            MPIU_Memcpy(req->dev.user_buf, immed_data, (size_t)immed_len);
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
            req->dev.user_buf = (void*)((char*)req->dev.user_buf + immed_len);
            req->dev.recv_data_sz -= immed_len;
        }

        data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
        data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
        MPIU_Assert(req->dev.recv_data_sz > 0);

        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

        /* return bytes of data processed in this pkt handler */
        (*buflen) = sizeof(MPIDI_CH3_Pkt_t) + data_len;

        if (complete) {
            mpi_errno = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete(vc, req, &complete);
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            if (complete) {
                goto fn_exit;
            }
        }

        (*reqp) = req;
    }

375 376 377 378 379 380 381 382 383
 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


static inline int set_lock_sync_counter(MPID_Win *win_ptr, int target_rank)
{
384
    MPIDI_RMA_Target_t *t = NULL;
385 386
    int mpi_errno = MPI_SUCCESS;

387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
    MPIU_Assert(win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED ||
                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED);

    if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED) {
        MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &orig_vc);
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
        if (win_ptr->comm_ptr->rank == target_rank ||
            (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
            win_ptr->outstanding_locks--;
            MPIU_Assert(win_ptr->outstanding_locks >= 0);
            goto fn_exit;
        }
    }
    else if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED) {
403 404
        win_ptr->outstanding_locks--;
        MPIU_Assert(win_ptr->outstanding_locks >= 0);
405
        goto fn_exit;
406 407
    }

408 409 410 411
    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    MPIU_Assert(t != NULL);
    t->access_state = MPIDI_RMA_LOCK_GRANTED;
412 413 414 415 416 417 418 419

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


420 421 422 423
#undef FUNCNAME
#define FUNCNAME acquire_local_lock
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
424
static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
425 426 427 428 429
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_ACQUIRE_LOCAL_LOCK);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ACQUIRE_LOCAL_LOCK);

Xin Zhao's avatar
Xin Zhao committed
430 431
    MPIR_T_PVAR_TIMER_START(RMA, rma_winlock_getlocallock);

432 433 434 435 436 437 438 439
    if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 1) {
        mpi_errno = set_lock_sync_counter(win_ptr, win_ptr->comm_ptr->rank);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }
    else {
        /* Queue the lock information. */
        MPIDI_CH3_Pkt_t pkt;
        MPIDI_CH3_Pkt_lock_t *lock_pkt = &pkt.lock;
440
        MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
441 442 443 444 445

        MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
        lock_pkt->lock_type = lock_type;
        lock_pkt->origin_rank = win_ptr->comm_ptr->rank;

446 447 448 449 450
        new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, &pkt);
        if (new_ptr == NULL) {
            /* FIXME: we run out of resources of lock requests, needs to
               send LOCK DISCARDED packet back to origin */
        }
451 452 453
        MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);

        new_ptr->all_data_recved = 1;
454 455
    }

456
  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
457
    MPIR_T_PVAR_TIMER_END(RMA, rma_winlock_getlocallock);
458
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ACQUIRE_LOCAL_LOCK);
459 460
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
461
  fn_fail:
462 463 464 465 466
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


467 468 469 470 471 472 473 474 475 476 477 478
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Handle_flush_ack
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int MPIDI_CH3I_RMA_Handle_flush_ack(MPID_Win * win_ptr, int target_rank)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_RMA_Target_t *t;

    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

479 480 481 482
    t->sync.outstanding_acks--;
    MPIU_Assert(t->sync.outstanding_acks >= 0);

    t->put_acc_issued = 0; /* reset PUT_ACC_FLAG after FLUSH is completed */
483 484 485 486 487 488 489 490

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


491
#undef FUNCNAME
492
#define FUNCNAME do_accumulate_op
493 494
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
495 496
static inline int do_accumulate_op(void *source_buf, void *target_buf,
                                   int acc_count, MPI_Datatype acc_dtp, MPI_Op acc_op)
497 498
{
    int mpi_errno = MPI_SUCCESS;
499 500 501 502 503
    MPI_User_function *uop;
    MPIDI_STATE_DECL(MPID_STATE_DO_ACCUMULATE_OP);

    MPIDI_FUNC_ENTER(MPID_STATE_DO_ACCUMULATE_OP);

504
    if (acc_op == MPI_REPLACE)
505 506
    {
        /* simply copy the data */
507 508
        mpi_errno = MPIR_Localcopy(source_buf, acc_count, acc_dtp,
                                   target_buf, acc_count, acc_dtp);
509
        if (mpi_errno) {
510 511
	    MPIU_ERR_POP(mpi_errno);
	}
512 513 514
        goto fn_exit;
    }

515
    if (HANDLE_GET_KIND(acc_op) == HANDLE_KIND_BUILTIN)
516 517
    {
        /* get the function by indexing into the op table */
518
        uop = MPIR_OP_HDL_TO_FN(acc_op);
519 520 521 522
    }
    else
    {
	/* --BEGIN ERROR HANDLING-- */
523
        mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opnotpredefined", "**opnotpredefined %d", acc_op );
524 525
        return mpi_errno;
	/* --END ERROR HANDLING-- */
526 527
    }

528
    if (MPIR_DATATYPE_IS_PREDEFINED(acc_dtp))
529
    {
530
        (*uop)(source_buf, target_buf, &acc_count, &acc_dtp);
531 532 533 534
    }
    else
    {
	/* derived datatype */
535 536 537
        MPID_Segment *segp;
        DLOOP_VECTOR *dloop_vec;
        MPI_Aint first, last;
538 539
        int vec_len, i, count;
        MPI_Aint type_size;
540
        MPI_Datatype type;
541 542 543 544 545 546 547 548 549
        MPID_Datatype *dtp;

        segp = MPID_Segment_alloc();
	/* --BEGIN ERROR HANDLING-- */
        if (!segp)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
550
        }
551
	/* --END ERROR HANDLING-- */
552 553
        MPID_Segment_init(NULL, acc_count,
			  acc_dtp, segp, 0);
554 555 556
        first = 0;
        last  = SEGMENT_IGNORE_LAST;

557 558
        MPID_Datatype_get_ptr(acc_dtp, dtp);
        vec_len = dtp->max_contig_blocks * acc_count + 1;
559 560 561 562 563 564 565 566 567
        /* +1 needed because Rob says so */
        dloop_vec = (DLOOP_VECTOR *)
            MPIU_Malloc(vec_len * sizeof(DLOOP_VECTOR));
	/* --BEGIN ERROR HANDLING-- */
        if (!dloop_vec)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
568
        }
569 570 571 572 573 574 575 576 577
	/* --END ERROR HANDLING-- */

        MPID_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);

        type = dtp->eltype;
        MPID_Datatype_get_size_macro(type, type_size);
        for (i=0; i<vec_len; i++)
	{
            MPIU_Assign_trunc(count, (dloop_vec[i].DLOOP_VECTOR_LEN)/type_size, int);
578 579
            (*uop)((char *)source_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
                   (char *)target_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
580
                   &count, &type);
581 582
        }

583 584
        MPID_Segment_free(segp);
        MPIU_Free(dloop_vec);
585 586
    }

587 588
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
589 590

    return mpi_errno;
591
 fn_fail:
592 593 594
    goto fn_exit;
}

595

596 597 598 599 600
static inline int check_piggyback_lock(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                       MPIDI_CH3_Pkt_t *pkt,
                                       MPIDI_msg_sz_t *buflen,
                                       int *acquire_lock_fail,
                                       MPID_Request **reqp) {
601 602 603 604 605
    int lock_type;
    MPIDI_CH3_Pkt_flags_t flags;
    int mpi_errno = MPI_SUCCESS;

    (*acquire_lock_fail) = 0;
606
    (*reqp) = NULL;
607 608 609 610 611 612 613

    MPIDI_CH3_PKT_RMA_GET_FLAGS((*pkt), flags, mpi_errno);
    MPIDI_CH3_PKT_RMA_GET_LOCK_TYPE((*pkt), lock_type, mpi_errno);

    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
        if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 0) {
            /* cannot acquire the lock, queue up this operation. */
614
            mpi_errno = enqueue_lock_origin(win_ptr, vc, pkt, buflen, reqp);
615 616 617 618 619 620 621 622 623 624 625 626
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

            (*acquire_lock_fail) = 1;
        }
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

627 628 629 630 631 632 633 634
static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                      MPIDI_CH3_Pkt_type_t type,
                                      MPIDI_CH3_Pkt_flags_t flags,
                                      MPI_Win source_win_handle) {
    int mpi_errno = MPI_SUCCESS;

    if (type == MPIDI_CH3_PKT_PUT || type == MPIDI_CH3_PKT_ACCUMULATE) {
        /* This is PUT or ACC */
635
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
636 637
            if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) &&
                !(flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)) {
638 639 640
                mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr,
                                                         MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED,
                                                         source_win_handle);
641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661
                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
                MPIDI_CH3_Progress_signal_completion();
            }
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) {
            mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr, flags,
                                                      source_win_handle);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
            MPIDI_CH3_Progress_signal_completion();
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER) {
            win_ptr->at_completion_counter--;
            MPIU_Assert(win_ptr->at_completion_counter >= 0);
            /* Signal the local process when the op counter reaches 0. */
            if (win_ptr->at_completion_counter == 0)
                MPIDI_CH3_Progress_signal_completion();
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
            mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr, flags,
                                                      source_win_handle);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
662 663
            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
            MPIDI_CH3_Progress_signal_completion();
        }
    }
    else {
        /* This is GACC / GET / CAS / FOP */

        if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
            MPIDI_CH3_Progress_signal_completion();
        }

        if (flags & MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER) {
            win_ptr->at_completion_counter--;
            MPIU_Assert(win_ptr->at_completion_counter >= 0);
            /* Signal the local process when the op counter reaches 0. */
            if (win_ptr->at_completion_counter == 0)
                MPIDI_CH3_Progress_signal_completion();
        }
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
690

691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725

static inline int fill_ranks_in_win_grp(MPID_Win *win_ptr, MPID_Group *group_ptr,
                                        int *ranks_in_win_grp)
{
    int mpi_errno = MPI_SUCCESS;
    int i, *ranks_in_grp;
    MPID_Group *win_grp_ptr;
    MPIU_CHKLMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_FILL_RANKS_IN_WIN_GRP);

    MPIDI_RMA_FUNC_ENTER(MPID_STATE_FILL_RANKS_IN_WIN_GRP);

    MPIU_CHKLMEM_MALLOC(ranks_in_grp, int *, group_ptr->size * sizeof(int),
                        mpi_errno, "ranks_in_grp");
    for (i = 0; i < group_ptr->size; i++) ranks_in_grp[i] = i;

    mpi_errno = MPIR_Comm_group_impl(win_ptr->comm_ptr, &win_grp_ptr);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_translate_ranks_impl(group_ptr, group_ptr->size,
                                                ranks_in_grp, win_grp_ptr, ranks_in_win_grp);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_free_impl(win_grp_ptr);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

  fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


726
static inline int wait_progress_engine(void)
727 728
{
    int mpi_errno = MPI_SUCCESS;
729
    MPID_Progress_state progress_state;
730

731 732 733 734 735 736
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_wait(&progress_state);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS) {
        MPID_Progress_end(&progress_state);
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**winnoprogress");
737
    }
738 739
    /* --END ERROR HANDLING-- */
    MPID_Progress_end(&progress_state);
740

741
  fn_exit:
742
    return mpi_errno;
743
  fn_fail:
744 745 746
    goto fn_exit;
}

747
static inline int poke_progress_engine(void)
748 749
{
    int mpi_errno = MPI_SUCCESS;
750
    MPID_Progress_state progress_state;
751

752 753 754 755 756
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_poke();
    if (mpi_errno != MPI_SUCCESS)
        MPIU_ERR_POP(mpi_errno);
    MPID_Progress_end(&progress_state);
757

758
  fn_exit:
759
    return mpi_errno;
760
  fn_fail:
761 762 763
    goto fn_exit;
}

764
#endif /* MPID_RMA_H_INCLUDED */