mpidrma.h 23.4 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 3 4 5 6
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

7 8
#if !defined(MPID_RMA_H_INCLUDED)
#define MPID_RMA_H_INCLUDED
9

10 11 12
#include "mpid_rma_types.h"
#include "mpid_rma_oplist.h"
#include "mpid_rma_shm.h"
13
#include "mpid_rma_issue.h"
14
#include "mpid_rma_lockqueue.h"
15

16
#undef FUNCNAME
17
#define FUNCNAME send_lock_msg
18 19
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
20
static inline int send_lock_msg(int dest, int lock_type, MPID_Win * win_ptr)
21
{
22 23 24 25 26 27 28
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_lock_t *lock_pkt = &upkt.lock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_LOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_LOCK_MSG);
29

30
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
31

32 33 34 35
    MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
    lock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
    lock_pkt->source_win_handle = win_ptr->handle;
    lock_pkt->lock_type = lock_type;
36
    lock_pkt->origin_rank = win_ptr->comm_ptr->rank;
37

38 39 40 41
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_pkt, sizeof(*lock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
42

43 44 45 46
    /* release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
    }
47

48
  fn_exit:
49
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_LOCK_MSG);
50
    return mpi_errno;
51
    /* --BEGIN ERROR HANDLING-- */
52
  fn_fail:
53
    goto fn_exit;
54
    /* --END ERROR HANDLING-- */
55 56
}

57
#undef FUNCNAME
58
#define FUNCNAME send_unlock_msg
59 60
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
61 62
static inline int send_unlock_msg(int dest, MPID_Win * win_ptr,
                                  MPIDI_CH3_Pkt_flags_t flags)
63 64
{
    int mpi_errno = MPI_SUCCESS;
65 66 67 68 69 70
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_unlock_t *unlock_pkt = &upkt.unlock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_UNLOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_UNLOCK_MSG);
71

72
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
73

74 75
    /* Send a lock packet over to the target. wait for the lock_granted
     * reply. Then do all the RMA ops. */
76

77 78
    MPIDI_Pkt_init(unlock_pkt, MPIDI_CH3_PKT_UNLOCK);
    unlock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
79
    unlock_pkt->source_win_handle = win_ptr->handle;
80
    unlock_pkt->flags = flags;
81

82 83 84 85
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, unlock_pkt, sizeof(*unlock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
86

87 88 89
    /* Release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
90
    }
91

92
  fn_exit:
93
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_UNLOCK_MSG);
94 95
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
96
  fn_fail:
97 98 99 100
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

101

102
#undef FUNCNAME
103
#define FUNCNAME MPIDI_CH3I_Send_lock_granted_pkt
104 105
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
106
static inline int MPIDI_CH3I_Send_lock_granted_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr, MPI_Win source_win_handle)
107
{
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_lock_granted_t *lock_granted_pkt = &upkt.lock_granted;
    MPID_Request *req = NULL;
    int mpi_errno;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);

    /* send lock granted packet */
    MPIDI_Pkt_init(lock_granted_pkt, MPIDI_CH3_PKT_LOCK_GRANTED);
    lock_granted_pkt->source_win_handle = source_win_handle;
    lock_granted_pkt->target_rank = win_ptr->comm_ptr->rank;

    MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
                     (MPIU_DBG_FDEST, "sending lock granted pkt on vc=%p, source_win_handle=%#08x",
                      vc, lock_granted_pkt->source_win_handle));

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_granted_pkt, sizeof(*lock_granted_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    if (mpi_errno) {
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
130 131
    }

132 133
    if (req != NULL) {
        MPID_Request_release(req);
134
    }
135

136
  fn_fail:
137 138 139
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);

    return mpi_errno;
140 141 142 143
}


#undef FUNCNAME
Xin Zhao's avatar
Xin Zhao committed
144
#define FUNCNAME MPIDI_CH3I_Send_flush_ack_pkt
145 146
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
Xin Zhao's avatar
Xin Zhao committed
147
static inline int MPIDI_CH3I_Send_flush_ack_pkt(MPIDI_VC_t *vc, MPID_Win *win_ptr,
148
                                                MPIDI_CH3_Pkt_flags_t flags,
149
                                    MPI_Win source_win_handle)
150
{
151
    MPIDI_CH3_Pkt_t upkt;
Xin Zhao's avatar
Xin Zhao committed
152
    MPIDI_CH3_Pkt_flush_ack_t *flush_ack_pkt = &upkt.flush_ack;
153 154
    MPID_Request *req;
    int mpi_errno=MPI_SUCCESS;
Xin Zhao's avatar
Xin Zhao committed
155
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
156

Xin Zhao's avatar
Xin Zhao committed
157
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
158

Xin Zhao's avatar
Xin Zhao committed
159 160 161
    MPIDI_Pkt_init(flush_ack_pkt, MPIDI_CH3_PKT_FLUSH_ACK);
    flush_ack_pkt->source_win_handle = source_win_handle;
    flush_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
162 163 164
    flush_ack_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
        flush_ack_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
165

166 167
    /* Because this is in a packet handler, it is already within a critical section */	
    /* MPIU_THREAD_CS_ENTER(CH3COMM,vc); */
Xin Zhao's avatar
Xin Zhao committed
168
    mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_ack_pkt, sizeof(*flush_ack_pkt), &req);
169 170 171
    /* MPIU_THREAD_CS_EXIT(CH3COMM,vc); */
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg");
172 173
    }

174 175 176
    if (req != NULL)
    {
        MPID_Request_release(req);
177 178
    }

179
 fn_fail:
Xin Zhao's avatar
Xin Zhao committed
180
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
181 182
    return mpi_errno;
}
183

184

185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
#undef FUNCNAME
#define FUNCNAME send_decr_at_cnt_msg
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int send_decr_at_cnt_msg(int dst, MPID_Win * win_ptr)
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_decr_at_counter_t *decr_at_cnt_pkt = &upkt.decr_at_cnt;
    MPIDI_VC_t * vc;
    MPID_Request *request = NULL;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_SEND_DECR_AT_CNT_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_DECR_AT_CNT_MSG);

    MPIDI_Pkt_init(decr_at_cnt_pkt, MPIDI_CH3_PKT_DECR_AT_COUNTER);
    decr_at_cnt_pkt->target_win_handle = win_ptr->all_win_handles[dst];

    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dst, &vc);

    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, decr_at_cnt_pkt,
                                    sizeof(*decr_at_cnt_pkt), &request);
    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg" );
    }

    if (request != NULL) {
        MPID_Request_release(request);
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_DECR_AT_CNT_MSG);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

225 226 227


/* enqueue an unsatisfied origin in passive target at target side. */
228 229 230 231
static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                      MPIDI_CH3_Pkt_t *pkt,
                                      MPIDI_msg_sz_t *buflen,
                                      MPID_Request **reqp)
232
{
233
    MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
234 235
    int mpi_errno = MPI_SUCCESS;

236
    (*reqp) = NULL;
237

238 239 240 241 242
    new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, pkt);
    if (new_ptr == NULL) {
        /* FIXME: we run out of resources of lock requests, needs to
           send LOCK DISCARDED packet back to origin */
    }
243 244
    MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);

245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
    if (pkt->type == MPIDI_CH3_PKT_LOCK ||
        pkt->type == MPIDI_CH3_PKT_GET ||
        pkt->type == MPIDI_CH3_PKT_FOP ||
        pkt->type == MPIDI_CH3_PKT_CAS) {
        new_ptr->all_data_recved = 1;
        /* return bytes of data processed in this pkt handler */
        (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
        goto fn_exit;
    }
    else {
        MPI_Aint type_size = 0;
        MPIDI_msg_sz_t recv_data_sz = 0;
        MPID_Request *req = NULL;
        MPI_Datatype target_dtp;
        int target_count;
260
        int immed_len = 0;
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
        void *immed_data = NULL;
        int complete = 0;
        MPIDI_msg_sz_t data_len;
        char *data_buf = NULL;

        /* This is PUT, ACC, GACC */

        MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE((*pkt), target_dtp, mpi_errno);
        MPIDI_CH3_PKT_RMA_GET_TARGET_COUNT((*pkt), target_count, mpi_errno);

        MPID_Datatype_get_size_macro(target_dtp, type_size);
        recv_data_sz = type_size * target_count;

        if (recv_data_sz <= MPIDI_RMA_IMMED_BYTES) {
            /* all data fits in packet header */
            new_ptr->all_data_recved = 1;
            /* return bytes of data processed in this pkt handler */
            (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
            goto fn_exit;
        }

        /* allocate tmp buffer to recieve data. */
        new_ptr->data = MPIU_Malloc(recv_data_sz);
        if (new_ptr->data == NULL) {
            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %d",
                                 recv_data_sz);
        }

        /* create request to receive upcoming requests */
        req = MPID_Request_create();
        MPIU_Object_set_ref(req, 1);

        /* fill in area in req that will be used in Receive_data_found() */
        req->dev.user_buf = new_ptr->data;
        req->dev.user_count = target_count;
        req->dev.datatype = target_dtp;
        req->dev.recv_data_sz = recv_data_sz;
        req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
        req->dev.OnFinal = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
        req->dev.lock_queue_entry = new_ptr;

        MPIDI_CH3_PKT_RMA_GET_IMMED_LEN((*pkt), immed_len, mpi_errno);
        MPIDI_CH3_PKT_RMA_GET_IMMED_DATA_PTR((*pkt), immed_data, mpi_errno);

        if (immed_len > 0) {
            /* see if we can receive some data from packet header */
307
            MPIU_Memcpy(req->dev.user_buf, immed_data, (size_t)immed_len);
308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
            req->dev.user_buf = (void*)((char*)req->dev.user_buf + immed_len);
            req->dev.recv_data_sz -= immed_len;
        }

        data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
        data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
        MPIU_Assert(req->dev.recv_data_sz > 0);

        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

        /* return bytes of data processed in this pkt handler */
        (*buflen) = sizeof(MPIDI_CH3_Pkt_t) + data_len;

        if (complete) {
            mpi_errno = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete(vc, req, &complete);
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
            if (complete) {
                goto fn_exit;
            }
        }

        (*reqp) = req;
    }

333 334 335 336 337 338 339 340 341
 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


static inline int set_lock_sync_counter(MPID_Win *win_ptr, int target_rank)
{
342
    MPIDI_RMA_Target_t *t = NULL;
343 344
    int mpi_errno = MPI_SUCCESS;

345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
    MPIU_Assert(win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED ||
                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED);

    if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED) {
        MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &orig_vc);
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
        if (win_ptr->comm_ptr->rank == target_rank ||
            (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
            win_ptr->outstanding_locks--;
            MPIU_Assert(win_ptr->outstanding_locks >= 0);
            goto fn_exit;
        }
    }
    else if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED) {
361 362
        win_ptr->outstanding_locks--;
        MPIU_Assert(win_ptr->outstanding_locks >= 0);
363
        goto fn_exit;
364 365
    }

366 367 368 369
    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
    MPIU_Assert(t != NULL);
    t->access_state = MPIDI_RMA_LOCK_GRANTED;
370 371 372 373 374 375 376 377

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


378 379 380 381
#undef FUNCNAME
#define FUNCNAME acquire_local_lock
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
382
static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
383 384 385 386 387
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_ACQUIRE_LOCAL_LOCK);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ACQUIRE_LOCAL_LOCK);

Xin Zhao's avatar
Xin Zhao committed
388 389
    MPIR_T_PVAR_TIMER_START(RMA, rma_winlock_getlocallock);

390 391 392 393 394 395 396 397
    if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 1) {
        mpi_errno = set_lock_sync_counter(win_ptr, win_ptr->comm_ptr->rank);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }
    else {
        /* Queue the lock information. */
        MPIDI_CH3_Pkt_t pkt;
        MPIDI_CH3_Pkt_lock_t *lock_pkt = &pkt.lock;
398
        MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
399 400 401 402 403

        MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
        lock_pkt->lock_type = lock_type;
        lock_pkt->origin_rank = win_ptr->comm_ptr->rank;

404 405 406 407 408
        new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, &pkt);
        if (new_ptr == NULL) {
            /* FIXME: we run out of resources of lock requests, needs to
               send LOCK DISCARDED packet back to origin */
        }
409 410 411
        MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);

        new_ptr->all_data_recved = 1;
412 413
    }

414
  fn_exit:
Xin Zhao's avatar
Xin Zhao committed
415
    MPIR_T_PVAR_TIMER_END(RMA, rma_winlock_getlocallock);
416
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ACQUIRE_LOCAL_LOCK);
417 418
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
419
  fn_fail:
420 421 422 423 424
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


425 426 427 428 429 430 431 432 433 434 435 436
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Handle_flush_ack
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int MPIDI_CH3I_RMA_Handle_flush_ack(MPID_Win * win_ptr, int target_rank)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_RMA_Target_t *t;

    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

437 438 439 440
    t->sync.outstanding_acks--;
    MPIU_Assert(t->sync.outstanding_acks >= 0);

    t->put_acc_issued = 0; /* reset PUT_ACC_FLAG after FLUSH is completed */
441 442 443 444 445 446 447 448

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


449
#undef FUNCNAME
450
#define FUNCNAME do_accumulate_op
451 452
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
453 454
static inline int do_accumulate_op(void *source_buf, void *target_buf,
                                   int acc_count, MPI_Datatype acc_dtp, MPI_Op acc_op)
455 456
{
    int mpi_errno = MPI_SUCCESS;
457 458 459 460 461
    MPI_User_function *uop;
    MPIDI_STATE_DECL(MPID_STATE_DO_ACCUMULATE_OP);

    MPIDI_FUNC_ENTER(MPID_STATE_DO_ACCUMULATE_OP);

462
    if (acc_op == MPI_REPLACE)
463 464
    {
        /* simply copy the data */
465 466
        mpi_errno = MPIR_Localcopy(source_buf, acc_count, acc_dtp,
                                   target_buf, acc_count, acc_dtp);
467
        if (mpi_errno) {
468 469
	    MPIU_ERR_POP(mpi_errno);
	}
470 471 472
        goto fn_exit;
    }

473
    if (HANDLE_GET_KIND(acc_op) == HANDLE_KIND_BUILTIN)
474 475
    {
        /* get the function by indexing into the op table */
476
        uop = MPIR_OP_HDL_TO_FN(acc_op);
477 478 479 480
    }
    else
    {
	/* --BEGIN ERROR HANDLING-- */
481
        mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opnotpredefined", "**opnotpredefined %d", acc_op );
482 483
        return mpi_errno;
	/* --END ERROR HANDLING-- */
484 485
    }

486
    if (MPIR_DATATYPE_IS_PREDEFINED(acc_dtp))
487
    {
488
        (*uop)(source_buf, target_buf, &acc_count, &acc_dtp);
489 490 491 492
    }
    else
    {
	/* derived datatype */
493 494 495
        MPID_Segment *segp;
        DLOOP_VECTOR *dloop_vec;
        MPI_Aint first, last;
496 497
        int vec_len, i, count;
        MPI_Aint type_size;
498
        MPI_Datatype type;
499 500 501 502 503 504 505 506 507
        MPID_Datatype *dtp;

        segp = MPID_Segment_alloc();
	/* --BEGIN ERROR HANDLING-- */
        if (!segp)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
508
        }
509
	/* --END ERROR HANDLING-- */
510 511
        MPID_Segment_init(NULL, acc_count,
			  acc_dtp, segp, 0);
512 513 514
        first = 0;
        last  = SEGMENT_IGNORE_LAST;

515 516
        MPID_Datatype_get_ptr(acc_dtp, dtp);
        vec_len = dtp->max_contig_blocks * acc_count + 1;
517 518 519 520 521 522 523 524 525
        /* +1 needed because Rob says so */
        dloop_vec = (DLOOP_VECTOR *)
            MPIU_Malloc(vec_len * sizeof(DLOOP_VECTOR));
	/* --BEGIN ERROR HANDLING-- */
        if (!dloop_vec)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
526
        }
527 528 529 530 531 532 533 534 535
	/* --END ERROR HANDLING-- */

        MPID_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);

        type = dtp->eltype;
        MPID_Datatype_get_size_macro(type, type_size);
        for (i=0; i<vec_len; i++)
	{
            MPIU_Assign_trunc(count, (dloop_vec[i].DLOOP_VECTOR_LEN)/type_size, int);
536 537
            (*uop)((char *)source_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
                   (char *)target_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
538
                   &count, &type);
539 540
        }

541 542
        MPID_Segment_free(segp);
        MPIU_Free(dloop_vec);
543 544
    }

545 546
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
547 548

    return mpi_errno;
549
 fn_fail:
550 551 552
    goto fn_exit;
}

553

554 555 556 557 558
static inline int check_piggyback_lock(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                       MPIDI_CH3_Pkt_t *pkt,
                                       MPIDI_msg_sz_t *buflen,
                                       int *acquire_lock_fail,
                                       MPID_Request **reqp) {
559 560 561 562 563
    int lock_type;
    MPIDI_CH3_Pkt_flags_t flags;
    int mpi_errno = MPI_SUCCESS;

    (*acquire_lock_fail) = 0;
564
    (*reqp) = NULL;
565 566 567 568 569 570 571

    MPIDI_CH3_PKT_RMA_GET_FLAGS((*pkt), flags, mpi_errno);
    MPIDI_CH3_PKT_RMA_GET_LOCK_TYPE((*pkt), lock_type, mpi_errno);

    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
        if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 0) {
            /* cannot acquire the lock, queue up this operation. */
572
            mpi_errno = enqueue_lock_origin(win_ptr, vc, pkt, buflen, reqp);
573 574 575 576 577 578 579 580 581 582 583 584
            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

            (*acquire_lock_fail) = 1;
        }
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

585 586 587 588 589 590 591 592
static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                      MPIDI_CH3_Pkt_type_t type,
                                      MPIDI_CH3_Pkt_flags_t flags,
                                      MPI_Win source_win_handle) {
    int mpi_errno = MPI_SUCCESS;

    if (type == MPIDI_CH3_PKT_PUT || type == MPIDI_CH3_PKT_ACCUMULATE) {
        /* This is PUT or ACC */
593
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617
            if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) &&
                !(flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)) {
                mpi_errno = MPIDI_CH3I_Send_lock_granted_pkt(vc, win_ptr, source_win_handle);
                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
                MPIDI_CH3_Progress_signal_completion();
            }
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) {
            mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr, flags,
                                                      source_win_handle);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
            MPIDI_CH3_Progress_signal_completion();
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER) {
            win_ptr->at_completion_counter--;
            MPIU_Assert(win_ptr->at_completion_counter >= 0);
            /* Signal the local process when the op counter reaches 0. */
            if (win_ptr->at_completion_counter == 0)
                MPIDI_CH3_Progress_signal_completion();
        }
        if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
            mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr, flags,
                                                      source_win_handle);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
618 619
            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
            MPIDI_CH3_Progress_signal_completion();
        }
    }
    else {
        /* This is GACC / GET / CAS / FOP */

        if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
            MPIDI_CH3_Progress_signal_completion();
        }

        if (flags & MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER) {
            win_ptr->at_completion_counter--;
            MPIU_Assert(win_ptr->at_completion_counter >= 0);
            /* Signal the local process when the op counter reaches 0. */
            if (win_ptr->at_completion_counter == 0)
                MPIDI_CH3_Progress_signal_completion();
        }
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
646

647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681

static inline int fill_ranks_in_win_grp(MPID_Win *win_ptr, MPID_Group *group_ptr,
                                        int *ranks_in_win_grp)
{
    int mpi_errno = MPI_SUCCESS;
    int i, *ranks_in_grp;
    MPID_Group *win_grp_ptr;
    MPIU_CHKLMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_FILL_RANKS_IN_WIN_GRP);

    MPIDI_RMA_FUNC_ENTER(MPID_STATE_FILL_RANKS_IN_WIN_GRP);

    MPIU_CHKLMEM_MALLOC(ranks_in_grp, int *, group_ptr->size * sizeof(int),
                        mpi_errno, "ranks_in_grp");
    for (i = 0; i < group_ptr->size; i++) ranks_in_grp[i] = i;

    mpi_errno = MPIR_Comm_group_impl(win_ptr->comm_ptr, &win_grp_ptr);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_translate_ranks_impl(group_ptr, group_ptr->size,
                                                ranks_in_grp, win_grp_ptr, ranks_in_win_grp);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_free_impl(win_grp_ptr);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

  fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


682
static inline int wait_progress_engine(void)
683 684
{
    int mpi_errno = MPI_SUCCESS;
685
    MPID_Progress_state progress_state;
686

687 688 689 690 691 692
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_wait(&progress_state);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS) {
        MPID_Progress_end(&progress_state);
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**winnoprogress");
693
    }
694 695
    /* --END ERROR HANDLING-- */
    MPID_Progress_end(&progress_state);
696

697
  fn_exit:
698
    return mpi_errno;
699
  fn_fail:
700 701 702
    goto fn_exit;
}

703
static inline int poke_progress_engine(void)
704 705
{
    int mpi_errno = MPI_SUCCESS;
706
    MPID_Progress_state progress_state;
707

708 709 710 711 712
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_poke();
    if (mpi_errno != MPI_SUCCESS)
        MPIU_ERR_POP(mpi_errno);
    MPID_Progress_end(&progress_state);
713

714
  fn_exit:
715
    return mpi_errno;
716
  fn_fail:
717 718 719
    goto fn_exit;
}

720
#endif /* MPID_RMA_H_INCLUDED */