ch3u_rma_ops.c 19.7 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 3 4 5 6 7 8 9
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpidi_ch3_impl.h"
#include "mpidrma.h"

10 11 12
static int enableShortACC=1;

#ifdef USE_MPIU_INSTR
13 14
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_alloc);
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_set);
15 16 17
extern void MPIDI_CH3_RMA_InitInstr(void);
#endif

18 19 20
#define MPIDI_PASSIVE_TARGET_DONE_TAG  348297
#define MPIDI_PASSIVE_TARGET_RMA_TAG 563924

21 22 23 24 25 26 27 28 29 30 31 32
/* 
 * TODO:
 * Explore use of alternate allocation mechanisms for the RMA queue elements
 * (Because profiling has shown that queue element allocation/deallocation
 * can take a significant amount of time in the RMA operations).
 *    1: Current approach (uses perm memory malloc/free)
 *    2: Preallocate and maintain list (use perm memory malloc, but
 *       free onto window; use first; free on window free)
 *    3: Preallocate and maintain list (use separate memory, but free to
 *       thread/process; free in Finalize handler.  Option to use for
 *       single-threaded to avoid thread overheads)
 * Possible interface
33 34
 *    int MPIDI_RMAListAlloc(MPIDI_RMA_Op_t **a,MPID_Win *win)
 *    int MPIDI_RMAListFree(MPIDI_RMA_Op_t *a, MPID_Win *win)
35 36
 *    return value is error code (e.g., allocation failure).
 */
37 38 39 40 41 42 43

#undef FUNCNAME
#define FUNCNAME MPIDI_Win_free
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Win_free(MPID_Win **win_ptr)
{
44
    int mpi_errno=MPI_SUCCESS, total_pt_rma_puts_accs;
45
    int in_use;
46
    MPID_Comm *comm_ptr;
47
    int errflag = FALSE;
48 49 50
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FREE);
        
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_FREE);
James Dinan's avatar
James Dinan committed
51 52 53 54

    MPIU_ERR_CHKANDJUMP((*win_ptr)->epoch_state != MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

55 56
    mpi_errno = MPIDI_CH3I_Wait_for_pt_ops_finish(*win_ptr);
    if(mpi_errno) MPIU_ERR_POP(mpi_errno);
57

58
    comm_ptr = (*win_ptr)->comm_ptr;
59 60
    mpi_errno = MPIR_Comm_free_impl(comm_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
61

62
    MPIU_Free((*win_ptr)->targets);
63
    MPIU_Free((*win_ptr)->base_addrs);
64
    MPIU_Free((*win_ptr)->sizes);
65 66 67
    MPIU_Free((*win_ptr)->disp_units);
    MPIU_Free((*win_ptr)->all_win_handles);
    MPIU_Free((*win_ptr)->pt_rma_puts_accs);
68

69
    /* Free the attached buffer for windows created with MPI_Win_allocate() */
70
    if ((*win_ptr)->create_flavor == MPI_WIN_FLAVOR_ALLOCATE && (*win_ptr)->size > 0) {
71
      MPIU_Free((*win_ptr)->base);
72 73
    }

74 75 76
    MPIU_Object_release_ref(*win_ptr, &in_use);
    /* MPI windows don't have reference count semantics, so this should always be true */
    MPIU_Assert(!in_use);
77
    MPIU_Handle_obj_free( &MPID_Win_mem, *win_ptr );
78

79 80 81
 fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FREE);
    return mpi_errno;
82

83
 fn_fail:
84 85
    goto fn_exit;
}
86 87


88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
#undef FUNCNAME
#define FUNCNAME MPIDI_SHM_Win_free
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_SHM_Win_free(MPID_Win **win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_SHM_WIN_FREE);

    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_SHM_WIN_FREE);

    /* Free memory allocated by the default shared memory window
       implementation.  Note that this implementation works only for
       MPI_COMM_SELF and does not map a shared segment. */

    MPIU_Free((*win_ptr)->base);
    MPIU_Free((*win_ptr)->shm_base_addrs);

    mpi_errno = MPIDI_Win_free(win_ptr);
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

 fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_SHM_WIN_FREE);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
 fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


#undef FUNCNAME
#define FUNCNAME MPIDI_Win_shared_query
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Win_shared_query(MPID_Win *win_ptr, int target_rank, MPI_Aint *size,
                           int *disp_unit, void *baseptr)
{
    int mpi_errno = MPI_SUCCESS;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_SHARED_QUERY);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_SHARED_QUERY);

    *(void**) baseptr = win_ptr->shm_base_addrs[0];
    *size             = win_ptr->size;
    *disp_unit        = win_ptr->disp_unit;

 fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_SHARED_QUERY);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
 fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


145 146 147 148
#undef FUNCNAME
#define FUNCNAME MPIDI_Put
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
149
int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
150 151 152 153
            origin_datatype, int target_rank, MPI_Aint target_disp,
            int target_count, MPI_Datatype target_datatype, MPID_Win *win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
Pavan Balaji's avatar
Pavan Balaji committed
154
    int dt_contig ATTRIBUTE((unused)), rank, predefined;
155
    MPID_Datatype *dtp;
Pavan Balaji's avatar
Pavan Balaji committed
156
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
157 158 159 160 161
    MPIDI_msg_sz_t data_sz;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PUT);
        
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_PUT);

162 163 164 165
    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

166
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
167 168 169 170 171 172
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

173 174 175
    MPIDI_Datatype_get_info(origin_count, origin_datatype,
			    dt_contig, data_sz, dtp,dt_true_lb); 
    
176
    if (data_sz == 0) {
177 178 179
	goto fn_exit;
    }

180
    rank = win_ptr->comm_ptr->rank;
181 182
    
    /* If the put is a local operation, do it here */
183
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
184
    {
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
        void *base;
        int disp_unit;

        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
            base = win_ptr->shm_base_addrs[target_rank];
            disp_unit = win_ptr->disp_units[target_rank];
        }
        else {
            base = win_ptr->base;
            disp_unit = win_ptr->disp_unit;
        }

        mpi_errno = MPIR_Localcopy(origin_addr, origin_count, origin_datatype,
                                   (char *) base + disp_unit * target_disp,
                                   target_count, target_datatype);
200
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
201 202 203
    }
    else
    {
204
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
205
        MPIDI_RMA_Op_t *new_ptr = NULL;
206

207
	/* queue it up */
208
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
209
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
210 211
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
212

213
	MPIU_INSTR_DURATION_START(rmaqueue_set);
214
	/* FIXME: For contig and very short operations, use a streamlined op */
215
	new_ptr->type = MPIDI_RMA_PUT;
216
        /* Cast away const'ness for the origin address, as the
217
         * MPIDI_RMA_Op_t structure is used for both PUT and GET like
218 219
         * operations */
	new_ptr->origin_addr = (void *) origin_addr;
220 221 222 223 224 225
	new_ptr->origin_count = origin_count;
	new_ptr->origin_datatype = origin_datatype;
	new_ptr->target_rank = target_rank;
	new_ptr->target_disp = target_disp;
	new_ptr->target_count = target_count;
	new_ptr->target_datatype = target_datatype;
226 227
	MPIU_INSTR_DURATION_END(rmaqueue_set);

228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
	/* if source or target datatypes are derived, increment their
	   reference counts */ 
	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, predefined);
	if (!predefined)
	{
	    MPID_Datatype_get_ptr(origin_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, predefined);
	if (!predefined)
	{
	    MPID_Datatype_get_ptr(target_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_PUT);    
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}



#undef FUNCNAME
#define FUNCNAME MPIDI_Get
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
            origin_datatype, int target_rank, MPI_Aint target_disp,
            int target_count, MPI_Datatype target_datatype, MPID_Win *win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_msg_sz_t data_sz;
Pavan Balaji's avatar
Pavan Balaji committed
266 267
    int dt_contig ATTRIBUTE((unused)), rank, predefined;
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
268 269 270 271 272
    MPID_Datatype *dtp;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_GET);
        
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_GET);

273 274 275 276
    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

277
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
278 279 280 281 282 283
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

284 285 286
    MPIDI_Datatype_get_info(origin_count, origin_datatype,
			    dt_contig, data_sz, dtp, dt_true_lb); 

287
    if (data_sz == 0) {
288 289 290
	goto fn_exit;
    }

291
    rank = win_ptr->comm_ptr->rank;
292 293
    
    /* If the get is a local operation, do it here */
294
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
295
    {
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
        void *base;
        int disp_unit;

        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
            base = win_ptr->shm_base_addrs[target_rank];
            disp_unit = win_ptr->disp_units[target_rank];
        }
        else {
            base = win_ptr->base;
            disp_unit = win_ptr->disp_unit;
        }

        mpi_errno = MPIR_Localcopy((char *) base + disp_unit * target_disp,
                                   target_count, target_datatype, origin_addr,
                                   origin_count, origin_datatype);
311
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
312 313 314
    }
    else
    {
315
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
316
        MPIDI_RMA_Op_t *new_ptr = NULL;
317

318
	/* queue it up */
319
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
320
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
321 322 323
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

324
	MPIU_INSTR_DURATION_START(rmaqueue_set);
325
	/* FIXME: For contig and very short operations, use a streamlined op */
326 327 328 329 330 331 332 333
	new_ptr->type = MPIDI_RMA_GET;
	new_ptr->origin_addr = origin_addr;
	new_ptr->origin_count = origin_count;
	new_ptr->origin_datatype = origin_datatype;
	new_ptr->target_rank = target_rank;
	new_ptr->target_disp = target_disp;
	new_ptr->target_count = target_count;
	new_ptr->target_datatype = target_datatype;
334
	MPIU_INSTR_DURATION_END(rmaqueue_set);
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
	
	/* if source or target datatypes are derived, increment their
	   reference counts */ 
	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, predefined);
	if (!predefined)
	{
	    MPID_Datatype_get_ptr(origin_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, predefined);
	if (!predefined)
	{
	    MPID_Datatype_get_ptr(target_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_GET);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}



#undef FUNCNAME
#define FUNCNAME MPIDI_Accumulate
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
368
int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
369 370 371 372 373 374
                    origin_datatype, int target_rank, MPI_Aint target_disp,
                    int target_count, MPI_Datatype target_datatype, MPI_Op op,
                    MPID_Win *win_ptr)
{
    int mpi_errno=MPI_SUCCESS;
    MPIDI_msg_sz_t data_sz;
Pavan Balaji's avatar
Pavan Balaji committed
375 376
    int dt_contig ATTRIBUTE((unused)), rank, origin_predefined, target_predefined;
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
377 378 379 380 381 382
    MPID_Datatype *dtp;
    MPIU_CHKLMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_ACCUMULATE);
    
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_ACCUMULATE);

383 384 385 386
    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

387
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
388 389 390 391 392 393
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

394 395 396
    MPIDI_Datatype_get_info(origin_count, origin_datatype,
			    dt_contig, data_sz, dtp, dt_true_lb);  
    
397
    if (data_sz == 0) {
398 399
	goto fn_exit;
    }
400

401
    rank = win_ptr->comm_ptr->rank;
402 403 404 405
    
    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, origin_predefined);
    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, target_predefined);

406
    /* Do =! rank first (most likely branch?) */
407
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
408
    {
409 410 411 412 413 414 415 416 417 418 419 420 421
        MPI_User_function *uop;
        void *base;
        int disp_unit, shm_op = 0;

        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
            shm_op = 1;
            base = win_ptr->shm_base_addrs[target_rank];
            disp_unit = win_ptr->disp_units[target_rank];
        }
        else {
            base = win_ptr->base;
            disp_unit = win_ptr->disp_unit;
        }
422 423 424
	
	if (op == MPI_REPLACE)
	{
425 426 427 428 429 430
            if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
            mpi_errno = MPIR_Localcopy(origin_addr, origin_count,
                                       origin_datatype,
                                       (char *) base + disp_unit * target_disp,
                                       target_count, target_datatype);
            if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
431
            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
432 433 434 435 436 437 438 439
	    goto fn_exit;
	}
	
	MPIU_ERR_CHKANDJUMP1((HANDLE_GET_KIND(op) != HANDLE_KIND_BUILTIN), 
			     mpi_errno, MPI_ERR_OP, "**opnotpredefined",
			     "**opnotpredefined %d", op );
	
	/* get the function by indexing into the op table */
440
	uop = MPIR_OP_HDL_TO_FN(op);
441 442 443
	
	if (origin_predefined && target_predefined)
	{    
444 445
            /* Cast away const'ness for origin_address in order to
             * avoid changing the prototype for MPI_User_function */
446 447 448 449
            if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
            (*uop)((void *) origin_addr, (char *) base + disp_unit*target_disp,
                   &target_count, &target_datatype);
            if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
450 451 452 453 454 455 456 457 458 459 460
	}
	else
	{
	    /* derived datatype */
	    
	    MPID_Segment *segp;
	    DLOOP_VECTOR *dloop_vec;
	    MPI_Aint first, last;
	    int vec_len, i, type_size, count;
	    MPI_Datatype type;
	    MPI_Aint true_lb, true_extent, extent;
461 462
	    void *tmp_buf=NULL, *target_buf;
            const void *source_buf;
463 464 465 466 467 468 469
	    
	    if (origin_datatype != target_datatype)
	    {
		/* first copy the data into a temporary buffer with
		   the same datatype as the target. Then do the
		   accumulate operation. */
		
470
		MPIR_Type_get_true_extent_impl(target_datatype, &true_lb, &true_extent);
471 472 473 474 475 476 477 478 479 480 481 482 483
		MPID_Datatype_get_extent_macro(target_datatype, extent); 
		
		MPIU_CHKLMEM_MALLOC(tmp_buf, void *, 
			target_count * (MPIR_MAX(extent,true_extent)), 
			mpi_errno, "temporary buffer");
		/* adjust for potential negative lower bound in datatype */
		tmp_buf = (void *)((char*)tmp_buf - true_lb);
		
		mpi_errno = MPIR_Localcopy(origin_addr, origin_count,
					   origin_datatype, tmp_buf,
					   target_count, target_datatype);  
		if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	    }
484 485 486 487

	    if (target_predefined) { 
		/* target predefined type, origin derived datatype */

488 489 490 491
                if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
                (*uop)(tmp_buf, (char *) base + disp_unit * target_disp,
                       &target_count, &target_datatype);
                if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
492
	    }
493
	    else {
494
	    
495
		segp = MPID_Segment_alloc();
496 497
		MPIU_ERR_CHKANDJUMP1((!segp), mpi_errno, MPI_ERR_OTHER, 
				    "**nomem","**nomem %s","MPID_Segment_alloc"); 
498 499 500 501 502
		MPID_Segment_init(NULL, target_count, target_datatype, segp, 0);
		first = 0;
		last  = SEGMENT_IGNORE_LAST;
		
		MPID_Datatype_get_ptr(target_datatype, dtp);
503
		vec_len = dtp->max_contig_blocks * target_count + 1; 
504 505 506 507 508 509 510 511
		/* +1 needed because Rob says so */
		MPIU_CHKLMEM_MALLOC(dloop_vec, DLOOP_VECTOR *, 
				    vec_len * sizeof(DLOOP_VECTOR), 
				    mpi_errno, "dloop vector");
		
		MPID_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);
		
		source_buf = (tmp_buf != NULL) ? tmp_buf : origin_addr;
512
		target_buf = (char *) base + disp_unit * target_disp;
513 514
		type = dtp->eltype;
		type_size = MPID_Datatype_get_basic_size(type);
515
                if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
516 517 518 519 520 521 522
		for (i=0; i<vec_len; i++)
		{
		    count = (dloop_vec[i].DLOOP_VECTOR_LEN)/type_size;
		    (*uop)((char *)source_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
			   (char *)target_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
			   &count, &type);
		}
523
                if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
524 525 526
		
		MPID_Segment_free(segp);
	    }
527 528 529 530
	}
    }
    else
    {
531
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
532
        MPIDI_RMA_Op_t *new_ptr = NULL;
533

534
	/* queue it up */
535
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
536
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
537 538
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
539 540 541

	/* If predefined and contiguous, use a simplified element */
	if (origin_predefined && target_predefined && enableShortACC) {
542
	    MPIU_INSTR_DURATION_START(rmaqueue_set);
543 544
	    new_ptr->type = MPIDI_RMA_ACC_CONTIG;
	    /* Only the information needed for the contig/predefined acc */
545
            /* Cast away const'ness for origin_address as
546
             * MPIDI_RMA_Op_t contain both PUT and GET like ops */
547
	    new_ptr->origin_addr = (void *) origin_addr;
548 549 550 551 552 553 554
	    new_ptr->origin_count = origin_count;
	    new_ptr->origin_datatype = origin_datatype;
	    new_ptr->target_rank = target_rank;
	    new_ptr->target_disp = target_disp;
	    new_ptr->target_count = target_count;
	    new_ptr->target_datatype = target_datatype;
	    new_ptr->op = op;
555
	    MPIU_INSTR_DURATION_END(rmaqueue_set);
556
	    goto fn_exit;
557
	}
558

559
	MPIU_INSTR_DURATION_START(rmaqueue_set);
560
	new_ptr->type = MPIDI_RMA_ACCUMULATE;
561
        /* Cast away const'ness for origin_address as MPIDI_RMA_Op_t
562 563
         * contain both PUT and GET like ops */
	new_ptr->origin_addr = (void *) origin_addr;
564 565 566 567 568 569 570
	new_ptr->origin_count = origin_count;
	new_ptr->origin_datatype = origin_datatype;
	new_ptr->target_rank = target_rank;
	new_ptr->target_disp = target_disp;
	new_ptr->target_count = target_count;
	new_ptr->target_datatype = target_datatype;
	new_ptr->op = op;
571
	MPIU_INSTR_DURATION_END(rmaqueue_set);
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
	
	/* if source or target datatypes are derived, increment their
	   reference counts */ 
	if (!origin_predefined)
	{
	    MPID_Datatype_get_ptr(origin_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
	if (!target_predefined)
	{
	    MPID_Datatype_get_ptr(target_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
    }

 fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_ACCUMULATE);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


#undef FUNCNAME
#define FUNCNAME MPIDI_Alloc_mem
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
void *MPIDI_Alloc_mem( size_t size, MPID_Info *info_ptr )
{
    void *ap;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_ALLOC_MEM);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_ALLOC_MEM);

    ap = MPIU_Malloc(size);
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_ALLOC_MEM);
    return ap;
}


#undef FUNCNAME
#define FUNCNAME MPIDI_Free_mem
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Free_mem( void *ptr )
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_FREE_MEM);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_FREE_MEM);

    MPIU_Free(ptr);
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_FREE_MEM);
    return mpi_errno;
}