ch3u_rma_ops.c 20.8 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 3 4 5 6 7 8 9
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpidi_ch3_impl.h"
#include "mpidrma.h"

10 11 12
static int enableShortACC=1;

#ifdef USE_MPIU_INSTR
13 14 15 16 17
MPIU_INSTR_DURATION_EXTERN_DECL(wincreate_allgather);
MPIU_INSTR_DURATION_EXTERN_DECL(winfree_rs);
MPIU_INSTR_DURATION_EXTERN_DECL(winfree_complete);
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_alloc);
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_set);
18 19 20
extern void MPIDI_CH3_RMA_InitInstr(void);
#endif

21 22 23
#define MPIDI_PASSIVE_TARGET_DONE_TAG  348297
#define MPIDI_PASSIVE_TARGET_RMA_TAG 563924

24 25 26 27 28 29 30 31 32 33 34 35
/* 
 * TODO:
 * Explore use of alternate allocation mechanisms for the RMA queue elements
 * (Because profiling has shown that queue element allocation/deallocation
 * can take a significant amount of time in the RMA operations).
 *    1: Current approach (uses perm memory malloc/free)
 *    2: Preallocate and maintain list (use perm memory malloc, but
 *       free onto window; use first; free on window free)
 *    3: Preallocate and maintain list (use separate memory, but free to
 *       thread/process; free in Finalize handler.  Option to use for
 *       single-threaded to avoid thread overheads)
 * Possible interface
36 37
 *    int MPIDI_RMAListAlloc(MPIDI_RMA_Op_t **a,MPID_Win *win)
 *    int MPIDI_RMAListFree(MPIDI_RMA_Op_t *a, MPID_Win *win)
38 39
 *    return value is error code (e.g., allocation failure).
 */
40 41 42 43 44 45 46

#undef FUNCNAME
#define FUNCNAME MPIDI_Win_free
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Win_free(MPID_Win **win_ptr)
{
47
    int mpi_errno=MPI_SUCCESS, total_pt_rma_puts_accs;
48
    int in_use;
49
    MPID_Comm *comm_ptr;
50
    int errflag = FALSE;
51 52 53
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FREE);
        
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_FREE);
James Dinan's avatar
James Dinan committed
54 55 56 57

    MPIU_ERR_CHKANDJUMP((*win_ptr)->epoch_state != MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

58 59
    comm_ptr = (*win_ptr)->comm_ptr;
    MPIU_INSTR_DURATION_START(winfree_rs);
60 61
    mpi_errno = MPIR_Reduce_scatter_block_impl((*win_ptr)->pt_rma_puts_accs, 
                                               &total_pt_rma_puts_accs, 1, 
62
                                               MPI_INT, MPI_SUM, comm_ptr, &errflag);
63
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
64
    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
65
    MPIU_INSTR_DURATION_END(winfree_rs);
66 67 68 69 70 71

    if (total_pt_rma_puts_accs != (*win_ptr)->my_pt_rma_puts_accs)
    {
	MPID_Progress_state progress_state;
            
	/* poke the progress engine until the two are equal */
72
	MPIU_INSTR_DURATION_START(winfree_complete);
73 74 75 76 77 78 79 80
	MPID_Progress_start(&progress_state);
	while (total_pt_rma_puts_accs != (*win_ptr)->my_pt_rma_puts_accs)
	{
	    mpi_errno = MPID_Progress_wait(&progress_state);
	    /* --BEGIN ERROR HANDLING-- */
	    if (mpi_errno != MPI_SUCCESS)
	    {
		MPID_Progress_end(&progress_state);
81
		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
82 83 84 85
	    }
	    /* --END ERROR HANDLING-- */
	}
	MPID_Progress_end(&progress_state);
86
	MPIU_INSTR_DURATION_END(winfree_complete);
87 88
    }

89 90
    mpi_errno = MPIR_Comm_free_impl(comm_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
91

92
    MPIU_Free((*win_ptr)->targets);
93
    MPIU_Free((*win_ptr)->base_addrs);
94
    MPIU_Free((*win_ptr)->sizes);
95 96 97
    MPIU_Free((*win_ptr)->disp_units);
    MPIU_Free((*win_ptr)->all_win_handles);
    MPIU_Free((*win_ptr)->pt_rma_puts_accs);
98

99
    /* Free the attached buffer for windows created with MPI_Win_allocate() */
100
    if ((*win_ptr)->create_flavor == MPI_WIN_FLAVOR_ALLOCATE && (*win_ptr)->size > 0) {
101
      MPIU_Free((*win_ptr)->base);
102 103
    }

104 105 106
    MPIU_Object_release_ref(*win_ptr, &in_use);
    /* MPI windows don't have reference count semantics, so this should always be true */
    MPIU_Assert(!in_use);
107
    MPIU_Handle_obj_free( &MPID_Win_mem, *win_ptr );
108

109 110 111
 fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FREE);
    return mpi_errno;
112

113
 fn_fail:
114 115
    goto fn_exit;
}
116 117


118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
#undef FUNCNAME
#define FUNCNAME MPIDI_SHM_Win_free
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_SHM_Win_free(MPID_Win **win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_SHM_WIN_FREE);

    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_SHM_WIN_FREE);

    /* Free memory allocated by the default shared memory window
       implementation.  Note that this implementation works only for
       MPI_COMM_SELF and does not map a shared segment. */

    MPIU_Free((*win_ptr)->base);
    MPIU_Free((*win_ptr)->shm_base_addrs);

    mpi_errno = MPIDI_Win_free(win_ptr);
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

 fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_SHM_WIN_FREE);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
 fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


#undef FUNCNAME
#define FUNCNAME MPIDI_Win_shared_query
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Win_shared_query(MPID_Win *win_ptr, int target_rank, MPI_Aint *size,
                           int *disp_unit, void *baseptr)
{
    int mpi_errno = MPI_SUCCESS;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_SHARED_QUERY);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_SHARED_QUERY);

    *(void**) baseptr = win_ptr->shm_base_addrs[0];
    *size             = win_ptr->size;
    *disp_unit        = win_ptr->disp_unit;

 fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_SHARED_QUERY);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
 fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


175 176 177 178
#undef FUNCNAME
#define FUNCNAME MPIDI_Put
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
179
int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
180 181 182 183
            origin_datatype, int target_rank, MPI_Aint target_disp,
            int target_count, MPI_Datatype target_datatype, MPID_Win *win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
Pavan Balaji's avatar
Pavan Balaji committed
184
    int dt_contig ATTRIBUTE((unused)), rank, predefined;
185
    MPID_Datatype *dtp;
Pavan Balaji's avatar
Pavan Balaji committed
186
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
187 188 189 190 191
    MPIDI_msg_sz_t data_sz;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PUT);
        
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_PUT);

192 193 194 195
    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

196
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
197 198 199 200 201 202
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

203 204 205
    MPIDI_Datatype_get_info(origin_count, origin_datatype,
			    dt_contig, data_sz, dtp,dt_true_lb); 
    
206
    if (data_sz == 0) {
207 208 209
	goto fn_exit;
    }

210
    rank = win_ptr->myrank;
211 212
    
    /* If the put is a local operation, do it here */
213
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
214
    {
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
        void *base;
        int disp_unit;

        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
            base = win_ptr->shm_base_addrs[target_rank];
            disp_unit = win_ptr->disp_units[target_rank];
        }
        else {
            base = win_ptr->base;
            disp_unit = win_ptr->disp_unit;
        }

        mpi_errno = MPIR_Localcopy(origin_addr, origin_count, origin_datatype,
                                   (char *) base + disp_unit * target_disp,
                                   target_count, target_datatype);
230
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
231 232 233
    }
    else
    {
234
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
235
        MPIDI_RMA_Op_t *new_ptr = NULL;
236

237
	/* queue it up */
238
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
239
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
240 241
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
242

243
	MPIU_INSTR_DURATION_START(rmaqueue_set);
244
	/* FIXME: For contig and very short operations, use a streamlined op */
245
	new_ptr->type = MPIDI_RMA_PUT;
246
        /* Cast away const'ness for the origin address, as the
247
         * MPIDI_RMA_Op_t structure is used for both PUT and GET like
248 249
         * operations */
	new_ptr->origin_addr = (void *) origin_addr;
250 251 252 253 254 255
	new_ptr->origin_count = origin_count;
	new_ptr->origin_datatype = origin_datatype;
	new_ptr->target_rank = target_rank;
	new_ptr->target_disp = target_disp;
	new_ptr->target_count = target_count;
	new_ptr->target_datatype = target_datatype;
256 257
	MPIU_INSTR_DURATION_END(rmaqueue_set);

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
	/* if source or target datatypes are derived, increment their
	   reference counts */ 
	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, predefined);
	if (!predefined)
	{
	    MPID_Datatype_get_ptr(origin_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, predefined);
	if (!predefined)
	{
	    MPID_Datatype_get_ptr(target_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_PUT);    
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}



#undef FUNCNAME
#define FUNCNAME MPIDI_Get
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
            origin_datatype, int target_rank, MPI_Aint target_disp,
            int target_count, MPI_Datatype target_datatype, MPID_Win *win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_msg_sz_t data_sz;
Pavan Balaji's avatar
Pavan Balaji committed
296 297
    int dt_contig ATTRIBUTE((unused)), rank, predefined;
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
298 299 300 301 302
    MPID_Datatype *dtp;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_GET);
        
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_GET);

303 304 305 306
    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

307
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
308 309 310 311 312 313
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

314 315 316
    MPIDI_Datatype_get_info(origin_count, origin_datatype,
			    dt_contig, data_sz, dtp, dt_true_lb); 

317
    if (data_sz == 0) {
318 319 320
	goto fn_exit;
    }

321
    rank = win_ptr->myrank;
322 323
    
    /* If the get is a local operation, do it here */
324
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
325
    {
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
        void *base;
        int disp_unit;

        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
            base = win_ptr->shm_base_addrs[target_rank];
            disp_unit = win_ptr->disp_units[target_rank];
        }
        else {
            base = win_ptr->base;
            disp_unit = win_ptr->disp_unit;
        }

        mpi_errno = MPIR_Localcopy((char *) base + disp_unit * target_disp,
                                   target_count, target_datatype, origin_addr,
                                   origin_count, origin_datatype);
341
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
342 343 344
    }
    else
    {
345
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
346
        MPIDI_RMA_Op_t *new_ptr = NULL;
347

348
	/* queue it up */
349
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
350
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
351 352 353
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

354
	MPIU_INSTR_DURATION_START(rmaqueue_set);
355
	/* FIXME: For contig and very short operations, use a streamlined op */
356 357 358 359 360 361 362 363
	new_ptr->type = MPIDI_RMA_GET;
	new_ptr->origin_addr = origin_addr;
	new_ptr->origin_count = origin_count;
	new_ptr->origin_datatype = origin_datatype;
	new_ptr->target_rank = target_rank;
	new_ptr->target_disp = target_disp;
	new_ptr->target_count = target_count;
	new_ptr->target_datatype = target_datatype;
364
	MPIU_INSTR_DURATION_END(rmaqueue_set);
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
	
	/* if source or target datatypes are derived, increment their
	   reference counts */ 
	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, predefined);
	if (!predefined)
	{
	    MPID_Datatype_get_ptr(origin_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, predefined);
	if (!predefined)
	{
	    MPID_Datatype_get_ptr(target_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_GET);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}



#undef FUNCNAME
#define FUNCNAME MPIDI_Accumulate
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
398
int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
399 400 401 402 403 404
                    origin_datatype, int target_rank, MPI_Aint target_disp,
                    int target_count, MPI_Datatype target_datatype, MPI_Op op,
                    MPID_Win *win_ptr)
{
    int mpi_errno=MPI_SUCCESS;
    MPIDI_msg_sz_t data_sz;
Pavan Balaji's avatar
Pavan Balaji committed
405 406
    int dt_contig ATTRIBUTE((unused)), rank, origin_predefined, target_predefined;
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
407 408 409 410 411 412
    MPID_Datatype *dtp;
    MPIU_CHKLMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_ACCUMULATE);
    
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_ACCUMULATE);

413 414 415 416
    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

417
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
418 419 420 421 422 423
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

424 425 426
    MPIDI_Datatype_get_info(origin_count, origin_datatype,
			    dt_contig, data_sz, dtp, dt_true_lb);  
    
427
    if (data_sz == 0) {
428 429
	goto fn_exit;
    }
430 431

    rank = win_ptr->myrank;
432 433 434 435
    
    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, origin_predefined);
    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, target_predefined);

436
    /* Do =! rank first (most likely branch?) */
437
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
438
    {
439 440 441 442 443 444 445 446 447 448 449 450 451
        MPI_User_function *uop;
        void *base;
        int disp_unit, shm_op = 0;

        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
            shm_op = 1;
            base = win_ptr->shm_base_addrs[target_rank];
            disp_unit = win_ptr->disp_units[target_rank];
        }
        else {
            base = win_ptr->base;
            disp_unit = win_ptr->disp_unit;
        }
452 453 454
	
	if (op == MPI_REPLACE)
	{
455 456 457 458 459 460
            if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
            mpi_errno = MPIR_Localcopy(origin_addr, origin_count,
                                       origin_datatype,
                                       (char *) base + disp_unit * target_disp,
                                       target_count, target_datatype);
            if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
461
            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
462 463 464 465 466 467 468 469
	    goto fn_exit;
	}
	
	MPIU_ERR_CHKANDJUMP1((HANDLE_GET_KIND(op) != HANDLE_KIND_BUILTIN), 
			     mpi_errno, MPI_ERR_OP, "**opnotpredefined",
			     "**opnotpredefined %d", op );
	
	/* get the function by indexing into the op table */
470
	uop = MPIR_OP_HDL_TO_FN(op);
471 472 473
	
	if (origin_predefined && target_predefined)
	{    
474 475
            /* Cast away const'ness for origin_address in order to
             * avoid changing the prototype for MPI_User_function */
476 477 478 479
            if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
            (*uop)((void *) origin_addr, (char *) base + disp_unit*target_disp,
                   &target_count, &target_datatype);
            if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
480 481 482 483 484 485 486 487 488 489 490
	}
	else
	{
	    /* derived datatype */
	    
	    MPID_Segment *segp;
	    DLOOP_VECTOR *dloop_vec;
	    MPI_Aint first, last;
	    int vec_len, i, type_size, count;
	    MPI_Datatype type;
	    MPI_Aint true_lb, true_extent, extent;
491 492
	    void *tmp_buf=NULL, *target_buf;
            const void *source_buf;
493 494 495 496 497 498 499
	    
	    if (origin_datatype != target_datatype)
	    {
		/* first copy the data into a temporary buffer with
		   the same datatype as the target. Then do the
		   accumulate operation. */
		
500
		MPIR_Type_get_true_extent_impl(target_datatype, &true_lb, &true_extent);
501 502 503 504 505 506 507 508 509 510 511 512 513
		MPID_Datatype_get_extent_macro(target_datatype, extent); 
		
		MPIU_CHKLMEM_MALLOC(tmp_buf, void *, 
			target_count * (MPIR_MAX(extent,true_extent)), 
			mpi_errno, "temporary buffer");
		/* adjust for potential negative lower bound in datatype */
		tmp_buf = (void *)((char*)tmp_buf - true_lb);
		
		mpi_errno = MPIR_Localcopy(origin_addr, origin_count,
					   origin_datatype, tmp_buf,
					   target_count, target_datatype);  
		if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	    }
514 515 516 517

	    if (target_predefined) { 
		/* target predefined type, origin derived datatype */

518 519 520 521
                if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
                (*uop)(tmp_buf, (char *) base + disp_unit * target_disp,
                       &target_count, &target_datatype);
                if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
522
	    }
523
	    else {
524
	    
525
		segp = MPID_Segment_alloc();
526 527
		MPIU_ERR_CHKANDJUMP1((!segp), mpi_errno, MPI_ERR_OTHER, 
				    "**nomem","**nomem %s","MPID_Segment_alloc"); 
528 529 530 531 532
		MPID_Segment_init(NULL, target_count, target_datatype, segp, 0);
		first = 0;
		last  = SEGMENT_IGNORE_LAST;
		
		MPID_Datatype_get_ptr(target_datatype, dtp);
533
		vec_len = dtp->max_contig_blocks * target_count + 1; 
534 535 536 537 538 539 540 541
		/* +1 needed because Rob says so */
		MPIU_CHKLMEM_MALLOC(dloop_vec, DLOOP_VECTOR *, 
				    vec_len * sizeof(DLOOP_VECTOR), 
				    mpi_errno, "dloop vector");
		
		MPID_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);
		
		source_buf = (tmp_buf != NULL) ? tmp_buf : origin_addr;
542
		target_buf = (char *) base + disp_unit * target_disp;
543 544
		type = dtp->eltype;
		type_size = MPID_Datatype_get_basic_size(type);
545
                if (shm_op) MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
546 547 548 549 550 551 552
		for (i=0; i<vec_len; i++)
		{
		    count = (dloop_vec[i].DLOOP_VECTOR_LEN)/type_size;
		    (*uop)((char *)source_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
			   (char *)target_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
			   &count, &type);
		}
553
                if (shm_op) MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
554 555 556
		
		MPID_Segment_free(segp);
	    }
557 558 559 560
	}
    }
    else
    {
561
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
562
        MPIDI_RMA_Op_t *new_ptr = NULL;
563

564
	/* queue it up */
565
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
566
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
567 568
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
569 570 571

	/* If predefined and contiguous, use a simplified element */
	if (origin_predefined && target_predefined && enableShortACC) {
572
	    MPIU_INSTR_DURATION_START(rmaqueue_set);
573 574
	    new_ptr->type = MPIDI_RMA_ACC_CONTIG;
	    /* Only the information needed for the contig/predefined acc */
575
            /* Cast away const'ness for origin_address as
576
             * MPIDI_RMA_Op_t contain both PUT and GET like ops */
577
	    new_ptr->origin_addr = (void *) origin_addr;
578 579 580 581 582 583 584
	    new_ptr->origin_count = origin_count;
	    new_ptr->origin_datatype = origin_datatype;
	    new_ptr->target_rank = target_rank;
	    new_ptr->target_disp = target_disp;
	    new_ptr->target_count = target_count;
	    new_ptr->target_datatype = target_datatype;
	    new_ptr->op = op;
585
	    MPIU_INSTR_DURATION_END(rmaqueue_set);
586
	    goto fn_exit;
587
	}
588

589
	MPIU_INSTR_DURATION_START(rmaqueue_set);
590
	new_ptr->type = MPIDI_RMA_ACCUMULATE;
591
        /* Cast away const'ness for origin_address as MPIDI_RMA_Op_t
592 593
         * contain both PUT and GET like ops */
	new_ptr->origin_addr = (void *) origin_addr;
594 595 596 597 598 599 600
	new_ptr->origin_count = origin_count;
	new_ptr->origin_datatype = origin_datatype;
	new_ptr->target_rank = target_rank;
	new_ptr->target_disp = target_disp;
	new_ptr->target_count = target_count;
	new_ptr->target_datatype = target_datatype;
	new_ptr->op = op;
601
	MPIU_INSTR_DURATION_END(rmaqueue_set);
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
	
	/* if source or target datatypes are derived, increment their
	   reference counts */ 
	if (!origin_predefined)
	{
	    MPID_Datatype_get_ptr(origin_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
	if (!target_predefined)
	{
	    MPID_Datatype_get_ptr(target_datatype, dtp);
	    MPID_Datatype_add_ref(dtp);
	}
    }

 fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_ACCUMULATE);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


#undef FUNCNAME
#define FUNCNAME MPIDI_Alloc_mem
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
void *MPIDI_Alloc_mem( size_t size, MPID_Info *info_ptr )
{
    void *ap;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_ALLOC_MEM);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_ALLOC_MEM);

    ap = MPIU_Malloc(size);
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_ALLOC_MEM);
    return ap;
}


#undef FUNCNAME
#define FUNCNAME MPIDI_Free_mem
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Free_mem( void *ptr )
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_FREE_MEM);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_FREE_MEM);

    MPIU_Free(ptr);
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_FREE_MEM);
    return mpi_errno;
}