Commit b1685139 authored by Xin Zhao's avatar Xin Zhao Committed by Pavan Balaji
Browse files

Embedding packet structure into RMA operation structure.



We were duplicating information in the operation structure and in the
packet structure when the message is actually issued.  Since most of
the information is the same anyway, this patch just embeds a packet
structure into the operation structure, so that we eliminate unnessary
copy.
Signed-off-by: Pavan Balaji's avatarPavan Balaji <balaji@anl.gov>
parent ba1a400c
......@@ -189,6 +189,9 @@ MPI_TYPECLASS_INTEGER, or MPI_TYPECLASS_COMPLEX
**memcpyalias:memcpy arguments alias each other
**memcpyalias %p %p %L:memcpy arguments alias each other, dst=%p src=%p len=%L
**invalidpkt: Invalid packet type
**invalidpkt %d: Invalid packet type (%d)
**rmatypenotatomic: Datatype not permitted for atomic operations
**rmatypenotatomic %D: Datatype (%D) not permitted for atomic operations
**winflavor: Window flavor is not compatible with the given operation
......
......@@ -9,17 +9,6 @@
#include "mpidi_ch3_impl.h"
typedef enum MPIDI_RMA_Op_type {
MPIDI_RMA_PUT = 23,
MPIDI_RMA_GET = 24,
MPIDI_RMA_ACCUMULATE = 25,
/* REMOVED: MPIDI_RMA_LOCK = 26, */
MPIDI_RMA_ACC_CONTIG = 27,
MPIDI_RMA_GET_ACCUMULATE = 28,
MPIDI_RMA_COMPARE_AND_SWAP = 29,
MPIDI_RMA_FETCH_AND_OP = 30
} MPIDI_RMA_Op_type_t;
/* Special case RMA operations */
enum MPIDI_RMA_Datatype {
......@@ -61,33 +50,29 @@ typedef struct MPIDI_RMA_dtype_info { /* for derived datatypes */
typedef struct MPIDI_RMA_Op {
struct MPIDI_RMA_Op *prev; /* pointer to next element in list */
struct MPIDI_RMA_Op *next; /* pointer to next element in list */
/* FIXME: It would be better to setup the packet that will be sent, at
* least in most cases (if, as a result of the sync/ops/sync sequence,
* a different packet type is needed, it can be extracted from the
* information otherwise stored). */
MPIDI_RMA_Op_type_t type;
void *origin_addr;
int origin_count;
MPI_Datatype origin_datatype;
int target_rank;
MPI_Aint target_disp;
int target_count;
MPI_Datatype target_datatype;
MPI_Op op; /* for accumulate */
/* Used to complete operations */
struct MPID_Request *request;
MPIDI_RMA_dtype_info dtype_info;
void *dataloop;
void *compare_addr;
MPI_Datatype compare_datatype;
void *result_addr;
int result_count;
MPI_Datatype result_datatype;
void *compare_addr;
int compare_count;
MPI_Datatype compare_datatype;
struct MPID_Request *request;
MPIDI_RMA_dtype_info dtype_info;
void *dataloop;
int target_rank;
MPIDI_CH3_Pkt_t pkt;
} MPIDI_RMA_Op_t;
typedef struct MPIDI_PT_single_op {
int type; /* put, get, or accum. */
MPIDI_CH3_Pkt_type_t type; /* put, get, or accum. */
void *addr;
int count;
MPI_Datatype datatype;
......
......@@ -193,6 +193,43 @@ MPIDI_CH3_PKT_DEFS
#endif
/* *INDENT-ON* */
#define MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE(pkt_, datatype_, err_) \
{ \
err_ = MPI_SUCCESS; \
switch(pkt_.type) { \
case (MPIDI_CH3_PKT_PUT): \
datatype_ = pkt_.put.datatype; \
break; \
case (MPIDI_CH3_PKT_GET): \
datatype_ = pkt_.get.datatype; \
break; \
case (MPIDI_CH3_PKT_ACCUMULATE): \
case (MPIDI_CH3_PKT_GET_ACCUM): \
datatype_ = pkt_.accum.datatype; \
break; \
case (MPIDI_CH3_PKT_CAS): \
datatype_ = pkt_.cas.datatype; \
break; \
case (MPIDI_CH3_PKT_FOP): \
datatype_ = pkt_.fop.datatype; \
break; \
case (MPIDI_CH3_PKT_LOCK_PUT_UNLOCK): \
datatype_ = pkt_.lock_put_unlock.datatype; \
break; \
case (MPIDI_CH3_PKT_LOCK_GET_UNLOCK): \
datatype_ = pkt_.lock_get_unlock.datatype; \
break; \
case (MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK): \
datatype_ = pkt_.lock_accum_unlock.datatype; \
break; \
case (MPIDI_CH3_PKT_ACCUM_IMMED): \
datatype_ = pkt_.accum_immed.datatype; \
break; \
default: \
MPIU_ERR_SETANDJUMP1(err_, MPI_ERR_OTHER, "**invalidpkt", "**invalidpkt %d", pkt_.type); \
} \
}
typedef struct MPIDI_CH3_Pkt_put {
MPIDI_CH3_Pkt_type_t type;
MPIDI_CH3_Pkt_flags_t flags;
......@@ -279,6 +316,7 @@ typedef struct MPIDI_CH3_Pkt_cas {
MPI_Datatype datatype;
void *addr;
MPI_Request request_handle;
MPI_Win source_win_handle;
MPI_Win target_win_handle; /* Used in the last RMA operation in each
* epoch for decrementing rma op counter in
* active target rma and for unlocking window
......@@ -300,6 +338,7 @@ typedef struct MPIDI_CH3_Pkt_fop {
void *addr;
MPI_Op op;
MPI_Request request_handle;
MPI_Win source_win_handle;
MPI_Win target_win_handle; /* Used in the last RMA operation in each
* epoch for decrementing rma op counter in
* active target rma and for unlocking window
......
......@@ -12,8 +12,7 @@
#include "mpid_rma_shm.h"
int MPIDI_CH3I_Issue_rma_op(MPIDI_RMA_Op_t * op_ptr, MPID_Win * win_ptr,
MPIDI_CH3_Pkt_flags_t flags, MPI_Win source_win_handle,
MPI_Win target_win_handle);
MPIDI_CH3_Pkt_flags_t flags);
#undef FUNCNAME
#define FUNCNAME send_lock_msg
......
......@@ -905,7 +905,7 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
MPIDI_PT_single_op * single_op;
single_op = lock_queue->pt_single_op;
if (single_op->type == MPIDI_RMA_PUT) {
if (single_op->type == MPIDI_CH3_PKT_LOCK_PUT_UNLOCK) {
mpi_errno = MPIR_Localcopy(single_op->data,
single_op->count,
single_op->datatype,
......@@ -913,21 +913,21 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
single_op->count,
single_op->datatype);
}
else if (single_op->type == MPIDI_RMA_ACCUMULATE) {
else if (single_op->type == MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK) {
if (win_ptr->shm_allocated == TRUE)
MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
mpi_errno = do_simple_accumulate(single_op);
if (win_ptr->shm_allocated == TRUE)
MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
}
else if (single_op->type == MPIDI_RMA_GET) {
else if (single_op->type == MPIDI_CH3_PKT_LOCK_GET_UNLOCK) {
mpi_errno = do_simple_get(win_ptr, lock_queue);
}
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
/* if put or accumulate, send rma done packet and release lock. */
if (single_op->type != MPIDI_RMA_GET) {
if (single_op->type != MPIDI_CH3_PKT_LOCK_GET_UNLOCK) {
/* NOTE: Only *queued* single_op operations are completed here.
Lock-op-unlock/single_op RMA ops can also be completed as
they arrive within various packet/request handlers via
......
This diff is collapsed.
......@@ -74,6 +74,7 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_put_t *put_pkt = NULL;
/* queue it up */
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
......@@ -81,18 +82,21 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
MPIU_ERR_POP(mpi_errno);
}
put_pkt = &(new_ptr->pkt.put);
MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT);
put_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
put_pkt->count = target_count;
put_pkt->datatype = target_datatype;
put_pkt->dataloop_size = 0;
put_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
put_pkt->source_win_handle = win_ptr->handle;
/* FIXME: For contig and very short operations, use a streamlined op */
new_ptr->type = MPIDI_RMA_PUT;
/* Cast away const'ness for the origin address, as the
* MPIDI_RMA_Op_t structure is used for both PUT and GET like
* operations */
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
/* if source or target datatypes are derived, increment their
* reference counts */
......@@ -181,6 +185,7 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_get_t *get_pkt = NULL;
/* queue it up */
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
......@@ -188,15 +193,21 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
MPIU_ERR_POP(mpi_errno);
}
get_pkt = &(new_ptr->pkt.get);
MPIDI_Pkt_init(get_pkt, MPIDI_CH3_PKT_GET);
get_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
get_pkt->count = target_count;
get_pkt->datatype = target_datatype;
get_pkt->dataloop_size = 0;
get_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
get_pkt->source_win_handle = win_ptr->handle;
/* FIXME: For contig and very short operations, use a streamlined op */
new_ptr->type = MPIDI_RMA_GET;
new_ptr->origin_addr = origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
/* if source or target datatypes are derived, increment their
* reference counts */
......@@ -286,6 +297,7 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_accum_t *accum_pkt = NULL;
/* queue it up */
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
......@@ -296,32 +308,48 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
/* If predefined and contiguous, use a simplified element */
if (MPIR_DATATYPE_IS_PREDEFINED(origin_datatype) &&
MPIR_DATATYPE_IS_PREDEFINED(target_datatype) && enableShortACC) {
new_ptr->type = MPIDI_RMA_ACC_CONTIG;
/* Only the information needed for the contig/predefined acc */
/* Cast away const'ness for origin_address as
* MPIDI_RMA_Op_t contain both PUT and GET like ops */
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
new_ptr->op = op;
goto fn_exit;
MPI_Aint origin_type_size;
size_t len;
MPID_Datatype_get_size_macro(origin_datatype, origin_type_size);
MPIU_Assign_trunc(len, origin_count * origin_type_size, size_t);
if (MPIR_CVAR_CH3_RMA_ACC_IMMED && len <= MPIDI_RMA_IMMED_INTS * sizeof(int)) {
MPIDI_CH3_Pkt_accum_immed_t *accumi_pkt;
accumi_pkt = &(new_ptr->pkt.accum_immed);
MPIDI_Pkt_init(accumi_pkt, MPIDI_CH3_PKT_ACCUM_IMMED);
accumi_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
accumi_pkt->count = target_count;
accumi_pkt->datatype = target_datatype;
accumi_pkt->op = op;
accumi_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
accumi_pkt->source_win_handle = win_ptr->handle;
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->target_rank = target_rank;
goto fn_exit;
}
}
new_ptr->type = MPIDI_RMA_ACCUMULATE;
/* Cast away const'ness for origin_address as MPIDI_RMA_Op_t
* contain both PUT and GET like ops */
accum_pkt = &(new_ptr->pkt.accum);
MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_ACCUMULATE);
accum_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
accum_pkt->count = target_count;
accum_pkt->datatype = target_datatype;
accum_pkt->dataloop_size = 0;
accum_pkt->op = op;
accum_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
accum_pkt->source_win_handle = win_ptr->handle;
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
new_ptr->op = op;
/* if source or target datatypes are derived, increment their
* reference counts */
......@@ -423,20 +451,44 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
/* TODO: Can we use the MPIDI_RMA_ACC_CONTIG optimization? */
new_ptr->type = MPIDI_RMA_GET_ACCUMULATE;
/* Cast away const'ness for origin_address as MPIDI_RMA_Op_t
* contain both PUT and GET like ops */
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->result_addr = result_addr;
new_ptr->result_count = result_count;
new_ptr->result_datatype = result_datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
new_ptr->op = op;
if (op == MPI_NO_OP) {
/* Convert GAcc to a Get */
MPIDI_CH3_Pkt_get_t *get_pkt = &(new_ptr->pkt.get);
MPIDI_Pkt_init(get_pkt, MPIDI_CH3_PKT_GET);
get_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
get_pkt->count = target_count;
get_pkt->datatype = target_datatype;
get_pkt->dataloop_size = 0;
get_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
get_pkt->source_win_handle = win_ptr->handle;
new_ptr->origin_addr = result_addr;
new_ptr->origin_count = result_count;
new_ptr->origin_datatype = result_datatype;
new_ptr->target_rank = target_rank;
}
else {
MPIDI_CH3_Pkt_accum_t *accum_pkt = &(new_ptr->pkt.accum);
MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_GET_ACCUM);
accum_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
accum_pkt->count = target_count;
accum_pkt->datatype = target_datatype;
accum_pkt->dataloop_size = 0;
accum_pkt->op = op;
accum_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
accum_pkt->source_win_handle = win_ptr->handle;
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->result_addr = result_addr;
new_ptr->result_count = result_count;
new_ptr->result_datatype = result_datatype;
new_ptr->target_rank = target_rank;
}
/* if source or target datatypes are derived, increment their
* reference counts */
......@@ -524,6 +576,7 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_cas_t *cas_pkt = NULL;
/* Append this operation to the RMA ops queue */
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
......@@ -531,20 +584,22 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
MPIU_ERR_POP(mpi_errno);
}
new_ptr->type = MPIDI_RMA_COMPARE_AND_SWAP;
cas_pkt = &(new_ptr->pkt.cas);
MPIDI_Pkt_init(cas_pkt, MPIDI_CH3_PKT_CAS);
cas_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
cas_pkt->datatype = datatype;
cas_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
cas_pkt->source_win_handle = win_ptr->handle;
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = 1;
new_ptr->origin_datatype = datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = 1;
new_ptr->target_datatype = datatype;
new_ptr->result_addr = result_addr;
new_ptr->result_count = 1;
new_ptr->result_datatype = datatype;
new_ptr->compare_addr = (void *) compare_addr;
new_ptr->compare_count = 1;
new_ptr->compare_datatype = datatype;
new_ptr->target_rank = target_rank;
}
fn_exit:
......@@ -615,6 +670,7 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
else {
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_fop_t *fop_pkt = NULL;
/* Append this operation to the RMA ops queue */
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
......@@ -622,18 +678,21 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
MPIU_ERR_POP(mpi_errno);
}
new_ptr->type = MPIDI_RMA_FETCH_AND_OP;
fop_pkt = &(new_ptr->pkt.fop);
MPIDI_Pkt_init(fop_pkt, MPIDI_CH3_PKT_FOP);
fop_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
fop_pkt->datatype = datatype;
fop_pkt->op = op;
fop_pkt->source_win_handle = win_ptr->handle;
fop_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = 1;
new_ptr->origin_datatype = datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = 1;
new_ptr->target_datatype = datatype;
new_ptr->result_addr = result_addr;
new_ptr->result_count = 1;
new_ptr->result_datatype = datatype;
new_ptr->op = op;
new_ptr->target_rank = target_rank;
}
fn_exit:
......
......@@ -1003,7 +1003,7 @@ int MPIDI_CH3_PktHandler_LockPutUnlock(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
new_ptr->source_win_handle = lock_put_unlock_pkt->source_win_handle;
new_ptr->origin_rank = lock_put_unlock_pkt->origin_rank;
new_ptr->pt_single_op->type = MPIDI_RMA_PUT;
new_ptr->pt_single_op->type = MPIDI_CH3_PKT_LOCK_PUT_UNLOCK;
new_ptr->pt_single_op->flags = lock_put_unlock_pkt->flags;
new_ptr->pt_single_op->addr = lock_put_unlock_pkt->addr;
new_ptr->pt_single_op->count = lock_put_unlock_pkt->count;
......@@ -1151,7 +1151,7 @@ int MPIDI_CH3_PktHandler_LockGetUnlock(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
new_ptr->source_win_handle = lock_get_unlock_pkt->source_win_handle;
new_ptr->origin_rank = lock_get_unlock_pkt->origin_rank;
new_ptr->pt_single_op->type = MPIDI_RMA_GET;
new_ptr->pt_single_op->type = MPIDI_CH3_PKT_LOCK_GET_UNLOCK;
new_ptr->pt_single_op->flags = lock_get_unlock_pkt->flags;
new_ptr->pt_single_op->addr = lock_get_unlock_pkt->addr;
new_ptr->pt_single_op->count = lock_get_unlock_pkt->count;
......@@ -1242,7 +1242,7 @@ int MPIDI_CH3_PktHandler_LockAccumUnlock(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
new_ptr->source_win_handle = lock_accum_unlock_pkt->source_win_handle;
new_ptr->origin_rank = lock_accum_unlock_pkt->origin_rank;
new_ptr->pt_single_op->type = MPIDI_RMA_ACCUMULATE;
new_ptr->pt_single_op->type = MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK;
new_ptr->pt_single_op->flags = lock_accum_unlock_pkt->flags;
new_ptr->pt_single_op->addr = lock_accum_unlock_pkt->addr;
new_ptr->pt_single_op->count = lock_accum_unlock_pkt->count;
......
......@@ -367,7 +367,6 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
MPIDI_RMA_Op_t *curr_ptr;
MPIDI_RMA_Ops_list_t *ops_list;
MPID_Comm *comm_ptr;
MPI_Win source_win_handle, target_win_handle;
MPID_Progress_state progress_state;
int errflag = FALSE;
MPIU_CHKLMEM_DECL(3);
......@@ -487,11 +486,7 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
flags = MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE;
}
source_win_handle = win_ptr->handle;
target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
mpi_errno = MPIDI_CH3I_Issue_rma_op(curr_ptr, win_ptr, flags,
source_win_handle, target_win_handle);
mpi_errno = MPIDI_CH3I_Issue_rma_op(curr_ptr, win_ptr, flags);
if (mpi_errno)
MPIU_ERR_POP(mpi_errno);
......@@ -933,7 +928,6 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
MPIDI_RMA_Op_t *curr_ptr;
MPIDI_RMA_Ops_list_t *ops_list;
MPID_Comm *comm_ptr;
MPI_Win source_win_handle, target_win_handle;
int start_grp_size, *ranks_in_win_grp, rank;
int nRequest = 0;
int nRequestNew = 0;
......@@ -1024,11 +1018,7 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
flags = MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE;
}
source_win_handle = win_ptr->handle;
target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
mpi_errno = MPIDI_CH3I_Issue_rma_op(curr_ptr, win_ptr, flags,
source_win_handle, target_win_handle);
mpi_errno = MPIDI_CH3I_Issue_rma_op(curr_ptr, win_ptr, flags);
if (mpi_errno)
MPIU_ERR_POP(mpi_errno);
......@@ -1410,8 +1400,8 @@ int MPIDI_Win_unlock(int dest, MPID_Win * win_ptr)
if (MPIR_CVAR_CH3_RMA_MERGE_LOCK_OP_UNLOCK &&
win_ptr->targets[dest].remote_lock_state == MPIDI_CH3_WIN_LOCK_CALLED &&
rma_op && rma_op->next == NULL /* There is only one op */ &&
rma_op->type != MPIDI_RMA_COMPARE_AND_SWAP &&
rma_op->type != MPIDI_RMA_FETCH_AND_OP && rma_op->type != MPIDI_RMA_GET_ACCUMULATE) {
rma_op->pkt.type != MPIDI_CH3_PKT_CAS &&
rma_op->pkt.type != MPIDI_CH3_PKT_FOP && rma_op->pkt.type != MPIDI_CH3_PKT_GET_ACCUM) {
/* Single put, get, or accumulate between the lock and unlock. If it
* is of small size and predefined datatype at the target, we
* do an optimization where the lock and the RMA operation are
......@@ -1420,18 +1410,20 @@ int MPIDI_Win_unlock(int dest, MPID_Win * win_ptr)
MPI_Aint type_size;
MPIDI_VC_t *vc;
MPIDI_RMA_Op_t *curr_op = rma_op;
MPI_Datatype target_datatype;
MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
MPID_Datatype_get_size_macro(curr_op->origin_datatype, type_size);
/* msg_sz typically = 65480 */
if (MPIR_DATATYPE_IS_PREDEFINED(curr_op->target_datatype) &&
MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE(curr_op->pkt, target_datatype, mpi_errno);
if (MPIR_DATATYPE_IS_PREDEFINED(target_datatype) &&
(type_size * curr_op->origin_count <= vc->eager_max_msg_sz)) {
single_op_opt = 1;
/* Set the lock granted flag to 1 */
win_ptr->targets[dest].remote_lock_state = MPIDI_CH3_WIN_LOCK_GRANTED;
if (curr_op->type == MPIDI_RMA_GET) {
if (curr_op->pkt.type == MPIDI_CH3_PKT_GET) {
mpi_errno = send_lock_get(win_ptr, dest);
wait_for_rma_done_pkt = 0;
}
......@@ -1912,7 +1904,6 @@ static int do_passive_target_rma(MPID_Win * win_ptr, int target_rank,
{
int mpi_errno = MPI_SUCCESS, nops;
MPIDI_RMA_Op_t *curr_ptr;
MPI_Win source_win_handle = MPI_WIN_NULL, target_win_handle = MPI_WIN_NULL;
int nRequest = 0, nRequestNew = 0;
MPIDI_STATE_DECL(MPID_STATE_DO_PASSIVE_TARGET_RMA);
......@@ -1935,9 +1926,9 @@ static int do_passive_target_rma(MPID_Win * win_ptr, int target_rank,
/* Check if we can piggyback the RMA done acknowlegdement on the last
* operation in the epoch. */
if (tail->type == MPIDI_RMA_GET ||
tail->type == MPIDI_RMA_COMPARE_AND_SWAP ||
tail->type == MPIDI_RMA_FETCH_AND_OP || tail->type == MPIDI_RMA_GET_ACCUMULATE) {
if (tail->pkt.type == MPIDI_CH3_PKT_GET ||
tail->pkt.type == MPIDI_CH3_PKT_CAS ||
tail->pkt.type == MPIDI_CH3_PKT_FOP || tail->pkt.type == MPIDI_CH3_PKT_GET_ACCUM) {
/* last operation sends a response message. no need to wait
* for an additional rma done pkt */
*wait_for_rma_done_pkt = 0;
......@@ -1952,7 +1943,7 @@ static int do_passive_target_rma(MPID_Win * win_ptr, int target_rank,
curr_ptr = MPIDI_CH3I_RMA_Ops_head(&win_ptr->targets[target_rank].rma_ops_list);
while (curr_ptr != NULL) {
if (curr_ptr->type == MPIDI_RMA_GET) {
if (curr_ptr->pkt.type == MPIDI_CH3_PKT_GET) {
/* Found a GET, move it to the end */
*wait_for_rma_done_pkt = 0;
......@@ -1979,10 +1970,6 @@ static int do_passive_target_rma(MPID_Win * win_ptr, int target_rank,
curr_ptr = MPIDI_CH3I_RMA_Ops_head(&win_ptr->targets[target_rank].rma_ops_list);
if (curr_ptr != NULL) {
target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
}
while (curr_ptr != NULL) {
MPIDI_CH3_Pkt_flags_t flags = MPIDI_CH3_PKT_FLAG_NONE;
......@@ -2028,12 +2015,9 @@ static int do_passive_target_rma(MPID_Win * win_ptr, int target_rank,
if (*wait_for_rma_done_pkt) {
flags |= MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK;
}
source_win_handle = win_ptr->handle;
}