Commit 274a5a70 authored by Pavan Balaji's avatar Pavan Balaji
Browse files

Simplified RMA_Op structure.



We were creating duplicating information in the operation structure
and in the packet structure when the message is actually issued.
Since most of the information is the same anyway, this patch just
embeds a packet structure into the operation structure.
Signed-off-by: default avatarXin Zhao <xinzhao3@illinois.edu>
parent 006a54bd
......@@ -62,7 +62,7 @@ typedef union {
such as different RMA types. */
enum MPIDI_CH3_Pkt_types
{
MPIDI_CH3_PKT_EAGER_SEND = 0,
MPIDI_CH3_PKT_EAGER_SEND = 53,
#if defined(USE_EAGER_SHORT)
MPIDI_CH3_PKT_EAGERSHORT_SEND,
#endif /* defined(USE_EAGER_SHORT) */
......@@ -197,6 +197,43 @@ MPIDI_CH3_Pkt_cancel_send_resp_t;
MPIDI_CH3_PKT_DEFS
#endif
#define MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE(pkt_, datatype_) \
{ \
switch(pkt_.type) { \
case (MPIDI_CH3_PKT_PUT): \
{ \
MPIDI_CH3_Pkt_put_t put_pkt_ = pkt_.put; \
datatype_ = put_pkt_.datatype; \
break; \
} \
case (MPIDI_CH3_PKT_GET): \
{ \
MPIDI_CH3_Pkt_get_t get_pkt_ = pkt_.get; \
datatype_ = get_pkt_.datatype; \
break; \
} \
case (MPIDI_CH3_PKT_ACCUMULATE): \
case (MPIDI_CH3_PKT_GET_ACCUM): \
{ \
MPIDI_CH3_Pkt_accum_t acc_pkt_ = pkt_.accum; \
datatype_ = acc_pkt_.datatype; \
break; \
} \
case (MPIDI_CH3_PKT_CAS): \
{ \
MPIDI_CH3_Pkt_cas_t cas_pkt_ = pkt_.cas; \
datatype_ = cas_pkt_.datatype; \
break; \
} \
case (MPIDI_CH3_PKT_FOP): \
{ \
MPIDI_CH3_Pkt_fop_t fop_pkt_ = pkt_.fop; \
datatype_ = fop_pkt_.datatype; \
break; \
} \
} \
}
typedef struct MPIDI_CH3_Pkt_put
{
MPIDI_CH3_Pkt_type_t type;
......@@ -296,6 +333,7 @@ typedef struct MPIDI_CH3_Pkt_cas
MPI_Datatype datatype;
void *addr;
MPI_Request request_handle;
MPI_Win source_win_handle;
MPI_Win target_win_handle; /* Used in the last RMA operation in each
* epoch for decrementing rma op counter in
* active target rma and for unlocking window
......@@ -321,6 +359,7 @@ typedef struct MPIDI_CH3_Pkt_fop
void *addr;
MPI_Op op;
MPI_Request request_handle;
MPI_Win source_win_handle;
MPI_Win target_win_handle; /* Used in the last RMA operation in each
* epoch for decrementing rma op counter in
* active target rma and for unlocking window
......
......@@ -13,17 +13,6 @@ MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_wincreate_allgather);
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winfree_rs);
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winfree_complete);
typedef enum MPIDI_RMA_Op_type {
MPIDI_RMA_PUT = 23,
MPIDI_RMA_GET = 24,
MPIDI_RMA_ACCUMULATE = 25,
/* REMOVED: MPIDI_RMA_LOCK = 26, */
MPIDI_RMA_ACC_CONTIG = 27,
MPIDI_RMA_GET_ACCUMULATE = 28,
MPIDI_RMA_COMPARE_AND_SWAP = 29,
MPIDI_RMA_FETCH_AND_OP = 30
} MPIDI_RMA_Op_type_t;
/* Special case RMA operations */
enum MPIDI_RMA_Datatype {
......@@ -62,33 +51,29 @@ typedef struct MPIDI_RMA_dtype_info { /* for derived datatypes */
typedef struct MPIDI_RMA_Op {
struct MPIDI_RMA_Op *prev; /* pointer to next element in list */
struct MPIDI_RMA_Op *next; /* pointer to next element in list */
/* FIXME: It would be better to setup the packet that will be sent, at
least in most cases (if, as a result of the sync/ops/sync sequence,
a different packet type is needed, it can be extracted from the
information otherwise stored). */
MPIDI_RMA_Op_type_t type;
void *origin_addr;
int origin_count;
MPI_Datatype origin_datatype;
int target_rank;
MPI_Aint target_disp;
int target_count;
MPI_Datatype target_datatype;
MPI_Op op; /* for accumulate */
/* Used to complete operations */
struct MPID_Request *request;
MPIDI_RMA_dtype_info dtype_info;
void *dataloop;
void *compare_addr;
MPI_Datatype compare_datatype;
void *result_addr;
int result_count;
MPI_Datatype result_datatype;
void *compare_addr;
int compare_count;
MPI_Datatype compare_datatype;
struct MPID_Request *request;
MPIDI_RMA_dtype_info dtype_info;
void *dataloop;
int target_rank;
MPIDI_CH3_Pkt_t pkt;
} MPIDI_RMA_Op_t;
typedef struct MPIDI_PT_single_op {
int type; /* put, get, or accum. */
enum MPIDI_CH3_Pkt_types type; /* put, get, or accum. */
void *addr;
int count;
MPI_Datatype datatype;
......
......@@ -961,7 +961,7 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
MPIDI_PT_single_op * single_op;
single_op = lock_queue->pt_single_op;
if (single_op->type == MPIDI_RMA_PUT) {
if (single_op->type == MPIDI_CH3_PKT_LOCK_PUT_UNLOCK) {
mpi_errno = MPIR_Localcopy(single_op->data,
single_op->count,
single_op->datatype,
......@@ -969,21 +969,21 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
single_op->count,
single_op->datatype);
}
else if (single_op->type == MPIDI_RMA_ACCUMULATE) {
else if (single_op->type == MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK) {
if (win_ptr->shm_allocated == TRUE)
MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
mpi_errno = do_simple_accumulate(single_op);
if (win_ptr->shm_allocated == TRUE)
MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
}
else if (single_op->type == MPIDI_RMA_GET) {
else if (single_op->type == MPIDI_CH3_PKT_LOCK_GET_UNLOCK) {
mpi_errno = do_simple_get(win_ptr, lock_queue);
}
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
/* if put or accumulate, send rma done packet and release lock. */
if (single_op->type != MPIDI_RMA_GET) {
if (single_op->type != MPIDI_CH3_PKT_LOCK_GET_UNLOCK) {
/* NOTE: Only *queued* single_op operations are completed here.
Lock-op-unlock/single_op RMA ops can also be completed as
they arrive within various packet/request handlers via
......
......@@ -86,20 +86,46 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
/* TODO: Can we use the MPIDI_RMA_ACC_CONTIG optimization? */
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
new_ptr->type = MPIDI_RMA_GET_ACCUMULATE;
/* Cast away const'ness for origin_address as MPIDI_RMA_Op_t
* contain both PUT and GET like ops */
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->result_addr = result_addr;
new_ptr->result_count = result_count;
new_ptr->result_datatype = result_datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
new_ptr->op = op;
if (op == MPI_NO_OP) {
/* Convert GAcc to a Get */
MPIDI_CH3_Pkt_get_t *get_pkt = &(new_ptr->pkt.get);
MPIDI_Pkt_init(get_pkt, MPIDI_CH3_PKT_GET);
get_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
get_pkt->count = target_count;
get_pkt->datatype = target_datatype;
get_pkt->dataloop_size = 0;
get_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
get_pkt->source_win_handle = win_ptr->handle;
new_ptr->origin_addr = result_addr;
new_ptr->origin_count = result_count;
new_ptr->origin_datatype = result_datatype;
new_ptr->target_rank = target_rank;
}
else {
MPIDI_CH3_Pkt_accum_t *accum_pkt = &(new_ptr->pkt.accum);
MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_GET_ACCUM);
accum_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
accum_pkt->count = target_count;
accum_pkt->datatype = target_datatype;
accum_pkt->dataloop_size = 0;
accum_pkt->op = op;
accum_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
accum_pkt->source_win_handle = win_ptr->handle;
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->result_addr = result_addr;
new_ptr->result_count = result_count;
new_ptr->result_datatype = result_datatype;
new_ptr->target_rank = target_rank;
}
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
/* if source or target datatypes are derived, increment their
......@@ -188,6 +214,8 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_cas_t *cas_pkt = NULL;
/* Append this operation to the RMA ops queue */
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
......@@ -195,20 +223,23 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
new_ptr->type = MPIDI_RMA_COMPARE_AND_SWAP;
cas_pkt = &(new_ptr->pkt.cas);
MPIDI_Pkt_init(cas_pkt, MPIDI_CH3_PKT_CAS);
cas_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
cas_pkt->datatype = datatype;
cas_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
cas_pkt->source_win_handle = win_ptr->handle;
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = 1;
new_ptr->origin_datatype = datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = 1;
new_ptr->target_datatype = datatype;
new_ptr->result_addr = result_addr;
new_ptr->result_count = 1;
new_ptr->result_datatype = datatype;
new_ptr->compare_addr = (void *) compare_addr;
new_ptr->compare_count = 1;
new_ptr->compare_datatype = datatype;
new_ptr->target_rank = target_rank;
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
}
......@@ -280,6 +311,8 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_fop_t *fop_pkt = NULL;
/* Append this operation to the RMA ops queue */
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
......@@ -287,18 +320,21 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
new_ptr->type = MPIDI_RMA_FETCH_AND_OP;
fop_pkt = &(new_ptr->pkt.fop);
MPIDI_Pkt_init(fop_pkt, MPIDI_CH3_PKT_FOP);
fop_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
fop_pkt->datatype = datatype;
fop_pkt->op = op;
fop_pkt->source_win_handle = win_ptr->handle;
fop_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = 1;
new_ptr->origin_datatype = datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = 1;
new_ptr->target_datatype = datatype;
new_ptr->result_addr = result_addr;
new_ptr->result_count = 1;
new_ptr->result_datatype = datatype;
new_ptr->op = op;
new_ptr->target_rank = target_rank;
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
}
......
......@@ -173,6 +173,8 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_put_t *put_pkt = NULL;
/* queue it up */
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
......@@ -180,18 +182,22 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
put_pkt = &(new_ptr->pkt.put);
MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT);
put_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
put_pkt->count = target_count;
put_pkt->datatype = target_datatype;
put_pkt->dataloop_size = 0;
put_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
put_pkt->source_win_handle = win_ptr->handle;
/* FIXME: For contig and very short operations, use a streamlined op */
new_ptr->type = MPIDI_RMA_PUT;
/* Cast away const'ness for the origin address, as the
* MPIDI_RMA_Op_t structure is used for both PUT and GET like
* operations */
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
new_ptr->target_rank = target_rank;
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
/* if source or target datatypes are derived, increment their
......@@ -285,6 +291,8 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_get_t *get_pkt = NULL;
/* queue it up */
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
......@@ -292,15 +300,22 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
get_pkt = &(new_ptr->pkt.get);
MPIDI_Pkt_init(get_pkt, MPIDI_CH3_PKT_GET);
get_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
get_pkt->count = target_count;
get_pkt->datatype = target_datatype;
get_pkt->dataloop_size = 0;
get_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
get_pkt->source_win_handle = win_ptr->handle;
/* FIXME: For contig and very short operations, use a streamlined op */
new_ptr->type = MPIDI_RMA_GET;
new_ptr->origin_addr = origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
new_ptr->target_rank = target_rank;
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
/* if source or target datatypes are derived, increment their
......@@ -396,6 +411,8 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
MPIDI_RMA_Op_t *new_ptr = NULL;
MPIDI_CH3_Pkt_accum_t *accum_pkt = NULL;
/* queue it up */
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
......@@ -405,35 +422,52 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
/* If predefined and contiguous, use a simplified element */
if (MPIR_DATATYPE_IS_PREDEFINED(origin_datatype) &&
MPIR_DATATYPE_IS_PREDEFINED(target_datatype) && enableShortACC) {
MPI_Aint origin_type_size;
size_t len;
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
new_ptr->type = MPIDI_RMA_ACC_CONTIG;
/* Only the information needed for the contig/predefined acc */
/* Cast away const'ness for origin_address as
* MPIDI_RMA_Op_t contain both PUT and GET like ops */
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
new_ptr->op = op;
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
goto fn_exit;
MPID_Datatype_get_size_macro(origin_datatype, origin_type_size);
MPIU_Assign_trunc(len, origin_count * origin_type_size, size_t);
if (MPIR_CVAR_CH3_RMA_ACC_IMMED && len <= MPIDI_RMA_IMMED_INTS*sizeof(int)) {
MPIDI_CH3_Pkt_accum_immed_t *accumi_pkt = &(new_ptr->pkt.accum_immed);
MPIDI_Pkt_init(accumi_pkt, MPIDI_CH3_PKT_ACCUM_IMMED);
accumi_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
accumi_pkt->count = target_count;
accumi_pkt->datatype = target_datatype;
accumi_pkt->op = op;
accumi_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
accumi_pkt->source_win_handle = win_ptr->handle;
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->target_rank = target_rank;
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
goto fn_exit;
}
}
MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
new_ptr->type = MPIDI_RMA_ACCUMULATE;
/* Cast away const'ness for origin_address as MPIDI_RMA_Op_t
* contain both PUT and GET like ops */
accum_pkt = &(new_ptr->pkt.accum);
MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_ACCUMULATE);
accum_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
win_ptr->disp_units[target_rank] * target_disp;
accum_pkt->count = target_count;
accum_pkt->datatype = target_datatype;
accum_pkt->dataloop_size = 0;
accum_pkt->op = op;
accum_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
accum_pkt->source_win_handle = win_ptr->handle;
new_ptr->origin_addr = (void *) origin_addr;
new_ptr->origin_count = origin_count;
new_ptr->origin_datatype = origin_datatype;
new_ptr->target_rank = target_rank;
new_ptr->target_disp = target_disp;
new_ptr->target_count = target_count;
new_ptr->target_datatype = target_datatype;
new_ptr->op = op;
new_ptr->target_rank = target_rank;
MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
/* if source or target datatypes are derived, increment their
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment