Commit 7473a3b3 authored by Su Huang's avatar Su Huang Committed by Michael Blocksome
Browse files

PAMId: MPI_Win_set_info, MPI_Win_get_info and the frame work for window creation



The following changes apply only to the PAMId layer
 - provided MPID_Win_set_info and MPID_Win_get_info functions
 - redesigned the window creation.The change is to support new window creation
   functions introduced in MPI 3.0.
 - updated pscw, MPIDI_WinCtrlSend etc. to reflect the change made in
   MPI_Group_incl().

The following changes were made after a code review
 - added a comment for zero_req in mpidi_onesided.h
 - replaced "pami_task_t peer" "int rank"  in MPIDI_WinCtrlSen()

(ibm) F189041
Signed-off-by: default avatarMichael Blocksome <blocksom@us.ibm.com>
parent 4a220990
......@@ -370,10 +370,28 @@ struct MPID_Win;
struct MPID_Group;
/**
* \brief Collective information related to a window
*
* This structure is used to share information about a local window with
* all nodes in the window communicator. Part of that information includes
* statistics about RMA operations during access/exposure epochs.
*
* The structure is allocated as an array sized for the window communicator.
* Each entry in the array corresponds directly to the node of the same rank.
*/
typedef enum
{
MPIDI_REQUEST_LOCK,
MPIDI_REQUEST_LOCKALL,
} MPIDI_LOCK_TYPE_t;
struct MPIDI_Win_lock
{
struct MPIDI_Win_lock *next;
unsigned rank;
MPIDI_LOCK_TYPE_t mtype; /* MPIDI_REQUEST_LOCK or MPIDI_REQUEST_LOCKALL */
int type;
};
struct MPIDI_Win_queue
......@@ -381,32 +399,56 @@ struct MPIDI_Win_queue
struct MPIDI_Win_lock *head;
struct MPIDI_Win_lock *tail;
};
/**
* \brief Collective information related to a window
*
* This structure is used to share information about a local window with
* all nodes in the window communicator. Part of that information includes
* statistics about RMA operations during access/exposure epochs.
*
* The structure is allocated as an array sized for the window communicator.
* Each entry in the array corresponds directly to the node of the same rank.
*/
struct MPIDI_Win_info
typedef enum {
MPIDI_ACCU_ORDER_RAR = 1,
MPIDI_ACCU_ORDER_RAW = 2,
MPIDI_ACCU_ORDER_WAR = 4,
MPIDI_ACCU_ORDER_WAW = 8
} MPIDI_Win_info_accumulate_ordering;
typedef enum {
MPIDI_ACCU_SAME_OP,
MPIDI_ACCU_SAME_OP_NO_OP
} MPIDI_Win_info_accumulate_ops;
typedef struct MPIDI_Win_info_args {
int no_locks;
MPIDI_Win_info_accumulate_ordering accumulate_ordering;
MPIDI_Win_info_accumulate_ops accumulate_ops; /* default is same_op_no_op */
int same_size;
int alloc_shared_noncontig;
} MPIDI_Win_info_args;
typedef struct {
int nStarted;
int nCompleted;
} RMA_nOps_t;
typedef struct workQ_t {
void *msgQ;
int count;
} workQ_t;
typedef struct MPIDI_Win_info
{
void * base_addr; /**< Node's exposure window base address */
struct MPID_Win * win;
uint32_t disp_unit; /**< Node's exposure window displacement units */
pami_memregion_t memregion; /**< Memory region descriptor for each node */
#ifdef RDMA_FAILOVER
uint32_t memregion_used;
#endif
};
} MPIDI_Win_info;
/**
* \brief Structure of PAMI extensions to MPID_Win structure
*/
struct MPIDI_Win
{
struct MPIDI_Win_info * info; /**< allocated array of collective info */
MPIDI_Win_info_args info_args;
void ** shm_base_addrs; /* base address shared by all process in comm */
workQ_t work;
RMA_nOps_t *origin;
struct MPIDI_Win_sync
{
#if 0
......@@ -432,6 +474,7 @@ struct MPIDI_Win
struct
{
volatile unsigned locked;
volatile unsigned allLocked;
} remote;
struct
{
......
......@@ -35,9 +35,9 @@ lib_lib@MPILIBNAME@_la_SOURCES += \
src/mpid/pamid/src/onesided/mpid_win_lock.c \
src/mpid/pamid/src/onesided/mpid_win_pscw.c \
src/mpid/pamid/src/onesided/mpid_win_put.c \
src/mpid/pamid/src/onesided/mpidi_win_control.c \
src/mpid/pamid/src/onesided/mpid_win_get_info.c \
src/mpid/pamid/src/onesided/mpid_win_set_info.c \
src/mpid/pamid/src/onesided/mpidi_win_control.c
src/mpid/pamid/src/onesided/mpid_win_set_info.c
endif BUILD_PAMID
......
......@@ -27,7 +27,9 @@ MPIDI_Win_DoneCB(pami_context_t context,
void * cookie,
pami_result_t result)
{
int target_rank;
MPIDI_Win_request *req = (MPIDI_Win_request*)cookie;
target_rank = req->target.rank;
++req->win->mpid.sync.complete;
++req->origin.completed;
......@@ -45,6 +47,7 @@ MPIDI_Win_DoneCB(pami_context_t context,
MPID_assert(mpi_errno == MPI_SUCCESS);
MPID_Datatype_release(req->origin.dt.pointer);
MPIU_Free(req->buffer);
MPIU_Free(req->user_buffer);
req->buffer_free = 0;
}
}
......@@ -52,8 +55,12 @@ MPIDI_Win_DoneCB(pami_context_t context,
//if (req->win->mpid.sync.total == req->win->mpid.sync.complete)
if (req->origin.completed == req->target.dt.num_contig)
{
if (req->buffer_free)
MPIU_Free(req->buffer);
req->win->mpid.origin[target_rank].nCompleted++;
if (req->buffer_free) {
MPIU_Free(req->buffer);
MPIU_Free(req->user_buffer);
req->buffer_free = 0;
}
if (req->accum_headers)
MPIU_Free(req->accum_headers);
MPIU_Free(req);
......
......@@ -21,6 +21,145 @@
*/
#include "mpidi_onesided.h"
/***************************************************************************/
/* */
/* allocate win_ptr (MPIDI_Win) */
/* update win structure except for base address */
/* */
/***************************************************************************/
int
MPIDI_Win_init( MPI_Aint length,
int disp_unit,
MPID_Win **win_ptr,
MPID_Info *info,
MPID_Comm *comm_ptr,
int create_flavor,
int model)
{
int mpi_errno=MPI_SUCCESS;
size_t length_out = 0;
pami_result_t rc;
size_t rank, size;
MPIDI_Win_info *winfo;
int i;
static char FCNAME[] = "MPIDI_Win_init";
/* ----------------------------------------- */
/* Setup the common sections of the window */
/* ----------------------------------------- */
MPID_Win *win = (MPID_Win*)MPIU_Handle_obj_alloc(&MPID_Win_mem);
MPIU_ERR_CHKANDSTMT(win == NULL, mpi_errno, MPI_ERR_NO_MEM,
return mpi_errno, "**nomem");
*win_ptr = win;
memset(&win->mpid, 0, sizeof(struct MPIDI_Win));
win->comm_ptr = comm_ptr; MPIR_Comm_add_ref(comm_ptr);
size = comm_ptr->local_size;
rank = comm_ptr->rank;
win->mpid.info = MPIU_Malloc(size * sizeof(struct MPIDI_Win_info));
MPID_assert(win->mpid.info != NULL);
memset((void *) win->mpid.info,0,(size * sizeof(struct MPIDI_Win_info)));
winfo = &win->mpid.info[rank];
win->errhandler = NULL;
win->base = NULL;
win->size = length;
win->disp_unit = disp_unit;
win->create_flavor = create_flavor;
win->model = model;
win->copyCreateFlavor = 0;
win->copyModel = 0;
win->attributes = NULL;
win->comm_ptr = comm_ptr;
if ((info != NULL) && ((int *)info != (int *) MPI_INFO_NULL)) {
mpi_errno= MPIDI_Win_set_info(win, info);
MPID_assert(mpi_errno == 0);
}
MPID_assert(mpi_errno == 0);
win->mpid.origin = MPIU_Calloc0(size, RMA_nOps_t);
/* Initialize the info (hint) flags per window */
win->mpid.info_args.no_locks = 0;
win->mpid.info_args.accumulate_ordering =
(MPIDI_ACCU_ORDER_RAR | MPIDI_ACCU_ORDER_RAW | MPIDI_ACCU_ORDER_WAR | MPIDI_ACCU_ORDER_WAW);
win->mpid.info_args.accumulate_ops = MPIDI_ACCU_OPS_SAME_OP_NO_OP; /*default */
win->mpid.info_args.same_size = 0;
win->mpid.info_args.alloc_shared_noncontig = 0;
win->copyDispUnit=0;
win->copySize=0;
winfo->memregion_used = 0;
winfo->disp_unit = disp_unit;
return mpi_errno;
}
/***************************************************************************/
/* */
/* MPIDI_Win_allgather */
/* */
/* registers memory with PAMI if possible */
/* calls Allgather to gather the information from all members in win. */
/* */
/***************************************************************************/
int
MPIDI_Win_allgather(void *base, MPI_Aint size, MPID_Win **win_ptr )
{
int mpi_errno = MPI_SUCCESS;
MPID_Win *win;
int i, k, comm_size, rank;;
MPI_Aint temp;
int nErrors=0;
MPID_Comm *comm_ptr;
size_t length_out = 0;
pami_result_t rc;
MPIDI_Win_info *winfo;
pami_task_t task_id;
static char FCNAME[] = "MPIDI_Win_allgather";
win = *win_ptr;
comm_ptr = win->comm_ptr;
rank = comm_ptr->rank;
winfo = &win->mpid.info[rank];
/* --------------------------------------- */
/* Setup the PAMI sections of the window */
/* --------------------------------------- */
#ifdef USE_PAMI_RDMA
if (size != 0)
{
rc = PAMI_Memregion_create(MPIDI_Context[0], win->base, win->size, &length_out, &winfo->memregion);
MPIU_ERR_CHKANDJUMP((rc != PAMI_SUCCESS), mpi_errno, MPI_ERR_OTHER, "**nomem");
MPIU_ERR_CHKANDJUMP((win->size < length_out), mpi_errno, MPI_ERR_OTHER, "**nomem");
}
#else
if ( (!MPIDI_Process.mp_s_use_pami_get) && (size != 0) )
{
rc = PAMI_Memregion_create(MPIDI_Context[0], win->base, win->size, &length_out, &winfo->memregion);
if(rc == PAMI_SUCCESS)
{
winfo->memregion_used = 1;
MPID_assert(win->size == length_out);
}
}
#endif
mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE,
0,
MPI_DATATYPE_NULL,
win->mpid.info,
sizeof(struct MPIDI_Win_info),
MPI_BYTE,
comm_ptr,
&mpi_errno);
fn_fail:
return mpi_errno;
}
/**
* \brief MPI-PAMI glue for MPI_Win_create function
......@@ -45,84 +184,36 @@
*/
int
MPID_Win_create(void * base,
MPI_Aint length,
MPI_Aint size,
int disp_unit,
MPID_Info * info,
MPID_Comm * comm_ptr,
MPID_Win ** win_ptr)
{
int mpi_errno = MPI_SUCCESS;
int rc = MPI_SUCCESS,i;
static char FCNAME[] = "MPID_Win_create";
MPID_Win *win;
MPID_Win *sub_win;
size_t rank,rk;
pami_task_t taskid;
MPIDI_Win_info *winfo;
/* ----------------------------------------- */
/* Setup the common sections of the window */
/* ----------------------------------------- */
MPID_Win *win = (MPID_Win*)MPIU_Handle_obj_alloc(&MPID_Win_mem);
if (win == NULL)
return mpi_errno;
*win_ptr = win;
rc=MPIDI_Win_init(size,disp_unit,win_ptr, info, comm_ptr, MPI_WIN_FLAVOR_CREATE, MPI_WIN_SEPARATE);
win = *win_ptr;
win->base = base;
win->size = length;
win->disp_unit = disp_unit;
/* --------------------------------------- */
/* Setup the PAMI sections of the window */
/* --------------------------------------- */
memset(&win->mpid, 0, sizeof(struct MPIDI_Win));
win->comm_ptr = comm_ptr; MPIR_Comm_add_ref(comm_ptr);
rank = comm_ptr->rank;
winfo = &win->mpid.info[rank];
winfo->base_addr = base;
winfo->win = win;
winfo->disp_unit = disp_unit;
size_t size = comm_ptr->local_size;
size_t rank = comm_ptr->rank;
rc= MPIDI_Win_allgather(base,size,win_ptr);
if (rc != MPI_SUCCESS)
return rc;
win->mpid.info = MPIU_Calloc0(size, struct MPIDI_Win_info);
struct MPIDI_Win_info *winfo = &win->mpid.info[rank];
MPID_assert((base != NULL) || (length == 0));
#ifdef USE_PAMI_RDMA
if (length != 0)
{
size_t length_out = 0;
pami_result_t rc;
rc = PAMI_Memregion_create(MPIDI_Context[0], base, length, &length_out, &winfo->memregion);
MPID_assert(rc == PAMI_SUCCESS);
MPID_assert(length == length_out);
}
#else
if ( (!MPIDI_Process.mp_s_use_pami_get) && (length != 0) )
{
size_t length_out = 0;
pami_result_t rc;
rc = PAMI_Memregion_create(MPIDI_Context[0], base, length, &length_out, &winfo->memregion);
if(rc == PAMI_SUCCESS)
{
winfo->memregion_used = 1;
MPID_assert(length == length_out);
}
}
#endif
winfo->base_addr = base;
/* winfo->win_handle = win->handle; */
winfo->win = win;
winfo->disp_unit = disp_unit;
mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE,
0,
MPI_DATATYPE_NULL,
win->mpid.info,
sizeof(struct MPIDI_Win_info),
MPI_BYTE,
comm_ptr,
&mpi_errno);
if (mpi_errno != MPI_SUCCESS)
return mpi_errno;
mpi_errno = MPIR_Barrier_impl(comm_ptr, &mpi_errno);
if (mpi_errno != MPI_SUCCESS)
return mpi_errno;
return mpi_errno;
}
......@@ -70,6 +70,7 @@ MPID_Win_free(MPID_Win **win_ptr)
#endif
MPIU_Free(win->mpid.info);
MPIU_Free(win->mpid.origin);
MPIR_Comm_release(win->comm_ptr, 0);
......
......@@ -17,7 +17,8 @@
/* (C)Copyright IBM Corp. 2007, 2011 */
/**
* \file src/onesided/mpid_win_get_info.c
* \brief ???
* \brief returns a new info object containing the hints of the window
* associated with win.
*/
#include "mpidi_onesided.h"
......@@ -28,23 +29,75 @@
* \param[in] info_p_p Info hint
* \return MPI_SUCCESS
*/
#undef FUNCNAME
#define FUNCNAME MPID_Win_get_info
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIDI_Win_get_info(MPID_Win *win, MPID_Info **info_used)
{
int mpi_errno = MPI_SUCCESS;
/* Populate the predefined info keys */
if (win->mpid.info_args.no_locks)
mpi_errno = MPIR_Info_set_impl(*info_used, "no_locks", "true");
else
mpi_errno = MPIR_Info_set_impl(*info_used, "no_locks", "false");
MPID_assert(mpi_errno == MPI_SUCCESS);
{
#define BUFSIZE 32
char buf[BUFSIZE];
int c = 0;
if (win->mpid.info_args.accumulate_ordering & MPIDI_ACCU_ORDER_RAR)
c += snprintf(buf+c, BUFSIZE-c, "%srar", (c > 0) ? "," : "");
if (win->mpid.info_args.accumulate_ordering & MPIDI_ACCU_ORDER_RAW)
c += snprintf(buf+c, BUFSIZE-c, "%sraw", (c > 0) ? "," : "");
if (win->mpid.info_args.accumulate_ordering & MPIDI_ACCU_ORDER_WAR)
c += snprintf(buf+c, BUFSIZE-c, "%swar", (c > 0) ? "," : "");
if (win->mpid.info_args.accumulate_ordering & MPIDI_ACCU_ORDER_WAW)
c += snprintf(buf+c, BUFSIZE-c, "%swaw", (c > 0) ? "," : "");
if (c == 0) {
memcpy(&buf[0],"not set ",10);
}
MPIR_Info_set_impl(*info_used, "accumulate_ordering", buf);
MPID_assert(mpi_errno == MPI_SUCCESS);
#undef BUFSIZE
}
if (win->mpid.info_args.accumulate_ops == MPIDI_ACCU_OPS_SAME_OP)
mpi_errno = MPIR_Info_set_impl(*info_used, "accumulate_ops", "same_op");
else
mpi_errno = MPIR_Info_set_impl(*info_used, "accumulate_ops", "same_op_no_op");
MPID_assert(mpi_errno == MPI_SUCCESS);
if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
if (win->mpid.info_args.alloc_shared_noncontig)
mpi_errno = MPIR_Info_set_impl(*info_used, "alloc_shared_noncontig", "true");
else
mpi_errno = MPIR_Info_set_impl(*info_used, "alloc_shared_noncontig", "false");
MPID_assert(mpi_errno == MPI_SUCCESS);
}
else if (win->create_flavor == MPI_WIN_FLAVOR_ALLOCATE) {
if (win->mpid.info_args.same_size)
mpi_errno = MPIR_Info_set_impl(*info_used, "same_size", "true");
else
mpi_errno = MPIR_Info_set_impl(*info_used, "same_size", "false");
MPID_assert(mpi_errno == MPI_SUCCESS);
}
return mpi_errno;
}
int
MPID_Win_get_info(MPID_Win *win,
MPID_Info **info_p_p)
MPID_Info **info_p)
{
int mpi_errno = MPI_SUCCESS;
/* Allocate an empty info object */
mpi_errno = MPIU_Info_alloc(info_p_p);
if (mpi_errno != MPI_SUCCESS)
goto fn_fail;
fn_exit:
return mpi_errno;
fn_fail:
goto fn_exit;
mpi_errno = MPIU_Info_alloc(info_p);
MPID_assert(mpi_errno == MPI_SUCCESS);
mpi_errno = MPIDI_Win_get_info(win, info_p);
MPID_assert(mpi_errno == MPI_SUCCESS);
return MPI_SUCCESS;
}
......@@ -21,25 +21,13 @@
*/
#include "mpidi_onesided.h"
typedef struct
{
unsigned peer;
int lock_type;
MPID_Win * win;
volatile unsigned done;
pami_work_t work;
} MPIDI_WinLock_info;
static inline void
void
MPIDI_WinLockAck_post(pami_context_t context,
unsigned peer,
MPID_Win * win);
static inline void
void
MPIDI_WinLockAdvance(pami_context_t context,
MPID_Win * win)
{
......@@ -112,7 +100,7 @@ MPIDI_WinLockReq_proc(pami_context_t context,
}
static inline void
void
MPIDI_WinLockAck_post(pami_context_t context,
unsigned peer,
MPID_Win * win)
......
......@@ -44,7 +44,7 @@ MPIDI_WinPost_post(pami_context_t context,
};
for (index=0; index < group->size; ++index) {
peer = group->lrank_to_lpid[index].lpid;
peer = group->lrank_to_lpid[index].lrank;
MPIDI_WinCtrlSend(context, &msg, peer, info->win);
}
......@@ -75,7 +75,7 @@ MPIDI_WinComplete_post(pami_context_t context,
};
for (index=0; index < group->size; ++index) {
peer = group->lrank_to_lpid[index].lpid;
peer = group->lrank_to_lpid[index].lrank;
MPIDI_WinCtrlSend(context, &msg, peer, info->win);
}
......
......@@ -17,7 +17,8 @@
/* (C)Copyright IBM Corp. 2007, 2011 */
/**
* \file src/onesided/mpid_win_set_info.c
* \brief ???
* \brief sets new values for the hints of the window of the window
* associated with win.
*/
#include "mpidi_onesided.h"
......@@ -28,13 +29,76 @@
* \param[in] info Info hint
* \return MPI_SUCCESS
*/
#undef FUNCNAME
#define FUNCNAME MPID_Win_set_info
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIDI_Win_set_info(MPID_Win *win, MPID_Info *info)
{
int mpi_errno = MPI_SUCCESS;
MPID_Info *curr_ptr, *prev_ptr;
MPID_Info *c_ptr, *p_ptr;
char *value, *token;
char *savePtr;
prev_ptr = info;
curr_ptr = info->next;
uint save_ordering;
while (curr_ptr) {
if (!strcmp(curr_ptr->key,"no_locks")) {
if (!strcmp(curr_ptr->value,"true")) {
win->mpid.info_args.no_locks=1;
} else
win->mpid.info_args.no_locks=0;
} else if (!strcmp(curr_ptr->key,"accumulate_ordering"))
{
save_ordering=(uint) win->mpid.info_args.accumulate_ordering;
win->mpid.info_args.accumulate_ordering=0;