Commit 2f73f9b6 authored by Michael Blocksome's avatar Michael Blocksome
Browse files

PAMId: added some mpi 3.0 RMA function support



The following functions are added:
- MPI_Win_allocate
- MPI_Win_allocate_dynamic
- MPI_Win_attach
- MPI_Win_deatch
- MPI_Win_lock_all
- MPI_Win_unlock_all
- MPI_Win_sync
- MPI_Win_flush

(ibm) F189033
Signed-off-by: default avatarMichael Blocksome <blocksom@us.ibm.com>
parent 7473a3b3
......@@ -93,6 +93,7 @@ MPID_EPOTYPE_START = 2, /**< MPI_Win_start access epoch */
MPID_EPOTYPE_POST = 3, /**< MPI_Win_post exposure epoch */
MPID_EPOTYPE_FENCE = 4, /**< MPI_Win_fence access/exposure epoch */
MPID_EPOTYPE_REFENCE = 5, /**< MPI_Win_fence possible access/exposure epoch */
MPID_EPOTYPE_LOCK_ALL = 6, /**< MPI_Win_lock_all access epoch */
};
enum
......
......@@ -91,11 +91,6 @@ int MPID_Comm_group_failed(MPID_Comm *comm_ptr, MPID_Group **failed_group_ptr)
return 0;
}
int MPID_Win_attach(MPID_Win *win, void *base, MPI_Aint size)
{
MPID_abort();
return 0;
}
int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPID_Info *info_ptr, MPID_Comm *comm_ptr,
void **base_ptr, MPID_Win **win_ptr)
......@@ -113,17 +108,6 @@ int MPID_Rput(const void *origin_addr, int origin_count,
return 0;
}
int MPID_Win_flush_local(int rank, MPID_Win *win)
{
MPID_abort();
return 0;
}
int MPID_Win_detach(MPID_Win *win, const void *base)
{
MPID_abort();
return 0;
}
int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
void *result_addr, MPI_Datatype datatype, int target_rank,
......@@ -167,36 +151,6 @@ int MPID_Win_shared_query(MPID_Win *win, int rank, MPI_Aint *size, int *disp_uni
return 0;
}
int MPID_Win_allocate(MPI_Aint size, int disp_unit, MPID_Info *info,
MPID_Comm *comm, void *baseptr, MPID_Win **win)
{
MPID_abort();
return 0;
}
int MPID_Win_flush(int rank, MPID_Win *win)
{
MPID_abort();
return 0;
}
int MPID_Win_flush_local_all(MPID_Win *win)
{
MPID_abort();
return 0;
}
int MPID_Win_unlock_all(MPID_Win *win)
{
MPID_abort();
return 0;
}
int MPID_Win_create_dynamic(MPID_Info *info, MPID_Comm *comm, MPID_Win **win)
{
MPID_abort();
return 0;
}
int MPID_Rget(void *origin_addr, int origin_count,
MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
......@@ -207,18 +161,6 @@ int MPID_Rget(void *origin_addr, int origin_count,
return 0;
}
int MPID_Win_sync(MPID_Win *win)
{
MPID_abort();
return 0;
}
int MPID_Win_flush_all(MPID_Win *win)
{
MPID_abort();
return 0;
}
int MPID_Get_accumulate(const void *origin_addr, int origin_count,
MPI_Datatype origin_datatype, void *result_addr, int result_count,
MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
......@@ -228,8 +170,3 @@ int MPID_Get_accumulate(const void *origin_addr, int origin_count,
return 0;
}
int MPID_Win_lock_all(int assert, MPID_Win *win)
{
MPID_abort();
return 0;
}
......@@ -33,11 +33,18 @@ lib_lib@MPILIBNAME@_la_SOURCES += \
src/mpid/pamid/src/onesided/mpid_win_free.c \
src/mpid/pamid/src/onesided/mpid_win_get.c \
src/mpid/pamid/src/onesided/mpid_win_lock.c \
src/mpid/pamid/src/onesided/mpid_win_lock_all.c \
src/mpid/pamid/src/onesided/mpid_win_pscw.c \
src/mpid/pamid/src/onesided/mpid_win_put.c \
src/mpid/pamid/src/onesided/mpidi_win_control.c \
src/mpid/pamid/src/onesided/mpid_win_allocate.c \
src/mpid/pamid/src/onesided/mpid_win_create_dynamic.c \
src/mpid/pamid/src/onesided/mpid_win_flush.c \
src/mpid/pamid/src/onesided/mpid_win_attach.c \
src/mpid/pamid/src/onesided/mpid_win_detach.c \
src/mpid/pamid/src/onesided/mpid_win_sync.c \
src/mpid/pamid/src/onesided/mpid_win_get_info.c \
src/mpid/pamid/src/onesided/mpid_win_set_info.c
src/mpid/pamid/src/onesided/mpid_win_set_info.c \
src/mpid/pamid/src/onesided/mpidi_win_control.c
endif BUILD_PAMID
......
......@@ -45,6 +45,7 @@ MPIDI_Win_DoneCB(pami_context_t context,
req->origin.count,
req->origin.datatype);
MPID_assert(mpi_errno == MPI_SUCCESS);
MPIDI_Win_datatype_unmap(&req->target.dt);
MPID_Datatype_release(req->origin.dt.pointer);
MPIU_Free(req->buffer);
MPIU_Free(req->user_buffer);
......@@ -52,7 +53,6 @@ MPIDI_Win_DoneCB(pami_context_t context,
}
}
//if (req->win->mpid.sync.total == req->win->mpid.sync.complete)
if (req->origin.completed == req->target.dt.num_contig)
{
req->win->mpid.origin[target_rank].nCompleted++;
......
......@@ -37,6 +37,9 @@ MPIDI_WinAccumCB(pami_context_t context,
MPID_assert(msginfo_size == sizeof(MPIDI_Win_MsgInfo));
MPID_assert(_msginfo != NULL);
const MPIDI_Win_MsgInfo * msginfo = (const MPIDI_Win_MsgInfo*)_msginfo;
const MPIDI_Win_request * req = (const MPIDI_Win_request*)(msginfo->req);
char *tmpbuf;
int mpi_errno, rc;
int null=0;
pami_type_t pami_type;
......@@ -51,6 +54,8 @@ MPIDI_WinAccumCB(pami_context_t context,
TRACE_ERR(" PAMI: type=%p op=%p\n", pami_type, pami_op);
#endif
MPID_assert(recv != NULL);
*recv = zero_recv_parms;
recv->cookie = NULL;
recv->local_fn = NULL;
recv->addr = msginfo->addr;
......@@ -68,20 +73,14 @@ MPIDI_Accumulate(pami_context_t context,
MPIDI_Win_request *req = (MPIDI_Win_request*)_req;
pami_result_t rc;
void *map;
pami_send_t params;
pami_send_t params = {
.send = {
.header = {
.iov_len = sizeof(MPIDI_Win_MsgInfo),
},
.dispatch = MPIDI_Protocols_WinAccum,
.dest = req->dest,
},
.events = {
.cookie = req,
.remote_fn = MPIDI_Win_DoneCB,
},
};
params = zero_send_parms;
params.send.header.iov_len = sizeof(MPIDI_Win_MsgInfo);
params.send.dispatch = MPIDI_Protocols_WinAccum;
params.send.dest = req->dest;
params.events.cookie = req;
params.events.remote_fn = MPIDI_Win_DoneCB;
struct MPIDI_Win_sync* sync = &req->win->mpid.sync;
TRACE_ERR("Start index=%u/%d l-addr=%p r-base=%p r-offset=%zu (sync->started=%u sync->complete=%u)\n",
......@@ -107,12 +106,10 @@ MPIDI_Accumulate(pami_context_t context,
TRACE_ERR(" Sub index=%u bytes=%zu l-offset=%zu r-addr=%p l-buf=%p *(int*)buf=0x%08x *(double*)buf=%g\n",
req->state.index, params.send.data.iov_len, req->state.local_offset, req->accum_headers[req->state.index].addr, buf, *ibuf, *dbuf);
#endif
/** sync->total will be updated with every RMA and the complete
will not change till that RMA has completed. In the meanwhile
the rest of the RMAs will have memory leaks */
if (req->target.dt.num_contig - req->state.index == 1) {
//if (sync->total - sync->complete == 1) {
map=NULL;
if (req->target.dt.map != &req->target.dt.__map) {
map=(void *) req->target.dt.map;
......@@ -174,6 +171,7 @@ MPID_Accumulate(void *origin_addr,
{
int mpi_errno = MPI_SUCCESS;
MPIDI_Win_request *req = MPIU_Calloc0(1, MPIDI_Win_request);
*req = zero_req;
req->win = win;
req->type = MPIDI_WIN_REQUEST_ACCUMULATE;
......@@ -190,6 +188,7 @@ MPID_Accumulate(void *origin_addr,
}
req->offset = target_disp * win->mpid.info[target_rank].disp_unit;
win->mpid.origin[target_rank].nStarted++;
if (origin_datatype == MPI_DOUBLE_INT)
{
......@@ -242,6 +241,7 @@ MPID_Accumulate(void *origin_addr,
if ( (req->origin.dt.size == 0) ||
(target_rank == MPI_PROC_NULL))
{
win->mpid.origin[target_rank].nCompleted++;
MPIU_Free(req);
return MPI_SUCCESS;
}
......@@ -259,7 +259,7 @@ MPID_Accumulate(void *origin_addr,
req->buffer_free = 1;
req->buffer = MPIU_Malloc(req->origin.dt.size);
MPID_assert(req->buffer != NULL);
MPID_Datatype_add_ref(req->origin.dt.pointer);
int mpi_errno = 0;
mpi_errno = MPIR_Localcopy(origin_addr,
origin_count,
......
/* begin_generated_IBM_copyright_prolog */
/* */
/* This is an automatically generated copyright prolog. */
/* After initializing, DO NOT MODIFY OR MOVE */
/* --------------------------------------------------------------- */
/* Licensed Materials - Property of IBM */
/* Blue Gene/Q 5765-PER 5765-PRP */
/* */
/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved */
/* US Government Users Restricted Rights - */
/* Use, duplication, or disclosure restricted */
/* by GSA ADP Schedule Contract with IBM Corp. */
/* */
/* --------------------------------------------------------------- */
/* */
/* end_generated_IBM_copyright_prolog */
/* (C)Copyright IBM Corp. 2007, 2011 */
/**
* \file src/onesided/mpid_win_create.c
* \brief ???
*/
#include "mpidi_onesided.h"
/**
* \brief MPI-PAMI glue for MPI_Win_allocate function
*
* Create a window object. Allocates a MPID_Win object and initializes it,
* then allocates the collective info array, initalizes our entry, and
* performs an Allgather to distribute/collect the rest of the array entries.
* On each process, it allocates memory of at least size bytes, returns a
* pointer to it, and returns a window object that can be used by all processes
* in comm to * perform RMA operations. The returned memory consists of size
* bytes local to each process, starting at address base_ptr and is associated
* with the window as if the user called 'MPI_Win_create' on existing memory.
* The size argument may be different at each process and size = 0 is valid;
* however, a library might allocate and expose more memory in order to create
* a fast, globally symmetric allocation.
* Input Parameters:
* \param[in] size size of window in bytes (nonnegative integer)
* \param[in] disp_unit local unit size for displacements, in bytes (positive integer)
* \param[in] info info argument (handle))
* \param[in] comm_ptr Communicator (handle)
* \param[out] base_ptr - base address of the window in local memory
* \param[out] win_ptr window object returned by the call (handle)
* \return MPI_SUCCESS, MPI_ERR_ARG, MPI_ERR_COMM, MPI_ERR_INFO. MPI_ERR_OTHER,
* MPI_ERR_SIZE
*/
int
MPID_Win_allocate(MPI_Aint size,
int disp_unit,
MPID_Info * info,
MPID_Comm * comm_ptr,
void *base_ptr,
MPID_Win ** win_ptr)
{
int mpi_errno = MPI_SUCCESS;
int rc = MPI_SUCCESS;
void *baseP;
static char FCNAME[] = "MPID_Win_allocate";
MPIDI_Win_info *winfo;
MPID_Win *win;
int rank;
rc=MPIDI_Win_init(size,disp_unit,win_ptr, info, comm_ptr, MPI_WIN_FLAVOR_ALLOCATE, MPI_WIN_SEPARATE);
win = *win_ptr;
if (size > 0) {
baseP = MPIU_Malloc(size);
#ifndef MPIDI_NO_ASSERT
MPID_assert(baseP != NULL);
#else
MPIU_ERR_CHKANDJUMP((baseP == NULL), mpi_errno, MPI_ERR_BUFFER, "**bufnull");
#endif
} else if (size == 0) {
baseP = NULL;
} else {
MPIU_ERR_CHKANDSTMT(size >=0 , mpi_errno, MPI_ERR_SIZE,
return mpi_errno, "**rmasize");
}
win->base = baseP;
rank = comm_ptr->rank;
winfo = &win->mpid.info[rank];
winfo->base_addr = baseP;
winfo->win = win;
winfo->disp_unit = disp_unit;
rc= MPIDI_Win_allgather(baseP,size,win_ptr);
if (rc != MPI_SUCCESS)
return rc;
*(void**) base_ptr = (void *) win->base;
mpi_errno = MPIR_Barrier_impl(comm_ptr, &mpi_errno);
fn_fail:
return mpi_errno;
}
/* begin_generated_IBM_copyright_prolog */
/* */
/* This is an automatically generated copyright prolog. */
/* After initializing, DO NOT MODIFY OR MOVE */
/* --------------------------------------------------------------- */
/* Licensed Materials - Property of IBM */
/* Blue Gene/Q 5765-PER 5765-PRP */
/* */
/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved */
/* US Government Users Restricted Rights - */
/* Use, duplication, or disclosure restricted */
/* by GSA ADP Schedule Contract with IBM Corp. */
/* */
/* --------------------------------------------------------------- */
/* */
/* end_generated_IBM_copyright_prolog */
/* (C)Copyright IBM Corp. 2007, 2011 */
/**
* \file src/onesided/mpid_win_attach.c
* \brief attaches a local memory region beginning at base for remote
* access within the given window.
*/
#include "mpidi_onesided.h"
/**
* \brief MPI-PAMI glue for MPI_Win_attach function
*
* Attaches a local memory region beginning at base for remote access
* within the given window.
*
* \param[in] win shared memory window object
* \param[in] base initial address of memory to be attached
* \param[in] length length of memory to be attached in bytes
* \return MPI_SUCCESS, MPI_ERR_RMA_FLAVOR
*
*/
int
MPID_Win_attach(MPID_Win *win, void *base, MPI_Aint size)
{
int mpi_errno = MPI_SUCCESS;
static char FCNAME[] = "MPID_Win_attach";
MPIU_ERR_CHKANDSTMT((win->create_flavor != MPI_WIN_FLAVOR_DYNAMIC), mpi_errno,
MPI_ERR_RMA_FLAVOR, return mpi_errno, "**rmaflavor");
/* no op, all memory is exposed, the user is responsible for */
/* ensuring that MPI_WIN_ATTACH at the target has returned */
/* before a process attempts to target that memory with an */
/* RMA call */
fn_exit:
return mpi_errno;
fn_fail:
goto fn_exit;
}
/* begin_generated_IBM_copyright_prolog */
/* */
/* This is an automatically generated copyright prolog. */
/* After initializing, DO NOT MODIFY OR MOVE */
/* --------------------------------------------------------------- */
/* Licensed Materials - Property of IBM */
/* Blue Gene/Q 5765-PER 5765-PRP */
/* */
/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved */
/* US Government Users Restricted Rights - */
/* Use, duplication, or disclosure restricted */
/* by GSA ADP Schedule Contract with IBM Corp. */
/* */
/* --------------------------------------------------------------- */
/* */
/* end_generated_IBM_copyright_prolog */
/* (C)Copyright IBM Corp. 2007, 2011 */
/**
* \file src/onesided/mpid_win_create.c
* \brief ???
*/
#include "mpidi_onesided.h"
/**
* \brief MPI-PAMI glue for MPI_Win_create_dynamic function
*
* Create a window object. Allocates a MPID_Win object and initializes it,
* then allocates the collective info array, initalizes our entry, and
* performs an Allgather to distribute/collect the rest of the array entries.
* The function returns a window win without memory attached.
*
* Input Parameters:
* \param[in] info info argument
* \param[in] comm intra-Communicator (handle)
* \param[out] win_ptr window object returned by the call (handle)
* \return MPI_SUCCESS, MPI_ERR_ARG, MPI_ERR_COMM, MPI_ERR_INFO. MPI_ERR_OTHER,
* MPI_ERR_SIZE
*/
int
MPID_Win_create_dynamic( MPID_Info * info,
MPID_Comm * comm_ptr,
MPID_Win ** win_ptr)
{
int mpi_errno = MPI_SUCCESS;
int rc = MPI_SUCCESS;
static char FCNAME[] = "MPID_Win_allocate_dynamic";
MPIDI_Win_info *winfo;
MPID_Win *win;
int rank,i;
rc=MPIDI_Win_init(0,1,win_ptr, info, comm_ptr, MPI_WIN_FLAVOR_DYNAMIC, MPI_WIN_SEPARATE);
win = *win_ptr;
win->base = MPI_BOTTOM;
rank = comm_ptr->rank;
winfo = &win->mpid.info[rank];
winfo->win = win;
rc= MPIDI_Win_allgather(MPI_BOTTOM,0,win_ptr);
if (rc != MPI_SUCCESS)
return rc;
mpi_errno = MPIR_Barrier_impl(comm_ptr, &mpi_errno);
return mpi_errno;
}
/* begin_generated_IBM_copyright_prolog */
/* */
/* This is an automatically generated copyright prolog. */
/* After initializing, DO NOT MODIFY OR MOVE */
/* --------------------------------------------------------------- */
/* Licensed Materials - Property of IBM */
/* Blue Gene/Q 5765-PER 5765-PRP */
/* */
/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved */
/* US Government Users Restricted Rights - */
/* Use, duplication, or disclosure restricted */
/* by GSA ADP Schedule Contract with IBM Corp. */
/* */
/* --------------------------------------------------------------- */
/* */
/* end_generated_IBM_copyright_prolog */
/* (C)Copyright IBM Corp. 2007, 2011 */
/**
* \file src/onesided/mpid_win_detach.c
* \brief detaches a previously attached memory region beginning at
* base
*/
#include "mpidi_onesided.h"
/**
* \brief MPI-PAMI glue for MPI_Win_detach function
*
* Detaches a previously attached memory beginning at base.
* The arguments base and win must match the arguments passed
* to a previous call to MPI_Win_attach.
* \param[in] win window object
* \param[in] base initial address of emmory to be detached
* \return MPI_SUCCESS, MPI_ERR_RMA_FLAVOR
*
*/
int
MPID_Win_detach(MPID_Win *win, const void *base)
{
int mpi_errno = MPI_SUCCESS;
static char FCNAME[] = "MPID_Win_detach";
MPIU_ERR_CHKANDSTMT((win->create_flavor != MPI_WIN_FLAVOR_DYNAMIC), mpi_errno,
MPI_ERR_RMA_FLAVOR, return mpi_errno, "**rmaflavor");
/* no op, all memory is exposed */
fn_exit:
return mpi_errno;
fn_fail:
goto fn_exit;
}
/* begin_generated_IBM_copyright_prolog */
/* */
/* This is an automatically generated copyright prolog. */
/* After initializing, DO NOT MODIFY OR MOVE */
/* --------------------------------------------------------------- */
/* Licensed Materials - Property of IBM */
/* Blue Gene/Q 5765-PER 5765-PRP */
/* */
/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved */
/* US Government Users Restricted Rights - */
/* Use, duplication, or disclosure restricted */
/* by GSA ADP Schedule Contract with IBM Corp. */
/* */
/* --------------------------------------------------------------- */
/* */
/* end_generated_IBM_copyright_prolog */
/* (C)Copyright IBM Corp. 2007, 2011 */
/**
* \file src/onesided/mpid_win_flush.c
* \brief returns a new info object containing the hints of the window
* associated with win.
*/
#include "mpidi_onesided.h"
/**
* \brief MPI-PAMI glue for MPI_Win_flush function
*
* The funcion can be called only within passive target epochs such as
* MPI_Win_lock, MPI_Win_unlock, MPI_Win_lock_all and MPI_Win_unlock_all.
*
* The function completes all outstanding RMA operations initialized by
* the calling process to a specified target rank on the given window.
* The operations are completed both at the origin and the target.
*
* \param[in] rank rank of target window
* \param[in] win window object
* \return MPI_SUCCESS, MPI_ERR_OTHER
*/
int
MPID_Win_flush(int rank,
MPID_Win *win)
{
int mpi_errno = MPI_SUCCESS;
struct MPIDI_Win_sync* sync;
static char FCNAME[] = "MPID_Win_flush";
if((win->mpid.sync.origin_epoch_type != MPID_EPOTYPE_LOCK) &&
(win->mpid.sync.origin_epoch_type != MPID_EPOTYPE_LOCK_ALL))
{
MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC,
return mpi_errno, "**rmasync");
}
MPID_PROGRESS_WAIT_WHILE(win->mpid.origin[rank].nStarted != win->mpid.origin[rank].nCompleted);
sync = &win->mpid.sync;
win->mpid.origin[rank].nStarted=0;
win->mpid.origin[rank].nCompleted=0;
return mpi_errno;
}
/**
* \brief MPI-PAMI glue for MPI_Win_flush_all function
*
* The funcion can be called only within passive target epochs such as
* MPI_Win_lock, MPI_Win_unlock, MPI_Win_lock_all and MPI_Win_unlock_all.
*
* All RMA opertions issued by the calling process to any target on the
* given window prior to this call and in the given window will have
* completed both at the origin and the target when the call returns.
*
* \param[in] win window object
* \return MPI_SUCCESS, MPI_ERR_OTHER
*/
int
MPID_Win_flush_all(MPID_Win *win)
{
int mpi_errno = MPI_SUCCESS;
int nTasks,i;
struct MPIDI_Win_sync* sync;
static char FCNAME[] = "MPID_Win_flush_all";
if((win->mpid.sync.origin_epoch_type != MPID_EPOTYPE_LOCK) &&
(win->mpid.sync.origin_epoch_type != MPID_EPOTYPE_LOCK_ALL))
{
MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC,
return mpi_errno, "**rmasync");
}
sync = &win->mpid.sync;
MPID_PROGRESS_WAIT_WHILE(sync->total != sync->complete);
sync->total = 0;
sync->started = 0;
sync->complete = 0;