Commit bd9e1576 authored by Darius Buntinas's avatar Darius Buntinas
Browse files

[svn-r2957] added fixes for nemesis rma asserts

parent a23cd223
......@@ -471,9 +471,7 @@ int MPID_nem_newtcp_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *head
iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t);
iov_n = MPID_IOV_LIMIT - 1;
/* On the initial load of a send iov req, set the OnFinal action (null
for point-to-point) */
sreq->dev.OnFinal = 0;
mpi_errno = MPIDI_CH3U_Request_load_send_iov(sreq, &iov[1], &iov_n);
MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|loadsendiov");
......
......@@ -28,9 +28,6 @@ int MPIDI_CH3I_SendNoncontig( MPIDI_VC_t *vc, MPID_Request *sreq, void *header,
MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)header);
sreq->dev.OnFinal = 0;
sreq->dev.OnDataAvail = 0;
if (!MPIDI_CH3I_SendQ_empty(CH3_NORMAL_QUEUE)) /* MT */
{
/* send queue is not empty, just enqueue this request */
......
......@@ -33,9 +33,7 @@ int MPIDI_CH3_SendNoncontig_iov( MPIDI_VC_t *vc, MPID_Request *sreq,
iov[0].MPID_IOV_LEN = hdr_sz;
iov_n = MPID_IOV_LIMIT - 1;
/* One the initial load of a send iov req, set the OnFinal action (null
for point-to-point) */
sreq->dev.OnFinal = 0;
mpi_errno = MPIDI_CH3U_Request_load_send_iov(sreq, &iov[1], &iov_n);
if (mpi_errno == MPI_SUCCESS)
{
......@@ -99,6 +97,9 @@ int MPIDI_CH3_EagerNoncontigSend( MPID_Request **sreq_p,
MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
"sending non-contiguous eager message, data_sz=" MPIDI_MSG_SZ_FMT,
data_sz));
sreq->dev.OnDataAvail = 0;
sreq->dev.OnFinal = 0;
MPIDI_Pkt_init(eager_pkt, reqtype);
eager_pkt->match.rank = comm->rank;
eager_pkt->match.tag = tag;
......
......@@ -41,7 +41,8 @@ int MPIDI_CH3_EagerSyncNoncontigSend( MPID_Request **sreq_p,
sreq->cc = 2;
sreq->dev.OnDataAvail = 0;
sreq->dev.OnFinal = 0;
MPIDI_Pkt_init(es_pkt, MPIDI_CH3_PKT_EAGER_SYNC_SEND);
es_pkt->match.rank = comm->rank;
es_pkt->match.tag = tag;
......
......@@ -276,9 +276,7 @@ int MPIDI_CH3_ReqHandler_GetRespDerivedDTComplete( MPIDI_VC_t *vc,
MPID_Datatype *new_dtp;
MPIDI_CH3_Pkt_t upkt;
MPIDI_CH3_Pkt_get_resp_t * get_resp_pkt = &upkt.get_resp;
MPID_IOV iov[MPID_IOV_LIMIT];
MPID_Request * sreq;
int iov_n;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GETRESPDERIVEDDTCOMPLETE);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GETRESPDERIVEDDTCOMPLETE);
......@@ -289,9 +287,8 @@ int MPIDI_CH3_ReqHandler_GetRespDerivedDTComplete( MPIDI_VC_t *vc,
/* create request for sending data */
sreq = MPID_Request_create();
if (sreq == NULL) {
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
}
MPIU_ERR_CHKANDJUMP(sreq == NULL, mpi_errno,MPI_ERR_OTHER,"**nomem");
sreq->kind = MPID_REQUEST_SEND;
MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_GET_RESP);
sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GetSendRespComplete;
......@@ -304,10 +301,7 @@ int MPIDI_CH3_ReqHandler_GetRespDerivedDTComplete( MPIDI_VC_t *vc,
sreq->dev.source_win_handle = rreq->dev.source_win_handle;
MPIDI_Pkt_init(get_resp_pkt, MPIDI_CH3_PKT_GET_RESP);
get_resp_pkt->request_handle = rreq->dev.request_handle;
iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
get_resp_pkt->request_handle = rreq->dev.request_handle;
sreq->dev.segment_ptr = MPID_Segment_alloc( );
MPIU_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
......@@ -318,25 +312,17 @@ int MPIDI_CH3_ReqHandler_GetRespDerivedDTComplete( MPIDI_VC_t *vc,
sreq->dev.segment_ptr, 0);
sreq->dev.segment_first = 0;
sreq->dev.segment_size = new_dtp->size * sreq->dev.user_count;
iov_n = MPID_IOV_LIMIT - 1;
/* Note that the OnFinal handler was set above */
mpi_errno = MPIDI_CH3U_Request_load_send_iov(sreq, &iov[1], &iov_n);
if (mpi_errno == MPI_SUCCESS)
mpi_errno = vc->sendNoncontig_fn(vc, sreq, get_resp_pkt, sizeof(*get_resp_pkt));
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno != MPI_SUCCESS)
{
iov_n += 1;
mpi_errno = MPIU_CALL(MPIDI_CH3,iSendv(vc, sreq, iov, iov_n));
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno != MPI_SUCCESS)
{
MPIU_Object_set_ref(sreq, 0);
MPIDI_CH3_Request_destroy(sreq);
sreq = NULL;
MPIU_ERR_SETFATALANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg");
}
/* --END ERROR HANDLING-- */
MPIU_Object_set_ref(sreq, 0);
MPIDI_CH3_Request_destroy(sreq);
sreq = NULL;
MPIU_ERR_SETFATALANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg");
}
/* --END ERROR HANDLING-- */
/* mark receive data transfer as complete and decrement CC in receive
request */
......
......@@ -34,6 +34,10 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr,
static int MPIDI_CH3I_Send_lock_put_or_acc(MPID_Win *win_ptr);
static int MPIDI_CH3I_Send_lock_get(MPID_Win *win_ptr);
static int create_datatype(const MPIDI_RMA_dtype_info *dtype_info,
const void *dataloop, MPI_Aint dataloop_sz,
const void *o_addr, int o_count, MPI_Datatype o_datatype,
MPID_Datatype **combined_dtp);
#undef FUNCNAME
#define FUNCNAME MPIDI_Win_fence
......@@ -326,6 +330,87 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
/* --END ERROR HANDLING-- */
}
/* create_datatype() creates a new struct datatype for the dtype_info
and the dataloop of the target datatype together with the user data */
#undef FUNCNAME
#define FUNCNAME create_datatype
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static int create_datatype(const MPIDI_RMA_dtype_info *dtype_info,
const void *dataloop, MPI_Aint dataloop_sz,
const void *o_addr, int o_count, MPI_Datatype o_datatype,
MPID_Datatype **combined_dtp)
{
int mpi_errno = MPI_SUCCESS;
/* datatype_set_contents wants an array 'ints' which is the
blocklens array with count prepended to it. So blocklens
points to the 2nd element of ints to avoid having to copy
blocklens into ints later. */
int ints[4];
int *blocklens = &ints[1];
MPI_Aint displaces[3];
MPI_Datatype datatypes[3];
int i;
const int count = 3;
MPI_Datatype combined_datatype;
MPIDI_STATE_DECL(MPID_STATE_CREATE_DATATYPE);
MPIDI_FUNC_ENTER(MPID_STATE_CREATE_DATATYPE);
/* create datatype */
displaces[0] = MPIU_PtrToAint(dtype_info);
blocklens[0] = sizeof(*dtype_info);
datatypes[0] = MPI_BYTE;
displaces[1] = MPIU_PtrToAint(dataloop);
blocklens[1] = dataloop_sz;
datatypes[1] = MPI_BYTE;
displaces[2] = MPIU_PtrToAint(o_addr);
blocklens[2] = o_count;
datatypes[2] = o_datatype;
mpi_errno = MPID_Type_struct(count,
blocklens,
displaces,
datatypes,
&combined_datatype);
ints[0] = count;
MPID_Datatype_get_ptr(combined_datatype, *combined_dtp);
mpi_errno = MPID_Datatype_set_contents(*combined_dtp,
MPI_COMBINER_STRUCT,
count+1, /* ints (cnt,blklen) */
count, /* aints (disps) */
count, /* types */
ints,
displaces,
datatypes);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* Commit datatype */
MPID_Dataloop_create(combined_datatype,
&(*combined_dtp)->dataloop,
&(*combined_dtp)->dataloop_size,
&(*combined_dtp)->dataloop_depth,
MPID_DATALOOP_HOMOGENEOUS);
/* create heterogeneous dataloop */
MPID_Dataloop_create(combined_datatype,
&(*combined_dtp)->hetero_dloop,
&(*combined_dtp)->hetero_dloop_size,
&(*combined_dtp)->hetero_dloop_depth,
MPID_DATALOOP_HETEROGENEOUS);
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_CREATE_DATATYPE);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_Send_rma_msg
......@@ -363,7 +448,7 @@ static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_ops *rma_op, MPID_Win *win_ptr,
put_pkt->dataloop_size = 0;
put_pkt->target_win_handle = target_win_handle;
put_pkt->source_win_handle = source_win_handle;
iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) put_pkt;
iov[0].MPID_IOV_LEN = sizeof(*put_pkt);
}
......@@ -383,10 +468,10 @@ static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_ops *rma_op, MPID_Win *win_ptr,
iov[0].MPID_IOV_LEN = sizeof(*accum_pkt);
}
/* printf("send pkt: type %d, addr %d, count %d, base %d\n", rma_pkt->type,
rma_pkt->addr, rma_pkt->count, win_ptr->base_addrs[rma_op->target_rank]);
fflush(stdout);
*/
/* printf("send pkt: type %d, addr %d, count %d, base %d\n", rma_pkt->type,
rma_pkt->addr, rma_pkt->count, win_ptr->base_addrs[rma_op->target_rank]);
fflush(stdout);
*/
MPID_Comm_get_ptr(win_ptr->comm, comm_ptr);
MPIDI_Comm_get_vc(comm_ptr, rma_op->target_rank, &vc);
......@@ -450,52 +535,63 @@ static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_ops *rma_op, MPID_Win *win_ptr,
MPID_Datatype_get_size_macro(rma_op->origin_datatype, origin_type_size);
if (!origin_dt_derived)
if (!target_dt_derived)
{
/* basic datatype on origin */
if (!target_dt_derived)
{
/* basic datatype on target */
/* basic datatype on target */
if (!origin_dt_derived)
{
/* basic datatype on origin */
iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rma_op->origin_addr;
iov[1].MPID_IOV_LEN = rma_op->origin_count * origin_type_size;
iovcnt = 2;
mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsgv(vc, iov, iovcnt, request));
MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
}
else
{
/* derived datatype on target */
iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)dtype_info;
iov[1].MPID_IOV_LEN = sizeof(*dtype_info);
iov[2].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)*dataloop;
iov[2].MPID_IOV_LEN = target_dtp->dataloop_size;
iov[3].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rma_op->origin_addr;
iov[3].MPID_IOV_LEN = rma_op->origin_count * origin_type_size;
iovcnt = 4;
}
mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsgv(vc, iov, iovcnt, request));
if (mpi_errno != MPI_SUCCESS) {
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg");
{
/* derived datatype on origin */
*request = MPID_Request_create();
if (*request == NULL) {
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
}
MPIU_Object_set_ref(*request, 2);
(*request)->kind = MPID_REQUEST_SEND;
(*request)->dev.segment_ptr = MPID_Segment_alloc( );
MPIU_ERR_CHKANDJUMP1((*request)->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
(*request)->dev.datatype_ptr = origin_dtp;
/* this will cause the datatype to be freed when the request
is freed. */
MPID_Segment_init(rma_op->origin_addr, rma_op->origin_count,
rma_op->origin_datatype,
(*request)->dev.segment_ptr, 0);
(*request)->dev.segment_first = 0;
(*request)->dev.segment_size = rma_op->origin_count * origin_type_size;
(*request)->dev.OnFinal = 0;
(*request)->dev.OnDataAvail = 0;
mpi_errno = vc->sendNoncontig_fn(vc, *request, iov[0].MPID_IOV_BUF, iov[0].MPID_IOV_LEN);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno != MPI_SUCCESS)
{
MPID_Datatype_release((*request)->dev.datatype_ptr);
MPIU_Object_set_ref(*request, 0);
MPIDI_CH3_Request_destroy(*request);
*request = NULL;
MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
}
/* --END ERROR HANDLING-- */
}
}
else
{
/* derived datatype on origin */
if (!target_dt_derived)
{
/* basic datatype on target */
iovcnt = 1;
}
else
{
/* derived datatype on target */
iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)dtype_info;
iov[1].MPID_IOV_LEN = sizeof(*dtype_info);
iov[2].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)*dataloop;
iov[2].MPID_IOV_LEN = target_dtp->dataloop_size;
iovcnt = 3;
}
/* derived datatype on target */
MPID_Datatype *combined_dtp;
*request = MPID_Request_create();
if (*request == NULL) {
......@@ -504,58 +600,43 @@ static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_ops *rma_op, MPID_Win *win_ptr,
MPIU_Object_set_ref(*request, 2);
(*request)->kind = MPID_REQUEST_SEND;
(*request)->dev.datatype_ptr = origin_dtp;
/* this will cause the datatype to be freed when the request
is freed. */
(*request)->dev.segment_ptr = MPID_Segment_alloc( );
/* if (!*request)->dev.segment_ptr) { MPIU_ERR_POP(); } */
MPID_Segment_init(rma_op->origin_addr, rma_op->origin_count,
rma_op->origin_datatype,
MPIU_ERR_CHKANDJUMP1((*request)->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
/* create a new datatype containing the dtype_info, dataloop, and origin data */
create_datatype(dtype_info, *dataloop, target_dtp->dataloop_size, rma_op->origin_addr,
rma_op->origin_count, rma_op->origin_datatype, &combined_dtp);
(*request)->dev.datatype_ptr = combined_dtp;
/* combined_datatype will be freed when request is freed */
MPID_Segment_init(MPI_BOTTOM, 1, combined_dtp->handle,
(*request)->dev.segment_ptr, 0);
(*request)->dev.segment_first = 0;
(*request)->dev.segment_size = rma_op->origin_count * origin_type_size;
iov_n = MPID_IOV_LIMIT - iovcnt;
/* On the initial load of a send iov req, set the OnFinal action (null
for point-to-point) */
(*request)->dev.OnFinal = 0;
mpi_errno = MPIDI_CH3U_Request_load_send_iov(*request,
&iov[iovcnt],
&iov_n);
if (mpi_errno == MPI_SUCCESS)
{
iov_n += iovcnt;
mpi_errno = MPIU_CALL(MPIDI_CH3,iSendv(vc, *request, iov, iov_n));
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno != MPI_SUCCESS)
{
MPID_Datatype_release((*request)->dev.datatype_ptr);
MPIU_Object_set_ref(*request, 0);
MPIDI_CH3_Request_destroy(*request);
*request = NULL;
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg");
}
/* --END ERROR HANDLING-- */
}
else
(*request)->dev.segment_size = combined_dtp->size;
(*request)->dev.OnFinal = 0;
(*request)->dev.OnDataAvail = 0;
mpi_errno = vc->sendNoncontig_fn(vc, *request, iov[0].MPID_IOV_BUF, iov[0].MPID_IOV_LEN);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno != MPI_SUCCESS)
{
/* --BEGIN ERROR HANDLING-- */
MPID_Datatype_release((*request)->dev.datatype_ptr);
MPIU_Object_set_ref(*request, 0);
MPIDI_CH3_Request_destroy(*request);
*request = NULL;
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|loadsendiov" );
/* --END ERROR HANDLING-- */
MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
}
}
/* --END ERROR HANDLING-- */
if (target_dt_derived)
{
/* we're done with the datatypes */
if (origin_dt_derived)
MPID_Datatype_release(origin_dtp);
MPID_Datatype_release(target_dtp);
}
}
fn_exit:
MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_RMA_MSG);
......@@ -1821,44 +1902,27 @@ static int MPIDI_CH3I_Send_lock_put_or_acc(MPID_Win *win_ptr)
is freed. */
request->dev.segment_ptr = MPID_Segment_alloc( );
/* if (!request->dev.segment_ptr) { MPIU_ERR_POP(); } */
MPIU_ERR_CHKANDJUMP1(request->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
MPID_Segment_init(rma_op->origin_addr, rma_op->origin_count,
rma_op->origin_datatype,
request->dev.segment_ptr, 0);
request->dev.segment_first = 0;
request->dev.segment_size = rma_op->origin_count * origin_type_size;
iov_n = MPID_IOV_LIMIT - iovcnt;
/* On the initial load of a send iov req, set the OnFinal action (null
for point-to-point) */
request->dev.OnFinal = 0;
mpi_errno = MPIDI_CH3U_Request_load_send_iov(request,
&iov[iovcnt],
&iov_n);
if (mpi_errno == MPI_SUCCESS)
{
iov_n += iovcnt;
mpi_errno = MPIU_CALL(MPIDI_CH3,iSendv(vc, request, iov, iov_n));
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno != MPI_SUCCESS)
{
MPID_Datatype_release(request->dev.datatype_ptr);
MPIU_Object_set_ref(request, 0);
MPIDI_CH3_Request_destroy(request);
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg");
}
/* --END ERROR HANDLING-- */
}
request->dev.OnFinal = 0;
request->dev.OnDataAvail = 0;
mpi_errno = vc->sendNoncontig_fn(vc, request, iov[0].MPID_IOV_BUF, iov[0].MPID_IOV_LEN);
/* --BEGIN ERROR HANDLING-- */
else
if (mpi_errno)
{
MPID_Datatype_release(request->dev.datatype_ptr);
MPIU_Object_set_ref(request, 0);
MPIDI_CH3_Request_destroy(request);
MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|loadsendiov");
}
/* --END ERROR HANDLING-- */
/* --END ERROR HANDLING-- */
}
if (request != NULL) {
......
......@@ -31,7 +31,9 @@ int MPIDI_CH3_RndvSend( MPID_Request **sreq_p, const void * buf, int count,
MPIU_DBG_MSG_D(CH3_OTHER,VERBOSE,
"sending rndv RTS, data_sz=" MPIDI_MSG_SZ_FMT, data_sz);
sreq->dev.OnDataAvail = 0;
sreq->partner_request = NULL;
MPIDI_Pkt_init(rts_pkt, MPIDI_CH3_PKT_RNDV_REQ_TO_SEND);
......@@ -193,7 +195,10 @@ int MPIDI_CH3_PktHandler_RndvClrToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
MPID_Request_get_ptr(cts_pkt->sender_req_id, sreq);
MPIU_DBG_PRINTF(("received cts, count=%d\n", sreq->dev.user_count));
sreq->dev.OnDataAvail = 0;
sreq->dev.OnFinal = 0;
/* Release the RTS request if one exists.
MPID_Request_fetch_and_clear_rts_sreq() needs to be atomic to
prevent
......@@ -206,7 +211,7 @@ int MPIDI_CH3_PktHandler_RndvClrToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
{
MPID_Request_release(rts_sreq);
}
*buflen = sizeof(MPIDI_CH3_Pkt_t);
MPIDI_Pkt_init(rs_pkt, MPIDI_CH3_PKT_RNDV_SEND);
......@@ -222,8 +227,6 @@ int MPIDI_CH3_PktHandler_RndvClrToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
"sending contiguous rndv data, data_sz=" MPIDI_MSG_SZ_FMT,
data_sz));
sreq->dev.OnDataAvail = 0;
iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rs_pkt;
iov[0].MPID_IOV_LEN = sizeof(*rs_pkt);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment