Commit 708ff5f3 authored by Masamichi Takagi's avatar Masamichi Takagi Committed by Pavan Balaji
Browse files

Implementing the Tofu netmod

Squash this because it's a mixed commit.
parent 41e58397
......@@ -12,6 +12,8 @@
#include "mpid_nem_impl.h"
#include "llc.h"
extern int MPID_nem_tofu_my_llc_rank;
/* The vc provides a generic buffer in which network modules can store
* private fields This removes all dependencies from the VC struction
* on the network module, facilitating dynamic module loading. */
......@@ -34,6 +36,7 @@ typedef struct
/* macro for tofu private in VC */
#define VC_TOFU(vc) ((MPID_nem_tofu_vc_area *)(vc)->ch.netmod_area.padding)
#define VC_FIELD(vcp, field) (((MPID_nem_tofu_vc_area *)(vc)->ch.netmod_area.padding)->field)
typedef struct
{
......@@ -46,6 +49,11 @@ typedef struct
((MPID_nem_tofu_req_area *)(&(req)->ch.netmod_area.padding))
#define REQ_FIELD(reqp, field) (((MPID_nem_tofu_req_area *)((reqp)->ch.netmod_area.padding))->field)
struct llctofu_cmd_area {
void *cbarg;
uint32_t raddr;
};
/* functions */
int MPID_nem_tofu_init (MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p);
int MPID_nem_tofu_finalize (void);
......@@ -71,6 +79,9 @@ int MPID_nem_tofu_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Dat
int dest, int tag, MPID_Comm *comm, int context_offset,
struct MPID_Request **request );
int MPID_nem_tofu_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *req);
int MPID_nem_tofu_kvs_put_binary(int from, const char *postfix, const uint8_t * buf,
int length);
int MPID_nem_tofu_kvs_get_binary(int from, const char *postfix, char *buf, int length);
/*
* temporary llctofu api
......
......@@ -9,6 +9,13 @@
#include "mpid_nem_impl.h"
#include "tofu_impl.h"
#define MPID_NEM_TOFU_DEBUG_INIT
#ifdef MPID_NEM_TOFU_DEBUG_INIT
#define dprintf printf
#else
#define dprintf(...)
#endif
/* global variables */
/* src/mpid/ch3/channels/nemesis/include/mpid_nem_nets.h */
......@@ -31,28 +38,84 @@ MPID_nem_netmod_funcs_t MPIDI_nem_tofu_funcs = {
.anysource_improbe = MPID_nem_tofu_anysource_improbe,
};
int MPID_nem_tofu_my_llc_rank;
#undef FUNCNAME
#define FUNCNAME MPID_nem_tofu_kvs_put_binary
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPID_nem_tofu_kvs_put_binary(int from, const char *postfix, const uint8_t * buf,
int length)
{
int mpi_errno = MPI_SUCCESS;
int pmi_errno;
char *kvs_name;
char key[256], val[256], str[256];
int j;
MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TOFU_KVS_PUT_BINARY);
MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TOFU_KVS_PUT_BINARY);
mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);
MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPIDI_PG_GetConnKVSname");
dprintf("kvs_put_binary,kvs_name=%s\n", kvs_name);
sprintf(key, "bc/%d/%s", from, postfix);
val[0] = 0;
for (j = 0; j < length; j++) {
sprintf(str, "%02x", buf[j]);
strcat(val, str);
}
dprintf("kvs_put_binary,rank=%d,from=%d,PMI_KVS_Put(%s, %s, %s)\n",
MPIDI_Process.my_pg_rank, from, kvs_name, key, val);
pmi_errno = PMI_KVS_Put(kvs_name, key, val);
MPIU_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**PMI_KVS_Put");
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TOFU_KVS_PUT_BINARY);
return mpi_errno;
fn_fail:
goto fn_exit;
}
static MPIDI_Comm_ops_t comm_ops = {
.recv_posted = MPID_nem_tofu_recv_posted,
.send = MPID_nem_tofu_isend, /* wait is performed separately after calling this */
.rsend = NULL,
.ssend = NULL,
.isend = MPID_nem_tofu_isend,
.irsend = NULL,
.issend = NULL,
.send_init = NULL,
.bsend_init = NULL,
.rsend_init = NULL,
.ssend_init = NULL,
.start_all = NULL,
.cancel_send = NULL,
.cancel_recv = NULL,
.prove = NULL,
.iprove = NULL,
.improve = NULL
#undef FUNCNAME
#define FUNCNAME MPID_nem_tofu_kvs_get_binary
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPID_nem_tofu_kvs_get_binary(int from, const char *postfix, char *buf, int length)
{
int mpi_errno = MPI_SUCCESS;
int pmi_errno;
char *kvs_name;
char key[256], val[256], str[256];
int j;
MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TOFU_KVS_GET_BINARY);
MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TOFU_KVS_GET_BINARY);
mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);
dprintf("kvs_get_binary,kvs_name=%s\n", kvs_name);
MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPIDI_PG_GetConnKVSname");
sprintf(key, "bc/%d/%s", from, postfix);
dprintf("kvs_put_binary,rank=%d,from=%d,PMI_KVS_Get(%s, %s, %s)\n",
MPIDI_Process.my_pg_rank, from, kvs_name, key, val);
pmi_errno = PMI_KVS_Get(kvs_name, key, val, 256);
MPIU_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**PMS_KVS_Get");
dprintf("rank=%d,obtained val=%s\n", MPIDI_Process.my_pg_rank, val);
char *strp = val;
for (j = 0; j < length; j++) {
memcpy(str, strp, 2);
str[2] = 0;
buf[j] = strtol(str, NULL, 16);
strp += 2;
}
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TOFU_KVS_GET_BINARY);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
......@@ -63,14 +126,34 @@ int
MPID_nem_tofu_init (MPIDI_PG_t *pg_p, int pg_rank,
char **bc_val_p, int *val_max_sz_p)
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno = MPI_SUCCESS, pmi_errno, llc_errno;
int rc;
int i;
int llc_rank;
MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TOFU_INIT);
MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TOFU_INIT);
rc = LLC_init();
MPIU_ERR_CHKANDJUMP(rc != 0, mpi_errno, MPI_ERR_OTHER, "**fail");
llc_errno = LLC_init();
MPIU_ERR_CHKANDJUMP(llc_errno, mpi_errno, MPI_ERR_OTHER, "**LLC_init");
llc_errno = LLC_comm_rank(LLC_COMM_WORLD, &MPID_nem_tofu_my_llc_rank);
MPIU_ERR_CHKANDJUMP(llc_errno, mpi_errno, MPI_ERR_OTHER, "**LLC_comm_rank");
/* Announce my LLC rank */
mpi_errno =
MPID_nem_tofu_kvs_put_binary(pg_rank, "llc_rank",
(uint8_t *) & MPID_nem_tofu_my_llc_rank,
sizeof(int));
MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER,
"**MPID_nem_ib_kvs_put_binary");
dprintf("tofu_init,my_pg_rank=%d,my_llc_rank=%d\n",
MPIDI_Process.my_pg_rank, MPID_nem_tofu_my_llc_rank);
/* Wait until the key-value propagates among all ranks */
pmi_errno = PMI_Barrier();
MPIU_ERR_CHKANDJUMP(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**PMI_Barrier");
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TOFU_INIT);
......
......@@ -9,6 +9,13 @@
#include "mpid_nem_impl.h"
#include "tofu_impl.h"
#define MPID_NEM_TOFU_DEBUG_POLL
#ifdef MPID_NEM_TOFU_DEBUG_POLL
#define dprintf printf
#else
#define dprintf(...)
#endif
/* function prototypes */
static void MPID_nem_tofu_send_handler(void *cba,
......@@ -89,12 +96,26 @@ static void MPID_nem_tofu_send_handler(void *cba,
case MPID_PREQUEST_SEND: {
reqtype = MPIDI_Request_get_type(sreq);
MPIU_Assert(reqtype != MPIDI_REQUEST_TYPE_GET_RESP);
int is_contig;
MPID_Datatype_is_contig(sreq->dev.datatype, &is_contig);
if (!is_contig && REQ_FIELD(sreq, pack_buf)) {
dprintf("tofu_send_handler,non-contiguous,free pack_buf\n");
MPIU_Free(REQ_FIELD(req, pack_buf));
/* Free temporal buffer for non-contiguous data.
* MPIDI_Request_create_sreq (in mpid_isend.c) sets req->dev.datatype.
* A control message has a req_type of MPIDI_REQUEST_TYPE_RECV and
* msg_type of MPIDI_REQUEST_EAGER_MSG because
* control message send follows
* MPIDI_CH3_iStartMsg/v-->MPID_nem_tofu_iStartContigMsg-->MPID_nem_tofu_iSendContig
* and MPID_nem_tofu_iSendContig set req->dev.state to zero
* because MPID_Request_create (in src/mpid/ch3/src/ch3u_request.c)
* sets it to zero. In addition, eager-short message has req->comm of zero. */
if (reqtype != MPIDI_REQUEST_TYPE_RECV && sreq->comm) {
/* Exclude control messages which have MPIDI_REQUEST_TYPE_RECV.
* Note that RMA messages should be included.
* Exclude eager-short by requiring req->comm != 0. */
int is_contig;
MPID_Datatype_is_contig(sreq->dev.datatype, &is_contig);
if (!is_contig && REQ_FIELD(sreq, pack_buf)) {
dprintf("tofu_send_handler,non-contiguous,free pack_buf\n");
MPIU_Free(REQ_FIELD(sreq, pack_buf));
}
}
/* sreq: src/mpid/ch3/include/mpidpre.h */
......@@ -131,6 +152,8 @@ static void MPID_nem_tofu_send_handler(void *cba,
}
break; }
default:
printf("send_handler,unknown kind=%08x\n", sreq->kind);
MPID_nem_tofu_segv;
break;
}
......@@ -209,21 +232,27 @@ static void MPID_nem_tofu_recv_handler(
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPID_nem_tofu_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *req)
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno = MPI_SUCCESS, llc_errno;
int dt_contig;
MPIDI_msg_sz_t data_sz;
MPID_Datatype *dt_ptr;
MPI_Aint dt_true_lb;
int i;
MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TOFU_RECV_POSTED);
MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TOFU_RECV_POSTED);
/* req->dev.datatype is set in MPIDI_CH3U_Recvq_FDU_or_AEP (in src/mpid/ch3/src/ch3u_recvq.c) */
/* req->dev.datatype is set in MPID_irecv --> MPIDI_CH3U_Recvq_FDU_or_AEP */
MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr,
dt_true_lb);
/* stash vc for ib_poll */
req->ch.vc = vc;
/* Don't save VC because it's not used in llctofu_poll */
/* Save data size for llctofu_poll */
req->dev.recv_data_sz = data_sz;
dprintf("tofu_recv_posted,%d<-%d,vc=%p,req=%p,user_buf=%p,data_sz=%ld,datatype=%08x,dt_contig=%d\n",
MPIDI_Process.my_pg_rank, vc->pg_rank, vc, req, req->dev.user_buf, req->dev.recv_data_sz, req->dev.datatype, dt_contig);
void *write_to_buf;
if (dt_contig) {
......@@ -236,28 +265,47 @@ int MPID_nem_tofu_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *req)
write_to_buf = REQ_FIELD(req, pack_buf);
}
int LLC_my_rank;
LLC_comm_rank(LLC_COMM_WORLD, &LLC_my_rank);
dprintf("tofu_isend,LLC_my_rank=%d\n", LLC_my_rank);
dprintf("tofu_recv_posted,remote_endpoint_addr=%ld\n", VC_FIELD(vc, remote_endpoint_addr));
LLC_cmd_t *cmd = LLC_cmd_alloc(1);
cmd[0].opcode = LLC_OPCODE_RECV;
cmd[0].comm = LLC_COMM_WORLD;
cmd[0].rank = VC_FIELD(vc, remote_endpoint_addr);
cmd[0].req_id = cmd;
((MPIDI_Message_match_parts_t*)(&cmd[0].match.bits))->rank = comm->rank;
((MPIDI_Message_match_parts_t*)(&cmd[0].match.bits))->tag = tag;
((MPIDI_Message_match_parts_t*)(&cmd[0].match.bits))->context_id = comm->context_id + context_offset;
/* req->comm is set in MPID_irecv --> MPIDI_CH3U_Recvq_FDU_or_AEP */
MPIU_Assert(sizeof(LLC_match_t) >= sizeof(MPIDI_Message_match_parts_t));
memset((uint8_t*)&cmd[0].match.bits + sizeof(MPIDI_Message_match_parts_t),
memcpy(cmd[0].match.bits, &req->dev.match.parts, sizeof(MPIDI_Message_match_parts_t));
memset((uint8_t*)cmd[0].match.bits + sizeof(MPIDI_Message_match_parts_t),
0, sizeof(LLC_match_t) - sizeof(MPIDI_Message_match_parts_t));
dprintf("tofu_recv_posted,match.bits=");
for(i = 0; i < sizeof(LLC_match_t); i++) {
dprintf("%02x", cmd[0].match.bits[i]);
}
dprintf("\n");
cmd[0].iov_local = LLC_iov_alloc(1);
cmd[0].iov_local[0].addr = (uint64_t)write_to_buf;
cmd[0].iov_local[0].length = data_sz;
cmd[0].niov_local = 1;
cmd[0].iov_remote = LLC_iov_alloc(1);
cmd[0].iov_remote[0].addr = 0;
cmd[0].iov_remote[0].length = data_sz;;
cmd[0].niov_remote = 1;
((struct llctofu_cmd_area *)cmd[0].usr_area)->cbarg = req;
((struct llctofu_cmd_area *)cmd[0].usr_area)->raddr = VC_FIELD(vc, remote_endpoint_addr);
llc_errno = LLC_post(cmd, 1);
ERR_CHKANDJUMP(llc_errno != LLC_SUCCESS, -1, printf("LLC_post failed\n"));
MPIU_ERR_CHKANDJUMP(llc_errno != LLC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**LLC_post");
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TOFU_RECV_POSTED);
......
......@@ -9,27 +9,57 @@
#include "mpid_nem_impl.h"
#include "tofu_impl.h"
#define MPID_NEM_TOFU_DEBUG_SEND
#ifdef MPID_NEM_TOFU_DEBUG_SEND
#define dprintf printf
#else
#define dprintf(...)
#endif
#undef FUNCNAME
#define FUNCNAME MPID_nem_tofu_isend
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPID_nem_tofu_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Datatype datatype,
int dest, int tag, MPID_Comm *comm, int context_offset,
struct MPID_Request **request )
struct MPID_Request **req_out)
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno = MPI_SUCCESS, llc_errno;
int dt_contig;
MPIDI_msg_sz_t data_sz;
MPID_Datatype *dt_ptr;
MPI_Aint dt_true_lb;
int i;
MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TOFU_ISEND);
MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TOFU_ISEND);
dprintf("tofu_isend,%d->%d,buf=%p,count=%d,datatype=%08x,dest=%d,tag=%08x,comm=%p,context_offset=%d\n",
MPIDI_Process.my_pg_rank, vc->pg_rank, buf, count, datatype, dest, tag, comm, context_offset);
int LLC_my_rank;
LLC_comm_rank(LLC_COMM_WORLD, &LLC_my_rank);
dprintf("tofu_isend,LLC_my_rank=%d\n", LLC_my_rank);
struct MPID_Request * sreq = MPID_Request_create();
MPIU_Assert(sreq != NULL);
MPIU_Object_set_ref(sreq, 2);
sreq->kind = MPID_REQUEST_SEND;
/* Used in tofullc_poll --> MPID_nem_tofu_send_handler */
sreq->ch.vc = vc;
sreq->dev.OnDataAvail = 0;
/* Don't save iov_offset because it's not used. */
/* Save it because it's used in send_handler */
sreq->dev.datatype = datatype;
dprintf("tofu_isend,remote_endpoint_addr=%ld\n", VC_FIELD(vc, remote_endpoint_addr));
LLC_cmd_t *cmd = LLC_cmd_alloc(1);
cmd[0].opcode = LLC_OPCODE_SEND;
cmd[0].comm = LLC_COMM_WORLD;
cmd[0].rank = vc_tofu->remote_endpoint_addr;
cmd[0].rank = VC_FIELD(vc, remote_endpoint_addr);
cmd[0].req_id = cmd;
/* Prepare bit-vector to perform tag-match. We use the same bit-vector as in CH3 layer. */
......@@ -41,12 +71,20 @@ int MPID_nem_tofu_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Dat
memset((uint8_t*)&cmd[0].match.bits + sizeof(MPIDI_Message_match_parts_t),
0, sizeof(LLC_match_t) - sizeof(MPIDI_Message_match_parts_t));
dprintf("tofu_isend,match.bits=");
for(i = 0; i < sizeof(LLC_match_t); i++) {
dprintf("%02x", cmd[0].match.bits[i]);
}
dprintf("\n");
/* Prepare RDMA-write from buffer */
MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr,
dt_true_lb);
dprintf("tofu_isend,dt_contig=%d,data_sz=%ld\n",
dt_contig, data_sz);
void *write_from_buf;
const void *write_from_buf;
if (dt_contig) {
write_from_buf = buf;
}
......@@ -60,13 +98,13 @@ int MPID_nem_tofu_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Dat
MPIDI_msg_sz_t segment_size = data_sz;
MPIDI_msg_sz_t last = segment_size;
MPIU_Assert(last > 0);
REQ_FIELD(req, pack_buf) = MPIU_Malloc((size_t) data_sz);
MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, pack_buf), mpi_errno, MPI_ERR_OTHER,
REQ_FIELD(sreq, pack_buf) = MPIU_Malloc((size_t) data_sz);
MPIU_ERR_CHKANDJUMP(!REQ_FIELD(sreq, pack_buf), mpi_errno, MPI_ERR_OTHER,
"**outofmemory");
MPID_Segment_pack(segment_ptr, segment_first, &last,
(char *) (REQ_FIELD(req, pack_buf)));
(char *) (REQ_FIELD(sreq, pack_buf)));
MPIU_Assert(last == data_sz);
write_from_buf = REQ_FIELD(req, lmt_pack_buf);
write_from_buf = REQ_FIELD(sreq, pack_buf);
}
cmd[0].iov_local = LLC_iov_alloc(1);
......@@ -79,10 +117,14 @@ int MPID_nem_tofu_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Dat
cmd[0].iov_remote[0].length = data_sz;
cmd[0].niov_remote = 1;
((struct llctofu_cmd_area *)cmd[0].usr_area)->cbarg = sreq;
((struct llctofu_cmd_area *)cmd[0].usr_area)->raddr = VC_FIELD(vc, remote_endpoint_addr);
llc_errno = LLC_post(cmd, 1);
MPIU_ERR_CHKANDJUMP(llc_errno != LLC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**LLC_post");
fn_exit:
*req_out = sreq;
MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TOFU_ISEND);
return mpi_errno;
fn_fail:
......@@ -103,6 +145,9 @@ int MPID_nem_tofu_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, MPIDI_msg_sz_t hdr_
MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TOFU_ISTARTCONTIGMSG);
dprintf("tofu_iStartContigMsg,%d->%d,hdr=%p,hdr_sz=%ld,data=%p,data_sz=%ld\n",
MPIDI_Process.my_pg_rank, vc->pg_rank, hdr, hdr_sz, data, data_sz);
MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "tofu_iStartContigMsg");
MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)hdr);
......@@ -197,6 +242,9 @@ int MPID_nem_tofu_iSendContig(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr, MPI
MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TOFU_ISENDCONTIGMSG);
dprintf("tofu_iSendConitig,sreq=%p,hdr=%p,hdr_sz=%ld,data=%p,data_sz=%ld\n",
sreq, hdr, hdr_sz, data, data_sz);
MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "tofu_iSendContig");
MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)hdr);
......@@ -403,17 +451,14 @@ int MPIDI_nem_tofu_Rqst_iov_update(MPID_Request *mreq, MPIDI_msg_sz_t consume)
return ret;
}
struct llctofu_cmd_area {
void *cbarg;
uint32_t raddr;
};
ssize_t llctofu_writev(void *endpt, uint64_t raddr,
const struct iovec *iovs, int niov, void *cbarg, void **vpp_reqid)
{
ssize_t nw = 0;
LLC_cmd_t *lcmd = 0;
dprintf("writev,raddr=%ld,niov=%d,sreq=%p", raddr, niov, cbarg);
MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "llctofu_writev(%d)", (int)raddr);
{
uint8_t *buff = 0;
......@@ -445,7 +490,11 @@ ssize_t llctofu_writev(void *endpt, uint64_t raddr,
nw = -1; /* ENOMEM */
goto bad;
}
lcmd[0].iov_local = LLC_iov_alloc(1);
lcmd[0].iov_remote = LLC_iov_alloc(1);
lcmd->opcode = LLC_OPCODE_UNSOLICITED;
lcmd->comm = LLC_COMM_WORLD;
lcmd->rank = (uint32_t)raddr; /* XXX */
lcmd->req_id = lcmd;
......@@ -542,7 +591,7 @@ int llctofu_poll(int in_blocking_poll,
int llc_errno;
int nevents;
LLC_event_t events[1];
while(1) {
llc_errno = LLC_poll(1, events, &nevents);
MPIU_ERR_CHKANDJUMP(llc_errno, mpi_errno, MPI_ERR_OTHER, "**LLC_poll");
......@@ -557,13 +606,32 @@ int llctofu_poll(int in_blocking_poll,
MPIU_Assert(nevents == 1);
switch(events[0].type) {
case LLC_EVENT_SEND_LEFT:
case LLC_EVENT_UNSOLICITED_LEFT: {
case LLC_EVENT_SEND_LEFT: {
dprintf("llctofu_poll,EVENT_SEND_LEFT\n");
lcmd = events[0].side.initiator.req_id;
MPIU_Assert(lcmd != 0);
MPIU_Assert(lcmd->opcode == LLC_OPCODE_SEND);
if(events[0].side.initiator.error_code != LLC_ERROR_SUCCESS) {
printf("llctofu_poll,error_code=%d\n", events[0].side.initiator.error_code);
MPID_nem_tofu_segv;
}
/* Call send_handler. First arg is a pointer to MPID_Request */
(*sfnc)(((struct llctofu_cmd_area *)lcmd->usr_area)->cbarg, &reqid);
/* Don't free iov_local[0].addr */
llc_errno = LLC_cmd_free(lcmd, 1);
MPIU_ERR_CHKANDJUMP(llc_errno, mpi_errno, MPI_ERR_OTHER, "**LLC_cmd_free");
break; }
case LLC_EVENT_UNSOLICITED_LEFT: {
dprintf("llctofu_poll,EVENT_UNSOLICITED_LEFT\n");
lcmd = events[0].side.initiator.req_id;
MPIU_Assert(lcmd != 0);
MPIU_Assert(lcmd->opcode == LLC_OPCODE_UNSOLICITED);
struct llctofu_cmd_area *usr;
usr = (void *)lcmd->usr_area;
vp_sreq = usr->cbarg;
......@@ -583,6 +651,7 @@ int llctofu_poll(int in_blocking_poll,
break; }
case LLC_EVENT_UNSOLICITED_ARRIVED: {
dprintf("llctofu_poll,EVENT_UNSOLICITED_ARRIVED\n");
void *vp_vc = 0;
uint64_t addr;
void *buff;
......@@ -602,10 +671,11 @@ int llctofu_poll(int in_blocking_poll,
break; }
case LLC_EVENT_RECV_MATCHED: {
dprintf("llctofu_poll,EVENT_RECV_MATCHED\n");
lcmd = events[0].side.initiator.req_id;
MPID_Request *req = ((struct llctofu_cmd_area*)lcmd->usr_area)->cbarg;
/* unpack non-contiguous dt */
/* Unpack non-contiguous dt */
int is_contig;
MPID_Datatype_is_contig(req->dev.datatype, &is_contig);
if (!is_contig) {
......@@ -613,10 +683,11 @@ int llctofu_poll(int in_blocking_poll,
/* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */
/* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */
MPIDI_msg_sz_t unpack_sz = req->ch.lmt_data_sz;
MPIDI_msg_sz_t unpack_sz = req->dev.recv_data_sz;
MPID_Segment seg;
MPI_Aint last;
/* user_buf etc. are set in MPID_irecv --> MPIDI_CH3U_Recvq_FDU_or_AEP */
MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, &seg,
0);
last = unpack_sz;
......@@ -634,12 +705,11 @@ int llctofu_poll(int in_blocking_poll,
}
dprintf("llctofu_poll,ref_count=%d,pack_buf=%p\n", req->ref_count,
REQ_FIELD(req, pack_buf));
MPIU_Free(REQ_FIELD(req, pack_buf), (size_t) req->ch.lmt_data_sz);
MPIU_Free(REQ_FIELD(req, pack_buf));
}
/* Dequeue request from posted queue.
A request is posted to the queue, for example, in the following path.
MPID_Irecv --> MPIDI_CH3U_Recvq_FDU_or_AEP */
It's posted in MPID_Irecv --> MPIDI_CH3U_Recvq_FDU_or_AEP */
int found = MPIDI_CH3U_Recvq_DP(req);
MPIU_Assert(found);
......
......@@ -8,14 +8,40 @@
#include "mpid_nem_impl.h"
#include "tofu_impl.h"
#ifdef NOTDEF
#include <llc/llc-tofu.h>