Commit bb3456d9 authored by Pavan Balaji's avatar Pavan Balaji
Browse files

Added Intel-contributed patches for SCIF.

parent 0ff12df5
......@@ -1592,6 +1592,8 @@ is too big (> MPIU_SHMW_GHND_SZ)
**scif_poll_send %s:scif_poll_send failed (scif_poll_send failed with error '%s')
**poll error:poll error
**loadsendiov:sendiov failed
**MPIDI_PG_Get_vc:MPIDI_PG_Get_vc failed
**MPIDI_PG_Get_vc %s:MPIDI_PG_Get_vc failed (MPIDI_PG_Get_vc failed with error '%s')
# Datarep conversion function not supported by ROMIO (Temproary until implemented)
**drconvnotsupported:Read and Write datarep conversions are currently not supported by MPI-IO
......
......@@ -4,7 +4,7 @@
* See COPYRIGHT in top-level directory.
*
* Portions of this code were written by Intel Corporation.
* Copyright (C) 2011-2012 Intel Corporation. Intel provides this material
* Copyright (C) 2011-2013 Intel Corporation. Intel provides this material
* to Argonne National Laboratory subject to Software Grant and Corporate
* Contributor License Agreement dated February 8, 2012.
*/
......@@ -41,6 +41,54 @@ int MPID_nem_scif_myrank;
static int listen_fd;
static int listen_port;
#undef FUNCNAME
#define FUNCNAME MPID_nem_scif_post_init
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPID_nem_scif_post_init(void)
{
int mpi_errno = MPI_SUCCESS, pmi_errno;
MPIDI_PG_t * my_pg = MPIDI_Process.my_pg;
int my_rank = MPIDI_CH3I_my_rank;
int i;
MPIDI_VC_t *vc;
scifconn_t *sc;
MPIDI_CH3I_VC *vc_ch;
MPID_nem_scif_vc_area *vc_scif;
size_t s;
int ret;
off_t offset;
int peer_rank;
MPIDI_STATE_DECL(MPID_NEM_SCIF_POST_INIT);
MPIDI_FUNC_ENTER(MPID_NEM_SCIF_POST_INIT);
for (i=0;i< MPID_nem_scif_nranks;i++) {
vc = &my_pg->vct[i];
vc_ch = &vc->ch;
if (vc->pg_rank == MPID_nem_scif_myrank || vc_ch->is_local) {
continue;
}
/* restore some value which might be rewrited during MPID_nem_vc_init() */
vc->sendNoncontig_fn = MPID_nem_scif_SendNoncontig;
vc_ch->iStartContigMsg = MPID_nem_scif_iStartContigMsg;
vc_ch->iSendContig = MPID_nem_scif_iSendContig;
}
fn_exit:
MPIDI_FUNC_EXIT(MPID_NEM_SCIF_POST_INIT);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPID_nem_scif_init
#undef FCNAME
......@@ -48,6 +96,7 @@ static int listen_port;
int MPID_nem_scif_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p)
{
int mpi_errno = MPI_SUCCESS;
int pmi_errno;
int ret;
int i;
MPIU_CHKPMEM_DECL(2);
......@@ -63,7 +112,7 @@ int MPID_nem_scif_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val
MPID_nem_scif_myrank = pg_rank;
/* set up listener socket */
if (MPID_nem_scif_myrank < MPID_nem_scif_nranks - 1) {
{
listen_fd = scif_open();
MPIU_ERR_CHKANDJUMP1(listen_fd == -1, mpi_errno, MPI_ERR_OTHER,
"**scif_open", "**scif_open %s", MPIU_Strerror(errno));
......@@ -92,6 +141,10 @@ int MPID_nem_scif_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val
MPIU_CHKPMEM_MALLOC(MPID_nem_scif_recv_buf, char *,
MPID_NEM_SCIF_RECV_MAX_PKT_LEN, mpi_errno, "SCIF temporary buffer");
MPIU_CHKPMEM_COMMIT();
mpi_errno = MPID_nem_register_initcomp_cb(MPID_nem_scif_post_init);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
pmi_errno = PMI_Barrier();
MPIU_ERR_CHKANDJUMP1 (pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_barrier", "**pmi_barrier %d", pmi_errno);
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_INIT);
......@@ -105,7 +158,7 @@ int MPID_nem_scif_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val
#undef FUNCNAME
#define FUNCNAME MPID_nem_scif_get_business_card
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPID_nem_scif_get_business_card(int my_rank, char **bc_val_p, int *val_max_sz_p)
{
int mpi_errno = MPI_SUCCESS;
......@@ -232,31 +285,36 @@ static int get_addr(MPIDI_VC_t * vc, struct scif_portID *addr)
int MPID_nem_scif_vc_init(MPIDI_VC_t * vc)
{
int mpi_errno = MPI_SUCCESS;
MPIDI_CH3I_VC *vc_ch = &vc->ch;
MPID_nem_scif_vc_area *vc_scif = VC_SCIF(vc);
MPIDI_CH3I_VC *vc_ch;
MPID_nem_scif_vc_area *vc_scif;
int ret;
size_t s;
scifconn_t *sc;
off_t offset;
int peer_rank;
MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_VC_INIT);
MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_VC_INIT);
vc->sendNoncontig_fn = MPID_nem_scif_SendNoncontig;
vc_ch->iStartContigMsg = MPID_nem_scif_iStartContigMsg;
vc_ch->iSendContig = MPID_nem_scif_iSendContig;
vc_ch->next = NULL;
vc_ch->prev = NULL;
ASSIGN_SC_TO_VC(vc_scif, NULL);
vc_scif->send_queue.head = vc_scif->send_queue.tail = NULL;
vc_scif->sc = sc = &MPID_nem_scif_conns[vc->pg_rank];
vc_scif->terminate = 0;
sc->vc = vc;
/* do the connection */
if (vc->pg_rank < MPID_nem_scif_myrank) {
vc_ch = &vc->ch;
vc_scif = VC_SCIF(vc);
vc->sendNoncontig_fn = MPID_nem_scif_SendNoncontig;
vc_ch->iStartContigMsg = MPID_nem_scif_iStartContigMsg;
vc_ch->iSendContig = MPID_nem_scif_iSendContig;
vc_ch->pkt_handler = NULL; // pkt_handlers;
vc_ch->num_pkt_handlers = 0; // MPIDI_NEM_SCIF_PKT_NUM_TYPES;
vc_ch->next = NULL;
vc_ch->prev = NULL;
ASSIGN_SC_TO_VC(vc_scif, NULL);
vc_scif->send_queue.head = vc_scif->send_queue.tail = NULL;
vc_scif->sc = sc = &MPID_nem_scif_conns[vc->pg_rank];
vc_scif->terminate = 0;
sc->vc = vc;
sc->fd = scif_open();
MPIU_ERR_CHKANDJUMP1(sc->fd == -1, mpi_errno, MPI_ERR_OTHER,
"**scif_open", "**scif_open %s", MPIU_Strerror(errno));
......@@ -266,11 +324,43 @@ int MPID_nem_scif_vc_init(MPIDI_VC_t * vc)
ret = scif_connect(sc->fd, &sc->addr);
MPIU_ERR_CHKANDJUMP1(ret == -1, mpi_errno, MPI_ERR_OTHER,
"**scif_connect", "**scif_connect %s", MPIU_Strerror(errno));
s = scif_send(sc->fd, &MPID_nem_scif_myrank, sizeof(MPID_nem_scif_myrank), SCIF_SEND_BLOCK);
MPIU_ERR_CHKANDJUMP1(s != sizeof(MPID_nem_scif_myrank), mpi_errno, MPI_ERR_OTHER, "**scif_send", "**scif_send %s", MPIU_Strerror(errno));
}
else {
ret = scif_accept(listen_fd, &sc->addr, &sc->fd, SCIF_ACCEPT_SYNC);
struct scif_portID portID;
int fd;
// Can accept a connection from any peer, not necessary from vc->pg_rank.
// So we need to know the actual peer and adjust vc.
ret = scif_accept(listen_fd, &portID, &fd, SCIF_ACCEPT_SYNC);
MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER,
"**scif_accept", "**scif_accept %s", MPIU_Strerror(errno));
s = scif_recv(fd, &peer_rank, sizeof(peer_rank), SCIF_RECV_BLOCK);
MPIU_ERR_CHKANDJUMP1(s != sizeof(peer_rank), mpi_errno, MPI_ERR_OTHER, "**scif_recv", "**scif_recv %s", MPIU_Strerror(errno));
// check and adjust vc
if( peer_rank != vc->pg_rank ) {
// get another vc
MPIDI_PG_Get_vc(MPIDI_Process.my_pg, peer_rank, &vc);
// check another new corresponds to actual peer_rank
MPIU_ERR_CHKANDJUMP1( peer_rank != vc->pg_rank, mpi_errno, MPI_ERR_OTHER, "**MPIDI_PG_Get_vc", "**MPIDI_PG_Get_vc %s", "wrong vc after accept");
}
vc_ch = &vc->ch;
vc_scif = VC_SCIF(vc);
vc->sendNoncontig_fn = MPID_nem_scif_SendNoncontig;
vc_ch->iStartContigMsg = MPID_nem_scif_iStartContigMsg;
vc_ch->iSendContig = MPID_nem_scif_iSendContig;
vc_ch->pkt_handler = NULL; // pkt_handlers;
vc_ch->num_pkt_handlers = 0; // MPIDI_NEM_SCIF_PKT_NUM_TYPES;
vc_ch->next = NULL;
vc_ch->prev = NULL;
ASSIGN_SC_TO_VC(vc_scif, NULL);
vc_scif->send_queue.head = vc_scif->send_queue.tail = NULL;
vc_scif->sc = sc = &MPID_nem_scif_conns[vc->pg_rank];
vc_scif->terminate = 0;
sc->vc = vc;
sc->addr = portID;
sc->fd = fd;
}
MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
ret = MPID_nem_scif_init_shmsend(&sc->csend, sc->fd, vc->pg_rank);
......
......@@ -4,7 +4,7 @@
* See COPYRIGHT in top-level directory.
*
* Portions of this code were written by Intel Corporation.
* Copyright (C) 2011-2012 Intel Corporation. Intel provides this material
* Copyright (C) 2011-2013 Intel Corporation. Intel provides this material
* to Argonne National Laboratory subject to Software Grant and Corporate
* Contributor License Agreement dated February 8, 2012.
*/
......@@ -108,11 +108,7 @@ static regmem_t *regmem(int ep, shmchan_t * c, void *addr, size_t len)
base = (uint64_t) addr & ~(pagesize - 1);
size = ((uint64_t) addr + len + pagesize - 1) & ~(pagesize - 1);
size -= base;
for (rp = c->reg; rp != 0; rp = rp->next) {
if (base >= (uint64_t) rp->base && base < (uint64_t) rp->base + rp->size)
if (base + size <= (uint64_t) rp->base + rp->size)
return rp;
}
rp = malloc(sizeof(regmem_t));
rp->base = (char *) base;
rp->size = size;
......@@ -160,17 +156,12 @@ static int dma_read(int ep, shmchan_t * c, void *recv_buf, off_t raddr, size_t m
if (buflen >= msglen &&
((off_t) recv_buf & (CACHE_LINESIZE - 1)) ==
((raddr + c->pos) & (CACHE_LINESIZE - 1))) {
rp = regmem(ep, c, recv_buf, buflen);
if (rp == 0) {
retval = -1;
goto fn_exit;
}
locoffs = (char *) recv_buf - rp->base;
retval = scif_readfrom(ep, rp->offset + locoffs, buflen, raddr + c->pos, 0);
retval = scif_vreadfrom(ep, recv_buf, buflen, raddr + c->pos, 0);
if (retval < 0) {
fprintf(stderr, "scif_readfrom #1 returns %d, errno %d\n", retval, errno);
fprintf(stderr, "locoffs: 0x%lx raddr: 0x%lx buflen: %ld\n",
rp->offset + locoffs, raddr + c->pos, buflen);
fprintf(stderr, "scif_vreadfrom #1 returns %d, errno %d\n", retval, errno);
fprintf(stderr, "recv_buf: %p raddr: 0x%lx buflen: %ld\n",
recv_buf, raddr + c->pos, buflen);
}
*did_dma = 1;
goto fn_exit;
......@@ -184,20 +175,15 @@ static int dma_read(int ep, shmchan_t * c, void *recv_buf, off_t raddr, size_t m
void *p;
off_t offset;
if (c->dmalen) {
scif_unregister(ep, c->dmaoffset, c->dmalen);
free(c->dmabuf);
}
retval = posix_memalign(&p, pagesize, newbufsiz);
if (retval != 0)
goto fn_exit;
offset = scif_register(ep, p, newbufsiz, 0, SCIF_PROT_READ | SCIF_PROT_WRITE, 0);
if (offset == SCIF_REGISTER_FAILED) {
retval = errno;
free(p);
}
c->dmabuf = p;
c->dmalen = newbufsiz;
c->dmaoffset = offset;
c->dmaoffset = 0;
c->dmastart = -1;
c->dmaend = 0;
}
......@@ -213,7 +199,8 @@ static int dma_read(int ep, shmchan_t * c, void *recv_buf, off_t raddr, size_t m
}
locoffs = c->dmaoffset + c->dmastart;
assert(c->pos == 0);
retval = scif_readfrom(ep, locoffs, msglen, raddr, 0);
retval = scif_vreadfrom(ep, c->dmabuf + c->dmastart, msglen, raddr, 0);
*did_dma = 1;
scif_fence_mark(ep, SCIF_FENCE_INIT_SELF, &mark);
scif_fence_wait(ep, mark);
if (retval < 0)
......@@ -276,7 +263,7 @@ static ssize_t getmsg(int ep, shmchan_t * c, void *recv_buf, size_t len, int *di
}
/* Read at most one message */
#if 0
static ssize_t do_scif_read(int ep, shmchan_t * c, void *recv_buf, size_t len, int *did_dma)
{
ssize_t retval = 0;
......@@ -313,42 +300,6 @@ static ssize_t do_scif_read(int ep, shmchan_t * c, void *recv_buf, size_t len, i
fn_exit:
return nread;
}
#else
static ssize_t do_scif_read(int ep, shmchan_t * c, void *recv_buf, size_t len, int *did_dma)
{
ssize_t retval = 0;
size_t nread = 0;
uint64_t rseqno;
if (c->pos >= 0) {
/* partial message chunk left */
retval = getmsg(ep, c, (char *) recv_buf + nread, len - nread, did_dma);
if (retval < 0) {
nread = -1;
goto fn_exit;
}
nread += retval;
goto fn_exit;
}
/* Check if we have a message */
rseqno = *c->rseqno;
if (rseqno <= c->seqno) {
goto fn_exit;
}
/* Message is available */
++c->seqno;
c->pos = 0;
retval = getmsg(ep, c, (char *) recv_buf + nread, len - nread, did_dma);
if (retval < 0) {
nread = -1;
goto fn_exit;
}
nread += retval;
fn_exit:
return nread;
}
#endif
ssize_t MPID_nem_scif_read(int ep, shmchan_t * c, void *recv_buf, size_t len)
{
......@@ -386,6 +337,8 @@ ssize_t MPID_nem_scif_readv(int ep, shmchan_t * c, const struct iovec * iov, int
if (retval == 0)
break;
nread += retval;
if (retval < iov[i].iov_len)
break;
}
if (retval > 0 && did_dma) {
scif_fence_mark(ep, SCIF_FENCE_INIT_SELF, &mark);
......@@ -423,6 +376,8 @@ ssize_t MPID_nem_scif_writev(int ep, shmchan_t * c, const struct iovec * iov, in
size_t nwritten = 0;
int did_dma = 0;
int i;
int mark;
regmem_t *rp;
for (i = 0; i < iov_cnt; ++i) {
size_t len;
......@@ -459,7 +414,7 @@ ssize_t MPID_nem_scif_writev(int ep, shmchan_t * c, const struct iovec * iov, in
}
else {
did_dma = 1;
regmem_t *rp = regmem(ep, c, iov[i].iov_base, iovlen);
rp = regmem(ep, c, iov[i].iov_base, iovlen);
if (rp == 0) {
nwritten = -1;
goto fn_exit;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment