Commit de37df90 authored by Rob Latham's avatar Rob Latham
Browse files

[svn-r6683] from Michael Raymond <mraymond@sgi.com>: clean up XFS direct i/o

parent e766e103
......@@ -8,22 +8,62 @@
#include "ad_xfs.h"
#include "adio_extern.h"
static unsigned xfs_direct_read_chunk_size;
static unsigned xfs_direct_write_chunk_size;
void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
char *value;
char *value, * c;
int flag;
static char xfs_initialized = 0;
if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
/* the nightly builds say somthing is calling MPI_Info_set w/ a null info,
* so protect the calls to MPI_Info_set */
if (fd->info != MPI_INFO_NULL ) {
MPI_Info_set(fd->info, "direct_read", "false");
MPI_Info_set(fd->info, "direct_write", "false");
fd->direct_read = fd->direct_write = 0;
}
/* has user specified values for keys "direct_read" and "direct wirte"? */
MPI_Info_set(fd->info, "direct_read", "false");
MPI_Info_set(fd->info, "direct_write", "false");
fd->direct_read = fd->direct_write = 0;
if (!xfs_initialized) {
xfs_initialized = 1;
c = getenv("MPIO_DIRECT_READ_CHUNK_SIZE");
if (c) {
int io;
io = atoi(c);
if (io <= 0) {
fprintf(stderr,
"MPI: Ignoring an invalid setting for MPIO_DIRECT_READ_CHUNK_SIZE.\n"
" It must be set to a positive integer value.\n");
} else {
xfs_direct_read_chunk_size = io;
}
} else {
xfs_direct_read_chunk_size = 0;
}
c = getenv("MPIO_DIRECT_WRITE_CHUNK_SIZE");
if (c) {
int io;
io = atoi(c);
if (io <= 0) {
fprintf(stderr,
"MPI: Ignoring an invalid setting for MPIO_DIRECT_WRITE_CHUNK_SIZE.\n"
" It must be set to a positive integer value.\n");
} else {
xfs_direct_write_chunk_size = io;
}
} else {
xfs_direct_write_chunk_size = 0;
}
}
if (!fd->hints->initialized) {
fd->hints->fs_hints.xfs.read_chunk_sz =
xfs_direct_read_chunk_size;
fd->hints->fs_hints.xfs.write_chunk_sz =
xfs_direct_write_chunk_size;
}
/* has user specified values for keys "direct_read" and "direct write"? */
if (users_info != MPI_INFO_NULL) {
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
......@@ -47,8 +87,10 @@ void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* set the values for collective I/O and data sieving parameters */
ADIOI_GEN_SetInfo(fd, users_info, error_code);
/* Environment variables override MPI_Info hints */
if (ADIOI_Direct_read) fd->direct_read = 1;
if (ADIOI_Direct_write) fd->direct_write = 1;
/* environment variables checked in ADIO_Init */
*error_code = MPI_SUCCESS;
......
......@@ -5,26 +5,26 @@
* See COPYRIGHT notice in top-level directory.
*/
#define _GNU_SOURCE // for O_DIRECT
#include "ad_xfs.h"
#include <sys/ioctl.h>
#ifdef HAVE_STDDEF_H
#include <stddef.h>
#endif
#if defined(MPISGI)
#include <mpitypedefs.h>
#include <mpifunctions.h>
#endif
#ifndef HAVE_LSEEK64
#define lseek64 lseek
#endif
void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
{
int perm, amode, amode_direct;
int perm, amode, amode_direct, factor;
unsigned int old_mask;
struct dioattr st;
static char myname[] = "ADIOI_XFS_OPEN";
unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz;
unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz;
if (fd->perm == ADIO_PERM_NULL) {
old_mask = umask(022);
......@@ -53,7 +53,7 @@ void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
fd->fd_direct = open(fd->filename, amode_direct, perm);
if (fd->fd_direct != -1) {
#if defined(LINUX) && defined(MPISGI)
#if defined(MPISGI)
ioctl(fd->fd_direct, XFS_IOC_DIOINFO, &st);
#else
fcntl(fd->fd_direct, F_DIOINFO, &st);
......@@ -61,7 +61,34 @@ void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
fd->d_mem = st.d_mem;
fd->d_miniosz = st.d_miniosz;
fd->d_maxiosz = st.d_maxiosz;
if (read_chunk_sz == 0) {
fd->hints->fs_hints.xfs.read_chunk_sz = st.d_maxiosz;
} else {
/*
* MPIO_DIRECT_READ_CHUNK_SIZE was set.
* Make read_chunk_sz a multiple of d_miniosz.
*/
factor = read_chunk_sz / fd->d_miniosz;
if (factor == 0 || read_chunk_sz != fd->d_miniosz * factor) {
fd->hints->fs_hints.xfs.read_chunk_sz =
fd->d_miniosz * (factor + 1);
}
}
if (write_chunk_sz == 0) {
fd->hints->fs_hints.xfs.write_chunk_sz = st.d_maxiosz;
} else {
/*
* MPIO_DIRECT_WRITE_CHUNK_SIZE was set.
* Make write_chunk_sz a multiple of d_miniosz.
*/
factor = write_chunk_sz / fd->d_miniosz;
if (factor == 0 || write_chunk_sz != fd->d_miniosz * factor) {
fd->hints->fs_hints.xfs.write_chunk_sz =
fd->d_miniosz * (factor + 1);
}
}
if (fd->d_mem > XFS_MEMALIGN) {
FPRINTF(stderr, "MPI: Run-time Direct-IO memory alignment, %d, does not match compile-time value, %d.\n",
......
......@@ -102,6 +102,7 @@ void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len,
ADIO_Offset offset, int *err)
{
int ntimes, rem, newrem, i, size, nbytes;
unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz;
/* memory buffer is aligned, offset in file is aligned,
io_size may or may not be of the right size.
......@@ -109,33 +110,33 @@ void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len,
use buffered I/O for remaining. */
if (!(len % fd->d_miniosz) &&
(len >= fd->d_miniosz) && (len <= fd->d_maxiosz))
(len >= fd->d_miniosz) && (len <= read_chunk_sz))
*err = pread(fd->fd_direct, buf, len, offset);
else if (len < fd->d_miniosz)
*err = pread(fd->fd_sys, buf, len, offset);
else if (len > fd->d_maxiosz) {
ntimes = len/(fd->d_maxiosz);
rem = len - ntimes * fd->d_maxiosz;
else if (len > read_chunk_sz) {
ntimes = len/(read_chunk_sz);
rem = len - ntimes * read_chunk_sz;
nbytes = 0;
for (i=0; i<ntimes; i++) {
nbytes += pread(fd->fd_direct, ((char *)buf) + i * fd->d_maxiosz,
fd->d_maxiosz, offset);
offset += fd->d_maxiosz;
nbytes += pread(fd->fd_direct, ((char *)buf) + i * read_chunk_sz,
read_chunk_sz, offset);
offset += read_chunk_sz;
}
if (rem) {
if (!(rem % fd->d_miniosz))
nbytes += pread(fd->fd_direct,
((char *)buf) + ntimes * fd->d_maxiosz, rem, offset);
((char *)buf) + ntimes * read_chunk_sz, rem, offset);
else {
newrem = rem % fd->d_miniosz;
size = rem - newrem;
if (size) {
nbytes += pread(fd->fd_direct,
((char *)buf) + ntimes * fd->d_maxiosz, size, offset);
((char *)buf) + ntimes * read_chunk_sz, size, offset);
offset += size;
}
nbytes += pread(fd->fd_sys,
((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset);
((char *)buf) + ntimes * read_chunk_sz + size, newrem, offset);
}
}
*err = nbytes;
......
......@@ -13,14 +13,15 @@
/* style: allow:free:2 sig:0 */
static void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
ADIO_Offset offset, int *err);
static int ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf,
ADIO_Offset len, ADIO_Offset offset);
void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
int err=-1, datatype_size, len, diff, size, nbytes;
int err=-1, datatype_size, diff, size;
ssize_t len;
void *newbuf;
static char myname[] = "ADIOI_XFS_WRITECONTIG";
......@@ -31,44 +32,48 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count,
if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
if (!(fd->direct_write)) /* direct I/O not enabled */
if (!(fd->direct_write)) { /* direct I/O not enabled */
err = pwrite(fd->fd_sys, buf, len, offset);
else { /* direct I/O enabled */
if (err < 0) {goto leaving;}
} else { /* direct I/O enabled */
/* (1) if mem_aligned && file_aligned
use direct I/O to write up to correct io_size
use buffered I/O for remaining */
if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz))
ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset, &err);
if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz)) {
err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset);
if (err < 0) {goto leaving;}
/* (2) if !file_aligned
use buffered I/O to write up to file_aligned
At that point, if still mem_aligned, use (1)
else copy into aligned buf and then use (1) */
else if (offset % fd->d_miniosz) {
} else if (offset % fd->d_miniosz) {
diff = fd->d_miniosz - (offset % fd->d_miniosz);
diff = ADIOI_MIN(diff, len);
nbytes = pwrite(fd->fd_sys, buf, diff, offset);
err = pwrite(fd->fd_sys, buf, diff, offset);
if (err < 0) {goto leaving;}
buf = ((char *) buf) + diff;
offset += diff;
size = len - diff;
if (!(((long) buf) % fd->d_mem)) {
ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset, &err);
nbytes += err;
err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset);
if (err < 0) {goto leaving;}
}
else {
newbuf = (void *) memalign(XFS_MEMALIGN, size);
if (newbuf) {
memcpy(newbuf, buf, size);
ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err);
nbytes += err;
err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset);
ADIOI_Free(newbuf);
if (err < 0) {goto leaving;}
} else {
err = pwrite(fd->fd_sys, buf, size, offset);
if (err < 0) {goto leaving;}
}
else nbytes += pwrite(fd->fd_sys, buf, size, offset);
}
err = nbytes;
}
/* (3) if !mem_aligned && file_aligned
......@@ -77,19 +82,22 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count,
newbuf = (void *) memalign(XFS_MEMALIGN, len);
if (newbuf) {
memcpy(newbuf, buf, len);
ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset, &err);
err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset);
ADIOI_Free(newbuf);
} else {
err = pwrite(fd->fd_sys, buf, len, offset);
}
else err = pwrite(fd->fd_sys, buf, len, offset);
if (err < 0) {goto leaving;}
}
}
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += err;
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
#ifdef HAVE_STATUS_SET_BYTES
if (err != -1) MPIR_Status_set_bytes(status, datatype, err);
if (err != -1) MPIR_Status_set_bytes(status, datatype, len);
#endif
leaving:
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO, "**io",
......@@ -99,10 +107,13 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count,
}
void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
ADIO_Offset offset, int *err)
static int
ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, ADIO_Offset len,
ADIO_Offset offset)
{
int ntimes, rem, newrem, i, size, nbytes;
unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz;
ADIO_Offset nbytes, rem, newrem, size;
int ntimes, i;
/* memory buffer is aligned, offset in file is aligned,
io_size may or may not be of the right size.
......@@ -110,42 +121,50 @@ void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
use buffered I/O for remaining. */
if (!(len % fd->d_miniosz) &&
(len >= fd->d_miniosz) && (len <= fd->d_maxiosz))
*err = pwrite(fd->fd_direct, buf, len, offset);
else if (len < fd->d_miniosz)
*err = pwrite(fd->fd_sys, buf, len, offset);
else if (len > fd->d_maxiosz) {
ntimes = len/(fd->d_maxiosz);
rem = len - ntimes * fd->d_maxiosz;
(len >= fd->d_miniosz) && (len <= write_chunk_sz)) {
nbytes = pwrite(fd->fd_direct, buf, len, offset);
if (nbytes < 0) {return -1;}
} else if (len < fd->d_miniosz) {
nbytes = pwrite(fd->fd_sys, buf, len, offset);
if (nbytes < 0) {return -1;}
} else if (len > write_chunk_sz) {
ntimes = len/(write_chunk_sz);
rem = len - ntimes * write_chunk_sz;
nbytes = 0;
for (i=0; i<ntimes; i++) {
nbytes += pwrite(fd->fd_direct, ((char *)buf) + i * fd->d_maxiosz,
fd->d_maxiosz, offset);
offset += fd->d_maxiosz;
nbytes = pwrite(fd->fd_direct, ((char *)buf) + i * write_chunk_sz,
write_chunk_sz, offset);
offset += write_chunk_sz;
if (nbytes < 0) {return -1;}
}
if (rem) {
if (!(rem % fd->d_miniosz))
nbytes += pwrite(fd->fd_direct,
((char *)buf) + ntimes * fd->d_maxiosz, rem, offset);
else {
if (!(rem % fd->d_miniosz)) {
nbytes = pwrite(fd->fd_direct,
((char *)buf) + ntimes * write_chunk_sz, rem, offset);
if (nbytes < 0) {return -1;}
} else {
newrem = rem % fd->d_miniosz;
size = rem - newrem;
if (size) {
nbytes += pwrite(fd->fd_direct,
((char *)buf) + ntimes * fd->d_maxiosz, size, offset);
nbytes = pwrite(fd->fd_direct,
((char *)buf) + ntimes * write_chunk_sz, size, offset);
offset += size;
if (nbytes < 0) {return -1;}
}
nbytes += pwrite(fd->fd_sys,
((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset);
nbytes = pwrite(fd->fd_sys,
((char *)buf) + ntimes * write_chunk_sz + size, newrem, offset);
if (nbytes < 0) {return -1;}
}
}
*err = nbytes;
}
else {
rem = len % fd->d_miniosz;
size = len - rem;
nbytes = pwrite(fd->fd_direct, buf, size, offset);
nbytes += pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size);
*err = nbytes;
if (nbytes < 0) {return -1;}
nbytes = pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size);
if (nbytes < 0) {return -1;}
}
return 0;
}
......@@ -86,10 +86,24 @@ int ADIOI_GEN_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
int error_code;
struct aiocb *aiocbp;
ADIOI_AIO_Request *aio_req;
#if defined(ROMIO_XFS)
unsigned maxiosz = wr ? fd->hints->fs_hints.xfs.write_chunk_sz :
fd->hints->fs_hints.xfs.read_chunk_sz;
#endif /* ROMIO_XFS */
fd_sys = fd->fd_sys;
#if defined(ROMIO_XFS)
/* Use Direct I/O if desired and properly aligned */
if (fd->fns == &ADIO_XFS_operations &&
((wr && fd->direct_write) || (!wr && fd->direct_read)) &&
!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) &&
!(len % fd->d_miniosz) && (len >= fd->d_miniosz) &&
(len <= maxiosz)) {
fd_sys = fd->fd_direct;
}
#endif /* ROMIO_XFS */
aio_req = (ADIOI_AIO_Request*)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1);
aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1);
aiocbp->aio_offset = offset;
......
......@@ -195,7 +195,6 @@ typedef struct ADIOI_FileD {
unsigned d_mem; /* data buffer memory alignment */
unsigned d_miniosz; /* min xfer size, xfer size multiple,
and file seek offset alignment */
unsigned d_maxiosz; /* max xfer size */
ADIO_Offset fp_ind; /* individual file pointer in MPI-IO (in bytes)*/
ADIO_Offset fp_sys_posn; /* current location of the system file-pointer
in bytes */
......
......@@ -70,6 +70,10 @@ struct ADIOI_Hints_struct {
int coll_threshold;
int ds_in_coll;
} lustre;
struct {
unsigned read_chunk_sz; /* chunk size for direct reads */
unsigned write_chunk_sz; /* chunk size for direct writes */
} xfs;
} fs_hints;
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment