...
 
Commits (43)
variables:
NMC_FE1_SLURM_PARAMETERS: "--nodes=1 --partition=ecp-p9-4v100"
ANL_THETA_PROJECT_SERVICE_USER: "cscstss"
ANL_THETA_SCHEDULER_PARAMETERS: "-A CSC250STPR19 -n 1 -t 20 -q debug-flat-quad"
stages:
- build
nmc:batch:env:
stage: build
tags:
- nmc
only:
refs:
- master
- staging
variables:
- $ECP_MIRROR == "NMC"
script:
- uname -a
- module avail
- lspci
- module load hwloc numactl cuda
- module avail
- lstopo
- env
- which gcc
- gcc --version
- compgen -c gcc
make:nmc-cuda-check:
stage: build
tags:
- nmc
only:
refs:
- master
- staging
variables:
- $ECP_MIRROR == "NMC"
script:
- module load numactl hwloc cuda
- env | grep -i cuda
- cat /proc/self/status
- ./autogen.sh
- ./configure
- make -j
- make check VERBOSE=1
artifacts:
when: on_failure
paths:
- tests/test-suite.log
- config.log
make:nmc-cuda-home:
stage: build
tags:
- nmc
only:
refs:
- master
- staging
variables:
- $ECP_MIRROR == "NMC"
script:
- module load numactl hwloc cuda
- env | grep -i cuda
- ./autogen.sh
- ./configure --with-cuda=$CUDA_HOME
- make -j
- make check VERBOSE=1
artifacts:
when: on_failure
paths:
- tests/test-suite.log
- config.log
make:theta-batch:
stage: build
tags:
- ecp-theta
- batch
only:
refs:
- master
- staging
variables:
- $ECP_MIRROR == "THETA"
script:
- |
cat > aml-ci-script.sh << EOF
#!/bin/bash
module list
./autogen.sh
./configure
make -j63
make check VERBOSE=1
EOF
- cat aml-ci-script.sh
- chmod +x aml-ci-script.sh
- aprun aml-ci-script.sh
artifacts:
when: on_failure
paths:
- tests/*.log
- config.log
......@@ -13,9 +13,15 @@ repoquality:
- /^wip.*/
- /^WIP.*/
script:
- git ls-files *.c *.h | grep -v -e benchmarks > .repoquality
- nix run -f "$ARGOPKGS" repoquality --command repoquality
tags:
- integration
- nix
- kvm
artifacts:
when: on_failure
paths:
- .repoquality
checkpatch:
stage: style
......@@ -23,12 +29,36 @@ checkpatch:
- /^wip.*/
- /^WIP.*/
tags:
- integration
- nix
- kvm
script:
- git ls-files *.c *.h | grep -v -e tests -e benchmarks >> .checkpatch.conf
- git ls-files *.c *.h | grep -v -e benchmarks >> .checkpatch.conf
- nix run -f "$ARGOPKGS" checkpatch --command checkpatch.pl
style:docs:
stage: style
except:
- /^wip.*/
- /^WIP.*/
tags:
- nix
- kvm
script:
- |
nix-shell "$ARGOPKGS" -A aml-dist --arg aml-src ./. --run bash << EOF
./autogen.sh
mkdir build
./configure --prefix=`pwd`/build --enable-docs
make install-data
EOF
artifacts:
when: on_failure
paths:
- config.log
make:generic:
tags:
- knl
stage: build
except:
- /^wip.*/
......@@ -49,6 +79,8 @@ make:generic:
- tests/*.log
make:out-of-tree:
tags:
- knl
stage: build
except:
- /^wip.*/
......@@ -92,35 +124,17 @@ make:knl:
readthedocs:
stage: docs
except:
- /^wip.*/
- /^WIP.*/
when: on_success
only:
- staging
- master
- /v[0-9]+\.[0-9]+\.x/
tags:
- integration
- nix
- kvm
environment:
name: rtd/$CI_COMMIT_REF_NAME
url: https://argo-aml.readthedocs.io/en/$CI_COMMIT_REF_NAME
script:
- nix run nixpkgs.curl -c curl -X POST -d "branch=$CI_COMMIT_REF_NAME" -d "token=$READTHEDOCS_TOKEN" https://readthedocs.org/api/v2/webhook/argo-aml/83161/
dist:
stage: release
except:
- /^wip.*/
- /^WIP.*/
when: on_success
only:
- tags
tags:
- integration
script:
- nix-build "$ARGOPKGS" -A aml-dist --arg aml-src ./.
- nix-shell "$ARGOPKGS" -A aml-dist --arg aml-src ./. --run "./release.sh CREATE $CI_JOB_ID $CI_PROJECT_ID $RELEASE_TOKEN $CI_COMMIT_REF_NAME"
artifacts:
when: on_success
paths:
- result/*.tar.gz
- CHECKSUM
expire_in: 1000y
- nix run nixpkgs.curl -c curl -X POST -d "branches=$CI_COMMIT_REF_NAME" -d "token=$READTHEDOCS_TOKEN" https://readthedocs.org/api/v2/webhook/argo-aml/83161/
src/*.c
src/area/*.c
src/dma/*.c
src/scratch/*.c
src/tiling/*.c
src/utils/*.c
include/*.h
include/aml/area/*.h
include/aml/dma/*.h
include/aml/scratch/*.h
include/aml/tiling/*.h
include/aml/utils/*.h
tests/area/*.c
tests/dma/*.c
tests/scratch/*.c
tests/tiling/*.c
tests/utils/*.c
benchmarks/dgemm_mkl.c
benchmarks/dgemm_noprefetch.c
benchmarks/dgemm_prefetch.c
benchmarks/stencil_pth_mt.c
benchmarks/stream_add_omp.c
benchmarks/stream_add_omp_mt.c
benchmarks/stream_add_omp_st.c
benchmarks/stream_add_pth_mt.c
benchmarks/stream_add_pth_st.c
benchmarks/utils.c
......@@ -6,3 +6,4 @@ Nicolas Denoyelle <ndenoyelle@anl.gov>
Clement Foyer <cfoyer@cray.com>
Brice Videau <bvideau@anl.gov>
Aleksandr Danilin <danilin96@gmail.com>
Kyle Shaver <kshaver@anl.gov>
ACLOCAL_AMFLAGS = -I m4
SUBDIRS = src include tests doc
if ADD_BENCHMARKS
SUBDIRS += benchmarks
endif
#if ADD_BENCHMARKS
#SUBDIRS += benchmarks
#endif
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = aml.pc
......
......@@ -34,16 +34,14 @@ int main(int argc, char *argv[])
long int N = atol(argv[3]);
unsigned long memsize = sizeof(double)*N*N;
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
double alpha = 1.0, beta = 1.0;
......
......@@ -77,17 +77,15 @@ int main(int argc, char* argv[])
assert(!aml_tiling_2d_create(&tiling_col, AML_TILING_TYPE_2D_COLMAJOR,
tilesize, memsize, N/T , N/T));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
......
......@@ -105,20 +105,18 @@ int main(int argc, char* argv[])
assert(!aml_tiling_1d_create(&tiling_prefetch,
tilesize*(N/T), memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_seq_create(&dma, 2));
assert(!aml_dma_linux_seq_create(&dma, 2, NULL, NULL));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2));
assert(!aml_scratch_par_create(&sb, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
......
......@@ -39,7 +39,7 @@ int main(int argc, char *argv[])
struct aml_area *slow = &aml_area_linux, *fast = aml_area_linux;
struct aml_dma *dma;
assert(!aml_dma_create(&dma, 0));
assert(!aml_dma_create(&dma, 0, NULL, NULL));
void *a, *b, *c;
......@@ -53,9 +53,9 @@ int main(int argc, char *argv[])
chunk_msz = MEMSIZE/(numthreads*CHUNKING);
esz = chunk_msz/sizeof(unsigned long);
}
a = aml_area_mmap(slow, NULL, MEMSIZE);
b = aml_area_mmap(slow, NULL, MEMSIZE);
c = aml_area_mmap(fast, NULL, MEMSIZE);
a = aml_area_mmap(slow, MEMSIZE, NULL);
b = aml_area_mmap(slow, MEMSIZE, NULL);
c = aml_area_mmap(fast, MEMSIZE, NULL);
assert(a != NULL && b != NULL && c != NULL);
/* create virtually accessible address range, backed by slow memory */
......
......@@ -64,22 +64,20 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_par_create(&dma, numthreads*2));
assert(!aml_dma_linux_par_create(&dma, numthreads*2, NULL, NULL));
assert(!aml_scratch_seq_create(&sa, fast, slow, dma, tiling,
(size_t)2*numthreads, (size_t)1));
assert(!aml_scratch_seq_create(&sb, fast, slow, dma, tiling,
(size_t)2*numthreads, (size_t)1));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long);
......
......@@ -95,22 +95,20 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_seq_create(&dma, numthreads*2));
assert(!aml_dma_linux_seq_create(&dma, numthreads*2, NULL, NULL));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling,
2*numthreads, numthreads));
assert(!aml_scratch_par_create(&sb, fast, slow, dma, tiling,
2*numthreads, numthreads));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long);
......
......@@ -101,22 +101,20 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_seq_create(dma, (size_t)numthreads*4));
assert(!aml_dma_linux_seq_create(dma, (size_t)numthreads*4, NULL, NULL));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling,
(size_t)2*numthreads, (size_t)numthreads));
assert(!aml_scratch_par_create(&sb, fast, slow, dma, tiling,
(size_t)2*numthreads, (size_t)numthreads));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long);
......@@ -127,7 +125,7 @@ int main(int argc, char *argv[])
}
/* run kernel */
struct winfo *wis = aml_area_mmap(slow, NULL, numthreads * sizeof(struct winfo));
struct winfo *wis = aml_area_mmap(slow, numthreads * sizeof(struct winfo), NULL);
for(unsigned long i = 0; i < numthreads; i++) {
wis[i].tid = i;
pthread_create(&wis[i].th, NULL, &th_work, (void*)&wis[i]);
......
......@@ -266,9 +266,9 @@ main(int argc, char *argv[])
aml_init(&argc, &argv);
size_t size = sizeof(STREAM_TYPE)*(STREAM_ARRAY_SIZE+OFFSET);
struct aml_area *area = aml_area_linux;
a = aml_area_mmap(area, NULL, size);
b = aml_area_mmap(area, NULL, size);
c = aml_area_mmap(area, NULL, size);
a = aml_area_mmap(area, size, NULL);
b = aml_area_mmap(area, size, NULL);
c = aml_area_mmap(area, size, NULL);
/* Get initial value for system clock. */
#pragma omp parallel for
......
......@@ -86,62 +86,95 @@ AC_CHECK_LIB(numa, mbind,,[AC_MSG_ERROR([AML requires libnuma.])])
# check doxygen + sphinx for documentation build
################################################
AC_CHECK_PROG([DOXYGEN], [doxygen], [doxygen], [no])
AC_CHECK_PROG([SPHINXBUILD], [sphinx-build], [sphinx-build], [no])
if [[ "x$DOXYGEN" != xno ]]; then
if [[ "x$SPHINXBUILD" != xno ]]; then
AC_MSG_NOTICE([Doxygen and Sphinx found, documentation will be build])
BUILD_DOCS=yes
else
AC_MSG_NOTICE([Sphinx not found, cannot build documentation])
BUILD_DOCS=no
AC_ARG_ENABLE(docs,
[AS_HELP_STRING([--enable-docs],
[Generate full html documentation (default is no).])],
[docs=true],[docs=false])
if [[ "x$docs" = xtrue ]]; then
AC_CHECK_PROG([DOXYGEN], [doxygen], [doxygen], [no])
if [[ "x$DOXYGEN" == xno ]]; then
AC_MSG_ERROR([Doxygen not found])
fi
AC_CHECK_PROG([SPHINXBUILD], [sphinx-build], [sphinx-build], [no])
if [[ "x$SPHINXBUILD" == xno ]]; then
AC_MSG_ERROR([Sphinx not found])
fi
else
AC_MSG_NOTICE([Doxygen not found, cannot build documentation])
BUILD_DOCS=no
fi
AM_CONDITIONAL([BUILD_DOCS],[ test "x$BUILD_DOCS" = xyes ])
AM_CONDITIONAL([BUILD_DOCS],[ test "x$docs" == xtrue ])
# check nvidia compiler and libraries
# check CUDA compiler and libraries
#####################################
BUILD_CUDA=no
AC_DEFINE([HAVE_CUDA], [0], [Whether aml support cuda library calls.])
AC_DEFINE([RUN_CUDA], [0], [Whether the machine on which aml is compiled can run cuda code.])
# Check compilation features
AC_CHECK_PROG([NVCC], [nvcc], [nvcc], [no])
AC_CHECK_LIB(cudart, cudaMalloc, [CUDART=yes], [CUDART=no])
AC_CHECK_HEADERS([cuda.h], [CUDA_H=yes], [CUDA_H=no])
AC_CHECK_HEADERS([cuda_runtime.h], [CUDA_RUNTIME_H=yes], [CUDA_RUNTIME_H=no])
if [[ "x$NVCC" != xno ]] && \
[[ "x$CUDART" = xyes ]] && \
[[ "x$CUDA_H" = xyes ]] && \
[[ "x$CUDA_RUNTIME_H" = xyes ]]
then
BUILD_CUDA=yes
AC_DEFINE([HAVE_CUDA], [1], [Whether aml support cuda library calls.])
have_cuda=0
AC_ARG_WITH([cuda],
[AS_HELP_STRING([--with-cuda@<:@=yes|no|DIR@:>@],
[support cuda inside the library (default is check)])],
[
if [[ "x$withval" = xno ]]; then
want_cuda="no"
elif [[ "x$withval" = xyes ]]; then
want_cuda="yes"
cuda_home_path="$CUDA_HOME"
else
want_cuda="yes"
cuda_home_path=$withval
fi
],
[
want_cuda="check"
cuda_home_path="$CUDA_HOME"
])
if [[ "x$want_cuda" != xno ]]; then
AC_MSG_NOTICE([starting checks for CUDA])
if [[ -n "$cuda_home_path" ]]; then
nvcc_search_dirs="$PATH$PATH_SEPARATOR$cuda_home_path/bin"
else
nvcc_search_dirs="$PATH"
fi
AC_PATH_PROG([NVCC], [nvcc], [], [$nvcc_search_dirs])
if [[ -n "$NVCC" ]]; then
have_nvcc="yes"
fi
else
AC_MSG_NOTICE([will not check for CUDA])
fi
AM_CONDITIONAL([BUILD_CUDA],[ test "x$BUILD_CUDA" = xyes ])
# Check runtime features
if [[ "x$BUILD_CUDA" = xyes ]]; then
LIBS="$LIBS -lcudart"
RUN_CUDA=no
AC_MSG_CHECKING([that cudart code runs without error])
AC_RUN_IFELSE(
[AC_LANG_PROGRAM([[
#include <cuda.h>
#include <cuda_runtime.h>]],
[int device; return cudaGetDevice(&device) == cudaSuccess ? 0 : 1;])],
[AC_DEFINE([RUN_CUDA], [1], [Whether the machine on which aml is compiled can run cuda code.])
RUN_CUDA=yes],[])
AC_MSG_RESULT($RUN_CUDA)
if [[ "x$have_nvcc" = xyes ]]; then
if [[ -n "$cuda_home_path" ]]; then
CUDA_CFLAGS="-I$cuda_home_path/include"
CUDA_LIBS="-L$cuda_home_path/lib64 -lcudart"
else
CUDA_CFLAGS="-I/usr/local/cuda/include"
CUDA_LIBS="-L/usr/local/cuda/lib64 -lcudart"
fi
saved_LIBS=$LIBS
saved_CFLAGS=$CFLAGS
LIBS="$LIBS $CUDA_LIBS"
CFLAGS="$CFLAGS $CUDA_CFLAGS"
AC_CHECK_HEADER([cuda.h],,
[AC_MSG_ERROR([could not find cuda.h])])
AC_CHECK_HEADER([cuda_runtime.h],,
[AC_MSG_ERROR([could not find cuda_runtime.h])])
AC_CHECK_LIB(cudart, cudaMalloc,,
AC_MSG_ERROR([could not find cudart library]))
LIBS=$saved_LIBS
CFLAGS=$saved_CFLAGS
have_cuda=1
fi
AM_CONDITIONAL([RUN_CUDA],[ test "x$RUN_CUDA" = xyes ])
AC_DEFINE_UNQUOTED([HAVE_CUDA], [$have_cuda], [Whether aml support cuda library calls.])
AC_SUBST([HAVE_CUDA],[$have_cuda])
AM_CONDITIONAL([HAVE_CUDA], [ test "$have_cuda" == "1" ])
AC_SUBST(CUDA_CFLAGS)
AC_SUBST(CUDA_LIBS)
AC_SUBST(NVCC)
# Output
########
......@@ -154,7 +187,8 @@ AC_CONFIG_FILES([Makefile
tests/Makefile
doc/Makefile
benchmarks/Makefile
o2lo
aml.pc
include/aml/utils/version.h], [chmod +x o2lo])
include/aml/utils/version.h
include/aml/utils/features.h])
AC_CONFIG_FILES([o2lo], [chmod +x o2lo])
AC_OUTPUT
......@@ -5,7 +5,7 @@ SPHINX_BUILD_DIR=./build-sphinx
if BUILD_DOCS
build-docs:
$(DOXYGEN) aml.doxy
$(SPHINXBUILD) -b html -a . $(SPHINX_BUILD_DIR)
$(SPHINXBUILD) -v -W -b html -a . $(SPHINX_BUILD_DIR)
dist-hook: build-docs
cp -r $(SPHINX_BUILD_DIR) $(distdir)
......
......@@ -756,13 +756,13 @@ WARN_IF_DOC_ERROR = YES
# parameter documentation, but not about the absence of documentation.
# The default value is: NO.
WARN_NO_PARAMDOC = NO
WARN_NO_PARAMDOC = YES
# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
# a warning is encountered.
# The default value is: NO.
WARN_AS_ERROR = NO
WARN_AS_ERROR = YES
# The WARN_FORMAT tag determines the format of the warning messages that doxygen
# can produce. The string should contain the $file, $line, and $text tags, which
......
......@@ -5,7 +5,7 @@ AML is a memory management library designed to ease the use of complex memory
topologies and complex data layout optimizations for high-performance computing
applications.
AML is Open Source, distributed under the BSD 3 clause license.
AML is Open Source, distributed under the BSD 3-clause license.
Overview
--------
......@@ -17,31 +17,31 @@ placement of data across hardware topologies, and affinity between work and
data.
AML is organized as a collection of abstractions, presented as *building
blocks*, to develop explicit memory and data management policies. AML goals
are:
blocks*, used to develop explicit memory and data management policies. The goals
of AML are:
* **composability**: application developers and performance experts should be
to pick and choose which building blocks to use depending on their specific
able to pick and choose the building blocks to use depending on their specific
needs.
* **flexibility**: users should be able to customize, replace, or change the
configuration of each building block as much as possible.
As of now, AML implements the following abstractions:
AML currently implements the following abstractions:
.. image:: img/building-blocks-diagram.png
:width: 300px
:align: right
* :doc:`Areas <pages/areas>`, a set of addressable physical memories,
* :doc:`Layout <pages/layout>`, a description of data structures organization,
* :doc:`Tilings <pages/tilings>`, (soon to be replaced),
* :doc:`DMAs <pages/dmas>`, an engine to asynchronously move data structures between areas,
* :doc:`Scratchpads <pages/scratchs>`, a stage-in, stage-out abstraction for prefetching.
* :doc:`Area <pages/areas>`, a set of addressable physical memories,
* :doc:`Layout <pages/layout>`, a description of data structure organization,
* :doc:`Tiling <pages/tilings>`, a description of data blocking (decomposition)
* :doc:`DMA <pages/dmas>`, an engine to asynchronously move data structures between areas,
* :doc:`Scratchpad <pages/scratchs>`, a stage-in, stage-out abstraction for prefetching.
Each of these abstractions have several implementations. For instance, areas
may refer to usual DRAM or a subset of them, GPU memory or non-volatile memory.
Tilings are implemented to reflect either 1D or 2D structures and so on.
Each of these abstractions has several implementations. For instance, areas
may refer to the usual DRAM or its subset, to GPU memory, or to non-volatile memory.
Tilings are implemented to reflect either 1D or 2D structures, and so on.
Quick Start Guide
-----------------
......@@ -76,7 +76,7 @@ Installation
Workflow
~~~~~~~~
Include aml header:
Include the AML header:
.. code-block:: c
......@@ -84,21 +84,21 @@ Include aml header:
...
int main(int argc, char **argv){
Check AML version:
Check the AML version:
.. code-block:: c
if(aml_version_major != AML_VERSION_MAJOR){
printf("AML ABI mismatch!");
fprintf(stderr, "AML ABI mismatch!\n");
return 1;
}
Initialize and Cleanup AML:
Initialize and clean up the AML:
.. code-block:: c
if(aml_init(&argc, &argv) != 0){
printf("AML library init failure!");
fprintf(stderr, "AML library init failure!\n");
return 1;
}
...
......@@ -106,14 +106,14 @@ Initialize and Cleanup AML:
Link your program with *-laml*.
See above building blocks specific pages for further examples and information
on library features.
Check the above building-blocks-specific pages for further examples and
information on the library features.
Support
-------
Support for AML is provided through the
`gitlab issue interface <https://xgitlab.cels.anl.gov/argo/aml/issues>`_.
`gitlab issues interface <https://xgitlab.cels.anl.gov/argo/aml/issues>`_.
Alternatively you can contact directly the developers/maintainers:
* Swann Perarnau (swann AT anl DOT gov)
......@@ -122,7 +122,7 @@ Alternatively you can contact directly the developers/maintainers:
Contributing
------------
AML welcomes any comment, suggestion, bug reporting, or feature request, as
AML welcomes comments, suggestions, bug reports, or feature requests, as
well as code contributions. See the
`contributing doc <https://xgitlab.cels.anl.gov/argo/aml/blob/master/CONTRIBUTING.markdown>`_
for more info.
......
Scratchpads: Staging in and out data
Scratchpads: Staging data in and out
====================================
.. doxygengroup:: aml_scratch
......
Tiling 1D Implementation API
============================
.. doxygengroup:: aml_tiling_1d
Tiling 2D Implementation API
============================
.. doxygengroup:: aml_tiling_2d
Tiling Padded Implementation API
================================
.. doxygengroup:: aml_tiling_pad
Tiling Resize Implementation API
================================
.. doxygengroup:: aml_tiling_resize
......@@ -8,5 +8,5 @@ Implementations
.. toctree::
tiling_1d_api
tiling_2d_api
tiling_resize_api
tiling_pad_api
......@@ -23,8 +23,9 @@ include_aml_scratch_HEADERS = \
include_aml_tilingdir=$(includedir)/aml/tiling
include_aml_tiling_HEADERS = \
aml/tiling/1d.h \
aml/tiling/2d.h
aml/tiling/native.h \
aml/tiling/pad.h \
aml/tiling/resize.h
include_amlutilsdir=$(includedir)/aml/utils
......@@ -33,5 +34,6 @@ include_amlutils_HEADERS = \
aml/utils/error.h \
aml/utils/inner-malloc.h \
aml/utils/vector.h \
aml/utils/version.h
aml/utils/version.h \
aml/utils/features.h
This diff is collapsed.
......@@ -11,7 +11,9 @@
/**
* @defgroup aml_area_cuda "AML Cuda Areas"
* @brief Cuda Implementation of Areas.
* @code
* #include <aml/area/cuda.h>
* @endcode
*
* Cuda implementation of AML areas.
* This building block relies on Cuda implementation of
......@@ -27,32 +29,103 @@
**/
/**
* Structure containing aml area hooks for cuda implementation.
* For now there is only a single implementation of the hooks.
* This implementation will choose between different cuda functions.
* Default cuda area flags.
* * Allocation on device only,
* * Allocation visible by a single device.
* * Allocation not mapped on host memory.
**/
extern struct aml_area_ops aml_area_cuda_ops;
#define AML_AREA_CUDA_FLAG_DEFAULT 0
/**
* Default cuda area with private mapping in current device.
* Can be used out of the box with aml_area_*() functions.
* Device allocation flag.
* Default behaviour is allocation on device.
* If this flag is set then allocation will
* be on host.
**/
extern struct aml_area aml_area_cuda;
#define AML_AREA_CUDA_FLAG_ALLOC_HOST (1 << 0)
/**
* Mapping flag.
* Default behaviour is allocation not mapped.
* If set, the pointer returned by mmap function
* will be host side memory mapped on device.
* A pointer to device memory can then be retrieved
* by calling cudaHostGetDevicePointer().
* If AML_AREA_CUDA_FLAG_ALLOC_HOST is set, then
* host side memory will be allocated. Else,
* "ptr" field of mmap options will be used to map
* device memory ("ptr" must not be NULL).
*
* @see cudaHostRegister(), cudaHostAlloc().
**/
#define AML_AREA_CUDA_FLAG_ALLOC_MAPPED (1 << 1)
/**
* Allocation flags to pass to cudaMallocManaged().
* @see cuda runtime API documentation / memory management.
* Unified memory flag.
* If this flag is set, then allocation will create
* a unified memory pointer usable on host and device.
* Additionally, AML_AREA_CUDA_FLAG_ALLOC_HOST and
* AML_AREA_CUDA_FLAG_ALLOC_MAPPED will be ignored.
*
* @see cudaMallocManaged()
**/
#define AML_AREA_CUDA_FLAG_ALLOC_UNIFIED (1 << 2)
/**
* Unified memory setting flag.
* If AML_AREA_CUDA_FLAG_ALLOC_UNIFIED is set,
* then this flagged is looked to set
* cudaMallocManaged() flag cudaAttachGlobal.
* Else if AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set,
* or AML_AREA_CUDA_FLAG_ALLOC_HOST flag is set,
* then this flag is looked to set cudaMallocHost()
* flag cudaHostAllocPortable.
* The default behaviour is to make allocation
* visible from a single device. If this flag is set,
* then allocation will be visible on all devices.
*
* @see cudaMallocManaged()
**/
enum aml_area_cuda_flags {
AML_AREA_CUDA_ATTACH_GLOBAL,
AML_AREA_CUDA_ATTACH_HOST,
#define AML_AREA_CUDA_FLAG_ALLOC_GLOBAL (1 << 3)
/**
* Options that can eventually be passed to mmap
* call.
**/
struct aml_area_cuda_mmap_options {
/**
* Specify a different device for one mmap call.
* if device < 0 use area device.
**/
int device;
/**
* Host memory pointer used for mapped allocations.
* If flag AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set
* and ptr is NULL, ptr will be overwritten with
* host allocated memory and will have to be freed
* using cudaFreeHost().
**/
void *ptr;
};
/** aml area hooks for cuda implementation. **/
extern struct aml_area_ops aml_area_cuda_ops;
/**
* Default cuda area:
* Allocation on device, visible by a single device,
* and not mapped on host memory.
**/
extern struct aml_area aml_area_cuda;
/** Implementation of aml_area_data for cuda areas. **/
struct aml_area_cuda_data {
/** allocation flags in cuda format **/
/** Area allocation flags. **/
int flags;
/** The device id on which allocation is done. **/
/**
* The device id on which allocation is done.
* If device < 0, use current device.
**/
int device;
};
......@@ -62,8 +135,8 @@ struct aml_area_cuda_data {
* @param[out] area pointer to an uninitialized struct aml_area pointer to
* receive the new area.
* @param[in] device: A valid cuda device id, i.e from 0 to num_devices-1.
* If device id is negative, then no cuda device will be selected when
* using aml_area_cuda_mmap().
* If device id is negative, then current cuda device will be used using
* aml_area_cuda_mmap().
* @param[in] flags: Allocation flags.
*
* @return AML_SUCCESS on success and area points to the new aml_area.
......@@ -72,11 +145,10 @@ struct aml_area_cuda_data {
* of devices.
* @return -AML_ENOMEM if space to carry area cannot be allocated.
*
* @see enum aml_area_cuda_flags.
* @see AML_AREA_CUDA_FLAG_*.
**/
int aml_area_cuda_create(struct aml_area **area,
const int device,
const enum aml_area_cuda_flags flags);
const int device, const int flags);
/**
* \brief Cuda area destruction.
......@@ -94,8 +166,9 @@ void aml_area_cuda_destroy(struct aml_area **area);
* This function is a wrapper on cuda alloc functions.
* It uses area settings to: select device on which to perform allocation,
* select allocation function and set its parameters.
* Allocations can be standalone on device, shared across multiple devices,
* and backed with cpu memory.
* Any pointer obtained through aml_area_cuda_mmap() must be unmapped with
* aml_area_cuda_munmap().
*
* Device selection is not thread safe and requires to set the global
* state of cuda library. When selecting a device, allocation may succeed
* while setting device back to original context devices may fail. In that
......@@ -103,31 +176,38 @@ void aml_area_cuda_destroy(struct aml_area **area);
* function in order to catch the error when return value is not NULL.
*
* @param[in] area_data: The structure containing cuda area settings.
* @param[in, out] ptr: If ptr is NULL, then call cudaMallocManaged() with
* area flags. Memory will be allocated only device side.
* If ptr is not NULL:
* * ptr must point to a valid memory area.
* Device side memory will be mapped on this host side memory.
* According to cuda runtime API documentation
* (cudaHostRegister()), host side memory pages will be locked or allocation
* will fail.
* @param[in] size: The size to allocate.
* @param[in] options: A struct aml_area_cuda_mmap_options *. If > 0,
* device will be used to select the target device.
* If area flags AML_AREA_CUDA_FLAG_MAPPED is set and
* AML_AREA_CUDA_FLAG_HOST is not set, then options field "ptr" must not
* be NULL and point to a host memory that can be mapped on GPU.
*
* @return A cuda pointer to allocated device memory on success, NULL on
* failure. If failure occures, aml_errno variable is set with one of the
* following values:
* * AML_ENOTSUP is one of the cuda calls failed with error:
* @return NULL on failure with aml errno set to the following error codes:
* AML_ENOTSUP is one of the cuda calls failed with error:
* cudaErrorInsufficientDriver, cudaErrorNoDevice.
* * AML_EINVAL if target device id is not valid.
* * AML_EBUSY if a specific device was requested and call to failed with error
* cudaErrorDeviceAlreadyInUse, or if region was already mapped on device.
* * AML_EINVAL if target device id is not valid or provided argument are not
* compatible.
* * AML_EBUSY if a specific device was requested but was in already use.
* * AML_ENOMEM if memory allocation failed with error
* cudaErrorMemoryAllocation.
* * AML_FAILURE if one of the cuda calls resulted in error
* cudaErrorInitializationError.
* @return A cuda pointer usable on device and host if area flags contains
* AML_AREA_CUDA_FLAG_ALLOC_UNIFIED.
* @return A pointer to host memory on which one can call
* cudaHostGetDevicePointer() to get a pointer to mapped device memory, if
* AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set.
* Obtained pointer must be unmapped with aml_area_cuda_munmap(). If host side
* memory was provided as mmap option, then it still has to be freed.
* @return A pointer to host memory if area flag AML_AREA_CUDA_FLAG_ALLOC_HOST
* is set.
* @return A pointer to device memory if no flag is set.
*
* @see AML_AREA_CUDA_FLAG_*
**/
void *aml_area_cuda_mmap(const struct aml_area_data *area_data,
void *ptr, size_t size);
size_t size, struct aml_area_mmap_options *options);
/**
* \brief munmap hook for aml area.
......
......@@ -13,121 +13,122 @@
/**
* @defgroup aml_area_linux "AML Linux Areas"
* @brief Linux Implementation of Areas.
* @brief Linux Implementation of AML Areas.
*
* Linux implementation of AML areas.
* This building block relies on libnuma implementation and
* linux mmap/munmap to provide mmap/munmap on NUMA host
* host processor memory. New areas may be created
* This building block relies on the libnuma implementation and
* the Linux mmap() / munmap() to provide mmap() / munmap() on NUMA host
* processor memory. New areas may be created
* to allocate a specific subset of memories.
* This building block also include a static declaration of
* a default initialized area that can be used out of the box with
* abstract area API.
* This building block also includes a static declaration of
* a default initialized area that can be used out-of-the-box with
* the abstract area API.
*
* @code
* #include <aml/area/linux.h>
* @endcode
* @{
**/
/**
* Allowed binding flag for area creation.
* This flag will apply strict binding to the selected bitmask.
* If subsequent allocation will failt if they cannot enforce binding
* on bitmask.
**/
#define AML_AREA_LINUX_BINDING_FLAG_BIND (MPOL_BIND)
/**
* Allowed binding flag for area creation.
* This flag will make subsequent allocations to interleave
* pages on memories of the bitmask.
**/
#define AML_AREA_LINUX_BINDING_FLAG_INTERLEAVE (MPOL_INTERLEAVE)
/**
* Allowed binding flag for area creation.
* This flag will make subsequent allocations to bound to the
* nodes of bitmask if possible, else to some other node.
**/
#define AML_AREA_LINUX_BINDING_FLAG_PREFERRED (MPOL_PREFERRED)
/**
* Allowed mapping flag for area creation.
* This flag will make subsequent allocations to be private
* to the process making them.
**/
#define AML_AREA_LINUX_MMAP_FLAG_PRIVATE (MAP_PRIVATE | MAP_ANONYMOUS)
/**
* Allowed mapping flag for area creation.
* This flag will make subsequent allocations to be visible to
* other processes of the system.
**/
#define AML_AREA_LINUX_MMAP_FLAG_SHARED (MAP_SHARED | MAP_ANONYMOUS)
/**
* This contains area operations implementation
* for linux area.
* Contains area operations implementation
* for the Linux area.
**/
extern struct aml_area_ops aml_area_linux_ops;
/**
* Default linux area with private mapping and no binding.
* Can be used out of the box with aml_area_*() functions.
* Default Linux area using a private mapping and no binding.
* Can be used out-of-the-box with aml_area_*() functions.
**/
extern struct aml_area aml_area_linux;
/** Allowed policy flag for area creation. **/
enum aml_area_linux_policy {
/** Default allocation policy. **/
AML_AREA_LINUX_POLICY_DEFAULT,
/**
* Enforce binding to the specified area nodeset; fail if not possible.
**/
AML_AREA_LINUX_POLICY_BIND,
/**
* Bind to the specified area nodeset;
* if not possible, fall back to other available nodes.
**/
AML_AREA_LINUX_POLICY_PREFERRED,
/** Bind to the specified area nodeset in a round-robin fashion. **/
AML_AREA_LINUX_POLICY_INTERLEAVE,
};
/**
* Implementation of aml_area_data for linux areas.
* Implementation of aml_area_data for Linux areas.
**/
struct aml_area_linux_data {
/** numanodes to use when allocating data **/
struct bitmask *nodeset;
/** binding policy **/
int binding_flags;
/** mmap flags **/
int mmap_flags;
enum aml_area_linux_policy policy;
};
/**
* Options implementation for aml_area_linux_mmap().
* @see mmap(2) man page.
**/
struct aml_area_linux_mmap_options {
/** hint address where to perform allocation **/
void *ptr;
/** Combination of mmap flags **/
int flags;
/** protection flags **/
int mode;
/** File descriptor backing and initializing memory. **/
int fd;
/** Offset in the file for mapping **/
off_t offset;
};
/**
* \brief Linux area creation.
*
* Allocate and initialize a struct aml_area implemented by aml_area_linux
* Allocates and initializes struct aml_area implemented by aml_area_linux
* operations.
* @param[out] area pointer to an uninitialized struct aml_area pointer to
* receive the new area.
* @param[in] mmap_flags flags to use when retrieving virtual memory with mmap
* @param[in] binding_flags, flags to use when binding memory.
* @param[in] nodemask list of memory nodes to use. Default to allowed memory
* nodes if NULL.
* @return On success, returns 0 and area points to the new aml_area.
* @return On failure, sets area to NULL and returns one of AML error codes:
* receive the new area.
* @param[in] nodemask list of memory nodes to use. Defaults to all allowed
* memory nodes if NULL.
* @param[in] policy: The memory allocation policy to use when binding to
* nodeset.
* @return On success, returns 0 and fills "area" with a pointer to the new
* aml_area.
* @return On failure, fills "area" with NULL and returns one of AML error
* codes:
* - AML_ENOMEM if there wasn't enough memory available.
* - AML_EINVAL if inputs flags were invalid.
* - AML_EDOM the nodemask provided is out of bounds (allowed nodeset).
* - AML_EINVAL if input flags were invalid.
* - AML_EDOM if the nodemask provided was out of bounds (of the allowed
* node set).
**/
int aml_area_linux_create(struct aml_area **area, const int mmap_flags,
int aml_area_linux_create(struct aml_area **area,
const struct aml_bitmap *nodemask,
const int binding_flags);
const enum aml_area_linux_policy policy);
/**
* \brief Linux area destruction.
*
* Destroy (finalize and free resources) a struct aml_area created by
* Destroys (finalizes and frees resources) struct aml_area created by
* aml_area_linux_create().
*
* @param area is NULL after this call.
* @param area address of an initialized struct aml_area pointer, which will be
* reset to NULL on return from this call.
**/
void aml_area_linux_destroy(struct aml_area **area);
/**
* Bind memory of size "size" pointed by "ptr" to binding set in "bind".
* If mbind call was not successfull, i.e AML_FAILURE is returned, then errno
* should be inspected for further error checking.
* @param bind: The binding settings. mmap_flags is actually unused.
* @param ptr: The data to bind.
* @param size: The size of the data pointed by ptr.
* @return an AML error code.
* Binds memory of size "size" pointed to by "ptr" using the binding provided
* in "bind". If the mbind() call was not successfull, i.e., AML_FAILURE is
* returned, then "errno" should be inspected for further error information.
* @param bind: The requested binding. "mmap_flags" is actually unused.
* @param ptr: The memory to bind.
* @param size: The size of the memory pointed to by "ptr".
* @return 0 if successful; an error code otherwise.
**/
int
aml_area_linux_mbind(struct aml_area_linux_data *bind,
......@@ -135,13 +136,14 @@ aml_area_linux_mbind(struct aml_area_linux_data *bind,
size_t size);
/**
* Function to check whether binding of a ptr obtained with
* aml_area_linux_mmap() then aml_area_linux_mbind() match area settings.
* Checks whether the binding of a pointer obtained with
* aml_area_linux_mmap() followed by aml_area_linux_mbind() matches the area
* settings.
* @param area_data: The expected binding settings.
* @param ptr: The data supposely bound.
* @param size: The data size.
* @return 1 if mapped memory binding in ptr match area_data binding settings,
* else 0.
* @param ptr: The supposedly bound memory.
* @param size: The memory size.
* @return 1 if the mapped memory binding in "ptr" matches the "area_data"
* binding settings, else 0.
**/
int
aml_area_linux_check_binding(struct aml_area_linux_data *area_data,
......@@ -149,33 +151,33 @@ aml_area_linux_check_binding(struct aml_area_linux_data *area_data,
size_t size);
/**
* \brief mmap block for aml area.
* \brief mmap block for AML area.
*
* This function is a wrapper on mmap function using arguments set in
* mmap_flags of area_data.
* This function does not perform binding, unlike it is done in areas created
* with aml_area_linux_create().
* @param area_data: The structure containing mmap_flags for mmap call.
* nodemask and bind_flags fields are ignored.
* @param ptr: A hint provided to mmap function.
* This function is a wrapper around the mmap() call using arguments set in
* "mmap_flags" of "area_data".
* This function does not perform binding, unlike what is done in areas created
* using aml_area_linux_create().
* @param area_data: The structure containing "mmap_flags" for the mmap() call.
* "nodemask" and "bind_flags" fields are ignored.
* @param size: The size to allocate.
* @return NULL on failure, else a valid pointer to memory.
* Upon failure, errno should be checked for further error investigations.
* @param opts: See "aml_area_linux_mmap_options".
* @return a valid memory pointer, or NULL on failure.
* On failure, "errno" should be checked for further information.
**/
void*
aml_area_linux_mmap(const struct aml_area_data *area_data,
void *ptr,
size_t size);
size_t size,
struct aml_area_mmap_options *opts);
/**
* \brief munmap hook for aml area.
* \brief munmap hook for AML area.
*
* unmap memory mapped with aml_area_linux_mmap().
* Unmaps memory mapped with aml_area_linux_mmap().
* @param area_data: unused
* @param ptr: The virtual memory to unmap.
* @param size: The size of virtual memory to unmap.
* @return AML_FAILURE on error, AML_SUCCESS.
* Upon failure errno should be checked for further error investigations.
* @param size: The size of the virtual memory to unmap.
* @return AML_SUCCESS on success, else AML_FAILURE.
* On failure, "errno" should be checked for further information.
**/
int
aml_area_linux_munmap(const struct aml_area_data *area_data,
......
......@@ -42,16 +42,24 @@ struct aml_dma_request_linux_par {
struct aml_dma_linux_par *dma;
/** The actual thread in charge for the request progress**/
pthread_t thread;
/** operator for this request **/
aml_dma_operator op;
/** operator argument for this request **/
void *op_arg;
};
/** Inside of a parallel dma for linux movement. **/
struct aml_dma_linux_par_data {
struct aml_vector *requests;
pthread_mutex_t lock;
/** default operator for this dma **/
aml_dma_operator default_op;
/** default operator arg for this dma **/
void *default_op_arg;
};
/** Declaration of linux parallel dma operations **/
struct aml_dma_linux_par_ops {
struct aml_dma_linux_par_inner_ops {
void *(*do_thread)(void *data);
};
......@@ -61,7 +69,7 @@ struct aml_dma_linux_par_ops {
* Can be passed to generic aml_dma_*() functions.
**/
struct aml_dma_linux_par {
struct aml_dma_linux_par_ops ops;
struct aml_dma_linux_par_inner_ops ops;
struct aml_dma_linux_par_data data;
};
......@@ -72,11 +80,12 @@ struct aml_dma_linux_par {
* will be stored.
* @param nbreqs the initial number of slots for asynchronous requests that are
* in-flight (will be increased automatically if necessary).
* @param nbthreads the number of threads to launch for each request.
*
* @param op: default operator
* @param op_arg: default argument to the operator
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs);
int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs,
aml_dma_operator op, void *op_arg);
/**
* Tears down a parallel DMA created with aml_dma_linux_par_create.
......
......@@ -38,6 +38,10 @@ struct aml_dma_request_linux_seq {
struct aml_layout *dest;
/** The source pointer of the data movement **/
struct aml_layout *src;
/** The operator being used **/
aml_dma_operator op;
/** Argument for operator **/
void *op_arg;
};
/** Inner data of sequential linux aml_dma implementation **/
......@@ -50,10 +54,14 @@ struct aml_dma_linux_seq_data {
struct aml_vector *requests;
/** Lock for queuing requests concurrently **/
pthread_mutex_t lock;
/** default operator **/
aml_dma_operator default_op;
/** default op_arg **/
void *default_op_arg;
};
/** Declaration of available linux sequential dma operations **/
struct aml_dma_linux_seq_ops {
struct aml_dma_linux_seq_inner_ops {
/**
* Perform a sequential copy between source and destination
* pointers allocated with an aml_area_linux.
......@@ -69,7 +77,7 @@ struct aml_dma_linux_seq_ops {
* Can be passed to generic aml_dma_*() functions.
**/
struct aml_dma_linux_seq {
struct aml_dma_linux_seq_ops ops;
struct aml_dma_linux_seq_inner_ops ops;
struct aml_dma_linux_seq_data data;
};
......@@ -81,10 +89,13 @@ struct aml_dma_linux_seq {
* will be stored.
* @param nbreqs the initial number of slots for asynchronous requests that are
* in-flight (will be increased automatically if necessary).
* @param op: default operator
* @param op_arg: default argument to the operator
*
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_linux_seq_create(struct aml_dma **dma, size_t nbreqs);
int aml_dma_linux_seq_create(struct aml_dma **dma, size_t nbreqs,
aml_dma_operator op, void *op_arg);
/**
* Tears down a sequential DMA created with aml_dma_linux_seq_create.
......
......@@ -21,7 +21,9 @@
* on the virtual address space, and a pitch (distance between contiguous