Commit bfc976af authored by Swann Perarnau's avatar Swann Perarnau

Merge branch 'staging' into 'master'

Sync-up master and staging

See merge request !99
parents 80431541 e6dc5dd3
Pipeline #8966 passed with stages
in 10 minutes and 7 seconds
variables:
NMC_FE1_SLURM_PARAMETERS: "--nodes=1 --partition=ecp-p9-4v100"
ANL_THETA_PROJECT_SERVICE_USER: "cscstss"
ANL_THETA_SCHEDULER_PARAMETERS: "-A CSC250STPR19 -n 1 -t 20 -q debug-flat-quad"
stages:
- build
nmc:batch:env:
stage: build
tags:
- nmc
only:
refs:
- master
- staging
variables:
- $ECP_MIRROR == "NMC"
script:
- uname -a
- module avail
- lspci
- module load hwloc numactl cuda
- module avail
- lstopo
- env
- which gcc
- gcc --version
- compgen -c gcc
make:nmc-cuda-check:
stage: build
tags:
- nmc
only:
refs:
- master
- staging
variables:
- $ECP_MIRROR == "NMC"
script:
- module load numactl hwloc cuda
- env | grep -i cuda
- cat /proc/self/status
- ./autogen.sh
- ./configure
- make -j
- make check VERBOSE=1
artifacts:
when: on_failure
paths:
- tests/test-suite.log
- config.log
make:nmc-cuda-home:
stage: build
tags:
- nmc
only:
refs:
- master
- staging
variables:
- $ECP_MIRROR == "NMC"
script:
- module load numactl hwloc cuda
- env | grep -i cuda
- ./autogen.sh
- ./configure --with-cuda=$CUDA_HOME
- make -j
- make check VERBOSE=1
artifacts:
when: on_failure
paths:
- tests/test-suite.log
- config.log
make:theta-batch:
stage: build
tags:
- ecp-theta
- batch
only:
refs:
- master
- staging
variables:
- $ECP_MIRROR == "THETA"
script:
- |
cat > aml-ci-script.sh << EOF
#!/bin/bash
module list
./autogen.sh
./configure
make -j63
make check VERBOSE=1
EOF
- cat aml-ci-script.sh
- chmod +x aml-ci-script.sh
- aprun aml-ci-script.sh
artifacts:
when: on_failure
paths:
- tests/*.log
- config.log
...@@ -16,7 +16,12 @@ repoquality: ...@@ -16,7 +16,12 @@ repoquality:
- git ls-files *.c *.h | grep -v -e benchmarks > .repoquality - git ls-files *.c *.h | grep -v -e benchmarks > .repoquality
- nix run -f "$ARGOPKGS" repoquality --command repoquality - nix run -f "$ARGOPKGS" repoquality --command repoquality
tags: tags:
- integration - nix
- kvm
artifacts:
when: on_failure
paths:
- .repoquality
checkpatch: checkpatch:
stage: style stage: style
...@@ -24,7 +29,8 @@ checkpatch: ...@@ -24,7 +29,8 @@ checkpatch:
- /^wip.*/ - /^wip.*/
- /^WIP.*/ - /^WIP.*/
tags: tags:
- integration - nix
- kvm
script: script:
- git ls-files *.c *.h | grep -v -e benchmarks >> .checkpatch.conf - git ls-files *.c *.h | grep -v -e benchmarks >> .checkpatch.conf
- nix run -f "$ARGOPKGS" checkpatch --command checkpatch.pl - nix run -f "$ARGOPKGS" checkpatch --command checkpatch.pl
...@@ -35,7 +41,8 @@ style:docs: ...@@ -35,7 +41,8 @@ style:docs:
- /^wip.*/ - /^wip.*/
- /^WIP.*/ - /^WIP.*/
tags: tags:
- integration - nix
- kvm
script: script:
- | - |
nix-shell "$ARGOPKGS" -A aml-dist --arg aml-src ./. --run bash << EOF nix-shell "$ARGOPKGS" -A aml-dist --arg aml-src ./. --run bash << EOF
...@@ -50,6 +57,8 @@ style:docs: ...@@ -50,6 +57,8 @@ style:docs:
- config.log - config.log
make:generic: make:generic:
tags:
- knl
stage: build stage: build
except: except:
- /^wip.*/ - /^wip.*/
...@@ -70,6 +79,8 @@ make:generic: ...@@ -70,6 +79,8 @@ make:generic:
- tests/*.log - tests/*.log
make:out-of-tree: make:out-of-tree:
tags:
- knl
stage: build stage: build
except: except:
- /^wip.*/ - /^wip.*/
...@@ -115,30 +126,15 @@ readthedocs: ...@@ -115,30 +126,15 @@ readthedocs:
stage: docs stage: docs
when: on_success when: on_success
only: only:
- staging
- master - master
- /v[0-9]+\.[0-9]+\.x/ - /v[0-9]+\.[0-9]+\.x/
tags: tags:
- integration - nix
- kvm
environment:
name: rtd/$CI_COMMIT_REF_NAME
url: https://argo-aml.readthedocs.io/en/$CI_COMMIT_REF_NAME
script: script:
- nix run nixpkgs.curl -c curl -X POST -d "branch=$CI_COMMIT_REF_NAME" -d "token=$READTHEDOCS_TOKEN" https://readthedocs.org/api/v2/webhook/argo-aml/83161/ - nix run nixpkgs.curl -c curl -X POST -d "branches=$CI_COMMIT_REF_NAME" -d "token=$READTHEDOCS_TOKEN" https://readthedocs.org/api/v2/webhook/argo-aml/83161/
dist:
stage: release
except:
- /^wip.*/
- /^WIP.*/
when: on_success
only:
- tags
tags:
- integration
script:
- nix-build "$ARGOPKGS" -A aml-dist --arg aml-src ./.
- nix-shell "$ARGOPKGS" -A aml-dist --arg aml-src ./. --run "./release.sh CREATE $CI_JOB_ID $CI_PROJECT_ID $RELEASE_TOKEN $CI_COMMIT_REF_NAME"
artifacts:
when: on_success
paths:
- result/*.tar.gz
- CHECKSUM
expire_in: 1000y
...@@ -6,3 +6,4 @@ Nicolas Denoyelle <ndenoyelle@anl.gov> ...@@ -6,3 +6,4 @@ Nicolas Denoyelle <ndenoyelle@anl.gov>
Clement Foyer <cfoyer@cray.com> Clement Foyer <cfoyer@cray.com>
Brice Videau <bvideau@anl.gov> Brice Videau <bvideau@anl.gov>
Aleksandr Danilin <danilin96@gmail.com> Aleksandr Danilin <danilin96@gmail.com>
Kyle Shaver <kshaver@anl.gov>
...@@ -104,46 +104,77 @@ if [[ "x$docs" = xtrue ]]; then ...@@ -104,46 +104,77 @@ if [[ "x$docs" = xtrue ]]; then
fi fi
AM_CONDITIONAL([BUILD_DOCS],[ test "x$docs" == xtrue ]) AM_CONDITIONAL([BUILD_DOCS],[ test "x$docs" == xtrue ])
# check nvidia compiler and libraries # check CUDA compiler and libraries
##################################### #####################################
BUILD_CUDA=no have_cuda=0
AC_DEFINE([HAVE_CUDA], [0], [Whether aml support cuda library calls.])
AC_DEFINE([RUN_CUDA], [0], [Whether the machine on which aml is compiled can run cuda code.]) AC_ARG_WITH([cuda],
[AS_HELP_STRING([--with-cuda@<:@=yes|no|DIR@:>@],
# Check compilation features [support cuda inside the library (default is check)])],
AC_CHECK_PROG([NVCC], [nvcc], [nvcc], [no]) [
AC_CHECK_LIB(cudart, cudaMalloc, [CUDART=yes], [CUDART=no]) if [[ "x$withval" = xno ]]; then
AC_CHECK_HEADERS([cuda.h], [CUDA_H=yes], [CUDA_H=no]) want_cuda="no"
AC_CHECK_HEADERS([cuda_runtime.h], [CUDA_RUNTIME_H=yes], [CUDA_RUNTIME_H=no]) elif [[ "x$withval" = xyes ]]; then
want_cuda="yes"
if [[ "x$NVCC" != xno ]] && \ cuda_home_path="$CUDA_HOME"
[[ "x$CUDART" = xyes ]] && \ else
[[ "x$CUDA_H" = xyes ]] && \ want_cuda="yes"
[[ "x$CUDA_RUNTIME_H" = xyes ]] cuda_home_path=$withval
then fi
BUILD_CUDA=yes ],
AC_DEFINE([HAVE_CUDA], [1], [Whether aml support cuda library calls.]) [
want_cuda="check"
cuda_home_path="$CUDA_HOME"
])
if [[ "x$want_cuda" != xno ]]; then
AC_MSG_NOTICE([starting checks for CUDA])
if [[ -n "$cuda_home_path" ]]; then
nvcc_search_dirs="$PATH$PATH_SEPARATOR$cuda_home_path/bin"
else
nvcc_search_dirs="$PATH"
fi
AC_PATH_PROG([NVCC], [nvcc], [], [$nvcc_search_dirs])
if [[ -n "$NVCC" ]]; then
have_nvcc="yes"
fi
else
AC_MSG_NOTICE([will not check for CUDA])
fi fi
AM_CONDITIONAL([BUILD_CUDA],[ test "x$BUILD_CUDA" = xyes ]) if [[ "x$have_nvcc" = xyes ]]; then
# Check runtime features if [[ -n "$cuda_home_path" ]]; then
if [[ "x$BUILD_CUDA" = xyes ]]; then CUDA_CFLAGS="-I$cuda_home_path/include"
LIBS="$LIBS -lcudart" CUDA_LIBS="-L$cuda_home_path/lib64 -lcudart"
RUN_CUDA=no else
AC_MSG_CHECKING([that cudart code runs without error]) CUDA_CFLAGS="-I/usr/local/cuda/include"
AC_RUN_IFELSE( CUDA_LIBS="-L/usr/local/cuda/lib64 -lcudart"
[AC_LANG_PROGRAM([[ fi
#include <cuda.h> saved_LIBS=$LIBS
#include <cuda_runtime.h>]], saved_CFLAGS=$CFLAGS
[int device; return cudaGetDevice(&device) == cudaSuccess ? 0 : 1;])], LIBS="$LIBS $CUDA_LIBS"
[AC_DEFINE([RUN_CUDA], [1], [Whether the machine on which aml is compiled can run cuda code.]) CFLAGS="$CFLAGS $CUDA_CFLAGS"
RUN_CUDA=yes],[]) AC_CHECK_HEADER([cuda.h],,
AC_MSG_RESULT($RUN_CUDA) [AC_MSG_ERROR([could not find cuda.h])])
AC_CHECK_HEADER([cuda_runtime.h],,
[AC_MSG_ERROR([could not find cuda_runtime.h])])
AC_CHECK_LIB(cudart, cudaMalloc,,
AC_MSG_ERROR([could not find cudart library]))
LIBS=$saved_LIBS
CFLAGS=$saved_CFLAGS
have_cuda=1
fi fi
AM_CONDITIONAL([RUN_CUDA],[ test "x$RUN_CUDA" = xyes ]) AC_DEFINE_UNQUOTED([HAVE_CUDA], [$have_cuda], [Whether aml support cuda library calls.])
AC_SUBST([HAVE_CUDA],[$have_cuda])
AM_CONDITIONAL([HAVE_CUDA], [ test "$have_cuda" == "1" ])
AC_SUBST(CUDA_CFLAGS)
AC_SUBST(CUDA_LIBS)
AC_SUBST(NVCC)
# Output # Output
######## ########
...@@ -156,7 +187,8 @@ AC_CONFIG_FILES([Makefile ...@@ -156,7 +187,8 @@ AC_CONFIG_FILES([Makefile
tests/Makefile tests/Makefile
doc/Makefile doc/Makefile
benchmarks/Makefile benchmarks/Makefile
o2lo
aml.pc aml.pc
include/aml/utils/version.h], [chmod +x o2lo]) include/aml/utils/version.h
include/aml/utils/features.h])
AC_CONFIG_FILES([o2lo], [chmod +x o2lo])
AC_OUTPUT AC_OUTPUT
...@@ -34,5 +34,6 @@ include_amlutils_HEADERS = \ ...@@ -34,5 +34,6 @@ include_amlutils_HEADERS = \
aml/utils/error.h \ aml/utils/error.h \
aml/utils/inner-malloc.h \ aml/utils/inner-malloc.h \
aml/utils/vector.h \ aml/utils/vector.h \
aml/utils/version.h aml/utils/version.h \
aml/utils/features.h
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "aml/utils/inner-malloc.h" #include "aml/utils/inner-malloc.h"
#include "aml/utils/vector.h" #include "aml/utils/vector.h"
#include "aml/utils/version.h" #include "aml/utils/version.h"
#include "aml/utils/features.h"
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
......
...@@ -32,6 +32,10 @@ ...@@ -32,6 +32,10 @@
* Structure of a dense layout. * Structure of a dense layout.
**/ **/
struct aml_layout_dense { struct aml_layout_dense {
/** base pointer of the address range **/
void *ptr;
/** number of dimensions **/
size_t ndims;
/** /**
* dimensions, in element size, of the data structure, * dimensions, in element size, of the data structure,
* by order of appearance in memory. * by order of appearance in memory.
...@@ -42,20 +46,11 @@ struct aml_layout_dense { ...@@ -42,20 +46,11 @@ struct aml_layout_dense {
* Offset in number of elements. * Offset in number of elements.
**/ **/
size_t *stride; size_t *stride;
/**
* distances between two elements of the next dimension
* (or total dimension of the layout in this dimension).
**/
size_t *pitch;
/** /**
* cumulative distances between two elements in the same * cumulative distances between two elements in the same
* dimension (pitch[0] is the element size in bytes). * dimension (pitch[0] is the element size in bytes).
**/ **/
size_t *cpitch; size_t *cpitch;
/** base pointer of the address range **/
void *ptr;
/** number of dimensions **/
size_t ndims;
}; };
/** /**
......
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#ifndef AML_FEATURES_H
#define AML_FEATURES_H
/**
* @defgroup aml_features "AML Features Detection API"
* @brief AML Features Detection API
*
* This module provides the compile time and runtime detection of backends
* used in AML.
* This feature is usefull for instance when testing AML. When AML is compiled
* with cuda backend support but runs on a machine without cuda devices, we can
* disable cuda tests.
* @{
**/
/** Whether aml had libnuma at compile time. (always true) **/
#define AML_HAVE_BACKEND_LIBNUMA 1
/** Whether aml had cuda capabilities at compile time **/
#define AML_HAVE_BACKEND_CUDA @HAVE_CUDA@
/** Flag for checking runtime suport for libnuma **/
#define AML_BACKEND_LIBNUMA (1UL<<1)
/** Flag for checking runtime suport for cuda **/
#define AML_BACKEND_CUDA (1UL<<2)
/**
* Check if a set of backends can be used at runtime.
*
* Checking for cuda and libnuma support at runtime:
* ```
* if (aml_support_backend(AML_BACKEND_LIBNUMA_FLAG | AML_BACKEND_CUDA_FLAG)
* { ... }
* ```
* @param[in] backends: The backend to check.
* @return 1 if the backend works, else 0.
**/
int aml_support_backends(const unsigned long backends);
/**
* @}
**/
#endif // AML_FEATURES_H
...@@ -34,7 +34,8 @@ TILING_SOURCES = \ ...@@ -34,7 +34,8 @@ TILING_SOURCES = \
UTILS_SOURCES = \ UTILS_SOURCES = \
utils/bitmap.c \ utils/bitmap.c \
utils/error.c \ utils/error.c \
utils/vector.c utils/vector.c \
utils/features.c
LIB_SOURCES = \ LIB_SOURCES = \
$(AREA_SOURCES) \ $(AREA_SOURCES) \
...@@ -54,11 +55,13 @@ libaml_la_SOURCES=$(LIB_SOURCES) ...@@ -54,11 +55,13 @@ libaml_la_SOURCES=$(LIB_SOURCES)
############################################# #############################################
# Cuda sources # Cuda sources
if BUILD_CUDA if HAVE_CUDA
AM_CPPFLAGS += $(CUDA_CFLAGS)
AM_LDFLAGS += $(CUDA_LIBS)
# Build .c sources using cuda runtime library. # Build .c sources using cuda runtime library.
libaml_la_SOURCES+=area/cuda.c libaml_la_SOURCES+=area/cuda.c
libaml_la_LDFLAGS+=-lcudart
# Build .cu sources containing device code. # Build .cu sources containing device code.
# #
......
...@@ -217,15 +217,16 @@ int aml_area_linux_create(struct aml_area **area, ...@@ -217,15 +217,16 @@ int aml_area_linux_create(struct aml_area **area,
/* check if the nodemask is compatible with the nodeset */ /* check if the nodemask is compatible with the nodeset */
if (nodemask != NULL) { if (nodemask != NULL) {
int aml_last = aml_bitmap_last(nodemask); for (int i = 0; i < AML_BITMAP_MAX; i++) {
int allowed_last = numa_bitmask_weight(data->nodeset); int ours, theirs;
while (!numa_bitmask_isbitset(data->nodeset, --allowed_last)) ours = aml_bitmap_isset(nodemask, i);
; theirs = numa_bitmask_isbitset(data->nodeset, i);
if (aml_last > allowed_last) { if (ours && !theirs) {
err = -AML_EDOM; err = -AML_EDOM;
goto err_f_node; goto err_f_node;
}
} }
aml_bitmap_copy_to_ulong(nodemask, aml_bitmap_copy_to_ulong(nodemask,
data->nodeset->maskp, data->nodeset->maskp,
......
...@@ -20,7 +20,7 @@ static int aml_layout_dense_alloc(struct aml_layout **ret, ...@@ -20,7 +20,7 @@ static int aml_layout_dense_alloc(struct aml_layout **ret,
layout = AML_INNER_MALLOC_EXTRA(struct aml_layout, layout = AML_INNER_MALLOC_EXTRA(struct aml_layout,
struct aml_layout_dense, struct aml_layout_dense,
size_t, 4*ndims); size_t, 3*ndims);
if (layout == NULL) { if (layout == NULL) {
*ret = NULL; *ret = NULL;
return -AML_ENOMEM; return -AML_ENOMEM;
...@@ -30,6 +30,10 @@ static int aml_layout_dense_alloc(struct aml_layout **ret, ...@@ -30,6 +30,10 @@ static int aml_layout_dense_alloc(struct aml_layout **ret,
struct aml_layout, struct aml_layout,
struct aml_layout_dense); struct aml_layout_dense);
layout->data = (struct aml_layout_data *) data; layout->data = (struct aml_layout_data *) data;
data->ptr = NULL;
data->ndims = ndims;
data->dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout, data->dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout, struct aml_layout,
struct aml_layout_dense, struct aml_layout_dense,
...@@ -41,16 +45,10 @@ static int aml_layout_dense_alloc(struct aml_layout **ret, ...@@ -41,16 +45,10 @@ static int aml_layout_dense_alloc(struct aml_layout **ret,
for (size_t i = 0; i < ndims; i++) for (size_t i = 0; i < ndims; i++)
data->stride[i] = 1; data->stride[i] = 1;
data->pitch = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_dense,
size_t, ndims*2);
data->cpitch = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout, data->cpitch = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout, struct aml_layout,
struct aml_layout_dense, struct aml_layout_dense,
size_t, ndims*3); size_t, ndims*2);
data->ptr = NULL;
data->ndims = ndims;
*ret = layout; *ret = layout;
return AML_SUCCESS; return AML_SUCCESS;
} }
...@@ -68,9 +66,7 @@ void aml_layout_dense_init_cpitch(struct aml_layout *layout, ...@@ -68,9 +66,7 @@ void aml_layout_dense_init_cpitch(struct aml_layout *layout,
data->ptr = ptr; data->ptr = ptr;
memcpy(data->dims, dims, ndims * sizeof(size_t)); memcpy(data->dims, dims, ndims * sizeof(size_t));
memcpy(data->stride, stride, ndims * sizeof(size_t)); memcpy(data->stride, stride, ndims * sizeof(size_t));
memcpy(data->cpitch, cpitch, (ndims + 1) * sizeof(size_t)); memcpy(data->cpitch, cpitch, ndims * sizeof(size_t));
for (size_t i = 0; i < ndims; i++)
data->pitch[i] = cpitch[i+1]/cpitch[i];
} }
int aml_layout_dense_create(struct aml_layout **layout, int aml_layout_dense_create(struct aml_layout **layout,
...@@ -98,6 +94,7 @@ int aml_layout_dense_create(struct aml_layout **layout, ...@@ -98,6 +94,7 @@ int aml_layout_dense_create(struct aml_layout **layout,
data = (struct aml_layout_dense *)l->data; data = (struct aml_layout_dense *)l->data;
data->ptr = ptr; data->ptr = ptr;
data->cpitch[0] = element_size; data->cpitch[0] = element_size;
size_t _pitch[ndims];
switch (AML_LAYOUT_ORDER(order)) { switch (AML_LAYOUT_ORDER(order)) {
...@@ -108,9 +105,9 @@ int aml_layout_dense_create(struct aml_layout **layout, ...@@ -108,9 +105,9 @@ int aml_layout_dense_create(struct aml_layout **layout,
if (stride) if (stride)
data->stride[i] = stride[ndims-i-1]; data->stride[i] = stride[ndims-i-1];
if (pitch) if (pitch)
data->pitch[i] = pitch[ndims-i-1]; _pitch[i] = pitch[ndims-i-1];
else else
data->pitch[i] = dims[ndims-i-1]; _pitch[i] = dims[ndims-i-1];
} }
break; break;
...@@ -120,17 +117,17 @@ int aml_layout_dense_create(struct aml_layout **layout, ...@@ -120,17 +117,17 @@ int aml_layout_dense_create(struct aml_layout **layout,
if (stride) if (stride)
memcpy(data->stride, stride, ndims * sizeof(size_t)); memcpy(data->stride, stride, ndims * sizeof(size_t));
if (pitch) if (pitch)
memcpy(data->pitch, pitch, ndims * sizeof(size_t)); memcpy(_pitch, pitch, ndims * sizeof(size_t));
else else
memcpy(data->pitch, dims, ndims * sizeof(size_t)); memcpy(_pitch, dims, ndims * sizeof(size_t));
break; break;
default: default:
free(l); free(l);
return -AML_EINVAL; return -AML_EINVAL;
} }
for (size_t i = 1; i <= ndims; i++) for (size_t i = 1; i < ndims; i++)
data->cpitch[i] = data->cpitch[i-1]*data->pitch[i-1]; data->cpitch[i] = data->cpitch[i-1]*_pitch[i-1];
*layout = l; *layout = l;
return AML_SUCCESS; return AML_SUCCESS;
...@@ -236,7 +233,7 @@ static void merge_dims(const size_t ndims, ...@@ -236,7 +233,7 @@ static void merge_dims(const size_t ndims,
} }
dim_index++; dim_index++;
} }
new_cpitch[new_dim_index + 1] = cpitch[dim_index + 1]; new_cpitch[new_dim_index + 1] = 0;
*new_ndims = new_dim_index + 1; *new_ndims = new_dim_index + 1;
} }
...@@ -256,6 +253,7 @@ static int reshape_dims(const struct aml_layout_dense *d, ...@@ -256,6 +253,7 @@ static int reshape_dims(const struct aml_layout_dense *d,
size_t m_ndims; size_t m_ndims;
size_t m_dims[d->ndims]; size_t m_dims[d->ndims];
size_t m_stride[d->ndims]; size_t m_stride[d->ndims];
/* for simplicity, the underlying algorithm needs one more slot */
size_t m_cpitch[d->ndims + 1]; size_t m_cpitch[d->ndims + 1];
/* First obtain a canonical representation of the layout /* First obtain a canonical representation of the layout
...@@ -306,6 +304,7 @@ int aml_layout_column_reshape(const struct aml_layout_data *data, ...@@ -306,6 +304,7 @@ int aml_layout_column_reshape(const struct aml_layout_data *data,
struct aml_layout *layout; struct aml_layout *layout;
const struct aml_layout_dense *d; const struct aml_layout_dense *d;
size_t stride[ndims]; size_t stride[ndims];
/* for simplicity, the underlying algorithm needs one more slot */
size_t cpitch[ndims + 1]; size_t cpitch[ndims + 1];
d = (const struct aml_layout_dense *)data; d = (const struct aml_layout_dense *)data;
...@@ -350,16 +349,12 @@ int aml_layout_column_slice(const struct aml_layout_data *data, ...@@ -350,16 +349,12 @@ int aml_layout_column_slice(const struct aml_layout_data *data,
if (err) if (err)
return err; return err;
size_t cpitch[d->ndims + 1]; size_t cpitch[d->ndims];
size_t new_strides[d->ndims]; size_t new_strides[d->ndims];
cpitch[d->ndims] = d->cpitch[d->ndims];
for (size_t i = 0; i < d->ndims; i++) { for (size_t i = 0; i < d->ndims; i++) {
cpitch[i] = d->cpitch[i]; cpitch[i] = d->cpitch[i];
new_strides[i] = strides[i] * d->stride[i]; new_strides[i] = strides[i] * d->stride[i];
cpitch[d->ndims] -= cpitch[i] * offsets[i] * d->stride[i];
} }
aml_layout_dense_init_cpitch(layout, aml_layout_dense_init_cpitch(layout,
...@@ -433,6 +428,7 @@ int aml_layout_row_reshape(const struct aml_layout_data *data, ...@@ -433,6 +428,7 @@ int aml_layout_row_reshape(const struct aml_layout_data *data,
struct aml_layout *layout; struct aml_layout *layout;