...
 
Commits (4)
/******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/
#ifndef AML_AREA_LAYOUT_CUDA_H
#define AML_AREA_LAYOUT_CUDA_H
/**
* @defgroup aml_layout_cuda "AML Layout Cuda"
* @brief Layout on device pointer.
*
* Cuda layout is a wrapper on other layout.
* All operations are deferred to the embedded layout.
* deref operation of the embedded layout is used to compute offset
* on device pointer and return the appropriate offset.
* Operations on this layout cannot be used on device side.
* However the layout pointer (if it is a device pointer) can be used
* on device side.
*
* @code
* #include <aml/layout/cuda.h>
* @endcode
* @see aml_layout
* @{
**/
#include <aml/area/cuda.h>
/** aml_layout data structure **/
struct aml_layout_cuda_data {
/** Pointer to data on device. **/
void *device_ptr;
/** Meta data on how is pointer allocated. **/
struct aml_area_cuda_data *data;
/** user expected layout order **/
int order;
/** layout num dims **/
size_t ndims;
/** layout dims stored in row major order **/
size_t *dims;
/**
* Offset between elements of the same dimension.
* Offset is in number of elements.
**/
size_t *stride;
/**
* cumulative distances between two elements in the same
* dimension (pitch[0] is the element size in bytes).
**/
size_t *cpitch;
};
/**
* Create a new layout on device pointer with embedded layout.
* @param[out] out: A pointer to receive the newly allocated layout.
* @param[in] device_ptr: The pointer on which the layout has to work.
* @param[in] element_size: The size of elements in this layout.
* @param[in] order: Order of dimensions in the layout.
* @param[in] ndims: The number of dimensions in the layout.
* @param[in] dims: The dimensions in the layout.
* @param[in] stride: The empty -- in number of elements -- space between
* consecutive elements of the same dimension, in number of elements.
* @param[in] pitch: The space -- in number of element -- between 2 elements in
* the next dimension.
* @param[in] data: The area data used to allocate device_ptr.
* data is not owned by the layout and must live longer than the layout.
* data is used as an additional source of information on pointer
* when performing operations.
* @return AML_SUCCESS or -AML_ENOMEM if the memory allocation for layout
* failed.
**/
int aml_layout_cuda_create(struct aml_layout **out,
void *device_ptr,
const size_t element_size,
const int order,
const size_t ndims,
const size_t *dims,
const size_t *stride,
const size_t *pitch,
struct aml_area_cuda_data *data);
/**
* Destroy a layout obtained with aml_layout_cuda_create().
* @param[in, out] layout: A pointer to the layout to destroy.
* On exit, the pointer content is set to NULL.
* @return AML_SUCCESS or -AML_EINVAL if layout or *layout is NULL.
**/
int aml_layout_cuda_destroy(struct aml_layout **layout);
/** Always returns the pointer to device_ptr, whatever the coordinates. **/
void *aml_layout_cuda_deref(const struct aml_layout_data *data,
const size_t *coords);
/** Always returns the pointer to device_ptr, whatever the coordinates. **/
void *aml_layout_cuda_deref_native(const struct aml_layout_data *data,
const size_t *coords);
/** Returns layout order **/
int aml_layout_cuda_order(const struct aml_layout_data *data);
/** Copies layout dims with user order. **/
int aml_layout_cuda_dims(const struct aml_layout_data *data, size_t *dims);
/** Copies layout dims in row major order. **/
int aml_layout_cuda_dims_native(const struct aml_layout_data *data,
size_t *dims);
/** Returns the number of dimensions in the layout. **/
size_t aml_layout_cuda_ndims(const struct aml_layout_data *data);
/** Returns the size of an element in the layout. **/
size_t aml_layout_cuda_element_size(const struct aml_layout_data *data);
/** Cuda layout operations **/
extern struct aml_layout_ops aml_layout_cuda_ops;
/**
* @}
**/
#endif // AML_AREA_LAYOUT_CUDA_H
......@@ -22,7 +22,7 @@
* Major version changes in AML
* denotes ABI changes which prevent
* compatibility with previous major version ABI.
*
*
**/
#define AML_VERSION_MAJOR @PACKAGE_VERSION_MAJOR@
......
......@@ -63,8 +63,10 @@ AM_LDFLAGS += $(CUDA_LIBS)
# Build .c sources using cuda runtime library.
libaml_la_SOURCES+=area/cuda.c
# Build files not requiring additional libraries
LAYOUT_SOURCES+=layout/cuda.c
# Build .cu sources containing device code.
#
# CUDA_LO_FILES= # .lo files result of .cu files.
# CUDA_FLAGS=--x=cu
#
......
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/
#include "aml.h"
#include "aml/utils/inner-malloc.h"
#include "aml/layout/cuda.h"
int aml_layout_cuda_create(struct aml_layout **out,
void *device_ptr,
const size_t element_size,
const int order,
const size_t ndims,
const size_t *dims,
const size_t *stride,
const size_t *pitch,
struct aml_area_cuda_data *data)
{
struct aml_layout *layout;
struct aml_layout_cuda_data *layout_data;
layout = AML_INNER_MALLOC_EXTRA(struct aml_layout,
struct aml_layout_cuda_data,
size_t, 3*ndims);
if (layout == NULL)
return -AML_ENOMEM;
layout_data = AML_INNER_MALLOC_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_cuda_data);
layout_data->device_ptr = device_ptr;
layout_data->order = order;
layout_data->ndims = ndims;
layout_data->data = data;
layout_data->dims =
AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_cuda_data,
size_t, 0);
layout_data->stride =
AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_cuda_data,
size_t, ndims);
layout_data->cpitch =
AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_cuda_data,
size_t, ndims*2);
// Store dims, stride and cpitch are internally stored in fortran
// row major.
layout_data->cpitch[0] = element_size;
if (order == AML_LAYOUT_ORDER_COLUMN_MAJOR) {
layout_data->dims[0] = dims[ndims-1];
layout_data->stride[0] = stride[ndims-1];
for (size_t i = 1; i < ndims; i++) {
layout_data->dims[i] = dims[ndims-1-i];
layout_data->stride[i] = stride[ndims-1-i];
layout_data->cpitch[i] = layout_data->cpitch[i-1] *
pitch[ndims-1-i];
}
} else {
memcpy(layout_data->dims, dims, ndims * sizeof(size_t));
memcpy(layout_data->stride, stride, ndims * sizeof(size_t));
for (size_t i = 1; i < ndims; i++)
layout_data->cpitch[i] = layout_data->cpitch[i-1] *
pitch[i];
}
layout->data = (struct aml_layout_data *) layout_data;
layout->ops = &aml_layout_cuda_ops;
*out = layout;
return AML_SUCCESS;
}
int aml_layout_cuda_destroy(struct aml_layout **layout)
{
if (layout == NULL || *layout == NULL)
return -AML_EINVAL;
free(*layout);
*layout = NULL;
return AML_SUCCESS;
}
void *aml_layout_cuda_deref(const struct aml_layout_data *data,
const size_t *coords)
{
struct aml_layout_cuda_data *cudata;
(void)coords;
cudata = (struct aml_layout_cuda_data *)data;
return cudata->device_ptr;
}
void *aml_layout_cuda_deref_native(const struct aml_layout_data *data,
const size_t *coords)
{
struct aml_layout_cuda_data *cudata;
(void)coords;
cudata = (struct aml_layout_cuda_data *)data;
return cudata->device_ptr;
}
int aml_layout_cuda_order(const struct aml_layout_data *data)
{
struct aml_layout_cuda_data *cudata;
cudata = (struct aml_layout_cuda_data *)data;
return cudata->order;
}
int aml_layout_cuda_dims(const struct aml_layout_data *data, size_t *dims)
{
struct aml_layout_cuda_data *cudata;
cudata = (struct aml_layout_cuda_data *)data;
if (cudata->order == AML_LAYOUT_ORDER_ROW_MAJOR)
memcpy(dims, cudata->dims, sizeof(*dims) * cudata->ndims);
else
for (size_t i = 0; i < cudata->ndims; i++)
dims[i] = cudata->dims[cudata->ndims - 1 - i];
return AML_SUCCESS;
}
int aml_layout_cuda_dims_native(const struct aml_layout_data *data,
size_t *dims)
{
struct aml_layout_cuda_data *cudata;
cudata = (struct aml_layout_cuda_data *)data;
memcpy(dims, cudata->dims, sizeof(*dims) * cudata->ndims);
return AML_SUCCESS;
}
size_t aml_layout_cuda_ndims(const struct aml_layout_data *data)
{
struct aml_layout_cuda_data *cudata;
cudata = (struct aml_layout_cuda_data *)data;
return cudata->ndims;
}
size_t aml_layout_cuda_element_size(const struct aml_layout_data *data)
{
struct aml_layout_cuda_data *cudata;
cudata = (struct aml_layout_cuda_data *)data;
return cudata->cpitch[0];
}
struct aml_layout_ops aml_layout_cuda_ops = {
.deref = aml_layout_cuda_deref,
.deref_native = aml_layout_cuda_deref_native,
.order = aml_layout_cuda_order,
.dims = aml_layout_cuda_dims,
.dims_native = aml_layout_cuda_dims_native,
.ndims = aml_layout_cuda_ndims,
.element_size = aml_layout_cuda_element_size,
.reshape = NULL,
.slice = NULL,
.slice_native = NULL
};
......@@ -2,13 +2,6 @@ AM_COLOR_TESTS = yes
AM_CFLAGS = -I$(top_srcdir)/include $(PTHREAD_CFLAGS)
AM_LDFLAGS = ../src/libaml.la $(PTHREAD_LIBS)
if HAVE_CUDA
# LIBS is used instead of AM_LDFLAGS on purpose
# AM_LDFLAGS appends flags before libraries added before LDADD.
# Thus, when linking with libaml.la, linking with cuda is not done.
LIBS += $(CUDA_CFLAGS)
LIBS += $(CUDA_LIBS)
endif
if HAVE_CUDA
# LIBS is used instead of AM_LDFLAGS on purpose
......@@ -34,6 +27,9 @@ UTILS_TESTS = \
AREA_TESTS = \
area/test_area \
area/test_linux
if HAVE_CUDA
AREA_TESTS += area/test_cuda
endif
noinst_LTLIBRARIES = liblayout_test.la
liblayout_test_la_CPPFLAGS =
......