Commit ad2d1dce authored by Nicolas Denoyelle's avatar Nicolas Denoyelle
Browse files

Remove cuda layouts.

parent 724b5594
Pipeline #12054 passed with stages
in 6 minutes and 48 seconds
......@@ -44,6 +44,5 @@ endif
if HAVE_CUDA
include_aml_area_HEADERS += aml/area/cuda.h
include_aml_layout_HEADERS += aml/layout/cuda.h
include_aml_dma_HEADERS += aml/dma/cuda.h
endif
......@@ -106,7 +106,29 @@ int aml_dma_cuda_destroy(struct aml_dma **dma);
//--- DMA copy operators --------------------------------------------------//
/** aml_dma_cuda copy operator for 1D to 1D layouts **/
/**
* Embed a pair of devices in a void* to use as dma copy_operator argument
* when copying from device to device.
**/
#define AML_DMA_CUDA_DEVICE_PAIR(src, dst) \
(void *)(((intptr_t)dst << 32) | ((intptr_t)src))
/**
* Translate back a pair of device ids stored in `pair` (void*) into to
* device id integers.
**/
#define AML_DMA_CUDA_DEVICE_FROM_PAIR(pair, src, dst) \
src = dst = 0; \
src = ((intptr_t)pair & 0xffffffff); \
dst = ((intptr_t)pair >> 32);
/**
* aml_dma_cuda copy operator for 1D to 1D layouts.
* @param [in] dst: The destination layout of the copy.
* @param [in] src: The source layout of the copy.
* @param [in] arg: Either a device id (int). or two device ids
* @return an AML error code.
**/
int aml_dma_cuda_copy_1D(struct aml_layout *dst,
const struct aml_layout *src,
void *arg);
......
/******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/
#ifndef AML_LAYOUT_CUDA_H
#define AML_LAYOUT_CUDA_H
/**
* @defgroup aml_layout_cuda "AML Layout Cuda"
* @brief Layout on device pointer.
*
* Cuda layout is a wrapper on other layout.
* All operations are deferred to the embedded layout.
* deref operation of the embedded layout is used to compute offset
* on device pointer and return the appropriate offset.
* Operations on this layout cannot be used on device side.
* However the layout pointer (if it is a device pointer) can be used
* on device side.
*
* @code
* #include <aml/layout/cuda.h>
* @endcode
* @see aml_layout
* @{
**/
/** aml_layout data structure **/
struct aml_layout_cuda_data {
/** Pointer to data on device. **/
void *device_ptr;
/** device id where ptr is located **/
int device;
/** user expected layout order **/
int order;
/** layout num dims **/
size_t ndims;
/** layout dims stored in row major order **/
size_t *dims;
/**
* Offset between elements of the same dimension.
* Offset is in number of elements.
**/
size_t *stride;
/**
* cumulative distances between two elements in the same
* dimension (pitch[0] is the element size in bytes).
**/
size_t *cpitch;
};
/**
* Create a new layout on device pointer with embedded layout.
* @param[out] out: A pointer to receive the newly allocated layout.
* @param[in] device_ptr: The pointer on which the layout has to work.
* @param[in] device: The device id where the device_ptr is allocated.
* @param[in] element_size: The size of elements in this layout.
* @param[in] order: Order of dimensions in the layout.
* @param[in] ndims: The number of dimensions in the layout.
* @param[in] dims: The dimensions in the layout.
* @param[in] stride: The empty -- in number of elements -- space between
* consecutive elements of the same dimension, in number of elements.
* @param[in] pitch: The space -- in number of element -- between 2 elements in
* the next dimension.
* @return AML_SUCCESS or -AML_ENOMEM if the memory allocation for layout
* failed.
**/
int aml_layout_cuda_create(struct aml_layout **out,
void *device_ptr,
int device,
const size_t element_size,
const int order,
const size_t ndims,
const size_t *dims,
const size_t *stride,
const size_t *pitch);
/** Always returns the pointer to device_ptr, whatever the coordinates. **/
void *aml_layout_cuda_deref(const struct aml_layout_data *data,
const size_t *coords);
/** Always returns the pointer to device_ptr, whatever the coordinates. **/
void *aml_layout_cuda_deref_native(const struct aml_layout_data *data,
const size_t *coords);
/** Returns layout order **/
int aml_layout_cuda_order(const struct aml_layout_data *data);
/** Copies layout dims with user order. **/
int aml_layout_cuda_dims(const struct aml_layout_data *data, size_t *dims);
/** Copies layout dims in row major order. **/
int aml_layout_cuda_dims_native(const struct aml_layout_data *data,
size_t *dims);
/** Returns the number of dimensions in the layout. **/
size_t aml_layout_cuda_ndims(const struct aml_layout_data *data);
/** Returns the size of an element in the layout. **/
size_t aml_layout_cuda_element_size(const struct aml_layout_data *data);
/** Cuda layout operations **/
extern struct aml_layout_ops aml_layout_cuda_ops;
/**
* @}
**/
#endif // AML_LAYOUT_CUDA_H
......@@ -62,7 +62,7 @@ if HAVE_CUDA
AM_CPPFLAGS += $(CUDA_CFLAGS)
AM_LDFLAGS += $(CUDA_LIBS)
libaml_la_SOURCES+=area/cuda.c layout/cuda.c dma/cuda.c
libaml_la_SOURCES+=area/cuda.c dma/cuda.c
endif
......
......@@ -11,7 +11,6 @@
#include "aml.h"
#include "aml/dma/cuda.h"
#include "aml/layout/cuda.h"
/**
* Callback on dma stream to update all requests status
......@@ -190,16 +189,16 @@ int aml_dma_cuda_copy_1D(struct aml_layout *dst,
void *arg)
{
int err;
int src_device;
int dst_device;
const void *src_ptr = aml_layout_rawptr(src);
void *dst_ptr = aml_layout_rawptr(dst);
struct aml_dma_cuda_data *dma_data = (struct aml_dma_cuda_data *)arg;
const struct aml_layout_cuda_data *cu_src =
(struct aml_layout_cuda_data *)(src->data);
struct aml_layout_cuda_data *cu_dst =
(struct aml_layout_cuda_data *)(dst->data);
size_t n = 0;
size_t size = 0;
AML_DMA_CUDA_DEVICE_FROM_PAIR(arg, src_device, dst_device);
err = aml_layout_dims(src, &n);
if (err != AML_SUCCESS)
return err;
......@@ -211,8 +210,8 @@ int aml_dma_cuda_copy_1D(struct aml_layout *dst,
dma_data->stream) != cudaSuccess)
return -AML_FAILURE;
} else if (dma_data->kind == cudaMemcpyDeviceToDevice) {
if (cudaMemcpyPeerAsync(dst_ptr, cu_dst->device, src_ptr,
cu_src->device, size,
if (cudaMemcpyPeerAsync(dst_ptr, dst_device, src_ptr,
src_device, size,
dma_data->stream) != cudaSuccess)
return -AML_FAILURE;
} else
......
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/
#include "aml.h"
#include "aml/layout/cuda.h"
#include "aml/utils/inner-malloc.h"
int aml_layout_cuda_create(struct aml_layout **out,
void *device_ptr,
int device,
const size_t element_size,
const int order,
const size_t ndims,
const size_t *dims,
const size_t *stride,
const size_t *pitch)
{
struct aml_layout *layout;
struct aml_layout_cuda_data *layout_data;
layout = AML_INNER_MALLOC_ARRAY(3 * ndims, size_t, struct aml_layout,
struct aml_layout_cuda_data);
if (layout == NULL)
return -AML_ENOMEM;
layout_data = AML_INNER_MALLOC_GET_FIELD(layout, 2, struct aml_layout,
struct aml_layout_cuda_data);
layout_data->device_ptr = device_ptr;
layout_data->device = device;
layout_data->order = order;
layout_data->ndims = ndims;
layout_data->dims = AML_INNER_MALLOC_GET_ARRAY(
layout, size_t, struct aml_layout, struct aml_layout_cuda_data);
layout_data->stride = layout_data->dims + ndims;
layout_data->cpitch = layout_data->stride + ndims;
// Store dims, stride and cpitch are internally stored in fortran
// row major.
layout_data->cpitch[0] = element_size;
if (order == AML_LAYOUT_ORDER_COLUMN_MAJOR) {
layout_data->dims[0] = dims[ndims - 1];
layout_data->stride[0] = stride[ndims - 1];
for (size_t i = 1; i < ndims; i++) {
layout_data->dims[i] = dims[ndims - 1 - i];
layout_data->stride[i] = stride[ndims - 1 - i];
layout_data->cpitch[i] = layout_data->cpitch[i - 1] *
pitch[ndims - 1 - i];
}
} else {
memcpy(layout_data->dims, dims, ndims * sizeof(size_t));
memcpy(layout_data->stride, stride, ndims * sizeof(size_t));
for (size_t i = 1; i < ndims; i++)
layout_data->cpitch[i] =
layout_data->cpitch[i - 1] * pitch[i];
}
layout->data = (struct aml_layout_data *)layout_data;
layout->ops = &aml_layout_cuda_ops;
*out = layout;
return AML_SUCCESS;
}
void *aml_layout_cuda_deref(const struct aml_layout_data *data,
const size_t *coords)
{
struct aml_layout_cuda_data *cudata;
(void)coords;
cudata = (struct aml_layout_cuda_data *)data;
return cudata->device_ptr;
}
void *aml_layout_cuda_deref_native(const struct aml_layout_data *data,
const size_t *coords)
{
struct aml_layout_cuda_data *cudata;
(void)coords;
cudata = (struct aml_layout_cuda_data *)data;
return cudata->device_ptr;
}
int aml_layout_cuda_order(const struct aml_layout_data *data)
{
struct aml_layout_cuda_data *cudata;
cudata = (struct aml_layout_cuda_data *)data;
return cudata->order;
}
int aml_layout_cuda_dims(const struct aml_layout_data *data, size_t *dims)
{
struct aml_layout_cuda_data *cudata;
cudata = (struct aml_layout_cuda_data *)data;
if (cudata->order == AML_LAYOUT_ORDER_ROW_MAJOR)
memcpy(dims, cudata->dims, sizeof(*dims) * cudata->ndims);
else
for (size_t i = 0; i < cudata->ndims; i++)
dims[i] = cudata->dims[cudata->ndims - 1 - i];
return AML_SUCCESS;
}
int aml_layout_cuda_dims_native(const struct aml_layout_data *data,
size_t *dims)
{
struct aml_layout_cuda_data *cudata;
cudata = (struct aml_layout_cuda_data *)data;
memcpy(dims, cudata->dims, sizeof(*dims) * cudata->ndims);
return AML_SUCCESS;
}
size_t aml_layout_cuda_ndims(const struct aml_layout_data *data)
{
struct aml_layout_cuda_data *cudata;
cudata = (struct aml_layout_cuda_data *)data;
return cudata->ndims;
}
size_t aml_layout_cuda_element_size(const struct aml_layout_data *data)
{
struct aml_layout_cuda_data *cudata;
cudata = (struct aml_layout_cuda_data *)data;
return cudata->cpitch[0];
}
void *aml_layout_cuda_rawptr(const struct aml_layout_data *data)
{
struct aml_layout_cuda_data *cudata;
cudata = (struct aml_layout_cuda_data *)data;
return cudata->device_ptr;
}
struct aml_layout_ops aml_layout_cuda_ops = {
.deref = aml_layout_cuda_deref,
.deref_native = aml_layout_cuda_deref_native,
.rawptr = aml_layout_cuda_rawptr,
.order = aml_layout_cuda_order,
.dims = aml_layout_cuda_dims,
.dims_native = aml_layout_cuda_dims_native,
.ndims = aml_layout_cuda_ndims,
.element_size = aml_layout_cuda_element_size,
.reshape = NULL,
.slice = NULL,
.slice_native = NULL,
.fprintf = NULL,
.duplicate = NULL,
.destroy = NULL,
};
......@@ -14,7 +14,6 @@
#include "aml.h"
#include "aml/dma/cuda.h"
#include "aml/layout/cuda.h"
#include "aml/layout/dense.h"
// Data
......@@ -47,10 +46,10 @@ void setup()
AML_LAYOUT_ORDER_COLUMN_MAJOR,
element_size, ndims, &dims, &stride,
&pitch) == AML_SUCCESS);
assert(aml_layout_cuda_create(&device_layout, device_data, 0,
element_size,
AML_LAYOUT_ORDER_COLUMN_MAJOR, ndims,
&dims, &stride, &pitch) == AML_SUCCESS);
assert(aml_layout_dense_create(&device_layout, device_data,
AML_LAYOUT_ORDER_COLUMN_MAJOR,
element_size, ndims, &dims, &stride,
&pitch) == AML_SUCCESS);
// Dma
assert(aml_dma_cuda_create(&host_device, cudaMemcpyHostToDevice) ==
......@@ -66,7 +65,7 @@ void teardown()
cudaFree(device_data);
// Layout
aml_layout_destroy(&device_layout);
aml_layout_destroy(&host_layout);
aml_layout_destroy(&device_layout);
// Dma
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment