Commit d2efb479 authored by Swann Perarnau's avatar Swann Perarnau
Browse files

Merge branch 'new-dma-design' into 'master'

[refactor] redesign DMAs to support layouts

Closes #46

See merge request !73
parents e5d7b7cb d8f56ffc
Pipeline #8242 passed with stages
in 28 minutes and 56 seconds
......@@ -70,7 +70,7 @@ int main(int argc, char *argv[])
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_par_create(&dma, numthreads*2, numthreads));
assert(!aml_dma_linux_par_create(&dma, numthreads*2));
assert(!aml_scratch_seq_create(&sa, fast, slow, dma, tiling,
(size_t)2*numthreads, (size_t)1));
assert(!aml_scratch_seq_create(&sb, fast, slow, dma, tiling,
......
......@@ -430,162 +430,6 @@ int aml_tiling_iterator_get(const struct aml_tiling_iterator *iterator, ...);
////////////////////////////////////////////////////////////////////////////////
/**
* @}
* @defgroup aml_dma "AML DMA"
* @brief Management of low-level memory movements.
*
* AML DMA is the abstraction for handling memory movements.
* AML DMA can asynchronously move data from one area to another.
* While performing a movement, DMA operation
* may also translates from a source tiling to a different
* destination tiling.
*
* @image html dma.png width=600
* @{
**/
////////////////////////////////////////////////////////////////////////////////
/**
* Internal macros used for tracking DMA request types.
* Invalid request type. Used for marking inactive requests in the vector.
**/
#define AML_DMA_REQUEST_TYPE_INVALID -1
/**
* Internal macros used for tracking DMA request types.
* Copy request type. Uses memcpy() for data migration.
**/
#define AML_DMA_REQUEST_TYPE_COPY 0
/**
* aml_dma is mainly used to asynchronously move data.
* aml_dma_request is an opaque structure containing information
* about ongoing request for data movement in a dma operation.
* @see aml_dma_ops
* @see aml_dma_async_copy()
**/
struct aml_dma_request;
/**
* Opaque handle implemented by each aml_dma implementations.
* Should not be used by end-users.
**/
struct aml_dma_data;
/**
aml_dma_ops is a structure containing operations for a specific
* aml_dma implementation.
* These operation are operation are detailed in the structure.
* They are specific in:
* - the type of aml_area source and destination,
* - the progress engine performing the operation,
* - the type of of source and destination data structures.
*
* Each different combination of these three points may require a different
* set of dma operations.
**/
struct aml_dma_ops {
/**
* Initiate a data movement, from a source pointer to a destination
* pointer, and output a request handler for managing the transfer.
* @param dma: dma_implementation internal data.
* @param req: Output the request handle to manage termination
* of the movement.
* @param type: A valid AML_DMA_REQUEST_TYPE_* specifying the kind
* of operation to perform.
* @param args: list of variadic arguments provided to aml_dma_copy()
* @return an AML error code.
**/
int (*create_request)(struct aml_dma_data *dma,
struct aml_dma_request **req, int type,
va_list args);
/**
* Destroy the request handle. If the data movement is still ongoing,
* then cancel it.
*
* @param dma: dma_implementation internal data.
* @param req: the request handle to manage termination of the movement.
* @return an AML error code.
**/
int (*destroy_request)(struct aml_dma_data *dma,
struct aml_dma_request *req);
/**
* Wait for termination of a data movement and destroy the request
* handle.
*
* @param dma: dma_implementation internal data.
* @param req: the request handle to manage termination of the movement.
* @return an AML error code.
**/
int (*wait_request)(struct aml_dma_data *dma,
struct aml_dma_request *req);
};
/**
* aml_dma is an abstraction for (asynchronously) moving data
* from one area to another. The implementation of dma to use
* is depends on the source and destination areas. The appropriate
* dma choice is delegated to the user.
* @see struct aml_area.
**/
struct aml_dma {
/** @see aml_dma_ops **/
struct aml_dma_ops *ops;
/** @see aml_dma_data **/
struct aml_dma_data *data;
};
/**
* Requests a synchronous data copy between two different tiles, using
* memcpy() or equivalent.
* @param dma: an initialized DMA structure.
* @param dt: an argument of type struct aml_tiling*; the destination tiling
* structure.
* @param dptr: an argument of type void*; the start address of the complete
* destination user data structure.
* @param dtid: an argument of type int; the destination tile identifier.
* @param st: an argument of type struct aml_tiling*; the source tiling
* structure.
* @param sptr: an argument of type void*; the start address of the
* complete source user data structure.
* @param stid: an argument of type int; the source tile identifier.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_copy(struct aml_dma *dma, ...);
/**
* Requests a data copy between two different tiles. This is an asynchronous
* version of aml_dma_copy().
* @param dma: an initialized DMA structure.
* @param req: an address where the pointer to the newly assigned DMA request
* will be stored.
* Variadic arguments: see aml_dma_copy().
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_async_copy(struct aml_dma *dma, struct aml_dma_request **req, ...);
/**
* Waits for an asynchronous DMA request to complete.
* @param dma: an initialized DMA structure.
* @param req: a DMA request obtained using aml_dma_async_*() calls.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_wait(struct aml_dma *dma, struct aml_dma_request *req);
/**
* Tears down an asynchronous DMA request before it completes.
* @param dma: an initialized DMA structure.
* @param req: a DMA request obtained using aml_dma_async_*() calls.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_cancel(struct aml_dma *dma, struct aml_dma_request *req);
////////////////////////////////////////////////////////////////////////////////
/**
* @}
* @defgroup aml_layout "AML Layout"
......@@ -945,6 +789,169 @@ int aml_layout_slice(const struct aml_layout *layout,
////////////////////////////////////////////////////////////////////////////////
/**
* @}
* @defgroup aml_dma "AML DMA"
* @brief Management of low-level memory movements.
*
* AML DMA (inspired by Direct Memory Access engines) is an abstraction over the
* ability to move data between places. A DMAs presents an interface that allows
* clients to create an asynchronous request to move data and to wait for this
* request to complete. Depending on the exact operation it is configured to do,
* the DMA might transform the data during the operation.
*
* Implementations are mostly responsible for providing access to various types
* of execution engine for data movement itself.
*
* @image html dma.png width=600
* @{
**/
////////////////////////////////////////////////////////////////////////////////
/**
* Internal macros used for tracking DMA request types.
* Invalid request type. Used for marking inactive requests in the vector.
**/
#define AML_DMA_REQUEST_TYPE_INVALID -1
/**
* The request is in the format (dest layout, src layout)
**/
#define AML_DMA_REQUEST_TYPE_LAYOUT 0
/**
* The request is in the format (dest ptr, src ptr, size)
*/
#define AML_DMA_REQUEST_TYPE_PTR 1
/**
* aml_dma is mainly used to asynchronously move data.
* aml_dma_request is an opaque structure containing information
* about ongoing request for data movement in a dma operation.
* @see aml_dma_ops
* @see aml_dma_async_copy()
**/
struct aml_dma_request;
/**
* Opaque handle implemented by each aml_dma implementations.
* Should not be used by end-users.
**/
struct aml_dma_data;
/**
aml_dma_ops is a structure containing operations for a specific
* aml_dma implementation.
* These operation are operation are detailed in the structure.
* They are specific in:
* - the type of aml_area source and destination,
* - the progress engine performing the operation,
* - the type of of source and destination data structures.
*
* Each different combination of these three points may require a different
* set of dma operations.
**/
struct aml_dma_ops {
/**
* Initiate a data movement, from a source pointer to a destination
* pointer, and output a request handler for managing the transfer.
* @param dma: dma_implementation internal data.
* @param req[out]: the request handle to manage termination
* of the movement.
* @param type: A valid AML_DMA_REQUEST_TYPE_* specifying the kind
* of operation to perform.
* @param args: list of variadic arguments provided to aml_dma_copy()
* @return an AML error code.
**/
int (*create_request)(struct aml_dma_data *dma,
struct aml_dma_request **req,
int type, va_list args);
/**
* Destroy the request handle. If the data movement is still ongoing,
* then cancel it.
*
* @param dma: dma_implementation internal data.
* @param req: the request handle to manage termination of the movement.
* @return an AML error code.
**/
int (*destroy_request)(struct aml_dma_data *dma,
struct aml_dma_request *req);
/**
* Wait for termination of a data movement and destroy the request
* handle.
*
* @param dma: dma_implementation internal data.
* @param req: the request handle to manage termination of the movement.
* @return an AML error code.
**/
int (*wait_request)(struct aml_dma_data *dma,
struct aml_dma_request *req);
};
/**
* aml_dma is an abstraction for (asynchronously) moving data
* from one area to another. The implementation of dma to use
* is depends on the source and destination areas. The appropriate
* dma choice is delegated to the user.
* @see struct aml_area.
**/
struct aml_dma {
/** @see aml_dma_ops **/
struct aml_dma_ops *ops;
/** @see aml_dma_data **/
struct aml_dma_data *data;
};
/**
* Requests a synchronous data copy between two different buffers.
* @param dma: an initialized DMA structure.
* Variadic arguments: implementation-specific.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_copy(struct aml_dma *dma, int type, ...);
/**
* Requests a data copy between two different buffers.This is an asynchronous
* version of aml_dma_copy().
* @param dma: an initialized DMA structure.
* @param req: an address where the pointer to the newly assigned DMA request
* will be stored.
* Variadic arguments: implementation-specific.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_async_copy(struct aml_dma *dma, struct aml_dma_request **req,
int type, ...);
/**
* Waits for an asynchronous DMA request to complete.
* @param dma: an initialized DMA structure.
* @param req: a DMA request obtained using aml_dma_async_*() calls.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_wait(struct aml_dma *dma, struct aml_dma_request *req);
/**
* Tears down an asynchronous DMA request before it completes.
* @param dma: an initialized DMA structure.
* @param req: a DMA request obtained using aml_dma_async_*() calls.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_cancel(struct aml_dma *dma, struct aml_dma_request *req);
/**
* Generic helper to copy from one layout to another.
* @param dst[out]: destination layout
* @param src[in]: source layout
*/
int aml_copy_layout_generic(struct aml_layout *dst,
const struct aml_layout *src);
////////////////////////////////////////////////////////////////////////////////
/**
* @}
* @defgroup aml_scratch "AML Scratchpad"
......
......@@ -27,21 +27,6 @@
**/
extern struct aml_dma_ops aml_dma_linux_par_ops;
/** Thread data embeded inside an asynchronous dma request. **/
struct aml_dma_linux_par_thread_data {
/**
* A logical identifier of the thread in charge for
* the request progress.
**/
size_t tid;
/** The actual thread in charge for the request progress**/
pthread_t thread;
/** The dma containing sequential operations **/
struct aml_dma_linux_par *dma;
/** The request handled by this thread **/
struct aml_dma_request_linux_par *req;
};
/** Inside of a parallel request for linux movement. **/
struct aml_dma_request_linux_par {
/**
......@@ -50,27 +35,24 @@ struct aml_dma_request_linux_par {
**/
int type;
/** The destination pointer of the data movement **/
void *dest;
struct aml_layout *dest;
/** The source pointer of the data movement **/
void *src;
/** The size of data to move **/
size_t size;
/** The thread data in charge of the request progress **/
struct aml_dma_linux_par_thread_data *thread_data;
struct aml_layout *src;
/** The dma containing sequential operations **/
struct aml_dma_linux_par *dma;
/** The actual thread in charge for the request progress**/
pthread_t thread;
};
/** Inside of a parallel request for linux movement. **/
struct aml_dma_linux_par_data {
size_t nbthreads;
struct aml_vector *requests;
pthread_mutex_t lock;
};
/** Declaration of linux parallel dma operations **/
struct aml_dma_linux_par_ops {
void *(*do_thread)(void *thread_data);
int (*do_copy)(struct aml_dma_linux_par_data *data,
struct aml_dma_request_linux_par *request, size_t tid);
void *(*do_thread)(void *data);
};
/**
......@@ -94,8 +76,7 @@ struct aml_dma_linux_par {
*
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs,
size_t nbthreads);
int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs);
/**
* Tears down a parallel DMA created with aml_dma_linux_par_create.
......
......@@ -35,11 +35,9 @@ struct aml_dma_request_linux_seq {
**/
int type;
/** The destination pointer of the data movement **/
void *dest;
struct aml_layout *dest;
/** The source pointer of the data movement **/
void *src;
/** The size of data to move **/
size_t size;
struct aml_layout *src;
};
/** Inner data of sequential linux aml_dma implementation **/
......
......@@ -9,8 +9,64 @@
*******************************************************************************/
#include "aml.h"
#include "aml/layout/native.h"
#include <assert.h>
/*******************************************************************************
* Generic DMA Copy implementations
*
* Needed by most DMAs. We don't provide introspection or any fancy API to it at
* this point.
******************************************************************************/
static inline void aml_copy_layout_generic_helper(size_t d,
struct aml_layout *dst,
const struct aml_layout *src,
const size_t *elem_number,
size_t elem_size,
size_t *coords)
{
if (d == 1) {
for (size_t i = 0; i < elem_number[0]; i += 1) {
coords[0] = i;
memcpy(aml_layout_deref_native(dst, coords),
aml_layout_deref_native(src, coords),
elem_size);
}
} else {
for (size_t i = 0; i < elem_number[d - 1]; i += 1) {
coords[d - 1] = i;
aml_copy_layout_generic_helper(d - 1, dst, src,
elem_number, elem_size,
coords);
}
}
}
int aml_copy_layout_generic(struct aml_layout *dst,
const struct aml_layout *src)
{
size_t d;
size_t elem_size;
assert(aml_layout_ndims(dst) == aml_layout_ndims(src));
d = aml_layout_ndims(dst);
assert(aml_layout_element_size(dst) == aml_layout_element_size(src));
elem_size = aml_layout_element_size(dst);
size_t coords[d];
size_t elem_number[d];
size_t elem_number2[d];
aml_layout_dims_native(src, elem_number);
aml_layout_dims_native(dst, elem_number2);
for (size_t i = 0; i < d; i += 1)
assert(elem_number[i] == elem_number2[i]);
aml_copy_layout_generic_helper(d, dst, src, elem_number, elem_size,
coords);
return 0;
}
/*******************************************************************************
* Generic DMA API:
* Most of the stuff is dispatched to a different layer, using type-specific
......@@ -20,31 +76,33 @@
* abstract the request creation after this layer.
******************************************************************************/
int aml_dma_copy(struct aml_dma *dma, ...)
int aml_dma_copy(struct aml_dma *dma, int type, ...)
{
assert(dma != NULL);
va_list ap;
int ret;
struct aml_dma_request *req;
va_start(ap, dma);
ret = dma->ops->create_request(dma->data, &req,
AML_DMA_REQUEST_TYPE_COPY, ap);
if (dma == NULL)
return -AML_EINVAL;
va_start(ap, type);
ret = dma->ops->create_request(dma->data, &req, type, ap);
va_end(ap);
ret = dma->ops->wait_request(dma->data, req);
return ret;
}
int aml_dma_async_copy(struct aml_dma *dma, struct aml_dma_request **req, ...)
int aml_dma_async_copy(struct aml_dma *dma, struct aml_dma_request **req,
int type, ...)
{
assert(dma != NULL);
assert(req != NULL);
va_list ap;
int ret;
va_start(ap, req);
ret = dma->ops->create_request(dma->data, req,
AML_DMA_REQUEST_TYPE_COPY, ap);
if (dma == NULL || req == NULL)
return -AML_EINVAL;
va_start(ap, type);
ret = dma->ops->create_request(dma->data, req, type, ap);
va_end(ap);
return ret;
}
......
......@@ -10,6 +10,7 @@
#include "aml.h"
#include "aml/dma/linux-par.h"
#include "aml/layout/dense.h"
#include <assert.h>
#include <errno.h>
......@@ -29,25 +30,24 @@
******************************************************************************/
int aml_dma_request_linux_par_copy_init(struct aml_dma_request_linux_par *req,
struct aml_tiling *dt,
void *dptr, int dtid,
struct aml_tiling *st,
void *sptr, int stid)
int type,
struct aml_layout *dest,
struct aml_layout *src)
{
assert(req != NULL);
req->type = AML_DMA_REQUEST_TYPE_COPY;
/* figure out pointers */
req->dest = aml_tiling_tilestart(dt, dptr, dtid);
req->src = aml_tiling_tilestart(st, sptr, stid);
req->size = aml_tiling_tilesize(st, stid);
/* TODO: assert size match */
req->type = type;
req->dest = dest;
req->src = src;
return 0;
}
int aml_dma_request_linux_par_copy_destroy(struct aml_dma_request_linux_par *r)
{
assert(r != NULL);
if (r->type == AML_DMA_REQUEST_TYPE_PTR) {
aml_layout_dense_destroy(&r->dest);
aml_layout_dense_destroy(&r->src);
}
return 0;
}
......@@ -57,37 +57,16 @@ int aml_dma_request_linux_par_copy_destroy(struct aml_dma_request_linux_par *r)
void *aml_dma_linux_par_do_thread(void *arg)
{
struct aml_dma_linux_par_thread_data *data =
(struct aml_dma_linux_par_thread_data *)arg;
struct aml_dma_request_linux_par *req =
(struct aml_dma_request_linux_par *)arg;
if (data->req->type == AML_DMA_REQUEST_TYPE_COPY)
data->dma->ops.do_copy(&data->dma->data, data->req, data->tid);
if (req->type != AML_DMA_REQUEST_TYPE_INVALID)
aml_copy_layout_generic(req->dest, req->src);
return NULL;
}
int aml_dma_linux_par_do_copy(struct aml_dma_linux_par_data *dma,
struct aml_dma_request_linux_par *req, size_t tid)
{
assert(dma != NULL);
assert(req != NULL);
/* chunk memory */
size_t nbthreads = dma->nbthreads;
size_t chunksize = req->size / nbthreads;
void *dest = (void *)((intptr_t)req->dest + tid * chunksize);
void *src = (void *)((intptr_t)req->src + tid * chunksize);
if (tid == nbthreads - 1 && req->size > chunksize * nbthreads)
chunksize += req->size % nbthreads;
memcpy(dest, src, chunksize);
return 0;
}
struct aml_dma_linux_par_ops aml_dma_linux_par_inner_ops = {
aml_dma_linux_par_do_thread,
aml_dma_linux_par_do_copy,
};
/*******************************************************************************
......@@ -109,30 +88,37 @@ int aml_dma_linux_par_create_request(struct aml_dma_data *d,
req = aml_vector_add(dma->data.requests);
/* init the request */
if (type == AML_DMA_REQUEST_TYPE_COPY) {
struct aml_tiling *dt, *st;
void *dptr, *sptr;
int dtid, stid;
dt = va_arg(ap, struct aml_tiling *);
dptr = va_arg(ap, void *);
dtid = va_arg(ap, int);