Commit d8f56ffc authored by Swann Perarnau's avatar Swann Perarnau
Browse files

[refactor] redesign DMAs to support layouts

Refactor the DMA API to work on layouts internally. The resulting code
ends up simpler for most cases, as long as we add a generic copy
function.

One things missing:
- a good API to expose the collection of copy functions we have.

One question not solved:
- should the DMA ask for a copy function on each operation ?
parent e5d7b7cb
...@@ -70,7 +70,7 @@ int main(int argc, char *argv[]) ...@@ -70,7 +70,7 @@ int main(int argc, char *argv[])
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND); &fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
assert(!aml_dma_linux_par_create(&dma, numthreads*2, numthreads)); assert(!aml_dma_linux_par_create(&dma, numthreads*2));
assert(!aml_scratch_seq_create(&sa, fast, slow, dma, tiling, assert(!aml_scratch_seq_create(&sa, fast, slow, dma, tiling,
(size_t)2*numthreads, (size_t)1)); (size_t)2*numthreads, (size_t)1));
assert(!aml_scratch_seq_create(&sb, fast, slow, dma, tiling, assert(!aml_scratch_seq_create(&sb, fast, slow, dma, tiling,
......
...@@ -430,162 +430,6 @@ int aml_tiling_iterator_get(const struct aml_tiling_iterator *iterator, ...); ...@@ -430,162 +430,6 @@ int aml_tiling_iterator_get(const struct aml_tiling_iterator *iterator, ...);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/**
* @}
* @defgroup aml_dma "AML DMA"
* @brief Management of low-level memory movements.
*
* AML DMA is the abstraction for handling memory movements.
* AML DMA can asynchronously move data from one area to another.
* While performing a movement, DMA operation
* may also translates from a source tiling to a different
* destination tiling.
*
* @image html dma.png width=600
* @{
**/
////////////////////////////////////////////////////////////////////////////////
/**
* Internal macros used for tracking DMA request types.
* Invalid request type. Used for marking inactive requests in the vector.
**/
#define AML_DMA_REQUEST_TYPE_INVALID -1
/**
* Internal macros used for tracking DMA request types.
* Copy request type. Uses memcpy() for data migration.
**/
#define AML_DMA_REQUEST_TYPE_COPY 0
/**
* aml_dma is mainly used to asynchronously move data.
* aml_dma_request is an opaque structure containing information
* about ongoing request for data movement in a dma operation.
* @see aml_dma_ops
* @see aml_dma_async_copy()
**/
struct aml_dma_request;
/**
* Opaque handle implemented by each aml_dma implementations.
* Should not be used by end-users.
**/
struct aml_dma_data;
/**
aml_dma_ops is a structure containing operations for a specific
* aml_dma implementation.
* These operation are operation are detailed in the structure.
* They are specific in:
* - the type of aml_area source and destination,
* - the progress engine performing the operation,
* - the type of of source and destination data structures.
*
* Each different combination of these three points may require a different
* set of dma operations.
**/
struct aml_dma_ops {
/**
* Initiate a data movement, from a source pointer to a destination
* pointer, and output a request handler for managing the transfer.
* @param dma: dma_implementation internal data.
* @param req: Output the request handle to manage termination
* of the movement.
* @param type: A valid AML_DMA_REQUEST_TYPE_* specifying the kind
* of operation to perform.
* @param args: list of variadic arguments provided to aml_dma_copy()
* @return an AML error code.
**/
int (*create_request)(struct aml_dma_data *dma,
struct aml_dma_request **req, int type,
va_list args);
/**
* Destroy the request handle. If the data movement is still ongoing,
* then cancel it.
*
* @param dma: dma_implementation internal data.
* @param req: the request handle to manage termination of the movement.
* @return an AML error code.
**/
int (*destroy_request)(struct aml_dma_data *dma,
struct aml_dma_request *req);
/**
* Wait for termination of a data movement and destroy the request
* handle.
*
* @param dma: dma_implementation internal data.
* @param req: the request handle to manage termination of the movement.
* @return an AML error code.
**/
int (*wait_request)(struct aml_dma_data *dma,
struct aml_dma_request *req);
};
/**
* aml_dma is an abstraction for (asynchronously) moving data
* from one area to another. The implementation of dma to use
* is depends on the source and destination areas. The appropriate
* dma choice is delegated to the user.
* @see struct aml_area.
**/
struct aml_dma {
/** @see aml_dma_ops **/
struct aml_dma_ops *ops;
/** @see aml_dma_data **/
struct aml_dma_data *data;
};
/**
* Requests a synchronous data copy between two different tiles, using
* memcpy() or equivalent.
* @param dma: an initialized DMA structure.
* @param dt: an argument of type struct aml_tiling*; the destination tiling
* structure.
* @param dptr: an argument of type void*; the start address of the complete
* destination user data structure.
* @param dtid: an argument of type int; the destination tile identifier.
* @param st: an argument of type struct aml_tiling*; the source tiling
* structure.
* @param sptr: an argument of type void*; the start address of the
* complete source user data structure.
* @param stid: an argument of type int; the source tile identifier.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_copy(struct aml_dma *dma, ...);
/**
* Requests a data copy between two different tiles. This is an asynchronous
* version of aml_dma_copy().
* @param dma: an initialized DMA structure.
* @param req: an address where the pointer to the newly assigned DMA request
* will be stored.
* Variadic arguments: see aml_dma_copy().
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_async_copy(struct aml_dma *dma, struct aml_dma_request **req, ...);
/**
* Waits for an asynchronous DMA request to complete.
* @param dma: an initialized DMA structure.
* @param req: a DMA request obtained using aml_dma_async_*() calls.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_wait(struct aml_dma *dma, struct aml_dma_request *req);
/**
* Tears down an asynchronous DMA request before it completes.
* @param dma: an initialized DMA structure.
* @param req: a DMA request obtained using aml_dma_async_*() calls.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_cancel(struct aml_dma *dma, struct aml_dma_request *req);
////////////////////////////////////////////////////////////////////////////////
/** /**
* @} * @}
* @defgroup aml_layout "AML Layout" * @defgroup aml_layout "AML Layout"
...@@ -945,6 +789,169 @@ int aml_layout_slice(const struct aml_layout *layout, ...@@ -945,6 +789,169 @@ int aml_layout_slice(const struct aml_layout *layout,
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/**
* @}
* @defgroup aml_dma "AML DMA"
* @brief Management of low-level memory movements.
*
* AML DMA (inspired by Direct Memory Access engines) is an abstraction over the
* ability to move data between places. A DMAs presents an interface that allows
* clients to create an asynchronous request to move data and to wait for this
* request to complete. Depending on the exact operation it is configured to do,
* the DMA might transform the data during the operation.
*
* Implementations are mostly responsible for providing access to various types
* of execution engine for data movement itself.
*
* @image html dma.png width=600
* @{
**/
////////////////////////////////////////////////////////////////////////////////
/**
* Internal macros used for tracking DMA request types.
* Invalid request type. Used for marking inactive requests in the vector.
**/
#define AML_DMA_REQUEST_TYPE_INVALID -1
/**
* The request is in the format (dest layout, src layout)
**/
#define AML_DMA_REQUEST_TYPE_LAYOUT 0
/**
* The request is in the format (dest ptr, src ptr, size)
*/
#define AML_DMA_REQUEST_TYPE_PTR 1
/**
* aml_dma is mainly used to asynchronously move data.
* aml_dma_request is an opaque structure containing information
* about ongoing request for data movement in a dma operation.
* @see aml_dma_ops
* @see aml_dma_async_copy()
**/
struct aml_dma_request;
/**
* Opaque handle implemented by each aml_dma implementations.
* Should not be used by end-users.
**/
struct aml_dma_data;
/**
aml_dma_ops is a structure containing operations for a specific
* aml_dma implementation.
* These operation are operation are detailed in the structure.
* They are specific in:
* - the type of aml_area source and destination,
* - the progress engine performing the operation,
* - the type of of source and destination data structures.
*
* Each different combination of these three points may require a different
* set of dma operations.
**/
struct aml_dma_ops {
/**
* Initiate a data movement, from a source pointer to a destination
* pointer, and output a request handler for managing the transfer.
* @param dma: dma_implementation internal data.
* @param req[out]: the request handle to manage termination
* of the movement.
* @param type: A valid AML_DMA_REQUEST_TYPE_* specifying the kind
* of operation to perform.
* @param args: list of variadic arguments provided to aml_dma_copy()
* @return an AML error code.
**/
int (*create_request)(struct aml_dma_data *dma,
struct aml_dma_request **req,
int type, va_list args);
/**
* Destroy the request handle. If the data movement is still ongoing,
* then cancel it.
*
* @param dma: dma_implementation internal data.
* @param req: the request handle to manage termination of the movement.
* @return an AML error code.
**/
int (*destroy_request)(struct aml_dma_data *dma,
struct aml_dma_request *req);
/**
* Wait for termination of a data movement and destroy the request
* handle.
*
* @param dma: dma_implementation internal data.
* @param req: the request handle to manage termination of the movement.
* @return an AML error code.
**/
int (*wait_request)(struct aml_dma_data *dma,
struct aml_dma_request *req);
};
/**
* aml_dma is an abstraction for (asynchronously) moving data
* from one area to another. The implementation of dma to use
* is depends on the source and destination areas. The appropriate
* dma choice is delegated to the user.
* @see struct aml_area.
**/
struct aml_dma {
/** @see aml_dma_ops **/
struct aml_dma_ops *ops;
/** @see aml_dma_data **/
struct aml_dma_data *data;
};
/**
* Requests a synchronous data copy between two different buffers.
* @param dma: an initialized DMA structure.
* Variadic arguments: implementation-specific.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_copy(struct aml_dma *dma, int type, ...);
/**
* Requests a data copy between two different buffers.This is an asynchronous
* version of aml_dma_copy().
* @param dma: an initialized DMA structure.
* @param req: an address where the pointer to the newly assigned DMA request
* will be stored.
* Variadic arguments: implementation-specific.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_async_copy(struct aml_dma *dma, struct aml_dma_request **req,
int type, ...);
/**
* Waits for an asynchronous DMA request to complete.
* @param dma: an initialized DMA structure.
* @param req: a DMA request obtained using aml_dma_async_*() calls.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_wait(struct aml_dma *dma, struct aml_dma_request *req);
/**
* Tears down an asynchronous DMA request before it completes.
* @param dma: an initialized DMA structure.
* @param req: a DMA request obtained using aml_dma_async_*() calls.
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_cancel(struct aml_dma *dma, struct aml_dma_request *req);
/**
* Generic helper to copy from one layout to another.
* @param dst[out]: destination layout
* @param src[in]: source layout
*/
int aml_copy_layout_generic(struct aml_layout *dst,
const struct aml_layout *src);
////////////////////////////////////////////////////////////////////////////////
/** /**
* @} * @}
* @defgroup aml_scratch "AML Scratchpad" * @defgroup aml_scratch "AML Scratchpad"
......
...@@ -27,21 +27,6 @@ ...@@ -27,21 +27,6 @@
**/ **/
extern struct aml_dma_ops aml_dma_linux_par_ops; extern struct aml_dma_ops aml_dma_linux_par_ops;
/** Thread data embeded inside an asynchronous dma request. **/
struct aml_dma_linux_par_thread_data {
/**
* A logical identifier of the thread in charge for
* the request progress.
**/
size_t tid;
/** The actual thread in charge for the request progress**/
pthread_t thread;
/** The dma containing sequential operations **/
struct aml_dma_linux_par *dma;
/** The request handled by this thread **/
struct aml_dma_request_linux_par *req;
};
/** Inside of a parallel request for linux movement. **/ /** Inside of a parallel request for linux movement. **/
struct aml_dma_request_linux_par { struct aml_dma_request_linux_par {
/** /**
...@@ -50,27 +35,24 @@ struct aml_dma_request_linux_par { ...@@ -50,27 +35,24 @@ struct aml_dma_request_linux_par {
**/ **/
int type; int type;
/** The destination pointer of the data movement **/ /** The destination pointer of the data movement **/
void *dest; struct aml_layout *dest;
/** The source pointer of the data movement **/ /** The source pointer of the data movement **/
void *src; struct aml_layout *src;
/** The size of data to move **/ /** The dma containing sequential operations **/
size_t size; struct aml_dma_linux_par *dma;
/** The thread data in charge of the request progress **/ /** The actual thread in charge for the request progress**/
struct aml_dma_linux_par_thread_data *thread_data; pthread_t thread;
}; };
/** Inside of a parallel request for linux movement. **/ /** Inside of a parallel request for linux movement. **/
struct aml_dma_linux_par_data { struct aml_dma_linux_par_data {
size_t nbthreads;
struct aml_vector *requests; struct aml_vector *requests;
pthread_mutex_t lock; pthread_mutex_t lock;
}; };
/** Declaration of linux parallel dma operations **/ /** Declaration of linux parallel dma operations **/
struct aml_dma_linux_par_ops { struct aml_dma_linux_par_ops {
void *(*do_thread)(void *thread_data); void *(*do_thread)(void *data);
int (*do_copy)(struct aml_dma_linux_par_data *data,
struct aml_dma_request_linux_par *request, size_t tid);
}; };
/** /**
...@@ -94,8 +76,7 @@ struct aml_dma_linux_par { ...@@ -94,8 +76,7 @@ struct aml_dma_linux_par {
* *
* @return 0 if successful; an error code otherwise. * @return 0 if successful; an error code otherwise.
**/ **/
int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs, int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs);
size_t nbthreads);
/** /**
* Tears down a parallel DMA created with aml_dma_linux_par_create. * Tears down a parallel DMA created with aml_dma_linux_par_create.
......
...@@ -35,11 +35,9 @@ struct aml_dma_request_linux_seq { ...@@ -35,11 +35,9 @@ struct aml_dma_request_linux_seq {
**/ **/
int type; int type;
/** The destination pointer of the data movement **/ /** The destination pointer of the data movement **/
void *dest; struct aml_layout *dest;
/** The source pointer of the data movement **/ /** The source pointer of the data movement **/
void *src; struct aml_layout *src;
/** The size of data to move **/
size_t size;
}; };
/** Inner data of sequential linux aml_dma implementation **/ /** Inner data of sequential linux aml_dma implementation **/
......
...@@ -9,8 +9,64 @@ ...@@ -9,8 +9,64 @@
*******************************************************************************/ *******************************************************************************/
#include "aml.h" #include "aml.h"
#include "aml/layout/native.h"
#include <assert.h> #include <assert.h>
/*******************************************************************************
* Generic DMA Copy implementations
*
* Needed by most DMAs. We don't provide introspection or any fancy API to it at
* this point.
******************************************************************************/
static inline void aml_copy_layout_generic_helper(size_t d,
struct aml_layout *dst,
const struct aml_layout *src,
const size_t *elem_number,
size_t elem_size,
size_t *coords)
{
if (d == 1) {
for (size_t i = 0; i < elem_number[0]; i += 1) {
coords[0] = i;
memcpy(aml_layout_deref_native(dst, coords),
aml_layout_deref_native(src, coords),
elem_size);
}
} else {
for (size_t i = 0; i < elem_number[d - 1]; i += 1) {
coords[d - 1] = i;
aml_copy_layout_generic_helper(d - 1, dst, src,
elem_number, elem_size,
coords);
}
}
}
int aml_copy_layout_generic(struct aml_layout *dst,
const struct aml_layout *src)
{
size_t d;
size_t elem_size;
assert(aml_layout_ndims(dst) == aml_layout_ndims(src));
d = aml_layout_ndims(dst);
assert(aml_layout_element_size(dst) == aml_layout_element_size(src));
elem_size = aml_layout_element_size(dst);
size_t coords[d];
size_t elem_number[d];
size_t elem_number2[d];
aml_layout_dims_native(src, elem_number);
aml_layout_dims_native(dst, elem_number2);
for (size_t i = 0; i < d; i += 1)
assert(elem_number[i] == elem_number2[i]);
aml_copy_layout_generic_helper(d, dst, src, elem_number, elem_size,
coords);
return 0;
}
/******************************************************************************* /*******************************************************************************
* Generic DMA API: * Generic DMA API:
* Most of the stuff is dispatched to a different layer, using type-specific * Most of the stuff is dispatched to a different layer, using type-specific
...@@ -20,31 +76,33 @@ ...@@ -20,31 +76,33 @@
* abstract the request creation after this layer. * abstract the request creation after this layer.
******************************************************************************/ ******************************************************************************/
int aml_dma_copy(struct aml_dma *dma, ...) int aml_dma_copy(struct aml_dma *dma, int type, ...)
{ {
assert(dma != NULL);
va_list ap; va_list ap;
int ret; int ret;
struct aml_dma_request *req; struct aml_dma_request *req;
va_start(ap, dma); if (dma == NULL)
ret = dma->ops->create_request(dma->data, &req, return -AML_EINVAL;
AML_DMA_REQUEST_TYPE_COPY, ap);
va_start(ap, type);
ret = dma->ops->create_request(dma->data, &req, type, ap);
va_end(ap); va_end(ap);
ret = dma->ops->wait_request(dma->data, req); ret = dma->ops->wait_request(dma->data, req);
return ret; return ret;
} }
int aml_dma_async_copy(struct aml_dma *dma, struct aml_dma_request **req, ...) int aml_dma_async_copy(struct aml_dma *dma, struct aml_dma_request **req,
int type, ...)
{ {
assert(dma != NULL);
assert(req != NULL);
va_list ap; va_list ap;
int ret; int ret;
va_start(ap, req); if (dma == NULL || req == NULL)
ret = dma->ops->create_request(dma->data, req, return -AML_EINVAL;
AML_DMA_REQUEST_TYPE_COPY, ap);
va_start(ap, type);
ret = dma->ops->create_request(dma->data, req, type, ap);
va_end(ap); va_end(ap);
return ret; return ret;
} }
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "aml.h" #include "aml.h"
#include "aml/dma/linux-par.h" #include "aml/dma/linux-par.h"
#include "aml/layout/dense.h"
#include <assert.h> #include <assert.h>
#include <errno.h> #include <errno.h>
...@@ -29,25 +30,24 @@ ...@@ -29,25 +30,24 @@
******************************************************************************/ ******************************************************************************/
int aml_dma_request_linux_par_copy_init(struct aml_dma_request_linux_par *req, int aml_dma_request_linux_par_copy_init(struct aml_dma_request_linux_par *req,
struct aml_tiling *dt, int type,
void *dptr, int dtid, struct aml_layout *dest,
struct aml_tiling *st, struct aml_layout *src)
void *sptr, int stid)
{ {
assert(req != NULL); assert(req != NULL);
req->type = type;
req->type = AML_DMA_REQUEST_TYPE_COPY; req->dest = dest;
/* figure out pointers */ req->src = src;
req->dest = aml_tiling_tilestart(dt, dptr, dtid);
req->src = aml_tiling_tilestart(st, sptr, stid);
req->size = aml_tiling_tilesize(st, stid);
/* TODO: assert size match */
return 0; return 0;
} }
int aml_dma_request_linux_par_copy_destroy(struct aml_dma_request_linux_par *r)