Commit d2efb479 authored by Swann Perarnau's avatar Swann Perarnau

Merge branch 'new-dma-design' into 'master'

[refactor] redesign DMAs to support layouts

Closes #46

See merge request !73
parents e5d7b7cb d8f56ffc
Pipeline #8242 passed with stages
in 28 minutes and 56 seconds
......@@ -70,7 +70,7 @@ int main(int argc, char *argv[])
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_par_create(&dma, numthreads*2, numthreads));
assert(!aml_dma_linux_par_create(&dma, numthreads*2));
assert(!aml_scratch_seq_create(&sa, fast, slow, dma, tiling,
(size_t)2*numthreads, (size_t)1));
assert(!aml_scratch_seq_create(&sb, fast, slow, dma, tiling,
......
This diff is collapsed.
......@@ -27,21 +27,6 @@
**/
extern struct aml_dma_ops aml_dma_linux_par_ops;
/** Thread data embeded inside an asynchronous dma request. **/
struct aml_dma_linux_par_thread_data {
/**
* A logical identifier of the thread in charge for
* the request progress.
**/
size_t tid;
/** The actual thread in charge for the request progress**/
pthread_t thread;
/** The dma containing sequential operations **/
struct aml_dma_linux_par *dma;
/** The request handled by this thread **/
struct aml_dma_request_linux_par *req;
};
/** Inside of a parallel request for linux movement. **/
struct aml_dma_request_linux_par {
/**
......@@ -50,27 +35,24 @@ struct aml_dma_request_linux_par {
**/
int type;
/** The destination pointer of the data movement **/
void *dest;
struct aml_layout *dest;
/** The source pointer of the data movement **/
void *src;
/** The size of data to move **/
size_t size;
/** The thread data in charge of the request progress **/
struct aml_dma_linux_par_thread_data *thread_data;
struct aml_layout *src;
/** The dma containing sequential operations **/
struct aml_dma_linux_par *dma;
/** The actual thread in charge for the request progress**/
pthread_t thread;
};
/** Inside of a parallel request for linux movement. **/
struct aml_dma_linux_par_data {
size_t nbthreads;
struct aml_vector *requests;
pthread_mutex_t lock;
};
/** Declaration of linux parallel dma operations **/
struct aml_dma_linux_par_ops {
void *(*do_thread)(void *thread_data);
int (*do_copy)(struct aml_dma_linux_par_data *data,
struct aml_dma_request_linux_par *request, size_t tid);
void *(*do_thread)(void *data);
};
/**
......@@ -94,8 +76,7 @@ struct aml_dma_linux_par {
*
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs,
size_t nbthreads);
int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs);
/**
* Tears down a parallel DMA created with aml_dma_linux_par_create.
......
......@@ -35,11 +35,9 @@ struct aml_dma_request_linux_seq {
**/
int type;
/** The destination pointer of the data movement **/
void *dest;
struct aml_layout *dest;
/** The source pointer of the data movement **/
void *src;
/** The size of data to move **/
size_t size;
struct aml_layout *src;
};
/** Inner data of sequential linux aml_dma implementation **/
......
......@@ -9,8 +9,64 @@
*******************************************************************************/
#include "aml.h"
#include "aml/layout/native.h"
#include <assert.h>
/*******************************************************************************
* Generic DMA Copy implementations
*
* Needed by most DMAs. We don't provide introspection or any fancy API to it at
* this point.
******************************************************************************/
static inline void aml_copy_layout_generic_helper(size_t d,
struct aml_layout *dst,
const struct aml_layout *src,
const size_t *elem_number,
size_t elem_size,
size_t *coords)
{
if (d == 1) {
for (size_t i = 0; i < elem_number[0]; i += 1) {
coords[0] = i;
memcpy(aml_layout_deref_native(dst, coords),
aml_layout_deref_native(src, coords),
elem_size);
}
} else {
for (size_t i = 0; i < elem_number[d - 1]; i += 1) {
coords[d - 1] = i;
aml_copy_layout_generic_helper(d - 1, dst, src,
elem_number, elem_size,
coords);
}
}
}
int aml_copy_layout_generic(struct aml_layout *dst,
const struct aml_layout *src)
{
size_t d;
size_t elem_size;
assert(aml_layout_ndims(dst) == aml_layout_ndims(src));
d = aml_layout_ndims(dst);
assert(aml_layout_element_size(dst) == aml_layout_element_size(src));
elem_size = aml_layout_element_size(dst);
size_t coords[d];
size_t elem_number[d];
size_t elem_number2[d];
aml_layout_dims_native(src, elem_number);
aml_layout_dims_native(dst, elem_number2);
for (size_t i = 0; i < d; i += 1)
assert(elem_number[i] == elem_number2[i]);
aml_copy_layout_generic_helper(d, dst, src, elem_number, elem_size,
coords);
return 0;
}
/*******************************************************************************
* Generic DMA API:
* Most of the stuff is dispatched to a different layer, using type-specific
......@@ -20,31 +76,33 @@
* abstract the request creation after this layer.
******************************************************************************/
int aml_dma_copy(struct aml_dma *dma, ...)
int aml_dma_copy(struct aml_dma *dma, int type, ...)
{
assert(dma != NULL);
va_list ap;
int ret;
struct aml_dma_request *req;
va_start(ap, dma);
ret = dma->ops->create_request(dma->data, &req,
AML_DMA_REQUEST_TYPE_COPY, ap);
if (dma == NULL)
return -AML_EINVAL;
va_start(ap, type);
ret = dma->ops->create_request(dma->data, &req, type, ap);
va_end(ap);
ret = dma->ops->wait_request(dma->data, req);
return ret;
}
int aml_dma_async_copy(struct aml_dma *dma, struct aml_dma_request **req, ...)
int aml_dma_async_copy(struct aml_dma *dma, struct aml_dma_request **req,
int type, ...)
{
assert(dma != NULL);
assert(req != NULL);
va_list ap;
int ret;
va_start(ap, req);
ret = dma->ops->create_request(dma->data, req,
AML_DMA_REQUEST_TYPE_COPY, ap);
if (dma == NULL || req == NULL)
return -AML_EINVAL;
va_start(ap, type);
ret = dma->ops->create_request(dma->data, req, type, ap);
va_end(ap);
return ret;
}
......
......@@ -10,6 +10,7 @@
#include "aml.h"
#include "aml/dma/linux-par.h"
#include "aml/layout/dense.h"
#include <assert.h>
#include <errno.h>
......@@ -29,25 +30,24 @@
******************************************************************************/
int aml_dma_request_linux_par_copy_init(struct aml_dma_request_linux_par *req,
struct aml_tiling *dt,
void *dptr, int dtid,
struct aml_tiling *st,
void *sptr, int stid)
int type,
struct aml_layout *dest,
struct aml_layout *src)
{
assert(req != NULL);
req->type = AML_DMA_REQUEST_TYPE_COPY;
/* figure out pointers */
req->dest = aml_tiling_tilestart(dt, dptr, dtid);
req->src = aml_tiling_tilestart(st, sptr, stid);
req->size = aml_tiling_tilesize(st, stid);
/* TODO: assert size match */
req->type = type;
req->dest = dest;
req->src = src;
return 0;
}
int aml_dma_request_linux_par_copy_destroy(struct aml_dma_request_linux_par *r)
{
assert(r != NULL);
if (r->type == AML_DMA_REQUEST_TYPE_PTR) {
aml_layout_dense_destroy(&r->dest);
aml_layout_dense_destroy(&r->src);
}
return 0;
}
......@@ -57,37 +57,16 @@ int aml_dma_request_linux_par_copy_destroy(struct aml_dma_request_linux_par *r)
void *aml_dma_linux_par_do_thread(void *arg)
{
struct aml_dma_linux_par_thread_data *data =
(struct aml_dma_linux_par_thread_data *)arg;
struct aml_dma_request_linux_par *req =
(struct aml_dma_request_linux_par *)arg;
if (data->req->type == AML_DMA_REQUEST_TYPE_COPY)
data->dma->ops.do_copy(&data->dma->data, data->req, data->tid);
if (req->type != AML_DMA_REQUEST_TYPE_INVALID)
aml_copy_layout_generic(req->dest, req->src);
return NULL;
}
int aml_dma_linux_par_do_copy(struct aml_dma_linux_par_data *dma,
struct aml_dma_request_linux_par *req, size_t tid)
{
assert(dma != NULL);
assert(req != NULL);
/* chunk memory */
size_t nbthreads = dma->nbthreads;
size_t chunksize = req->size / nbthreads;
void *dest = (void *)((intptr_t)req->dest + tid * chunksize);
void *src = (void *)((intptr_t)req->src + tid * chunksize);
if (tid == nbthreads - 1 && req->size > chunksize * nbthreads)
chunksize += req->size % nbthreads;
memcpy(dest, src, chunksize);
return 0;
}
struct aml_dma_linux_par_ops aml_dma_linux_par_inner_ops = {
aml_dma_linux_par_do_thread,
aml_dma_linux_par_do_copy,
};
/*******************************************************************************
......@@ -109,30 +88,37 @@ int aml_dma_linux_par_create_request(struct aml_dma_data *d,
req = aml_vector_add(dma->data.requests);
/* init the request */
if (type == AML_DMA_REQUEST_TYPE_COPY) {
struct aml_tiling *dt, *st;
void *dptr, *sptr;
int dtid, stid;
dt = va_arg(ap, struct aml_tiling *);
dptr = va_arg(ap, void *);
dtid = va_arg(ap, int);
st = va_arg(ap, struct aml_tiling *);
sptr = va_arg(ap, void *);
stid = va_arg(ap, int);
aml_dma_request_linux_par_copy_init(req, dt, dptr, dtid,
st, sptr, stid);
if (type == AML_DMA_REQUEST_TYPE_LAYOUT) {
struct aml_layout *dl, *sl;
dl = va_arg(ap, struct aml_layout *);
sl = va_arg(ap, struct aml_layout *);
aml_dma_request_linux_par_copy_init(req,
AML_DMA_REQUEST_TYPE_LAYOUT,
dl, sl);
} else if (type == AML_DMA_REQUEST_TYPE_PTR) {
struct aml_layout *dl, *sl;
void *dp, *sp;
size_t sz;
dp = va_arg(ap, void *);
sp = va_arg(ap, void *);
sz = va_arg(ap, size_t);
/* simple 1D layout, none of the parameters really matter, as
* long as the copy generates a single memcpy.
*/
aml_layout_dense_create(&dl, dp, 0, sizeof(size_t), 1,
&sz, NULL, NULL);
aml_layout_dense_create(&sl, sp, 0, sizeof(size_t), 1,
&sz, NULL, NULL);
aml_dma_request_linux_par_copy_init(req,
AML_DMA_REQUEST_TYPE_PTR,
dl, sl);
}
pthread_mutex_unlock(&dma->data.lock);
for (size_t i = 0; i < dma->data.nbthreads; i++) {
struct aml_dma_linux_par_thread_data *rd = &req->thread_data[i];
rd->req = req;
rd->dma = dma;
rd->tid = i;
pthread_create(&rd->thread, NULL, dma->ops.do_thread, rd);
}
if (req->type != AML_DMA_REQUEST_TYPE_INVALID)
pthread_create(&req->thread, NULL, dma->ops.do_thread, req);
*r = (struct aml_dma_request *)req;
return 0;
}
......@@ -149,13 +135,11 @@ int aml_dma_linux_par_destroy_request(struct aml_dma_data *d,
(struct aml_dma_request_linux_par *)r;
/* we cancel and join, instead of killing, for a cleaner result */
for (size_t i = 0; i < dma->data.nbthreads; i++) {
pthread_cancel(req->thread_data[i].thread);
pthread_join(req->thread_data[i].thread, NULL);
}
if (req->type == AML_DMA_REQUEST_TYPE_COPY)
if (req->type != AML_DMA_REQUEST_TYPE_INVALID) {
pthread_cancel(req->thread);
pthread_join(req->thread, NULL);
aml_dma_request_linux_par_copy_destroy(req);
}
pthread_mutex_lock(&dma->data.lock);
aml_vector_remove(dma->data.requests, req);
......@@ -172,12 +156,10 @@ int aml_dma_linux_par_wait_request(struct aml_dma_data *d,
struct aml_dma_request_linux_par *req =
(struct aml_dma_request_linux_par *)r;
for (size_t i = 0; i < dma->data.nbthreads; i++)
pthread_join(req->thread_data[i].thread, NULL);
/* destroy a completed request */
if (req->type == AML_DMA_REQUEST_TYPE_COPY)
if (req->type != AML_DMA_REQUEST_TYPE_INVALID) {
pthread_join(req->thread, NULL);
aml_dma_request_linux_par_copy_destroy(req);
}
pthread_mutex_lock(&dma->data.lock);
aml_vector_remove(dma->data.requests, req);
......@@ -195,8 +177,7 @@ struct aml_dma_ops aml_dma_linux_par_ops = {
* Init functions:
******************************************************************************/
int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs,
size_t nbthreads)
int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs)
{
struct aml_dma *ret = NULL;
struct aml_dma_linux_par *d;
......@@ -217,18 +198,10 @@ int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs,
d->ops = aml_dma_linux_par_inner_ops;
/* allocate request array */
d->data.nbthreads = nbthreads;
aml_vector_create(&d->data.requests, nbreqs,
sizeof(struct aml_dma_request_linux_par),
offsetof(struct aml_dma_request_linux_par, type),
AML_DMA_REQUEST_TYPE_INVALID);
for (size_t i = 0; i < nbreqs; i++) {
struct aml_dma_request_linux_par *req =
aml_vector_get(d->data.requests, i);
req->thread_data = calloc(d->data.nbthreads,
sizeof(struct aml_dma_linux_par_thread_data));
}
pthread_mutex_init(&d->data.lock, NULL);
*dma = ret;
......@@ -248,12 +221,6 @@ void aml_dma_linux_par_destroy(struct aml_dma **dma)
assert(d->data != NULL);
l = (struct aml_dma_linux_par *)d->data;
for (size_t i = 0; i < aml_vector_size(l->data.requests); i++) {
struct aml_dma_request_linux_par *req =
aml_vector_get(l->data.requests, i);
free(req->thread_data);
}
aml_vector_destroy(&l->data.requests);
pthread_mutex_destroy(&l->data.lock);
......
......@@ -10,6 +10,7 @@
#include "aml.h"
#include "aml/dma/linux-seq.h"
#include "aml/layout/dense.h"
#include <assert.h>
#include <errno.h>
......@@ -29,19 +30,15 @@
******************************************************************************/
int aml_dma_request_linux_seq_copy_init(struct aml_dma_request_linux_seq *req,
const struct aml_tiling *dt,
void *dptr, int dtid,
const struct aml_tiling *st,
const void *sptr, int stid)
int type,
struct aml_layout *dest,
struct aml_layout *src)
{
assert(req != NULL);
req->type = AML_DMA_REQUEST_TYPE_COPY;
req->type = type;
/* figure out pointers */
req->dest = aml_tiling_tilestart(dt, dptr, dtid);
req->src = aml_tiling_tilestart(st, sptr, stid);
req->size = aml_tiling_tilesize(st, stid);
/* TODO: assert size match */
req->dest = dest;
req->src = src;
return 0;
}
......@@ -54,12 +51,13 @@ int aml_dma_request_linux_seq_copy_destroy(struct aml_dma_request_linux_seq *r)
/*******************************************************************************
* Internal functions
******************************************************************************/
int aml_dma_linux_seq_do_copy(struct aml_dma_linux_seq_data *dma,
struct aml_dma_request_linux_seq *req)
{
assert(dma != NULL);
assert(req != NULL);
memcpy(req->dest, req->src, req->size);
aml_copy_layout_generic(req->dest, req->src);
return 0;
}
......@@ -86,19 +84,32 @@ int aml_dma_linux_seq_create_request(struct aml_dma_data *d,
req = aml_vector_add(dma->data.requests);
/* init the request */
if (type == AML_DMA_REQUEST_TYPE_COPY) {
struct aml_tiling *dt, *st;
void *dptr, *sptr;
int dtid, stid;
dt = va_arg(ap, struct aml_tiling *);
dptr = va_arg(ap, void *);
dtid = va_arg(ap, int);
st = va_arg(ap, struct aml_tiling *);
sptr = va_arg(ap, void *);
stid = va_arg(ap, int);
aml_dma_request_linux_seq_copy_init(req, dt, dptr, dtid,
st, sptr, stid);
if (type == AML_DMA_REQUEST_TYPE_LAYOUT) {
struct aml_layout *dl, *sl;
dl = va_arg(ap, struct aml_layout *);
sl = va_arg(ap, struct aml_layout *);
aml_dma_request_linux_seq_copy_init(req,
AML_DMA_REQUEST_TYPE_LAYOUT,
dl, sl);
} else if (type == AML_DMA_REQUEST_TYPE_PTR) {
struct aml_layout *dl, *sl;
void *dp, *sp;
size_t sz;
dp = va_arg(ap, void *);
sp = va_arg(ap, void *);
sz = va_arg(ap, size_t);
/* simple 1D layout, none of the parameters really matter, as
* long as the copy generates a single memcpy.
*/
aml_layout_dense_create(&dl, dp, 0, sizeof(size_t), 1,
&sz, NULL, NULL);
aml_layout_dense_create(&sl, sp, 0, sizeof(size_t), 1,
&sz, NULL, NULL);
aml_dma_request_linux_seq_copy_init(req,
AML_DMA_REQUEST_TYPE_PTR,
dl, sl);
}
pthread_mutex_unlock(&dma->data.lock);
*r = (struct aml_dma_request *)req;
......@@ -116,8 +127,13 @@ int aml_dma_linux_seq_destroy_request(struct aml_dma_data *d,
struct aml_dma_request_linux_seq *req =
(struct aml_dma_request_linux_seq *)r;
if (req->type == AML_DMA_REQUEST_TYPE_COPY)
if (req->type == AML_DMA_REQUEST_TYPE_LAYOUT)
aml_dma_request_linux_seq_copy_destroy(req);
else if (req->type == AML_DMA_REQUEST_TYPE_PTR) {
aml_layout_dense_destroy(&req->dest);
aml_layout_dense_destroy(&req->src);
aml_dma_request_linux_seq_copy_destroy(req);
}
/* enough to remove from request vector */
pthread_mutex_lock(&dma->data.lock);
......@@ -136,7 +152,7 @@ int aml_dma_linux_seq_wait_request(struct aml_dma_data *d,
(struct aml_dma_request_linux_seq *)r;
/* execute */
if (req->type == AML_DMA_REQUEST_TYPE_COPY)
if (req->type != AML_DMA_REQUEST_TYPE_INVALID)
dma->ops.do_copy(&dma->data, req);
/* destroy a completed request */
......
......@@ -125,7 +125,7 @@ int aml_layout_dense_create(struct aml_layout **layout,
else
memcpy(data->pitch, dims, ndims * sizeof(size_t));
for (size_t i = 1; i <= ndims; i++)
data->cpitch[i] = data->cpitch[i-1]*pitch[i-1];
data->cpitch[i] = data->cpitch[i-1]*data->pitch[i-1];
break;
default:
free(l);
......
......@@ -56,8 +56,17 @@ void *aml_scratch_par_do_thread(void *arg)
(struct aml_scratch_request_par *)arg;
struct aml_scratch_par *scratch = req->scratch;
aml_dma_copy(scratch->data.dma, scratch->data.tiling, req->dstptr,
req->dstid, scratch->data.tiling, req->srcptr, req->srcid);
void *dest, *src;
size_t size;
dest = aml_tiling_tilestart(scratch->data.tiling,
req->dstptr, req->dstid);
src = aml_tiling_tilestart(scratch->data.tiling,
req->srcptr, req->srcid);
size = aml_tiling_tilesize(scratch->data.tiling, req->srcid);
aml_dma_copy(scratch->data.dma, AML_DMA_REQUEST_TYPE_PTR,
dest, src, size);
return NULL;
}
......@@ -69,8 +78,6 @@ struct aml_scratch_par_ops aml_scratch_par_inner_ops = {
* Public API
******************************************************************************/
/* TODO: not thread-safe */
int aml_scratch_par_create_request(struct aml_scratch_data *d,
struct aml_scratch_request **r,
int type, va_list ap)
......
......@@ -54,9 +54,14 @@ int aml_scratch_seq_doit(struct aml_scratch_seq_data *scratch,
{
assert(scratch != NULL);
assert(req != NULL);
void *dest, *src;
size_t size;
dest = aml_tiling_tilestart(req->tiling, req->dstptr, req->dstid);
src = aml_tiling_tilestart(req->tiling, req->srcptr, req->srcid);
size = aml_tiling_tilesize(req->tiling, req->srcid);
return aml_dma_async_copy(scratch->dma, &req->dma_req,
req->tiling, req->dstptr, req->dstid,
req->tiling, req->srcptr, req->srcid);
AML_DMA_REQUEST_TYPE_PTR, dest, src, size);
}
struct aml_scratch_seq_ops aml_scratch_seq_inner_ops = {
......
......@@ -30,8 +30,7 @@ int main(int argc, char *argv[])
assert(!aml_tiling_1d_create(&tiling, TILESIZE*_SC_PAGE_SIZE,
TILESIZE*_SC_PAGE_SIZE*NBTILES));
size_t maxrequests = NBTILES;
size_t maxthreads = 4;
assert(!aml_dma_linux_par_create(&dma, maxrequests, maxthreads));
assert(!aml_dma_linux_par_create(&dma, maxrequests));
/* allocate some memory */
src = aml_area_mmap(&aml_area_linux, NULL, TILESIZE*_SC_PAGE_SIZE*NBTILES);
......@@ -43,8 +42,12 @@ int main(int argc, char *argv[])
memset(dst, 24, TILESIZE*_SC_PAGE_SIZE*NBTILES);
/* move some stuff by copy */
for(int i = 0; i < NBTILES; i++)
aml_dma_copy(dma, tiling, dst, i, tiling, src, i);
for(int i = 0; i < NBTILES; i++) {
void *d = aml_tiling_tilestart(tiling, dst, i);
void *s = aml_tiling_tilestart(tiling, src, i);
aml_dma_copy(dma, AML_DMA_REQUEST_TYPE_PTR,
d, s, TILESIZE*_SC_PAGE_SIZE);
}
assert(!memcmp(src, dst, TILESIZE*_SC_PAGE_SIZE*NBTILES));
......
......@@ -42,8 +42,12 @@ int main(int argc, char *argv[])
memset(dst, 24, TILESIZE*_SC_PAGE_SIZE*NBTILES);
/* move some stuff by copy */
for(int i = 0; i < NBTILES; i++)
aml_dma_copy(dma, tiling, dst, i, tiling, src, i);
for(int i = 0; i < NBTILES; i++) {
void *d = aml_tiling_tilestart(tiling, dst, i);
void *s = aml_tiling_tilestart(tiling, src, i);
aml_dma_copy(dma, AML_DMA_REQUEST_TYPE_PTR,
d, s, TILESIZE*_SC_PAGE_SIZE);
}
assert(!memcmp(src, dst, TILESIZE*_SC_PAGE_SIZE*NBTILES));
......
......@@ -33,8 +33,7 @@ int main(int argc, char *argv[])
TILESIZE*_SC_PAGE_SIZE*NBTILES));
size_t maxrequests = NBTILES;
size_t maxthreads = 4;
assert(!aml_dma_linux_par_create(&dma, maxrequests, maxthreads));
assert(!aml_dma_linux_par_create(&dma, maxrequests));
/* allocate some memory */
src = aml_area_mmap(&aml_area_linux, NULL, TILESIZE*_SC_PAGE_SIZE*NBTILES);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment