Commit fa51aea5 authored by Swann Perarnau's avatar Swann Perarnau

[feature] add a pthread based scratchpad

Add a scratchpad that creates one pthread per request, to call
synchronous dma operations.

The intent is to end up with a cross product of programming language
support between dma and scratch:
- scratch_par + dma_seq gives users parallel scratch requests
- scratch_seq + dma_par gives users sequential access to parallel moves

The two other options don't make as much sense though.
parent 20354336
...@@ -21,7 +21,8 @@ DMA_CSOURCES = dma.c \ ...@@ -21,7 +21,8 @@ DMA_CSOURCES = dma.c \
dma_linux_seq.c dma_linux_seq.c
SCRATCH_CSOURCES = scratch.c \ SCRATCH_CSOURCES = scratch.c \
scratch_seq.c scratch_seq.c \
scratch_par.c
UTILS_CSOURCES = vector.c UTILS_CSOURCES = vector.c
......
...@@ -758,6 +758,59 @@ int aml_scratch_seq_init(struct aml_scratch *scratch, ...); ...@@ -758,6 +758,59 @@ int aml_scratch_seq_init(struct aml_scratch *scratch, ...);
int aml_scratch_seq_vinit(struct aml_scratch *scratch, va_list args); int aml_scratch_seq_vinit(struct aml_scratch *scratch, va_list args);
int aml_scratch_seq_destroy(struct aml_scratch *scratch); int aml_scratch_seq_destroy(struct aml_scratch *scratch);
/*******************************************************************************
* Parallel scratchpad API:
* Scratchpad creates one thread to trigger synchronous dma movements.
******************************************************************************/
extern struct aml_scratch_ops aml_scratch_par_ops;
struct aml_scratch_request_par {
int type;
struct aml_tiling *stiling;
void *srcptr;
int srcid;
struct aml_tiling *dtiling;
void *dstptr;
int dstid;
struct aml_scratch_par *scratch;
pthread_t thread;
};
struct aml_scratch_par_data {
struct aml_area *src_area, *sch_area;
struct aml_tiling *tiling;
struct aml_dma *dma;
void * sch_ptr;
struct aml_vector tilemap;
struct aml_vector requests;
};
struct aml_scratch_par_ops {
void *(*do_thread)(void *);
};
struct aml_scratch_par {
struct aml_scratch_par_ops ops;
struct aml_scratch_par_data data;
};
#define AML_SCRATCH_PAR_DECL(name) \
struct aml_scratch_par __ ##name## _inner_data; \
struct aml_scratch name = { \
&aml_scratch_par_ops, \
(struct aml_scratch_data *)&__ ## name ## _inner_data, \
};
#define AML_SCRATCH_PAR_ALLOCSIZE \
(sizeof(struct aml_scratch_par) + \
sizeof(struct aml_scratch))
int aml_scratch_par_create(struct aml_scratch **scratch, ...);
int aml_scratch_par_init(struct aml_scratch *scratch, ...);
int aml_scratch_par_vinit(struct aml_scratch *scratch, va_list args);
int aml_scratch_par_destroy(struct aml_scratch *scratch);
/******************************************************************************* /*******************************************************************************
* General functions: * General functions:
* Initialize internal structures, cleanup everything at the end. * Initialize internal structures, cleanup everything at the end.
......
#include <aml.h>
#include <assert.h>
/*******************************************************************************
* Sequential scratchpad
* The scratch itself is organized into several different components
* - request types: push and pull
* - implementation of the request
* - user API (i.e. generic request creation and call)
* - how to init the scratch
******************************************************************************/
/*******************************************************************************
* Requests:
******************************************************************************/
int aml_scratch_request_par_init(struct aml_scratch_request_par *req, int type,
struct aml_tiling *dt, void *dstptr, int dstid,
struct aml_tiling *st, void *srcptr, int srcid)
{
assert(req != NULL);
req->type = type;
req->stiling = st;
req->srcptr = srcptr;
req->srcid = srcid;
req->dtiling = dt;
req->dstptr = dstptr;
req->dstid = dstid;
return 0;
}
int aml_scratch_request_par_destroy(struct aml_scratch_request_par *r)
{
assert(r != NULL);
return 0;
}
/*******************************************************************************
* Internal functions
******************************************************************************/
void *aml_scratch_par_do_thread(void *arg)
{
struct aml_scratch_request_par *req =
(struct aml_scratch_request_par *)arg;
struct aml_scratch_par *scratch = req->scratch;
aml_dma_copy(scratch->data.dma, req->dtiling, req->dstptr, req->dstid,
req->stiling, req->srcptr, req->srcid);
}
struct aml_scratch_par_ops aml_scratch_par_inner_ops = {
aml_scratch_par_do_thread,
};
/*******************************************************************************
* Public API
******************************************************************************/
/* TODO: not thread-safe */
int aml_scratch_par_create_request(struct aml_scratch_data *d,
struct aml_scratch_request **r,
int type, va_list ap)
{
assert(d != NULL);
assert(r != NULL);
struct aml_scratch_par *scratch =
(struct aml_scratch_par *)d;
struct aml_scratch_request_par *req;
req = aml_vector_add(&scratch->data.requests);
/* init the request */
if(type == AML_SCRATCH_REQUEST_TYPE_PUSH)
{
int scratchid;
int *srcid;
void *srcptr;
void *scratchptr;
srcptr = va_arg(ap, void *);
srcid = va_arg(ap, int *);
scratchptr = va_arg(ap, void *);
scratchid = va_arg(ap, int);
/* find destination tile */
int *slot = aml_vector_get(&scratch->data.tilemap, scratchid);
assert(slot != NULL);
*srcid = *slot;
/* init request */
aml_scratch_request_par_init(req, type, scratch->data.tiling,
srcptr, *srcid,
scratch->data.tiling,
scratchptr, scratchid);
}
else if(type == AML_SCRATCH_REQUEST_TYPE_PULL)
{
int *scratchid;
int srcid;
void *srcptr;
void *scratchptr;
scratchptr = va_arg(ap, void *);
scratchid = va_arg(ap, int *);
srcptr = va_arg(ap, void *);
srcid = va_arg(ap, int);
/* find destination tile
* We don't use add here because adding a tile means allocating
* new tiles on the sch_area too. */
/* TODO: this is kind of a bug: we reuse a tile, instead of
* creating a no-op request
*/
int slot = aml_vector_find(&scratch->data.tilemap, srcid);
if(slot == -1)
slot = aml_vector_find(&scratch->data.tilemap, -1);
assert(slot != -1);
int *tile = aml_vector_get(&scratch->data.tilemap, slot);
*tile = srcid;
*scratchid = slot;
/* init request */
aml_scratch_request_par_init(req, type,
scratch->data.tiling,
scratchptr, *scratchid,
scratch->data.tiling,
srcptr, srcid);
}
/* thread creation */
req->scratch = scratch;
pthread_create(&req->thread, NULL, scratch->ops.do_thread, req);
*r = (struct aml_scratch_request *)req;
return 0;
}
int aml_scratch_par_destroy_request(struct aml_scratch_data *d,
struct aml_scratch_request *r)
{
assert(d != NULL);
assert(r != NULL);
struct aml_scratch_par *scratch =
(struct aml_scratch_par *)d;
struct aml_scratch_request_par *req =
(struct aml_scratch_request_par *)r;
int *tile;
pthread_cancel(req->thread);
pthread_join(req->thread, NULL);
aml_scratch_request_par_destroy(req);
/* destroy removes the tile from the scratch */
if(req->type == AML_SCRATCH_REQUEST_TYPE_PUSH)
tile = aml_vector_get(&scratch->data.tilemap,req->srcid);
else if(req->type == AML_SCRATCH_REQUEST_TYPE_PULL)
tile = aml_vector_get(&scratch->data.tilemap,req->dstid);
aml_vector_remove(&scratch->data.tilemap, tile);
aml_vector_remove(&scratch->data.requests, req);
return 0;
}
int aml_scratch_par_wait_request(struct aml_scratch_data *d,
struct aml_scratch_request *r)
{
assert(d != NULL);
assert(r != NULL);
struct aml_scratch_par *scratch = (struct aml_scratch_par *)d;
struct aml_scratch_request_par *req =
(struct aml_scratch_request_par *)r;
int *tile;
/* wait for completion of the request */
pthread_join(req->thread, NULL);
/* cleanup a completed request. In case of push, free up the tile */
aml_scratch_request_par_destroy(req);
if(req->type == AML_SCRATCH_REQUEST_TYPE_PUSH)
{
tile = aml_vector_get(&scratch->data.tilemap,req->srcid);
aml_vector_remove(&scratch->data.tilemap, tile);
}
aml_vector_remove(&scratch->data.requests, req);
return 0;
}
void *aml_scratch_par_baseptr(struct aml_scratch_data *d)
{
assert(d != NULL);
struct aml_scratch_par *scratch = (struct aml_scratch_par *)d;
return scratch->data.sch_ptr;
}
struct aml_scratch_ops aml_scratch_par_ops = {
aml_scratch_par_create_request,
aml_scratch_par_destroy_request,
aml_scratch_par_wait_request,
aml_scratch_par_baseptr,
};
/*******************************************************************************
* Init functions:
******************************************************************************/
int aml_scratch_par_create(struct aml_scratch **d, ...)
{
va_list ap;
struct aml_scratch *ret = NULL;
intptr_t baseptr, dataptr;
va_start(ap, d);
/* alloc */
baseptr = (intptr_t) calloc(1, AML_SCRATCH_PAR_ALLOCSIZE);
dataptr = baseptr + sizeof(struct aml_scratch);
ret = (struct aml_scratch *)baseptr;
ret->data = (struct aml_scratch_data *)dataptr;
aml_scratch_par_vinit(ret, ap);
va_end(ap);
*d = ret;
return 0;
}
int aml_scratch_par_vinit(struct aml_scratch *d, va_list ap)
{
d->ops = &aml_scratch_par_ops;
struct aml_scratch_par *scratch = (struct aml_scratch_par *)d->data;
scratch->ops = aml_scratch_par_inner_ops;
scratch->data.sch_area = va_arg(ap, struct aml_area *);
scratch->data.src_area = va_arg(ap, struct aml_area *);
scratch->data.dma = va_arg(ap, struct aml_dma *);
scratch->data.tiling = va_arg(ap, struct aml_tiling *);
size_t nbtiles = va_arg(ap, size_t);
size_t nbreqs = va_arg(ap, size_t);
/* allocate request array */
aml_vector_init(&scratch->data.requests, nbreqs,
sizeof(struct aml_scratch_request_par),
offsetof(struct aml_scratch_request_par, type),
AML_SCRATCH_REQUEST_TYPE_INVALID);
/* scratch init */
aml_vector_init(&scratch->data.tilemap, nbtiles, sizeof(int), 0, -1);
size_t tilesize = aml_tiling_tilesize(scratch->data.tiling, 0);
scratch->data.sch_ptr = aml_area_calloc(scratch->data.sch_area,
nbtiles, tilesize);
return 0;
}
int aml_scratch_par_init(struct aml_scratch *d, ...)
{
int err;
va_list ap;
va_start(ap, d);
err = aml_scratch_par_vinit(d, ap);
va_end(ap);
return err;
}
int aml_scratch_par_destroy(struct aml_scratch *d)
{
struct aml_scratch_par *scratch = (struct aml_scratch_par *)d->data;
aml_vector_destroy(&scratch->data.requests);
aml_vector_destroy(&scratch->data.tilemap);
aml_area_free(scratch->data.sch_area, scratch->data.sch_ptr);
return 0;
}
...@@ -27,7 +27,8 @@ BINDING_TESTS = binding ...@@ -27,7 +27,8 @@ BINDING_TESTS = binding
DMA_LINUX_TESTS = dma_linux_seq \ DMA_LINUX_TESTS = dma_linux_seq \
dma_linux_par dma_linux_par
SCRATCH_TESTS = scratch_seq SCRATCH_TESTS = scratch_seq \
scratch_par
# unit tests # unit tests
UNIT_TESTS = $(ARENA_JEMALLOC_TESTS) \ UNIT_TESTS = $(ARENA_JEMALLOC_TESTS) \
......
#include <aml.h>
#include <assert.h>
#define TILESIZE (2)
#define NBTILES (4)
int main(int argc, char *argv[])
{
AML_BINDING_SINGLE_DECL(binding);
AML_TILING_1D_DECL(tiling);
AML_ARENA_JEMALLOC_DECL(arena);
AML_AREA_LINUX_DECL(area);
AML_DMA_LINUX_SEQ_DECL(dma);
AML_SCRATCH_PAR_DECL(scratch);
unsigned long nodemask[AML_NODEMASK_SZ];
void *dst, *src;
/* library initialization */
aml_init(&argc, &argv);
/* initialize all the supporting struct */
assert(!aml_binding_init(&binding, AML_BINDING_TYPE_SINGLE, 0));
assert(!aml_tiling_init(&tiling, AML_TILING_TYPE_1D, TILESIZE*PAGE_SIZE,
TILESIZE*PAGE_SIZE*NBTILES));
AML_NODEMASK_ZERO(nodemask);
AML_NODEMASK_SET(nodemask, 0);
assert(!aml_arena_jemalloc_init(&arena, AML_ARENA_JEMALLOC_TYPE_REGULAR));
assert(!aml_area_linux_init(&area,
AML_AREA_LINUX_MANAGER_TYPE_SINGLE,
AML_AREA_LINUX_MBIND_TYPE_REGULAR,
AML_AREA_LINUX_MMAP_TYPE_ANONYMOUS,
&arena, MPOL_BIND, nodemask));
size_t maxrequests = NBTILES;
assert(!aml_dma_linux_seq_init(&dma, maxrequests));
/* allocate some memory */
src = aml_area_malloc(&area, TILESIZE*PAGE_SIZE*NBTILES);
assert(src != NULL);
memset(src, 42, TILESIZE*PAGE_SIZE*NBTILES);
/* create scratchpad */
assert(!aml_scratch_par_init(&scratch, &area, &area, &dma, &tiling,
NBTILES, NBTILES));
dst = aml_scratch_baseptr(&scratch);
/* move some stuff */
for(int i = 0; i < NBTILES; i++)
{
int di, si;
void *dp, *sp;
aml_scratch_pull(&scratch, dst, &di, src, i);
dp = aml_tiling_tilestart(&tiling, dst, di);
sp = aml_tiling_tilestart(&tiling, src, i);
assert(!memcmp(sp, dp, TILESIZE*PAGE_SIZE));
memset(dp, 33, TILESIZE*PAGE_SIZE);
aml_scratch_push(&scratch, src, &si, dst, di);
assert(si == i);
sp = aml_tiling_tilestart(&tiling, src, si);
assert(!memcmp(sp, dp, TILESIZE*PAGE_SIZE));
}
/* delete everything */
aml_scratch_par_destroy(&scratch);
aml_dma_linux_seq_destroy(&dma);
aml_area_free(&area, dst);
aml_area_free(&area, src);
aml_area_linux_destroy(&area);
aml_tiling_destroy(&tiling, AML_TILING_TYPE_1D);
aml_binding_destroy(&binding, AML_BINDING_TYPE_SINGLE);
aml_finalize();
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment