...
 
Commits (23)
......@@ -14,6 +14,7 @@ include_aml_layout_HEADERS = \
include_aml_dmadir=$(includedir)/aml/dma
include_aml_dma_HEADERS = \
aml/dma/linux-seq.h \
aml/dma/linux-spin.h \
aml/dma/linux-par.h
include_aml_scratchdir=$(includedir)/aml/scratch
......@@ -23,8 +24,7 @@ include_aml_scratch_HEADERS = \
include_aml_tilingdir=$(includedir)/aml/tiling
include_aml_tiling_HEADERS = \
aml/tiling/1d.h \
aml/tiling/2d.h
aml/tiling/resize.h
include_amlutilsdir=$(includedir)/aml/utils
......
This diff is collapsed.
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#ifndef AML_DMA_LINUX_SPIN_H
#define AML_DMA_LINUX_SPIN_H 1
/**
* @defgroup aml_dma_linux_spin "AML Parallel DMA"
* @brief Parallel DMA implementation.
*
* DMA logic implemented based on general linux API, asynchronous execution
* threads. This DMA implementation moves between pointers allocated with an
* aml_area_linux.
* @{
**/
/**
* Default table of dma request operations for linux
* parallel dma.
**/
extern struct aml_dma_ops aml_dma_linux_spin_ops;
/** Inside of a parallel request for linux movement. **/
struct aml_dma_request_linux_spin {
/**
* The type of dma request
* @see <aml.h>
**/
volatile int type;
/** The destination pointer of the data movement **/
struct aml_layout *dest;
/** The source pointer of the data movement **/
struct aml_layout *src;
/** The dma containing sequential operations **/
struct aml_dma_linux_spin *dma;
/** The actual thread in charge for the request progress**/
pthread_t thread;
pthread_spinlock_t lock;
/** operator for this request **/
aml_dma_operator op;
/** operator argument for this request **/
void *op_arg;
};
/** Inside of a parallel dma for linux movement. **/
struct aml_dma_linux_spin_data {
struct aml_dma_request_linux_spin req;
/** default operator for this dma **/
aml_dma_operator default_op;
/** default operator arg for this dma **/
void *default_op_arg;
};
/** Declaration of linux parallel dma operations **/
struct aml_dma_linux_spin_ops {
void *(*do_thread)(void *data);
};
/**
* aml_dma structure for linux based, parallel dma movement
* Needs to be initialized with aml_dma_linux_spin_create().
* Can be passed to generic aml_dma_*() functions.
**/
struct aml_dma_linux_spin {
struct aml_dma_linux_spin_ops ops;
struct aml_dma_linux_spin_data data;
};
/**
* Allocates and initializes a new parallel DMA.
*
* @param dma an address where the pointer to the newly allocated DMA structure
* will be stored.
* @param nbreqs the initial number of slots for asynchronous requests that are
* in-flight (will be increased automatically if necessary).
* @param nbthreads the number of threads to launch for each request.
* @param op: default operator
* @return 0 if successful; an error code otherwise.
**/
int aml_dma_linux_spin_create(struct aml_dma **dma, const cpu_set_t *cpuset,
aml_dma_operator op, void *op_arg);
/**
* Tears down a parallel DMA created with aml_dma_linux_spin_create.
* @param dma the address of a pointer to a parallel dma. Will be NULL after.
**/
void aml_dma_linux_spin_destroy(struct aml_dma **dma);
/**
* @}
**/
#endif // AML_LINUX_DMA_LINUX_SPIN_H
......@@ -224,6 +224,8 @@ int aml_layout_row_slice_native(const struct aml_layout_data *data,
const size_t *dims,
const size_t *strides);
void aml_layout_dense_print(FILE *stream, char *prefix,
const struct aml_layout_dense *);
/**
* Pre-existing operators for dense layout
* with AML_LAYOUT_ORDER_COLUMN_MAJOR order.
......
......@@ -32,15 +32,11 @@ struct aml_scratch_request_seq {
* @see <aml.h>
**/
int type;
/** The tiling used for data organization in source and destination **/
struct aml_tiling *tiling;
/** The source layout of the data movement **/
struct aml_layout *src;
/** The identifier of the source tile **/
int srcid;
/** The destination pointer of the data movement **/
struct aml_layout *dst;
/** The identifier of the destination tile **/
int dstid;
/** The request used for movement **/
struct aml_dma_request *dma_req;
......@@ -48,22 +44,11 @@ struct aml_scratch_request_seq {
/** Inner data of the sequential scratchpad implementation **/
struct aml_scratch_seq_data {
/** The source area where data comes from **/
struct aml_area *src_area;
/** The destination area where data temporariliy goes to **/
struct aml_area *sch_area;
/**
* The data organisation.
* /todo why can't source and destination tiling vary?
**/
struct aml_tiling *tiling;
/** \todo What is this? **/
size_t scratch_size;
struct aml_tiling *src_tiling;
struct aml_tiling *scratch_tiling;
/** The dma engine in charge of the transfer **/
struct aml_dma *dma;
/** Pointer to data in scratch destination **/
void *sch_ptr;
/** The tilings involved in ongoing scratch requests **/
/** Map of tiles src layouts to scratch ids **/
struct aml_vector *tilemap;
/** The set of dma requests submitted to the dma to mode data **/
struct aml_vector *requests;
......@@ -96,21 +81,17 @@ struct aml_scratch_seq {
* @param scratch an address where the pointer to the newly allocated scratchpad
* structure will be stored.
*
* @param scratch_area the memory area where the scratchpad will be allocated.
* @param source_area the memory area containing the user data structure.
* @param dma the DMA that will be used for migrating data to and from
* the scratchpad.
* @param tiling the tiling to use on the user data structure and the scratch.
* @param nbtiles number of tiles to divide the scratchpad into.
* @param src_tiling the tiling on the source memory
* @param scratch_tiling the tiling to use on the scratch
* @param nbreqs the initial number of slots for asynchronous request that
* are in-flight (will be increased automatically if necessary).
* @return 0 if successful; an error code otherwise.
**/
int aml_scratch_seq_create(struct aml_scratch **scratch,
struct aml_area *scratch_area,
struct aml_area *src_area,
struct aml_dma *dma, struct aml_tiling *tiling,
size_t nbtiles, size_t nbreqs);
struct aml_dma *dma, struct aml_tiling *src_tiling,
struct aml_tiling *scratch_tiling, size_t nbreqs);
/**
* Tears down an initialized sequential scratchpad.
......
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#ifndef AML_TILING_1D_H
#define AML_TILING_1D_H 1
/**
* @defgroup aml_tiling_1d "AML 1D Tiling"
* @brief 1 dimension tiling implementation.
*
* Implementation of 1D tilings.
* @{
**/
/** Initialized structure containing operations on 1D tiling. **/
extern struct aml_tiling_ops aml_tiling_1d_ops;
/** Initialized structure containing operations on 1D tiling. **/
extern struct aml_tiling_iterator_ops aml_tiling_iterator_1d_ops;
/**
* Data of 1 dimension tiling. 1D tiling consists in a set of
* contiguous data blocks.
**/
struct aml_tiling_1d_data {
/** The size of a data block in tiling **/
size_t blocksize;
/** The toal size of the tiling **/
size_t totalsize;
};
/** Data of 1 dimension tiling iterator. **/
struct aml_tiling_iterator_1d_data {
/** Index of the current iteration **/
size_t i;
/** Tiling beeing iterated **/
struct aml_tiling_1d_data *tiling;
};
/**
* Allocates and initializes a new 1D tiling.
*
* @param tiling an address where the pointer to the newly allocated tiling
* structure will be stored.
* @param tilesize provides the size of each tile.
* @param totalsize provides the size of the complete user data structure to be
* tiled.
* @return 0 if successful; an error code otherwise.
**/
int aml_tiling_1d_create(struct aml_tiling **tiling,
size_t tilesize, size_t totalsize);
/**
* Tears down an initialized tiling.
*
* @param tiling a tiling created with aml_tiling_1d_create. NULL after return.
**/
void aml_tiling_1d_destroy(struct aml_tiling **tiling);
/**
* @}
**/
#endif /* AML_TILING_1D_H */
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#ifndef AML_TILING_RESIZE_H
#define AML_TILING_RESIZE_H 1
/**
* @defgroup aml_tiling_1d "AML Resizable Tiling"
* @brief tiling with not homogeneous tiles
*
* Implementation of a tiling for which the border tiles have the exact size of
* the underlying layout (not smaller, not larger).
* @{
**/
/** Initialized structure containing operations for a tiling in column order.
**/
extern struct aml_tiling_ops aml_tiling_resize_column_ops;
/** Initialized structure containing operations for a tiling in row order. **/
extern struct aml_tiling_ops aml_tiling_resize_row_ops;
struct aml_tiling_resize {
int tags;
const struct aml_layout *layout;
size_t ndims;
size_t *tile_dims;
size_t *dims;
size_t *border_tile_dims;
};
int aml_tiling_resize_create(struct aml_tiling **t, int tags,
const struct aml_layout *l,
size_t ndims, const size_t *tile_dims);
int aml_tiling_resize_destroy(struct aml_tiling **t);
/**
* @}
**/
#endif /* AML_TILING_RESIZE_H */
......@@ -19,17 +19,12 @@ LAYOUT_SOURCES = \
DMA_SOURCES = \
dma/dma.c \
dma/dma_linux_par.c \
dma/dma_linux_spin.c \
dma/dma_linux_seq.c
SCRATCH_SOURCES = \
scratch/scratch.c \
scratch/scratch_par.c \
scratch/scratch_seq.c
TILING_SOURCES = \
tiling/tiling.c \
tiling/tiling_1d.c \
tiling/tiling_2d.c
tiling/tiling_resize.c
UTILS_SOURCES = \
utils/bitmap.c \
......@@ -39,7 +34,6 @@ UTILS_SOURCES = \
LIB_SOURCES = \
$(AREA_SOURCES) \
$(DMA_SOURCES) \
$(SCRATCH_SOURCES) \
$(TILING_SOURCES) \
$(LAYOUT_SOURCES) \
$(UTILS_SOURCES) \
......
......@@ -10,7 +10,7 @@
#include "aml.h"
#include "aml/layout/native.h"
#include "aml/layout/dense.h"
#include <assert.h>
/*******************************************************************************
......@@ -68,6 +68,104 @@ int aml_copy_layout_generic(struct aml_layout *dst,
return 0;
}
static inline void aml_copy_shndstr_helper(size_t d, const size_t * target_dims,
void *dst,
const size_t * cumul_dst_pitch,
const size_t * dst_stride,
const void *src,
const size_t * cumul_src_pitch,
const size_t * src_stride,
const size_t * elem_number,
size_t elem_size)
{
if (d == 1)
if (dst_stride[0] * cumul_dst_pitch[0] == elem_size
&& src_stride[target_dims[0]] *
cumul_src_pitch[target_dims[0]] == elem_size)
memcpy(dst, src,
elem_number[target_dims[0]] * elem_size);
else
for (size_t i = 0; i < elem_number[target_dims[0]];
i += 1)
memcpy((void *)((intptr_t) dst +
i * (dst_stride[0] *
cumul_dst_pitch[0])),
(void *)((intptr_t) src +
i *
(src_stride[target_dims[0]] *
cumul_src_pitch[target_dims
[0]])),
elem_size);
else
for (size_t i = 0; i < elem_number[target_dims[d - 1]]; i += 1) {
aml_copy_shndstr_helper(d - 1, target_dims, dst,
cumul_dst_pitch, dst_stride,
src, cumul_src_pitch,
src_stride, elem_number,
elem_size);
dst =
(void *)((intptr_t) dst +
dst_stride[d - 1] * cumul_dst_pitch[d -
1]);
src =
(void *)((intptr_t) src +
src_stride[target_dims[d - 1]] *
cumul_src_pitch[target_dims[d - 1]]);
}
}
int aml_copy_shndstr_c(size_t d, const size_t * target_dims, void *dst,
const size_t * cumul_dst_pitch,
const size_t * dst_stride, const void *src,
const size_t * cumul_src_pitch,
const size_t * src_stride, const size_t * elem_number,
size_t elem_size)
{
assert(d > 0);
size_t present_dims;
present_dims = 0;
for (size_t i = 0; i < d; i += 1) {
assert(target_dims[i] < d);
present_dims |= 1 << target_dims[i];
}
for (size_t i = 0; i < d; i += 1)
assert(present_dims & 1 << i);
for (size_t i = 0; i < d - 1; i += 1) {
assert(cumul_dst_pitch[i + 1] >=
dst_stride[i] * cumul_dst_pitch[i] *
elem_number[target_dims[i]]);
assert(cumul_src_pitch[i + 1] >=
src_stride[i] * cumul_src_pitch[i] * elem_number[i]);
}
aml_copy_shndstr_helper(d, target_dims, dst, cumul_dst_pitch,
dst_stride, src, cumul_src_pitch, src_stride,
elem_number, elem_size);
return 0;
}
int aml_copy_layout_transform_native(struct aml_layout *dst,
const struct aml_layout *src,
void *arg)
{
size_t d;
size_t elem_size;
struct aml_layout_dense *ddst;
struct aml_layout_dense *dsrc;
const size_t *target_dims = (const size_t *)arg;
ddst = (struct aml_layout_dense *)dst->data;
dsrc = (struct aml_layout_dense *)src->data;
d = dsrc->ndims;
assert(d > 0);
elem_size = dsrc->cpitch[0];
assert(d == ddst->ndims);
assert(elem_size == ddst->cpitch[0]);
for (size_t i = 0; i < d; i += 1)
assert(dsrc->dims[target_dims[i]] == ddst->dims[i]);
return aml_copy_shndstr_c(d, target_dims, ddst->ptr, ddst->cpitch,
ddst->stride, dsrc->ptr, dsrc->cpitch,
dsrc->stride, dsrc->dims, elem_size);
}
/*******************************************************************************
* Generic DMA API:
* Most of the stuff is dispatched to a different layer, using type-specific
......
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#include "config.h"
#include "aml.h"
#include "aml/dma/linux-spin.h"
#include "aml/layout/dense.h"
#include <assert.h>
#include <errno.h>
#include <sys/mman.h>
#define ASMPAUSE asm("" : : : "memory")
/*******************************************************************************
* Linux-backed, paruential dma
* The dma itself is organized into several different components
* - request types: copy
* - implementation of the request
* - user API (i.e. generic request creation and call)
* - how to init the dma
******************************************************************************/
/*******************************************************************************
* Requests:
******************************************************************************/
int aml_dma_request_linux_spin_copy_init(struct aml_dma_request_linux_spin *req,
struct aml_layout *dest,
struct aml_layout *src,
aml_dma_operator op, void *op_arg)
{
assert(req != NULL);
req->type = AML_DMA_REQUEST_TYPE_LAYOUT;
req->dest = dest;
req->src = src;
req->op = op;
req->op_arg = op_arg;
return 0;
}
int aml_dma_request_linux_spin_copy_destroy(struct aml_dma_request_linux_spin *r)
{
assert(r != NULL);
r->type = AML_DMA_REQUEST_TYPE_INVALID;
return 0;
}
/*******************************************************************************
* Internal functions
******************************************************************************/
void *aml_dma_linux_spin_do_thread(void *arg)
{
struct aml_dma_request_linux_spin *req =
(struct aml_dma_request_linux_spin *)arg;
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
while(1) {
pthread_spin_lock(&req->lock);
if (req->type != AML_DMA_REQUEST_TYPE_INVALID) {
req->op(req->dest, req->src, req->op_arg);
req->type = AML_DMA_REQUEST_TYPE_INVALID;
}
pthread_spin_unlock(&req->lock);
}
return NULL;
}
struct aml_dma_linux_spin_ops aml_dma_linux_spin_inner_ops = {
aml_dma_linux_spin_do_thread,
};
/*******************************************************************************
* Public API
******************************************************************************/
int aml_dma_linux_spin_create_request(struct aml_dma_data *d,
struct aml_dma_request **r,
struct aml_layout *dest,
struct aml_layout *src,
aml_dma_operator op, void *op_arg)
{
/* NULL checks done by the generic API */
assert(d != NULL);
assert(r != NULL);
assert(dest != NULL);
assert(src != NULL);
struct aml_dma_linux_spin *dma =
(struct aml_dma_linux_spin *)d;
struct aml_dma_request_linux_spin *req;
req = &(dma->data.req);
if (op == NULL)
op = dma->data.default_op;
if (op_arg == NULL)
op_arg = dma->data.default_op_arg;
pthread_spin_lock(&dma->data.req.lock);
if (req->type != AML_DMA_REQUEST_TYPE_INVALID) {
pthread_spin_unlock(&dma->data.req.lock);
return -AML_EINVAL;
}
aml_dma_request_linux_spin_copy_init(req, dest, src, op, op_arg);
pthread_spin_unlock(&dma->data.req.lock);
*r = (struct aml_dma_request *)req;
return 0;
}
int aml_dma_linux_spin_destroy_request(struct aml_dma_data *d,
struct aml_dma_request **r)
{
return 0;
}
int aml_dma_linux_spin_wait_request(struct aml_dma_data *d,
struct aml_dma_request **r)
{
assert(d != NULL);
assert(r != NULL);
struct aml_dma_linux_spin *dma = (struct aml_dma_linux_spin *)d;
struct aml_dma_request_linux_spin *req;
if (*r == NULL)
return -AML_EINVAL;
req = (struct aml_dma_request_linux_spin *)*r;
while (1) {
while (req->type != AML_DMA_REQUEST_TYPE_INVALID){ASMPAUSE;}
pthread_spin_lock(&(req->lock));//
if (req->type == AML_DMA_REQUEST_TYPE_INVALID) break;
pthread_spin_unlock(&(req->lock));
}
pthread_spin_unlock(&(req->lock));
*r = NULL;
return 0;
}
struct aml_dma_ops aml_dma_linux_spin_ops = {
aml_dma_linux_spin_create_request,
aml_dma_linux_spin_destroy_request,
aml_dma_linux_spin_wait_request,
};
/*******************************************************************************
* Init functions:
******************************************************************************/
int aml_dma_linux_spin_create(struct aml_dma **dma, const cpu_set_t *cpuset,
aml_dma_operator op, void *op_arg)
{
struct aml_dma *ret = NULL;
struct aml_dma_linux_spin *d;
if (dma == NULL)
return -AML_EINVAL;
*dma = NULL;
ret = AML_INNER_MALLOC_2(struct aml_dma, struct aml_dma_linux_spin);
if (ret == NULL)
return -AML_ENOMEM;
ret->data = AML_INNER_MALLOC_NEXTPTR(ret, struct aml_dma,
struct aml_dma_linux_spin);
ret->ops = &aml_dma_linux_spin_ops;
d = (struct aml_dma_linux_spin *)ret->data;
d->ops = aml_dma_linux_spin_inner_ops;
if (op == NULL) {
op = aml_copy_layout_generic;
op_arg = NULL;
}
d->data.default_op = op;
d->data.default_op_arg = op_arg;
/* allocate request array */
d->data.req.type = AML_DMA_REQUEST_TYPE_INVALID;
pthread_spin_init(&d->data.req.lock, PTHREAD_PROCESS_PRIVATE);
pthread_create(&d->data.req.thread, NULL, d->ops.do_thread, &d->data.req);
if (cpuset)
pthread_setaffinity_np(d->data.req.thread, sizeof(cpu_set_t), cpuset);
*dma = ret;
return 0;
}
void aml_dma_linux_spin_destroy(struct aml_dma **d)
{
struct aml_dma_linux_spin *dma;
if (d == NULL || *d == NULL)
return;
dma = (struct aml_dma_linux_spin *)(*d)->data;
struct aml_dma_request_linux_spin *req;
req = &dma->data.req;
if (req->type != AML_DMA_REQUEST_TYPE_INVALID) {
pthread_cancel(req->thread);
pthread_join(req->thread, NULL);
}
pthread_spin_destroy(&req->lock);
free(*d);
*d = NULL;
}
......@@ -334,8 +334,8 @@ int aml_layout_column_reshape(const struct aml_layout_data *data,
int aml_layout_column_slice(const struct aml_layout_data *data,
struct aml_layout **output,
const size_t *dims,
const size_t *offsets,
const size_t *dims,
const size_t *strides)
{
struct aml_layout *layout;
......@@ -359,7 +359,7 @@ int aml_layout_column_slice(const struct aml_layout_data *data,
for (size_t i = 0; i < d->ndims; i++) {
cpitch[i] = d->cpitch[i];
new_strides[i] = strides[i] * d->stride[i];
cpitch[d->ndims] -= cpitch[i] * offsets[i] * d->stride[i];
//cpitch[d->ndims] -= cpitch[i] * offsets[i] * d->stride[i];
}
aml_layout_dense_init_cpitch(layout,
......@@ -465,8 +465,8 @@ int aml_layout_row_reshape(const struct aml_layout_data *data,
int aml_layout_row_slice(const struct aml_layout_data *data,
struct aml_layout **output,
const size_t *dims,
const size_t *offsets,
const size_t *dims,
const size_t *strides)
{
struct aml_layout *layout;
......@@ -495,7 +495,7 @@ int aml_layout_row_slice(const struct aml_layout_data *data,
for (size_t i = 0; i < d->ndims; i++) {
cpitch[i] = d->cpitch[i];
n_strides[i] *= d->stride[i];
cpitch[d->ndims] -= cpitch[i] * n_offsets[i] * d->stride[i];
//cpitch[d->ndims] -= cpitch[i] * n_offsets[i] * d->stride[i];
}
ptr = aml_layout_column_deref(data, n_offsets);
......@@ -535,7 +535,7 @@ int aml_layout_row_slice_native(const struct aml_layout_data *data,
for (size_t i = 0; i < d->ndims; i++) {
cpitch[i] = d->cpitch[i];
new_strides[i] = strides[i] * d->stride[i];
cpitch[d->ndims] -= cpitch[i] * offsets[i] * d->stride[i];
//cpitch[d->ndims] -= cpitch[i] * offsets[i] * d->stride[i];
}
ptr = aml_layout_column_deref(data, offsets);
......@@ -556,7 +556,7 @@ struct aml_layout_ops aml_layout_row_ops = {
aml_layout_column_deref,
aml_layout_row_order,
aml_layout_row_dims,
aml_layout_row_dims,
aml_layout_column_dims,
aml_layout_dense_ndims,
aml_layout_dense_element_size,
aml_layout_row_reshape,
......@@ -564,3 +564,20 @@ struct aml_layout_ops aml_layout_row_ops = {
aml_layout_row_slice_native
};
void aml_layout_dense_print(FILE *stream, char *prefix,
const struct aml_layout_dense *layout)
{
fprintf(stream, "%s: layout-dense: %p\n", prefix, layout);
if(layout == NULL)
return;
fprintf(stream, "%s: ptr: %p\n", prefix, layout->ptr);
fprintf(stream, "%s: ndims: %zu\n", prefix, layout->ndims);
for(size_t i = 0; i < layout->ndims; i++) {
fprintf(stream, "%s: %16zu: %16zu %16zu %16zu %16zu\n", prefix,
i, layout->dims[i], layout->stride[i],
layout->pitch[i], layout->cpitch[i]);
}
fprintf(stream, "%s: %16zu: %16s %16s %16s %16zu\n", prefix, layout->ndims,
"NA", "NA", "NA", layout->cpitch[layout->ndims]);
}
......@@ -173,8 +173,8 @@ static int
aml_check_layout_slice(const struct aml_layout *layout,
int (*get_dims)(const struct aml_layout_data *,
size_t *),
const size_t *dims,
const size_t *offsets,
const size_t *dims,
const size_t *strides)
{
assert(layout->ops->ndims != NULL &&
......@@ -201,8 +201,8 @@ aml_check_layout_slice(const struct aml_layout *layout,
int aml_layout_slice(const struct aml_layout *layout,
struct aml_layout **reshaped_layout,
const size_t *dims,
const size_t *offsets,
const size_t *dims,
const size_t *strides)
{
assert(layout != NULL &&
......@@ -231,14 +231,14 @@ int aml_layout_slice(const struct aml_layout *layout,
assert(aml_check_layout_slice(layout,
layout->ops->dims,
dims,
_offsets,
dims,
_strides) == AML_SUCCESS);
err = layout->ops->slice(layout->data,
&result,
dims,
_offsets,
dims,
_strides);
if (err == AML_SUCCESS)
*reshaped_layout = result;
......@@ -271,7 +271,7 @@ int aml_layout_slice_native(const struct aml_layout *layout,
strides) == AML_SUCCESS);
err = layout->ops->slice_native(layout->data,
&result, dims, offsets, strides);
&result, offsets, dims, strides);
if (err == AML_SUCCESS)
*reshaped_layout = result;
......
......@@ -20,91 +20,94 @@
* abstract the request creation after this layer.
******************************************************************************/
int aml_scratch_pull(struct aml_scratch *scratch, ...)
int aml_scratch_pull(struct aml_scratch *scratch,
struct aml_layout **dest, int *scratchid,
struct aml_layout *src, int srcid)
{
assert(scratch != NULL);
va_list ap;
int ret;
struct aml_scratch_request *req;
int ret;
va_start(ap, scratch);
if (scratch == NULL || dest == NULL || scratchid == NULL
|| src == NULL)
return -AML_EINVAL;
ret = scratch->ops->create_request(scratch->data, &req,
AML_SCRATCH_REQUEST_TYPE_PULL, ap);
va_end(ap);
ret = scratch->ops->wait_request(scratch->data, req);
return ret;
AML_SCRATCH_REQUEST_TYPE_PULL,
dest, scratchid, src, srcid);
if (ret)
return ret;
return scratch->ops->wait_request(scratch->data, req);
}
int aml_scratch_async_pull(struct aml_scratch *scratch,
struct aml_scratch_request **req, ...)
struct aml_scratch_request **req,
struct aml_layout **dest, int *scratchid,
struct aml_layout *src, int srcid)
{
assert(scratch != NULL);
assert(req != NULL);
va_list ap;
int ret;
if (scratch == NULL || dest == NULL || scratchid == NULL
|| src == NULL)
return -AML_EINVAL;
va_start(ap, req);
ret = scratch->ops->create_request(scratch->data, req,
AML_SCRATCH_REQUEST_TYPE_PULL, ap);
va_end(ap);
return ret;
return scratch->ops->create_request(scratch->data, req,
AML_SCRATCH_REQUEST_TYPE_PULL,
dest, scratchid, src, srcid);
}
int aml_scratch_push(struct aml_scratch *scratch, ...)
int aml_scratch_push(struct aml_scratch *scratch,
struct aml_layout **dest, int *destid,
struct aml_layout *src, int srcid)
{
assert(scratch != NULL);
struct aml_scratch_request *req;
va_list ap;
int ret;
va_start(ap, scratch);
if (scratch == NULL || dest == NULL || destid == NULL
|| src == NULL)
return -AML_EINVAL;
ret = scratch->ops->create_request(scratch->data, &req,
AML_SCRATCH_REQUEST_TYPE_PUSH, ap);
va_end(ap);
ret = scratch->ops->wait_request(scratch->data, req);
return ret;
AML_SCRATCH_REQUEST_TYPE_PUSH,
dest, destid, src, srcid);
if (ret)
return ret;
return scratch->ops->wait_request(scratch->data, req);
}
int aml_scratch_async_push(struct aml_scratch *scratch,
struct aml_scratch_request **req, ...)
struct aml_scratch_request **req,
struct aml_layout **dest, int *destid,
struct aml_layout *src, int srcid)
{
assert(scratch != NULL);
assert(req != NULL);
va_list ap;
int ret;
if (scratch == NULL || dest == NULL || destid == NULL
|| src == NULL)
return -AML_EINVAL;
va_start(ap, req);
ret = scratch->ops->create_request(scratch->data, req,
AML_SCRATCH_REQUEST_TYPE_PUSH, ap);
va_end(ap);
return ret;
return scratch->ops->create_request(scratch->data, req,
AML_SCRATCH_REQUEST_TYPE_PUSH,
dest, destid, src, srcid);
}
int aml_scratch_cancel(struct aml_scratch *scratch,
struct aml_scratch_request *req)
{
assert(scratch != NULL);
assert(req != NULL);
if (scratch == NULL || req == NULL)
return -AML_EINVAL;
return scratch->ops->destroy_request(scratch->data, req);
}
int aml_scratch_wait(struct aml_scratch *scratch,
struct aml_scratch_request *req)
{
assert(scratch != NULL);
assert(req != NULL);
if (scratch == NULL || req == NULL)
return -AML_EINVAL;
return scratch->ops->wait_request(scratch->data, req);
}
void *aml_scratch_baseptr(const struct aml_scratch *scratch)
{
assert(scratch != NULL);
return scratch->ops->baseptr(scratch->data);
}
int aml_scratch_release(struct aml_scratch *scratch, int scratchid)
{
assert(scratch != NULL);
if (scratch == NULL)
return -AML_EINVAL;
return scratch->ops->release(scratch->data, scratchid);
}
......@@ -27,31 +27,23 @@
******************************************************************************/
int aml_scratch_request_seq_init(struct aml_scratch_request_seq *req, int type,
struct aml_tiling *t, void *dstptr, int dstid,
void *srcptr, int srcid)
struct aml_layout *dest, int destid,
struct aml_layout *src, int srcid)
{
assert(req != NULL);
void *dp, *sp;
size_t size;
req->type = type;
req->tiling = t;
req->src = src;
req->srcid = srcid;
req->dstid = dstid;
dp = aml_tiling_tilestart(req->tiling, dstptr, dstid);
sp = aml_tiling_tilestart(req->tiling, srcptr, srcid);
size = aml_tiling_tilesize(req->tiling, srcid);
aml_layout_dense_create(&req->dst, dp, 0, 1, 1, &size, NULL, NULL);
aml_layout_dense_create(&req->src, sp, 0, 1, 1, &size, NULL, NULL);
req->dst = dest;
req->dstid = destid;
return 0;
}
int aml_scratch_request_seq_destroy(struct aml_scratch_request_seq *r)
{
assert(r != NULL);
aml_layout_dense_destroy(&r->dst);
aml_layout_dense_destroy(&r->src);
return 0;
}
......@@ -75,11 +67,11 @@ struct aml_scratch_seq_ops aml_scratch_seq_inner_ops = {
* Public API
******************************************************************************/
/* TODO: not thread-safe */
int aml_scratch_seq_create_request(struct aml_scratch_data *d,
struct aml_scratch_request **r,
int type, va_list ap)
int type,
struct aml_layout **dest, int *destid,
struct aml_layout *src, int srcid)
{
assert(d != NULL);
assert(r != NULL);
......@@ -92,45 +84,24 @@ int aml_scratch_seq_create_request(struct aml_scratch_data *d,
req = aml_vector_add(scratch->data.requests);
/* init the request */
if (type == AML_SCRATCH_REQUEST_TYPE_PUSH) {
int scratchid;
int *srcid;
void *srcptr;
void *scratchptr;
srcptr = va_arg(ap, void *);
srcid = va_arg(ap, int *);
scratchptr = va_arg(ap, void *);
scratchid = va_arg(ap, int);
/* find destination tile */
int *slot = aml_vector_get(scratch->data.tilemap, scratchid);
int *slot = aml_vector_get(scratch->data.tilemap, srcid);
assert(slot != NULL);
*srcid = *slot;
*destid = *slot;
*dest = aml_tiling_index_linear(scratch->data.src_tiling,
*destid);
/* init request */
aml_scratch_request_seq_init(req, type,
scratch->data.tiling,
srcptr, *srcid,
scratchptr, scratchid);
aml_scratch_request_seq_init(req, type, *dest, *destid,
src, srcid);
} else if (type == AML_SCRATCH_REQUEST_TYPE_PULL) {
int *scratchid;
int srcid;
void *srcptr;
void *scratchptr;
int slot, *tile;
scratchptr = va_arg(ap, void *);
scratchid = va_arg(ap, int *);
srcptr = va_arg(ap, void *);
srcid = va_arg(ap, int);
/* find destination tile
* We don't use add here because adding a tile means allocating
* new tiles on the sch_area too. */
/* TODO: this is kind of a bug: we reuse a tile, instead of
* creating a no-op request
*/
slot = aml_vector_find(scratch->data.tilemap, srcid);
if (slot == -1) {
slot = aml_vector_find(scratch->data.tilemap, -1);
......@@ -141,12 +112,13 @@ int aml_scratch_seq_create_request(struct aml_scratch_data *d,
type = AML_SCRATCH_REQUEST_TYPE_NOOP;
/* save the key */
*scratchid = slot;
*destid = slot;
*dest = aml_tiling_index_linear(scratch->data.scratch_tiling,
slot);
/* init request */
aml_scratch_request_seq_init(req, type, scratch->data.tiling,
scratchptr, *scratchid,
srcptr, srcid);
aml_scratch_request_seq_init(req, type, *dest, *destid,
src, srcid);
}
pthread_mutex_unlock(&scratch->data.lock);
if (req->type != AML_SCRATCH_REQUEST_TYPE_NOOP)
......@@ -210,15 +182,6 @@ int aml_scratch_seq_wait_request(struct aml_scratch_data *d,
return 0;
}
void *aml_scratch_seq_baseptr(const struct aml_scratch_data *d)
{
assert(d != NULL);
const struct aml_scratch_seq *scratch =
(const struct aml_scratch_seq *)d;
return scratch->data.sch_ptr;
}
int aml_scratch_seq_release(struct aml_scratch_data *d, int scratchid)
{
assert(d != NULL);
......@@ -237,7 +200,6 @@ struct aml_scratch_ops aml_scratch_seq_ops = {
aml_scratch_seq_create_request,
aml_scratch_seq_destroy_request,
aml_scratch_seq_wait_request,
aml_scratch_seq_baseptr,
aml_scratch_seq_release,
};
......@@ -246,17 +208,14 @@ struct aml_scratch_ops aml_scratch_seq_ops = {
******************************************************************************/
int aml_scratch_seq_create(struct aml_scratch **scratch,
struct aml_area *scratch_area,
struct aml_area *src_area,
struct aml_dma *dma, struct aml_tiling *tiling,
size_t nbtiles, size_t nbreqs)
struct aml_dma *dma, struct aml_tiling *src_tiling,
struct aml_tiling *scratch_tiling, size_t nbreqs)
{
struct aml_scratch *ret = NULL;
struct aml_scratch_seq *s;
if (scratch == NULL
|| scratch_area == NULL || src_area == NULL
|| dma == NULL || tiling == NULL)
if (scratch == NULL || dma == NULL || src_tiling == NULL
|| scratch_tiling == NULL)
return -AML_EINVAL;
*scratch = NULL;
......@@ -271,10 +230,9 @@ int aml_scratch_seq_create(struct aml_scratch **scratch,
s = (struct aml_scratch_seq *)ret->data;
s->ops = aml_scratch_seq_inner_ops;
s->data.sch_area = scratch_area;
s->data.src_area = src_area;
s->data.dma = dma;
s->data.tiling = tiling;
s->data.src_tiling = src_tiling;
s->data.scratch_tiling = scratch_tiling;
/* allocate request array */
aml_vector_create(&s->data.requests, nbreqs,
......@@ -282,14 +240,9 @@ int aml_scratch_seq_create(struct aml_scratch **scratch,
offsetof(struct aml_scratch_request_seq, type),
AML_SCRATCH_REQUEST_TYPE_INVALID);
/* s init */
/* "hashmap for src to scratch tiles */
size_t nbtiles = aml_tiling_ntiles(scratch_tiling);
aml_vector_create(&s->data.tilemap, nbtiles, sizeof(int), 0, -1);
size_t tilesize = aml_tiling_tilesize(s->data.tiling, 0);
s->data.scratch_size = nbtiles * tilesize;
s->data.sch_ptr = aml_area_mmap(s->data.sch_area,
s->data.scratch_size,
NULL);
pthread_mutex_init(&s->data.lock, NULL);
*scratch = ret;
......@@ -311,9 +264,6 @@ void aml_scratch_seq_destroy(struct aml_scratch **scratch)
inner = (struct aml_scratch_seq *)s->data;
aml_vector_destroy(&inner->data.requests);
aml_vector_destroy(&inner->data.tilemap);
aml_area_munmap(inner->data.sch_area,
inner->data.sch_ptr,
inner->data.scratch_size);
pthread_mutex_destroy(&inner->data.lock);
free(s);
*scratch = NULL;
......
......@@ -9,101 +9,59 @@
*******************************************************************************/
#include "aml.h"
#include "aml/tiling/1d.h"
#include "aml/tiling/2d.h"
#include <assert.h>
/*******************************************************************************
* Tiling functions
******************************************************************************/
int aml_tiling_tileid(const struct aml_tiling *t, ...)
int aml_tiling_order(const struct aml_tiling *t)
{
assert(t != NULL);
va_list ap;
int ret;
if (t == NULL || t->ops == NULL)
return -AML_EINVAL;
va_start(ap, t);
ret = t->ops->tileid(t->data, ap);
va_end(ap);
return ret;
return t->ops->order(t->data);
}
size_t aml_tiling_tilesize(const struct aml_tiling *t, int tileid)
int aml_tiling_tile_dims(const struct aml_tiling *t, size_t *dims)
{
assert(t != NULL);
return t->ops->tilesize(t->data, tileid);
}
if (t == NULL || t->ops == NULL || dims == NULL)
return -AML_EINVAL;
void *aml_tiling_tilestart(const struct aml_tiling *t, const void *ptr,
int tileid)
{
assert(t != NULL);
return t->ops->tilestart(t->data, ptr, tileid);
return t->ops->tile_dims(t->data, dims);
}
int aml_tiling_ndims(const struct aml_tiling *t, ...)
int aml_tiling_dims(const struct aml_tiling *t, size_t *dims)
{
assert(t != NULL);
va_list ap;
int err;
if (t == NULL || t->ops == NULL || dims == NULL)
return -AML_EINVAL;
va_start(ap, t);
err = t->ops->ndims(t->data, ap);
va_end(ap);
return err;
return t->ops->dims(t->data, dims);
}
/*******************************************************************************
* Tiling Iterator functions
******************************************************************************/
int aml_tiling_iterator_reset(struct aml_tiling_iterator *it)
size_t aml_tiling_ndims(const struct aml_tiling *t)
{
assert(it != NULL);
return it->ops->reset(it->data);
assert(t != NULL && t->ops != NULL);
return t->ops->ndims(t->data);
}
int aml_tiling_iterator_next(struct aml_tiling_iterator *it)
size_t aml_tiling_ntiles(const struct aml_tiling *t)
{
assert(it != NULL);
return it->ops->next(it->data);
assert(t != NULL && t->ops != NULL);
return t->ops->ntiles(t->data);
}
int aml_tiling_iterator_end(const struct aml_tiling_iterator *it)
struct aml_layout *aml_tiling_index(const struct aml_tiling *t,
const size_t *coords)
{
assert(it != NULL);
return it->ops->end(it->data);
}
if (t == NULL || t->ops == NULL || coords == NULL)
return NULL;
int aml_tiling_iterator_get(const struct aml_tiling_iterator *it, ...)
{
assert(it != NULL);
va_list ap;
va_start(ap, it);
it->ops->get(it->data, ap);
va_end(ap);
return 0;
return t->ops->index(t->data, coords);
}
/*******************************************************************************
* Iterator Init
* We can't do the allocation ourselves here, as we don't have the type of the
* tiling.
******************************************************************************/
int aml_tiling_create_iterator(struct aml_tiling *t,
struct aml_tiling_iterator **it, int flags)
struct aml_layout *aml_tiling_index_linear(const struct aml_tiling *t,
size_t uuid)
{
assert(t != NULL);
assert(it != NULL);
return t->ops->create_iterator(t->data, it, flags);
}
if (t == NULL || t->ops == NULL)
return NULL;
void aml_tiling_destroy_iterator(struct aml_tiling *t,
struct aml_tiling_iterator **it)
{
assert(t != NULL);
assert(it != NULL);
t->ops->destroy_iterator(t->data, it);
return t->ops->index_linear(t->data, uuid);
}
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#include "aml.h"
#include "aml/tiling/1d.h"
#include <assert.h>
/*******************************************************************************
* 1D Iterator
******************************************************************************/
int aml_tiling_iterator_1d_reset(struct aml_tiling_iterator_data *data)
{
struct aml_tiling_iterator_1d_data *it =
(struct aml_tiling_iterator_1d_data *)data;
it->i = 0;
return 0;
}
int aml_tiling_iterator_1d_end(const struct aml_tiling_iterator_data *data)
{
const struct aml_tiling_iterator_1d_data *it =
(const struct aml_tiling_iterator_1d_data *)data;
return it->i * it->tiling->blocksize >= it->tiling->totalsize;
}
int aml_tiling_iterator_1d_next(struct aml_tiling_iterator_data *data)
{
struct aml_tiling_iterator_1d_data *it =
(struct aml_tiling_iterator_1d_data *)data;
it->i++;
return 0;
}
int aml_tiling_iterator_1d_get(const struct aml_tiling_iterator_data *data,
va_list args)
{
const struct aml_tiling_iterator_1d_data *it =
(const struct aml_tiling_iterator_1d_data *)data;
unsigned long *x = va_arg(args, unsigned long *);
*x = it->i;
return 0;
}
struct aml_tiling_iterator_ops aml_tiling_iterator_1d_ops = {
aml_tiling_iterator_1d_reset,
aml_tiling_iterator_1d_next,
aml_tiling_iterator_1d_end,
aml_tiling_iterator_1d_get,
};
/*******************************************************************************
* 1D ops
******************************************************************************/
int aml_tiling_1d_tileid(const struct aml_tiling_data *t, va_list ap)
{
(void)t;
size_t x = va_arg(ap, size_t);
return x;
}
size_t aml_tiling_1d_tilesize(const struct aml_tiling_data *t, int tileid)
{
const struct aml_tiling_1d_data *data =
(const struct aml_tiling_1d_data *)t;
if (tileid < 0)
return 0;
else
return data->blocksize;
}
void *aml_tiling_1d_tilestart(const struct aml_tiling_data *t,
const void *ptr, int tileid)
{
const struct aml_tiling_1d_data *data =
(const struct aml_tiling_1d_data *)t;
intptr_t p = (intptr_t)ptr;
if (tileid < 0)
return NULL;
else
return (void *)(p + tileid*data->blocksize);
}
int aml_tiling_1d_ndims(const struct aml_tiling_data *t, va_list ap)
{
const struct aml_tiling_1d_data *data =
(const struct aml_tiling_1d_data *)t;
size_t *x = va_arg(ap, size_t *);
*x = data->totalsize/data->blocksize;
if (data->totalsize % data->blocksize != 0)
*x += 1;
return 0;
}
int aml_tiling_1d_init_iterator(struct aml_tiling_data *t,
struct aml_tiling_iterator *it, int flags)
{
assert(it->data != NULL);
(void)flags;
struct aml_tiling_iterator_1d_data *data =
(struct aml_tiling_iterator_1d_data *)it->data;
it->ops = &aml_tiling_iterator_1d_ops;
data->i = 0;
data->tiling = (struct aml_tiling_1d_data *)t;
return 0;
}
int aml_tiling_1d_create_iterator(struct aml_tiling_data *tiling,
struct aml_tiling_iterator **it, int flags)
{
struct aml_tiling_iterator *ret;
struct aml_tiling_iterator_1d_data *data;
(void)flags;
if (it == NULL)
return -AML_EINVAL;
*it = NULL;
ret = AML_INNER_MALLOC_2(struct aml_tiling_iterator,
struct aml_tiling_iterator_1d_data);
if (ret == NULL)
return -AML_ENOMEM;
ret->ops = &aml_tiling_iterator_1d_ops;
ret->data = AML_INNER_MALLOC_NEXTPTR(ret, struct aml_tiling_iterator,
struct aml_tiling_iterator_1d_data);
data = (struct aml_tiling_iterator_1d_data *)ret->data;
data->i = 0;
data->tiling = (struct aml_tiling_1d_data *)tiling;
*it = ret;
return AML_SUCCESS;
}
int aml_tiling_1d_destroy_iterator(struct aml_tiling_data *t,
struct aml_tiling_iterator **iter)
{
struct aml_tiling_iterator *it;
(void)t;
if (iter == NULL)
return -AML_EINVAL;
it = *iter;
if (it == NULL)
return -AML_EINVAL;
free(it);
*iter = NULL;
return AML_SUCCESS;
}
struct aml_tiling_ops aml_tiling_1d_ops = {
aml_tiling_1d_create_iterator,
aml_tiling_1d_destroy_iterator,
aml_tiling_1d_tileid,
aml_tiling_1d_tilesize,
aml_tiling_1d_tilestart,
aml_tiling_1d_ndims,
};
/*******************************************************************************
* 1D create/destroy
******************************************************************************/
int aml_tiling_1d_create(struct aml_tiling **tiling,
size_t tilesize, size_t totalsize)
{
struct aml_tiling *ret = NULL;
struct aml_tiling_1d_data *t;
if (tiling == NULL || tilesize > totalsize)
return -AML_EINVAL;
*tiling = NULL;
ret = AML_INNER_MALLOC_2(struct aml_tiling, struct aml_tiling_1d_data);
if (ret == NULL)
return -AML_ENOMEM;
ret->ops = &aml_tiling_1d_ops;
ret->data = AML_INNER_MALLOC_NEXTPTR(ret, struct aml_tiling,
struct aml_tiling_1d_data);
t = (struct aml_tiling_1d_data *) ret->data;
t->blocksize = tilesize;
t->totalsize = totalsize;
*tiling = ret;
return AML_SUCCESS;
}
void aml_tiling_1d_destroy(struct aml_tiling **tiling)
{
struct aml_tiling *t;
if (tiling == NULL)
return;
t = *tiling;
if (t == NULL)
return;
free(t);
*tiling = NULL;
}
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#include "aml.h"
#include "aml/tiling/2d.h"
#include <assert.h>
/*******************************************************************************
* 2D Iterator
******************************************************************************/