...
 
Commits (6)
......@@ -32,7 +32,8 @@ checkpatch:
- nix
- kvm
script:
- git ls-files *.c *.h | grep -v -e benchmarks >> .checkpatch.conf
- nix run -f "$ARGOPKGS" checkpatch --command checkpatch.pl --ignore TRAILING_SEMICOLON --ignore MULTISTATEMENT_MACRO_USE_DO_WHILE include/aml/utils/inner-malloc.h
- git ls-files *.c *.h | grep -v -e benchmarks | grep -v -e inner-malloc >> .checkpatch.conf
- nix run -f "$ARGOPKGS" checkpatch --command checkpatch.pl
style:docs:
......@@ -45,12 +46,12 @@ style:docs:
- kvm
script:
- |
nix-shell "$ARGOPKGS" -A aml-dist --arg aml-src ./. --run bash << EOF
./autogen.sh
mkdir build
./configure --prefix=`pwd`/build --enable-docs
make install-data
EOF
nix-shell "$ARGOPKGS" -A aml-dist --arg aml-src ./. --run bash <<< '
./autogen.sh
mkdir build
./configure --prefix=`pwd`/build --enable-docs
make install-data
'
artifacts:
when: on_failure
paths:
......@@ -58,7 +59,8 @@ style:docs:
make:generic:
tags:
- knl
- nix
- kvm
stage: build
except:
- /^wip.*/
......@@ -66,12 +68,15 @@ make:generic:
variables:
CFLAGS: "-std=c99 -pedantic -Wall -Wextra -Werror -Wno-unused-but-set-parameter"
script:
- ./autogen.sh
- mkdir build
- ./configure --prefix=`pwd`/build
- make
- make check
- make install
- |
nix-shell "$ARGOPKGS" -A aml --arg aml-src ./. --run bash <<< '
./autogen.sh
mkdir build
./configure --prefix=`pwd`/build
make
make check
make install
'
artifacts:
when: on_failure
paths:
......@@ -80,7 +85,8 @@ make:generic:
make:out-of-tree:
tags:
- knl
- nix
- kvm
stage: build
except:
- /^wip.*/
......@@ -88,13 +94,16 @@ make:out-of-tree:
variables:
CFLAGS: "-std=c99 -pedantic -Wall -Wextra -Werror -Wno-unused-but-set-parameter"
script:
- ./autogen.sh
- mkdir out
- cd out
- mkdir build
- ../configure --prefix=`pwd`/build
- make
- make check
- |
nix-shell "$ARGOPKGS" -A aml --arg aml-src ./. --run bash <<< '
./autogen.sh
mkdir out
cd out
mkdir build
../configure --prefix=`pwd`/build
make
make check
'
artifacts:
when: on_failure
paths:
......
......@@ -34,6 +34,8 @@ include_amlutils_HEADERS = \
aml/utils/error.h \
aml/utils/inner-malloc.h \
aml/utils/vector.h \
aml/utils/queue.h \
aml/utils/async.h \
aml/utils/version.h \
aml/utils/features.h
......@@ -37,6 +37,8 @@
#include "aml/utils/error.h"
#include "aml/utils/inner-malloc.h"
#include "aml/utils/vector.h"
#include "aml/utils/queue.h"
#include "aml/utils/async.h"
#include "aml/utils/version.h"
#include "aml/utils/features.h"
......
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#ifndef AML_ASYNC_H
#define AML_ASYNC_H
/**
* @defgroup aml_async "AML Asynchronous work utils"
* @brief AML Asynchronous work utils
*
* This module is used internally in the library to manage asynchronous
* optimizations.
* In particular, it defines a task abstraction and a work queue with
* a thread pool used by dma operations to speedup work.
* @{
**/
//----------------------------------------------------------------------------//
// User task abstraction (see tests/utils/test_async.c)
//----------------------------------------------------------------------------//
/** Input to an asynchronous task **/
struct aml_task_in;
/** Output from an asynchronous task **/
struct aml_task_out;
/** Task meta data **/
struct aml_task_data;
/** Function to be executed in a task**/
typedef struct aml_task_out *(*aml_task_work) (struct aml_task_in *);
/** Task abstraction **/
struct aml_task {
/** Input **/
struct aml_task_in *in;
/** Where to store output **/
struct aml_task_out *out;
/** Work to do **/
aml_task_work fn;
/** Metadata **/
struct aml_task_data *data;
};
//----------------------------------------------------------------------------//
// Implementer abstraction
//----------------------------------------------------------------------------//
/** Metadata of a thread pool **/
struct aml_sched_data;
/** Methods that thread pools must implement **/
struct aml_sched_ops {
/** Submit a task to the pool **/
int (*submit)(struct aml_sched_data *data, struct aml_task *task);
/** Wait for a specific task to be completed **/
int (*wait)(struct aml_sched_data *data, struct aml_task *task);
/** Pull the next executed task from the pool **/
struct aml_task *(*wait_any)(struct aml_sched_data *data);
};
/** Thread pool abstraction **/
struct aml_sched {
/** Metadata **/
struct aml_sched_data *data;
/** Methods **/
struct aml_sched_ops *ops;
};
//----------------------------------------------------------------------------//
// User interface
//----------------------------------------------------------------------------//
/** Submit a task to the pool **/
int aml_sched_submit_task(struct aml_sched *pool, struct aml_task *task);
/** Wait for a specific task to be completed **/
int aml_sched_wait_task(struct aml_sched *pool, struct aml_task *task);
/** Pull the next executed task from the pool **/
struct aml_task *aml_sched_wait_any(struct aml_sched *pool);
//----------------------------------------------------------------------------//
// Simple task scheduler with pthread worker.
//----------------------------------------------------------------------------//
/**
* Create an active pool of "nt" threads" to run asynchronously tasks queued
* in a FIFO queue.
* If nt == 0 then progress is made
* from caller thread on aml_sched_wait_task() and aml_sched_wait_any().
**/
struct aml_sched *aml_active_sched_create(const size_t nt);
/** Destroy an active thread pool and set it to NULL **/
void aml_active_sched_destroy(struct aml_sched **sched);
/** Get the number of tasks pushed to the scheduler and not yet pulled out. **/
int aml_active_sched_num_tasks(struct aml_sched *sched);
/**
* @}
**/
#endif //AML_ASYNC_H
......@@ -24,59 +24,154 @@
* This code is all macros to handle the type specific logic we need.
**/
/** Returns the allocation size required to handle two objects side-by-side.
*
* Use an anonymous struct to ask the compiler what size an allocation should be
* so that the second object is properly aligned too.
*/
#define AML_SIZEOF_ALIGNED(a, b) \
(sizeof(struct { a __e1; b __e2; }))
//---------------------------------------------------------------------------//
// Inner utils
//---------------------------------------------------------------------------//
/** Returns the offset of the second object when allocated side-by-side.
*
* Use the same anonymous struct trick to figure out what offset the pointer is
* at.
*/
#define AML_OFFSETOF_ALIGNED(a, b) \
(offsetof(struct { a __e1; b __e2; }, __e2))
// Stringify macro
#define STRINGIFY(a) STRINGIFY_(a)
#define STRINGIFY_(a) #a
/** Allocate a pointer that can be used to contain two types.
*
// Concatenate two arguments into a macro name
#define CONCATENATE(arg1, arg2) CONCATENATE1(arg1, arg2)
#define CONCATENATE1(arg1, arg2) CONCATENATE2(arg1, arg2)
#define CONCATENATE2(arg1, arg2) arg1##arg2
// Expand to number of variadic arguments for up to 8 args.
// VA_NARG(a,b,c)
// PP_ARG_N(a,b,c,8,7,6,5,4,3,2,1,0)
// 3
#define VA_NARG(...) PP_ARG_N(__VA_ARGS__, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#define VA_NARG(...) PP_ARG_N(__VA_ARGS__, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#define PP_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, N, ...) N
// Arithmetic
#define PLUS_1_1 2
#define PLUS_1_2 3
#define PLUS_1_3 4
#define PLUS_1_4 5
#define PLUS_1_5 6
#define PLUS_1_6 7
#define PLUS_1_7 8
#define PLUS_1(N) CONCATENATE(PLUS_1_, N)
// Field name in struct: __f1 for N = 1
#define AML_FIELD(N) CONCATENATE(__f, N)
// struct fields declaration.
// one field: f1 __f1;
// two fields: f2 __f1; f1 __f2;
// three fields: f3 __f1; f2 __f2; f1 __f3;
// We want fx fields to appear in the order of types provided by users.
// We want __fx names to appear in the reverse order, such that if the user
// wants the second fields it can name it with __f2.
#define AML_DECL_1(N, f1, ...) f1 AML_FIELD(N);
#define AML_DECL_2(N, f2, ...) \
f2 AML_FIELD(N); AML_DECL_1(PLUS_1(N), __VA_ARGS__)
#define AML_DECL_3(N, f3, ...) \
f3 AML_FIELD(N); AML_DECL_2(PLUS_1(N), __VA_ARGS__)
#define AML_DECL_4(N, f4, ...) \
f4 AML_FIELD(N); AML_DECL_3(PLUS_1(N), __VA_ARGS__)
#define AML_DECL_5(N, f5, ...) \
f5 AML_FIELD(N); AML_DECL_4(PLUS_1(N), __VA_ARGS__)
#define AML_DECL_6(N, f6, ...) \
f6 AML_FIELD(N); AML_DECL_5(PLUS_1(N), __VA_ARGS__)
#define AML_DECL_7(N, f7, ...) \
f7 AML_FIELD(N); AML_DECL_6(PLUS_1(N), __VA_ARGS__)
#define AML_DECL_8(N, f8, ...) \
f8 AML_FIELD(N); AML_DECL_7(PLUS_1(N), __VA_ARGS__)
// Declare a structure with up to 8 fields.
// (Pick the adequate AML_DECL_ macro and call it.)
#define AML_STRUCT_DECL(...) \
struct { \
CONCATENATE(AML_DECL_, VA_NARG(__VA_ARGS__))(1, __VA_ARGS__, 0) \
}
/** Returns the size required for allocation of up to 8 types **/
#define AML_SIZEOF_ALIGNED(...) sizeof(AML_STRUCT_DECL(__VA_ARGS__))
/**
* Returns the size required for allocation of up to 7 types plus one array.
* @param n: The number of elements in array.
* @param type: The type of array elements.
* @param ...: Up to 7 fields type preceding array allocation space.
**/
#define AML_INNER_MALLOC_2(a, b) calloc(1, AML_SIZEOF_ALIGNED(a, b))
#define AML_SIZEOF_ALIGNED_ARRAY(n, type, ...) \
(sizeof(AML_STRUCT_DECL(__VA_ARGS__, type)) + \
((n)-1) * sizeof(type))
/** Allocate a pointer that can be used to contain two types plus an extra area
* aligned on a third type.
*
/** Returns the offset of the nth type of a list of up to 8 types. **/
#define AML_OFFSETOF_ALIGNED(N, ...) \
offsetof(AML_STRUCT_DECL(__VA_ARGS__), AML_FIELD(N))
//---------------------------------------------------------------------------//
// User Macros
//---------------------------------------------------------------------------//
/**
* Allocate space aligned on a page boundary for up to 8 fields aligned as
* in a struct
* @param ...: types contained in allocation. (Up to 8)
**/
#define AML_INNER_MALLOC_EXTRA(a, b, c, sz) \
calloc(1, AML_SIZEOF_ALIGNED(struct { a __f1; b __f2; }, c) + \
(sizeof(c)*(sz)))
#define AML_INNER_MALLOC(...) calloc(1, AML_SIZEOF_ALIGNED(__VA_ARGS__))
/** Allocate a pointer that can be used to contain two types plus an extra area
* aligned on a third type, and extra bytes after that.
*
/**
* Allocate space aligned on a page boundary. It may contain up to 7 fields
* aligned as in a struct, and one array.
* @param n: Number of elements in array.
* @param type: Type of array elements.
* @param ...: Up to 7 fields type preceding array allocation space.
**/
#define AML_INNER_MALLOC_4(a, b, c, sz, d) \
calloc(1, AML_SIZEOF_ALIGNED(struct { a __f1; b __f2; }, c) + \
(sizeof(c)*(sz)) + d)
#define AML_INNER_MALLOC_ARRAY(n, type, ...) \
calloc(1, AML_SIZEOF_ALIGNED_ARRAY(n, type, __VA_ARGS__))
/** Returns the next pointer after an AML_INNER_MALLOC.
*
* Can be used to iterate over the pointers we need, using the last two types as
* parameters.
/**
* Allocate space aligned on a page boundary. It may contain up to 7 fields
* aligned as in a struct, one aligned array and arbitrary extra space.
* @param n: Number of elements in array.
* @param type: Type of array elements.
* @param size: The extra space in bytes to allocate.
* @param ...: Up to 7 fields type preceding array allocation space.
**/
#define AML_INNER_MALLOC_NEXTPTR(ptr, a, b) \
(void *)(((intptr_t) ptr) + AML_OFFSETOF_ALIGNED(a, b))
#define AML_INNER_MALLOC_EXTRA(n, type, size, ...) \
calloc(1, AML_SIZEOF_ALIGNED_ARRAY(n, type, __VA_ARGS__) + size)
/** Returns a pointer inside the extra zone after an AML_INNER_MALLOC_EXTRA.
*
* Can be used to iterate over the pointers we need.
/**
* Returns the nth __VA__ARGS__ field pointer from AML_INNER_MALLOC*()
* allocation.
* @param ptr: A pointer obtained from AML_INNER_MALLOC*()
* @param N: The field number. N must be a number (1, 2, 3, 4, 5, 6, 7, 8)
* and not a variable.
* @param ...: types contained in allocation. (Up to 8)
* @return A pointer to Nth field after ptr.
**/
#define AML_INNER_MALLOC_GET_FIELD(ptr, N, ...) \
(void *)(((intptr_t) ptr) + AML_OFFSETOF_ALIGNED(N, __VA_ARGS__))
/**
* Returns a pointer to the array after __VA_ARGS__ fields.
* @param ptr: Pointer returned by AML_INNER_MALLOC_ARRAY() or
* AML_INNER_MALLOC_EXTRA().
* @param type: Type of array elements.
* @param ...: Other types contained in allocation. (Up to 7)
**/
#define AML_INNER_MALLOC_GET_ARRAY(ptr, type, ...) \
AML_INNER_MALLOC_GET_FIELD(ptr, \
PLUS_1(VA_NARG(__VA_ARGS__)), \
__VA_ARGS__, type)
/**
* Returns a pointer to extra space allocated with
* AML_INNER_MALLOC_EXTRA().
* @param ptr: Pointer returned by AML_INNER_MALLOC_EXTRA().
* @param n: Number of elements in the array.
* @param type: Type of elements in the array.
* @param ...: Other types contained in allocation. (Up to 7)
**/
#define AML_INNER_MALLOC_EXTRA_NEXTPTR(ptr, a, b, c, off) \
(void *)(((intptr_t) ptr) + \
AML_OFFSETOF_ALIGNED(struct { a __f1; b __f2; }, c) + \
((off)*sizeof(c)))
#define AML_INNER_MALLOC_GET_EXTRA(ptr, n, type, ...) \
(void *)(((intptr_t) ptr) + \
AML_SIZEOF_ALIGNED_ARRAY(n, type, __VA_ARGS__))
/**
* @}
......
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#ifndef AML_QUEUE_H
#define AML_QUEUE_H
/**
* @defgroup aml_queue "AML Queue API"
* @brief AML Queue API
*
* Generic queue type allocated on heap:
* Serial queue for pushing and poping pointers.
* @{
**/
/** queue struct definition **/
struct aml_queue {
/** Maximum capacity. Is extended if reached **/
size_t max;
/** Index of head **/
size_t head;
/** Index of tail **/
size_t tail;
/** Elements in the queue **/
void **elems;
};
/**
* Create a queue with max pre-allocated space for max elements.
* @param[in] max: The number of elements fitting in the queue before
* trigerring a resize.
* @return NULL if memory allocation failed.
**/
struct aml_queue *aml_queue_create(const size_t max);
/**
* Forget about elements stored in the queue.
**/
void aml_queue_clear(struct aml_queue *q);
/**
* Free queue. Calling free() directly on queue is ok.
**/
void aml_queue_destroy(struct aml_queue *q);
/**
* Get the number of elements in the queue.
*@return 0 if q is NULL.
**/
size_t aml_queue_len(const struct aml_queue *q);
/**
* Add an element at the queue tail.
* @return -AML_ENOMEM if queue needed to be extended and allocation failed.
**/
int aml_queue_push(struct aml_queue **q, void *element);
/**
* Get an element out of the queue.
* @return NULL if queue is empty.
**/
void *aml_queue_pop(struct aml_queue *q);
/**
* Take an element out of the queue.
* @return NULL if queue does not contain the element.
**/
void *aml_queue_take(struct aml_queue *q, void *element);
/**
* @}
**/
#endif //AML_QUEUE_H
......@@ -22,7 +22,7 @@
* Major version changes in AML
* denotes ABI changes which prevent
* compatibility with previous major version ABI.
*
*
**/
#define AML_VERSION_MAJOR @PACKAGE_VERSION_MAJOR@
......
......@@ -35,6 +35,8 @@ UTILS_SOURCES = \
utils/bitmap.c \
utils/error.c \
utils/vector.c \
utils/queue.c \
utils/async.c \
utils/features.c
LIB_SOURCES = \
......
......@@ -223,12 +223,13 @@ int aml_area_cuda_create(struct aml_area **area,
if (device >= max_devices)
return -AML_EINVAL;
ret = AML_INNER_MALLOC_2(struct aml_area, struct aml_area_cuda_data);
ret = AML_INNER_MALLOC(struct aml_area,
struct aml_area_cuda_data);
if (ret == NULL)
return -AML_ENOMEM;
data = AML_INNER_MALLOC_NEXTPTR(ret, struct aml_area,
struct aml_area_cuda_data);
data = AML_INNER_MALLOC_GET_FIELD(ret, 2, struct aml_area,
struct aml_area_cuda_data);
ret->ops = &aml_area_cuda_ops;
ret->data = (struct aml_area_data *)data;
......
......@@ -196,12 +196,14 @@ int aml_area_linux_create(struct aml_area **area,
*area = NULL;
ret = AML_INNER_MALLOC_2(struct aml_area, struct aml_area_linux_data);
ret = AML_INNER_MALLOC(struct aml_area,
struct aml_area_linux_data);
if (ret == NULL)
return -AML_ENOMEM;
ret->data = AML_INNER_MALLOC_NEXTPTR(ret, struct aml_area,
struct aml_area_linux_data);
ret->data = AML_INNER_MALLOC_GET_FIELD(ret, 2,
struct aml_area,
struct aml_area_linux_data);
ret->ops = &aml_area_linux_ops;
data = (struct aml_area_linux_data *)ret->data;
......
......@@ -175,12 +175,13 @@ int aml_dma_linux_par_create(struct aml_dma **dma, size_t nbreqs,
*dma = NULL;
ret = AML_INNER_MALLOC_2(struct aml_dma, struct aml_dma_linux_par);
ret = AML_INNER_MALLOC(struct aml_dma, struct aml_dma_linux_par);
if (ret == NULL)
return -AML_ENOMEM;
ret->data = AML_INNER_MALLOC_NEXTPTR(ret, struct aml_dma,
struct aml_dma_linux_par);
ret->data = AML_INNER_MALLOC_GET_FIELD(ret, 2,
struct aml_dma,
struct aml_dma_linux_par);
ret->ops = &aml_dma_linux_par_ops;
d = (struct aml_dma_linux_par *)ret->data;
d->ops = aml_dma_linux_par_inner_ops;
......
......@@ -161,12 +161,13 @@ int aml_dma_linux_seq_create(struct aml_dma **dma, size_t nbreqs,
*dma = NULL;
ret = AML_INNER_MALLOC_2(struct aml_dma, struct aml_dma_linux_seq);
ret = AML_INNER_MALLOC(struct aml_dma, struct aml_dma_linux_seq);
if (ret == NULL)
return -AML_ENOMEM;
ret->data = AML_INNER_MALLOC_NEXTPTR(ret, struct aml_dma,
struct aml_dma_linux_seq);
ret->data = AML_INNER_MALLOC_GET_FIELD(ret, 2,
struct aml_dma,
struct aml_dma_linux_seq);
ret->ops = &aml_dma_linux_seq_ops;
d = (struct aml_dma_linux_seq *)ret->data;
......
......@@ -18,37 +18,32 @@ static int aml_layout_dense_alloc(struct aml_layout **ret,
struct aml_layout *layout;
struct aml_layout_dense *data;
layout = AML_INNER_MALLOC_EXTRA(struct aml_layout,
struct aml_layout_dense,
size_t, 3*ndims);
layout = AML_INNER_MALLOC_ARRAY(3*ndims, size_t,
struct aml_layout,
struct aml_layout_dense);
if (layout == NULL) {
*ret = NULL;
return -AML_ENOMEM;
}
data = AML_INNER_MALLOC_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_dense);
data = AML_INNER_MALLOC_GET_FIELD(layout, 2,
struct aml_layout,
struct aml_layout_dense);
layout->data = (struct aml_layout_data *) data;
data->ptr = NULL;
data->ndims = ndims;
data->dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_dense,
size_t, 0);
data->stride = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_dense,
size_t, ndims);
data->dims = AML_INNER_MALLOC_GET_ARRAY(layout,
size_t,
struct aml_layout,
struct aml_layout_dense);
data->stride = data->dims + ndims;
for (size_t i = 0; i < ndims; i++)
data->stride[i] = 1;
data->cpitch = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_dense,
size_t, ndims*2);
data->cpitch = data->stride + ndims;
*ret = layout;
return AML_SUCCESS;
}
......
......@@ -17,30 +17,30 @@ static int aml_layout_pad_alloc(struct aml_layout **ret,
struct aml_layout *layout;
struct aml_layout_pad *data;
layout = AML_INNER_MALLOC_4(struct aml_layout,
struct aml_layout_pad,
size_t, 2*ndims, element_size);
layout = AML_INNER_MALLOC_EXTRA(2*ndims, size_t,
element_size,
struct aml_layout,
struct aml_layout_pad);
if (layout == NULL) {
*ret = NULL;
return -AML_ENOMEM;
}
data = AML_INNER_MALLOC_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_pad);
data = AML_INNER_MALLOC_GET_FIELD(layout, 2,
struct aml_layout,
struct aml_layout_pad);
layout->data = (struct aml_layout_data *) data;
data->dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_pad,
size_t, 0);
data->target_dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_pad,
size_t, ndims);
data->neutral = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_pad,
size_t, 2*ndims);
data->dims = AML_INNER_MALLOC_GET_ARRAY(layout,
size_t,
struct aml_layout,
struct aml_layout_pad);
data->target_dims = data->dims + ndims;
data->neutral = AML_INNER_MALLOC_GET_EXTRA(layout,
2*ndims, size_t,
struct aml_layout,
struct aml_layout_pad);
data->target = NULL;
data->ndims = ndims;
data->element_size = element_size;
......
......@@ -21,30 +21,27 @@ static int aml_layout_reshape_alloc(struct aml_layout **ret,
struct aml_layout *layout;
struct aml_layout_data_reshape *data;
layout = AML_INNER_MALLOC_EXTRA(struct aml_layout,
struct aml_layout_data_reshape,
size_t, (2*ndims)+target_ndims);
layout = AML_INNER_MALLOC_ARRAY(2*ndims + target_ndims, size_t,
struct aml_layout,
struct aml_layout_data_reshape);
if (layout == NULL) {
*ret = NULL;
return -AML_ENOMEM;
}
data = AML_INNER_MALLOC_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_data_reshape);
data = AML_INNER_MALLOC_GET_FIELD(layout, 2,
struct aml_layout,
struct aml_layout_data_reshape);
layout->data = (struct aml_layout_data *)data;
data->dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_data_reshape,
size_t, 0);
data->coffsets = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_data_reshape,
size_t, ndims);
data->target_dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(layout,
struct aml_layout,
struct aml_layout_data_reshape,
size_t, 2*ndims);
data->dims = AML_INNER_MALLOC_GET_ARRAY(layout,
size_t,
struct aml_layout,
struct aml_layout_data_reshape);
data->coffsets = data->dims + ndims;
data->target_dims = data->coffsets + ndims;
data->target = NULL;
data->target_ndims = target_ndims;
......
......@@ -227,13 +227,15 @@ int aml_scratch_par_create(struct aml_scratch **scratch,
*scratch = NULL;
ret = AML_INNER_MALLOC_2(struct aml_scratch, struct aml_scratch_par);
ret = AML_INNER_MALLOC(struct aml_scratch,
struct aml_scratch_par);
if (ret == NULL)
return -AML_ENOMEM;
ret->ops = &aml_scratch_par_ops;
ret->data = AML_INNER_MALLOC_NEXTPTR(ret, struct aml_scratch,
struct aml_scratch_par);
ret->data = AML_INNER_MALLOC_GET_FIELD(ret, 2,
struct aml_scratch,
struct aml_scratch_par);
s = (struct aml_scratch_par *)ret->data;
s->ops = aml_scratch_par_inner_ops;
......
......@@ -222,13 +222,15 @@ int aml_scratch_seq_create(struct aml_scratch **scratch,
*scratch = NULL;
ret = AML_INNER_MALLOC_2(struct aml_scratch, struct aml_scratch_seq);
ret = AML_INNER_MALLOC(struct aml_scratch,
struct aml_scratch_seq);
if (ret == NULL)
return -AML_ENOMEM;
ret->ops = &aml_scratch_seq_ops;
ret->data = AML_INNER_MALLOC_NEXTPTR(ret, struct aml_scratch,
struct aml_scratch_seq);
ret->data = AML_INNER_MALLOC_GET_FIELD(ret, 2,
struct aml_scratch,
struct aml_scratch_seq);
s = (struct aml_scratch_seq *)ret->data;
s->ops = aml_scratch_seq_inner_ops;
......
......@@ -23,40 +23,35 @@ static int aml_tiling_pad_alloc(struct aml_tiling **ret, size_t ndims,
struct aml_tiling *tiling;
struct aml_tiling_pad *data;
tiling = AML_INNER_MALLOC_EXTRA(struct aml_tiling,
struct aml_tiling_pad,
size_t, 4*ndims + neutral_size);
tiling = AML_INNER_MALLOC_EXTRA(4*ndims, size_t,
neutral_size,
struct aml_tiling,
struct aml_tiling_pad);
if (tiling == NULL) {
*ret = NULL;
return -AML_ENOMEM;
}
data = AML_INNER_MALLOC_NEXTPTR(tiling,
struct aml_tiling,
struct aml_tiling_pad);
data = AML_INNER_MALLOC_GET_FIELD(tiling, 2,
struct aml_tiling,
struct aml_tiling_pad);
tiling->data = (struct aml_tiling_data *)data;
data->tile_dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(tiling,
struct aml_tiling,
struct aml_tiling_pad,
size_t, 0);
data->dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(tiling,
struct aml_tiling,
struct aml_tiling_pad,
size_t, ndims);
data->border_tile_dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(tiling,
struct aml_tiling,
struct aml_tiling_pad,
size_t, 2*ndims);
data->pad = AML_INNER_MALLOC_EXTRA_NEXTPTR(tiling,
data->tile_dims = AML_INNER_MALLOC_GET_ARRAY(tiling,
size_t,
struct aml_tiling,
struct aml_tiling_pad);
data->dims = data->tile_dims + ndims;
data->border_tile_dims = data->dims + ndims;
data->pad = data->border_tile_dims + ndims;
data->neutral = AML_INNER_MALLOC_GET_EXTRA(tiling,
4*ndims, size_t,
struct aml_tiling,
struct aml_tiling_pad,
size_t, 3*ndims);
data->neutral = AML_INNER_MALLOC_EXTRA_NEXTPTR(tiling,
struct aml_tiling,
struct aml_tiling_pad,
size_t, 4*ndims);
struct aml_tiling_pad);
data->layout = NULL;
data->ndims = ndims;
*ret = tiling;
......
......@@ -21,31 +21,29 @@ static int aml_tiling_resize_alloc(struct aml_tiling **ret, size_t ndims)
struct aml_tiling *tiling;
struct aml_tiling_resize *data;
tiling = AML_INNER_MALLOC_EXTRA(struct aml_tiling,
struct aml_tiling_resize,
size_t, 3*ndims);
tiling = AML_INNER_MALLOC_ARRAY(3*ndims, size_t,
struct aml_tiling,
struct aml_tiling_resize);
if (tiling == NULL) {
*ret = NULL;
return -AML_ENOMEM;
}
data = AML_INNER_MALLOC_NEXTPTR(tiling,
struct aml_tiling,
struct aml_tiling_resize);
data = AML_INNER_MALLOC_GET_FIELD(tiling,
2,
struct aml_tiling,
struct aml_tiling_resize);
tiling->data = (struct aml_tiling_data *)data;
data->tile_dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(tiling,
struct aml_tiling,
struct aml_tiling_resize,
size_t, 0);
data->dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(tiling,
struct aml_tiling,
struct aml_tiling_resize,
size_t, ndims);
data->border_tile_dims = AML_INNER_MALLOC_EXTRA_NEXTPTR(tiling,
struct aml_tiling,
struct aml_tiling_resize,
size_t, 2*ndims);
data->tile_dims = AML_INNER_MALLOC_GET_ARRAY(tiling,
size_t,
struct aml_tiling,
struct aml_tiling_resize);
data->dims = data->tile_dims + ndims;
data->border_tile_dims = data->dims + ndims;
data->layout = NULL;
data->ndims = ndims;
*ret = tiling;
......
This diff is collapsed.
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#include "aml.h"
#include "aml/utils/queue.h"
struct aml_queue *aml_queue_create(const size_t max)
{
struct aml_queue *q;
q = AML_INNER_MALLOC_ARRAY(max, void*, struct aml_queue);
if (q == NULL)
return NULL;
q->max = max;
q->head = 0;
q->tail = 0;
q->elems = AML_INNER_MALLOC_GET_ARRAY(q, void*, struct aml_queue);
return q;
}
void aml_queue_clear(struct aml_queue *q)
{
if (q != NULL) {
q->head = 0;
q->tail = 0;
}
}
void aml_queue_destroy(struct aml_queue *q)
{
free(q);
}
static struct aml_queue *aml_queue_extend(struct aml_queue *q)
{
const size_t len = q->max;
const size_t head = q->head;
const size_t tail = q->tail;
q = realloc(q,
AML_SIZEOF_ALIGNED_ARRAY(2*len, void*, struct aml_queue));
if (q == NULL)
return NULL;
q->elems = AML_INNER_MALLOC_GET_ARRAY(q, void*, struct aml_queue);
q->max = len * 2;
// If element are contiguous, no need for memmove.
if (head < tail)
return q;
// head is right to tail and smaller than tail then move it at the end.
if (len-head < tail) {
q->head = q->max - len + head;
memmove(q->elems + q->head,
q->elems + head,
(len - head) * sizeof(void *));
}
// tail is left to head and smaller than head then move it after head.
else {
memmove(q->elems + len,
q->elems,
tail * sizeof(void *));
q->tail = len + tail;
}
return q;
}
size_t aml_queue_len(const struct aml_queue *q)
{
if (q->tail > q->head)
return q->tail - q->head;
if (q->head > q->tail)
return q->max - q->head + q->tail;
return 0;
}
int aml_queue_push(struct aml_queue **q, void *element)
{
struct aml_queue *r;
if (q == NULL || *q == NULL)
return -AML_EINVAL;
r = *q;
const size_t len = aml_queue_len(r);
if (len >= r->max - 1) {
r = aml_queue_extend(r);
if (r == NULL)
return -AML_ENOMEM;
*q = r;
}
r->elems[r->tail] = element;
r->tail = (r->tail + 1) % r->max;
return AML_SUCCESS;
}
void *aml_queue_pop(struct aml_queue *q)
{
void *out;
if (q == NULL || q->tail == q->head)
return NULL;
out = q->elems[q->head];
q->head = (q->head + 1) % q->max;
return out;
}
/**
* Take an element out and stitch the circular buffer to
* make elements contiguous again.
**/
void *aml_queue_take(struct aml_queue *q, void *element)
{
if (q == NULL || q->tail == q->head)
return NULL;
// queue is empty
if (q->tail == q->head)
return NULL;
// All element are contiguous but the one removed.
if (q->tail > q->head) {
// move elements after the one removed by one to the left.
for (size_t i = q->head; i < q->tail; i++) {
if (q->elems[i] == element) {
memmove(q->elems + i,
q->elems + i + 1,
sizeof(void *) * (q->tail - i - 1));
q->tail--;
return element;
}
}
return NULL;
}
// tail is before head
if (q->tail < q->head) {
// move elements after the one removed by one to the left,
// when the element is between 0 and tail.
for (size_t i = 0; i < q->tail; i++) {
if (q->elems[i] == element) {
memmove(q->elems + i,
q->elems + i + 1,
sizeof(void *) * (q->tail - i - 1));
q->tail--;
return element;
}
}
// move elements after the one removed by one to the left,
// when the element is between head and end. Then move
// element at index 0 to the end. Finally slide elements from
// 1 to tail by one to the left.
for (size_t i = q->head; i < q->max; i++) {
if (q->elems[i] == element) {
memmove(q->elems + i,
q->elems + i + 1,
sizeof(void *) * (q->max - i - 1));
q->elems[q->max - 1] = q->elems[0];
if (q->tail > 0) {
memmove(q->elems,
q->elems + 1,
sizeof(void *) * (q->tail - 1));
q->tail--;
} else
q->tail = q->max - 1;
return element;
}
}
return NULL;
}
return NULL;
}
......@@ -29,6 +29,8 @@ UTILS_TESTS = \
utils/test_bitmap \
utils/test_inner_malloc \
utils/test_vector \
utils/test_async \
utils/test_queue \
utils/test_version
AREA_TESTS = \
......
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#define _POSIX_C_SOURCE 199309L // nanosleep
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <unistd.h>
#include "aml.h"
#include "aml/utils/async.h"
//----------------------------------------------------------------------------//
// Task Mockup Implementation for Tests
//----------------------------------------------------------------------------//
struct aml_task_out *aml_task_mockup_work(struct aml_task_in *in)
{
(void)in;
struct timespec us;
us.tv_sec = 0;
us.tv_nsec = 1000 * (rand() % 10);
nanosleep(&us, NULL);
return NULL;
}
struct aml_task aml_task_mockup = {
.in = NULL,
.out = NULL,
.data = NULL,
.fn = aml_task_mockup_work,
};
//----------------------------------------------------------------------------//
// Tests
//----------------------------------------------------------------------------//
void test_scheduler(struct aml_sched *sched, const unsigned int nt)
{
struct aml_task *t;
struct aml_task tasks[nt];
for (unsigned int i = 0; i < nt; i++)
tasks[i] = aml_task_mockup;
// Submit one task.
assert(aml_sched_submit_task(sched, tasks) == AML_SUCCESS);
t = aml_sched_wait_any(sched);
assert(t == tasks);
// Submit all tasks.x
for (unsigned int i = 0; i < nt; i++)
assert(aml_sched_submit_task(sched, tasks + i) == AML_SUCCESS);
// Wait for one specific task.
assert(aml_sched_wait_task(sched, tasks + (nt / 2)) == AML_SUCCESS);
for (unsigned int i = 0; i < nt - 1; i++) {
t = aml_sched_wait_any(sched);
assert(t != NULL);
assert(t >= tasks);
assert(t < tasks + nt);
}
}
//----------------------------------------------------------------------------//
// Main
//----------------------------------------------------------------------------//
int main(void)
{
// set seed for tasks sleep.
srand(0);
struct aml_sched *as = aml_active_sched_create(4);
assert(as != NULL);
test_scheduler(as, 256);
aml_active_sched_destroy(&as);
as = aml_active_sched_create(0);
assert(as != NULL);
test_scheduler(as, 256);
aml_active_sched_destroy(&as);
return 0;
}
......@@ -13,10 +13,10 @@
int main(void)
{
intptr_t *ptr = AML_INNER_MALLOC_2(void *, void *);
intptr_t *ptr = AML_INNER_MALLOC(void *, void *);
assert(ptr != NULL);
void *b = AML_INNER_MALLOC_NEXTPTR(ptr, void *, void *);
void *b = AML_INNER_MALLOC_GET_FIELD(ptr, 2, void *, void *);
assert(b == &ptr[1]);
free(ptr);
......
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#include "aml.h"
#include <assert.h>
#define SIZE 64
int mem[SIZE];
struct aml_queue *q;
int main(void)
{
int tail = 0, head = 0;
for (int i = 0; i < SIZE; i++)
mem[i] = i;
q = aml_queue_create(SIZE);
assert(q != NULL);
assert(aml_queue_len(q) == 0);
aml_queue_clear(q);
assert(aml_queue_len(q) == 0);
// Check insertion
for (int i = 0; i < SIZE * 2; i++, tail++) {
void *e = &mem[tail % SIZE];
aml_queue_push(&q, e);
assert((int)aml_queue_len(q) == (tail - head) + 1);
assert(q->elems[q->tail - 1] == e);
}
// Check that popped elements order
for (int i = 0; i < SIZE; i++, head++) {
void *e = aml_queue_pop(q);
assert(e == &mem[head % SIZE]);
assert((int)aml_queue_len(q) == (tail - head) - 1);
}
// reinsert to imply resize
for (int i = 0; i < SIZE * 2; i++, tail++) {
aml_queue_push(&q, (void *)(&mem[tail % SIZE]));
assert((int)aml_queue_len(q) == (tail - head) + 1);
}
// Check take specific elements
assert(aml_queue_take(q, NULL) == NULL);
for (int i = 0; i < SIZE; i++, head++)
assert(aml_queue_take(q, mem+i) == mem+i);
// Check empty queue.
int len = aml_queue_len(q);
assert(len == SIZE*2);
for (int i = 0; i < len; i++, head++) {
void *e = aml_queue_pop(q);
assert(e == &mem[head % SIZE]);
assert((int)aml_queue_len(q) == (tail - head) - 1);
}
assert(aml_queue_pop(q) == NULL);
free(q);
return 0;
}