Commit c3a501b9 authored by Nicolas Denoyelle's avatar Nicolas Denoyelle Committed by Swann Perarnau
Browse files

[feature] add area options on mmap

## Change area mmap interface to include implementation specific options.

### Summary of changes in mmap interface
Previous mmap interface was:
```
int aml_area_mmap(struct aml_area *area, void*ptr, size_t size);
```
First, in this interface, the purpose of ptr is fuzzy as it is implementation specific and neither
the user nor aml generic interface knows what to do with it in a generic way.
Second, this interface does not allow for extensive options. In particular, linux mmap allow
multiple arguments that cannot be elegantly passed with this interface.
Instead we aml_mmap replace with this new interface:
```
int aml_area_mmap(struct aml_area *area, size_t size, struct aml_area_options *options);
```
Where the last field clearly state it is used for options and can always be NULL so that it can explicitly be used without knowledge of options implementation.
parent 001799b6
...@@ -34,16 +34,14 @@ int main(int argc, char *argv[]) ...@@ -34,16 +34,14 @@ int main(int argc, char *argv[])
long int N = atol(argv[3]); long int N = atol(argv[3]);
unsigned long memsize = sizeof(double)*N*N; unsigned long memsize = sizeof(double)*N*N;
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
double alpha = 1.0, beta = 1.0; double alpha = 1.0, beta = 1.0;
......
...@@ -77,17 +77,15 @@ int main(int argc, char* argv[]) ...@@ -77,17 +77,15 @@ int main(int argc, char* argv[])
assert(!aml_tiling_2d_create(&tiling_col, AML_TILING_TYPE_2D_COLMAJOR, assert(!aml_tiling_2d_create(&tiling_col, AML_TILING_TYPE_2D_COLMAJOR,
tilesize, memsize, N/T , N/T)); tilesize, memsize, N/T , N/T));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
/* allocation */ /* allocation */
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize; size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
......
...@@ -105,20 +105,18 @@ int main(int argc, char* argv[]) ...@@ -105,20 +105,18 @@ int main(int argc, char* argv[])
assert(!aml_tiling_1d_create(&tiling_prefetch, assert(!aml_tiling_1d_create(&tiling_prefetch,
tilesize*(N/T), memsize)); tilesize*(N/T), memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
assert(!aml_dma_linux_seq_create(&dma, 2)); assert(!aml_dma_linux_seq_create(&dma, 2));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2)); assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2));
assert(!aml_scratch_par_create(&sb, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2)); assert(!aml_scratch_par_create(&sb, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2));
/* allocation */ /* allocation */
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize; size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
......
...@@ -53,9 +53,9 @@ int main(int argc, char *argv[]) ...@@ -53,9 +53,9 @@ int main(int argc, char *argv[])
chunk_msz = MEMSIZE/(numthreads*CHUNKING); chunk_msz = MEMSIZE/(numthreads*CHUNKING);
esz = chunk_msz/sizeof(unsigned long); esz = chunk_msz/sizeof(unsigned long);
} }
a = aml_area_mmap(slow, NULL, MEMSIZE); a = aml_area_mmap(slow, MEMSIZE, NULL);
b = aml_area_mmap(slow, NULL, MEMSIZE); b = aml_area_mmap(slow, MEMSIZE, NULL);
c = aml_area_mmap(fast, NULL, MEMSIZE); c = aml_area_mmap(fast, MEMSIZE, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
/* create virtually accessible address range, backed by slow memory */ /* create virtually accessible address range, backed by slow memory */
......
...@@ -64,11 +64,9 @@ int main(int argc, char *argv[]) ...@@ -64,11 +64,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */ /* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize)); assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
assert(!aml_dma_linux_par_create(&dma, numthreads*2)); assert(!aml_dma_linux_par_create(&dma, numthreads*2));
assert(!aml_scratch_seq_create(&sa, fast, slow, dma, tiling, assert(!aml_scratch_seq_create(&sa, fast, slow, dma, tiling,
...@@ -77,9 +75,9 @@ int main(int argc, char *argv[]) ...@@ -77,9 +75,9 @@ int main(int argc, char *argv[])
(size_t)2*numthreads, (size_t)1)); (size_t)2*numthreads, (size_t)1));
/* allocation */ /* allocation */
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long); unsigned long esize = memsize/sizeof(unsigned long);
......
...@@ -95,11 +95,9 @@ int main(int argc, char *argv[]) ...@@ -95,11 +95,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */ /* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize)); assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
assert(!aml_dma_linux_seq_create(&dma, numthreads*2)); assert(!aml_dma_linux_seq_create(&dma, numthreads*2));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling, assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling,
...@@ -108,9 +106,9 @@ int main(int argc, char *argv[]) ...@@ -108,9 +106,9 @@ int main(int argc, char *argv[])
2*numthreads, numthreads)); 2*numthreads, numthreads));
/* allocation */ /* allocation */
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long); unsigned long esize = memsize/sizeof(unsigned long);
......
...@@ -101,11 +101,9 @@ int main(int argc, char *argv[]) ...@@ -101,11 +101,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */ /* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize)); assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
assert(!aml_dma_linux_seq_create(dma, (size_t)numthreads*4)); assert(!aml_dma_linux_seq_create(dma, (size_t)numthreads*4));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling, assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling,
...@@ -114,9 +112,9 @@ int main(int argc, char *argv[]) ...@@ -114,9 +112,9 @@ int main(int argc, char *argv[])
(size_t)2*numthreads, (size_t)numthreads)); (size_t)2*numthreads, (size_t)numthreads));
/* allocation */ /* allocation */
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long); unsigned long esize = memsize/sizeof(unsigned long);
...@@ -127,7 +125,7 @@ int main(int argc, char *argv[]) ...@@ -127,7 +125,7 @@ int main(int argc, char *argv[])
} }
/* run kernel */ /* run kernel */
struct winfo *wis = aml_area_mmap(slow, NULL, numthreads * sizeof(struct winfo)); struct winfo *wis = aml_area_mmap(slow, numthreads * sizeof(struct winfo), NULL);
for(unsigned long i = 0; i < numthreads; i++) { for(unsigned long i = 0; i < numthreads; i++) {
wis[i].tid = i; wis[i].tid = i;
pthread_create(&wis[i].th, NULL, &th_work, (void*)&wis[i]); pthread_create(&wis[i].th, NULL, &th_work, (void*)&wis[i]);
......
...@@ -266,9 +266,9 @@ main(int argc, char *argv[]) ...@@ -266,9 +266,9 @@ main(int argc, char *argv[])
aml_init(&argc, &argv); aml_init(&argc, &argv);
size_t size = sizeof(STREAM_TYPE)*(STREAM_ARRAY_SIZE+OFFSET); size_t size = sizeof(STREAM_TYPE)*(STREAM_ARRAY_SIZE+OFFSET);
struct aml_area *area = aml_area_linux; struct aml_area *area = aml_area_linux;
a = aml_area_mmap(area, NULL, size); a = aml_area_mmap(area, size, NULL);
b = aml_area_mmap(area, NULL, size); b = aml_area_mmap(area, size, NULL);
c = aml_area_mmap(area, NULL, size); c = aml_area_mmap(area, size, NULL);
/* Get initial value for system clock. */ /* Get initial value for system clock. */
#pragma omp parallel for #pragma omp parallel for
......
...@@ -104,6 +104,14 @@ int aml_finalize(void); ...@@ -104,6 +104,14 @@ int aml_finalize(void);
**/ **/
struct aml_area_data; struct aml_area_data;
/**
* Opaque handle to pass additional options to area mmap hook.
* This is implementation specific and cannot be used as a
* generic interface but rather for customizing area behaviour
* on per mmap basis.
**/
struct aml_area_mmap_options;
/** /**
* aml_area_ops is a structure containing implementations * aml_area_ops is a structure containing implementations
* of an area operations. * of an area operations.
...@@ -114,21 +122,20 @@ struct aml_area_ops { ...@@ -114,21 +122,20 @@ struct aml_area_ops {
/** /**
* Building block for coarse grain allocator of virtual memory. * Building block for coarse grain allocator of virtual memory.
* *
* @param data: Opaque handle to implementation specific data. * @param[in] data: Opaque handle to implementation specific data.
* @param ptr: A virtual address to be used by underlying * @param[in] size: The minimum size of allocation.
* implementation.
* Can be NULL.
* @param size: The minimum size of allocation.
* Is greater than 0. Must not fail unless not enough * Is greater than 0. Must not fail unless not enough
* memory is available, or ptr argument does not point to a * memory is available, or ptr argument does not point to a
* suitable address. * suitable address.
* In case of failure, aml_errno must be set to an appropriate * In case of failure, aml_errno must be set to an appropriate
* value. * value.
* @param[in/out] opts: Opaque handle to pass additional options to area
* mmap hook. Can be NULL and must work with NULL opts.
* @return a pointer to allocated memory object. * @return a pointer to allocated memory object.
**/ **/
void* (*mmap)(const struct aml_area_data *data, void* (*mmap)(const struct aml_area_data *data,
void *ptr, size_t size,
size_t size); struct aml_area_mmap_options *opts);
/** /**
* Building block for unmapping of virtual memory mapped with mmap() * Building block for unmapping of virtual memory mapped with mmap()
...@@ -162,14 +169,16 @@ struct aml_area { ...@@ -162,14 +169,16 @@ struct aml_area {
/** /**
* Low-level function for getting memory from an area. * Low-level function for getting memory from an area.
* @param area: A valid area implementing access to target memory. * @param[in] area: A valid area implementing access to target memory.
* @param ptr: Implementation specific argument. See specific header. * @param[in] size: The usable size of memory returned.
* @param size: The usable size of memory returned. * @param[in, out] opts: Opaque handle to pass additional options to area
* @return virtual memory from this area with at least queried size bytes. * @return virtual memory from this area with at least queried size bytes.
* @return NULL on failure, with aml_errno set to the appropriate error
* code.
**/ **/
void *aml_area_mmap(const struct aml_area *area, void *aml_area_mmap(const struct aml_area *area,
void **ptr, size_t size,
size_t size); struct aml_area_mmap_options *opts);
/** /**
* Release data provided with aml_area_mmap() and the same area. * Release data provided with aml_area_mmap() and the same area.
......
...@@ -27,32 +27,103 @@ ...@@ -27,32 +27,103 @@
**/ **/
/** /**
* Structure containing aml area hooks for cuda implementation. * Default cuda area flags.
* For now there is only a single implementation of the hooks. * * Allocation on device only,
* This implementation will choose between different cuda functions. * * Allocation visible by a single device.
* * Allocation not mapped on host memory.
**/ **/
extern struct aml_area_ops aml_area_cuda_ops; #define AML_AREA_CUDA_FLAG_DEFAULT 0
/** /**
* Default cuda area with private mapping in current device. * Device allocation flag.
* Can be used out of the box with aml_area_*() functions. * Default behaviour is allocation on device.
* If this flag is set then allocation will
* be on host.
**/ **/
extern struct aml_area aml_area_cuda; #define AML_AREA_CUDA_FLAG_ALLOC_HOST (1 << 0)
/**
* Mapping flag.
* Default behaviour is allocation not mapped.
* If set, the pointer returned by mmap function
* will be host side memory mapped on device.
* A pointer to device memory can then be retrieved
* by calling cudaHostGetDevicePointer().
* If AML_AREA_CUDA_FLAG_ALLOC_HOST is set, then
* host side memory will be allocated. Else,
* "ptr" field of mmap options will be used to map
* device memory ("ptr" must not be NULL).
*
* @see cudaHostRegister(), cudaHostAlloc().
**/
#define AML_AREA_CUDA_FLAG_ALLOC_MAPPED (1 << 1)
/**
* Unified memory flag.
* If this flag is set, then allocation will create
* a unified memory pointer usable on host and device.
* Additionally, AML_AREA_CUDA_FLAG_ALLOC_HOST and
* AML_AREA_CUDA_FLAG_ALLOC_MAPPED will be ignored.
*
* @see cudaMallocManaged()
**/
#define AML_AREA_CUDA_FLAG_ALLOC_UNIFIED (1 << 2)
/**
* Unified memory setting flag.
* If AML_AREA_CUDA_FLAG_ALLOC_UNIFIED is set,
* then this flagged is looked to set
* cudaMallocManaged() flag cudaAttachGlobal.
* Else if AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set,
* or AML_AREA_CUDA_FLAG_ALLOC_HOST flag is set,
* then this flag is looked to set cudaMallocHost()
* flag cudaHostAllocPortable.
* The default behaviour is to make allocation
* visible from a single device. If this flag is set,
* then allocation will be visible on all devices.
*
* @see cudaMallocManaged()
**/
#define AML_AREA_CUDA_FLAG_ALLOC_GLOBAL (1 << 3)
/** /**
* Allocation flags to pass to cudaMallocManaged(). * Options that can eventually be passed to mmap
* @see cuda runtime API documentation / memory management. * call.
**/
struct aml_area_cuda_mmap_options {
/**
* Specify a different device for one mmap call.
* if device < 0 use area device.
**/
int device;
/**
* Host memory pointer used for mapped allocations.
* If flag AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set
* and ptr is NULL, ptr will be overwritten with
* host allocated memory and will have to be freed
* using cudaFreeHost().
**/ **/
enum aml_area_cuda_flags { void *ptr;
AML_AREA_CUDA_ATTACH_GLOBAL,
AML_AREA_CUDA_ATTACH_HOST,
}; };
/** aml area hooks for cuda implementation. **/
extern struct aml_area_ops aml_area_cuda_ops;
/**
* Default cuda area:
* Allocation on device, visible by a single device,
* and not mapped on host memory.
**/
extern struct aml_area aml_area_cuda;
/** Implementation of aml_area_data for cuda areas. **/ /** Implementation of aml_area_data for cuda areas. **/
struct aml_area_cuda_data { struct aml_area_cuda_data {
/** allocation flags in cuda format **/ /** Area allocation flags. **/
int flags; int flags;
/** The device id on which allocation is done. **/ /**
* The device id on which allocation is done.
* If device < 0, use current device.
**/
int device; int device;
}; };
...@@ -62,8 +133,8 @@ struct aml_area_cuda_data { ...@@ -62,8 +133,8 @@ struct aml_area_cuda_data {
* @param[out] area pointer to an uninitialized struct aml_area pointer to * @param[out] area pointer to an uninitialized struct aml_area pointer to
* receive the new area. * receive the new area.
* @param[in] device: A valid cuda device id, i.e from 0 to num_devices-1. * @param[in] device: A valid cuda device id, i.e from 0 to num_devices-1.
* If device id is negative, then no cuda device will be selected when * If device id is negative, then current cuda device will be used using
* using aml_area_cuda_mmap(). * aml_area_cuda_mmap().
* @param[in] flags: Allocation flags. * @param[in] flags: Allocation flags.
* *
* @return AML_SUCCESS on success and area points to the new aml_area. * @return AML_SUCCESS on success and area points to the new aml_area.
...@@ -72,11 +143,10 @@ struct aml_area_cuda_data { ...@@ -72,11 +143,10 @@ struct aml_area_cuda_data {
* of devices. * of devices.
* @return -AML_ENOMEM if space to carry area cannot be allocated. * @return -AML_ENOMEM if space to carry area cannot be allocated.
* *
* @see enum aml_area_cuda_flags. * @see AML_AREA_CUDA_FLAG_*.
**/ **/
int aml_area_cuda_create(struct aml_area **area, int aml_area_cuda_create(struct aml_area **area,
const int device, const int device, const int flags);
const enum aml_area_cuda_flags flags);
/** /**
* \brief Cuda area destruction. * \brief Cuda area destruction.
...@@ -94,8 +164,9 @@ void aml_area_cuda_destroy(struct aml_area **area); ...@@ -94,8 +164,9 @@ void aml_area_cuda_destroy(struct aml_area **area);
* This function is a wrapper on cuda alloc functions. * This function is a wrapper on cuda alloc functions.
* It uses area settings to: select device on which to perform allocation, * It uses area settings to: select device on which to perform allocation,
* select allocation function and set its parameters. * select allocation function and set its parameters.
* Allocations can be standalone on device, shared across multiple devices, * Any pointer obtained through aml_area_cuda_mmap() must be unmapped with
* and backed with cpu memory. * aml_area_cuda_munmap().
*
* Device selection is not thread safe and requires to set the global * Device selection is not thread safe and requires to set the global
* state of cuda library. When selecting a device, allocation may succeed * state of cuda library. When selecting a device, allocation may succeed
* while setting device back to original context devices may fail. In that * while setting device back to original context devices may fail. In that
...@@ -103,31 +174,38 @@ void aml_area_cuda_destroy(struct aml_area **area); ...@@ -103,31 +174,38 @@ void aml_area_cuda_destroy(struct aml_area **area);
* function in order to catch the error when return value is not NULL. * function in order to catch the error when return value is not NULL.
* *
* @param[in] area_data: The structure containing cuda area settings. * @param[in] area_data: The structure containing cuda area settings.
* @param[in, out] ptr: If ptr is NULL, then call cudaMallocManaged() with
* area flags. Memory will be allocated only device side.
* If ptr is not NULL:
* * ptr must point to a valid memory area.
* Device side memory will be mapped on this host side memory.
* According to cuda runtime API documentation
* (cudaHostRegister()), host side memory pages will be locked or allocation
* will fail.
* @param[in] size: The size to allocate. * @param[in] size: The size to allocate.
* @param[in] options: A struct aml_area_cuda_mmap_options *. If > 0,
* device will be used to select the target device.
* If area flags AML_AREA_CUDA_FLAG_MAPPED is set and
* AML_AREA_CUDA_FLAG_HOST is not set, then options field "ptr" must not
* be NULL and point to a host memory that can be mapped on GPU.
* *
* @return A cuda pointer to allocated device memory on success, NULL on * @return NULL on failure with aml errno set to the following error codes:
* failure. If failure occures, aml_errno variable is set with one of the * AML_ENOTSUP is one of the cuda calls failed with error:
* following values:
* * AML_ENOTSUP is one of the cuda calls failed with error:
* cudaErrorInsufficientDriver, cudaErrorNoDevice. * cudaErrorInsufficientDriver, cudaErrorNoDevice.
* * AML_EINVAL if target device id is not valid. * * AML_EINVAL if target device id is not valid or provided argument are not
* * AML_EBUSY if a specific device was requested and call to failed with error * compatible.
* cudaErrorDeviceAlreadyInUse, or if region was already mapped on device. * * AML_EBUSY if a specific device was requested but was in already use.
* * AML_ENOMEM if memory allocation failed with error * * AML_ENOMEM if memory allocation failed with error
* cudaErrorMemoryAllocation. * cudaErrorMemoryAllocation.
* * AML_FAILURE if one of the cuda calls resulted in error * * AML_FAILURE if one of the cuda calls resulted in error
* cudaErrorInitializationError. * cudaErrorInitializationError.
* @return A cuda pointer usable on device and host if area flags contains
* AML_AREA_CUDA_FLAG_ALLOC_UNIFIED.
* @return A pointer to host memory on which one can call
* cudaHostGetDevicePointer() to get a pointer to mapped device memory, if
* AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set.
* Obtained pointer must be unmapped with aml_area_cuda_munmap(). If host side
* memory was provided as mmap option, then it still has to be freed.
* @return A pointer to host memory if area flag AML_AREA_CUDA_FLAG_ALLOC_HOST
* is set.
* @return A pointer to device memory if no flag is set.
*
* @see AML_AREA_CUDA_FLAG_*
**/ **/
void *aml_area_cuda_mmap(const struct aml_area_data *area_data, void *aml_area_cuda_mmap(const struct aml_area_data *area_data,
void *ptr, size_t size); size_t size, struct aml_area_mmap_options *options);
/** /**
* \brief munmap hook for aml area. * \brief munmap hook for aml area.
......
...@@ -28,42 +28,6 @@ ...@@ -28,42 +28,6 @@
* @{ * @{
**/ **/
/**
* Allowed binding flag for area creation.
* This flag will apply strict binding to the selected bitmask.
* If subsequent allocation will failt if they cannot enforce binding
* on bitmask.
**/
#define AML_AREA_LINUX_BINDING_FLAG_BIND (MPOL_BIND)
/**
* Allowed binding flag for area creation.