Commit a466aaa7 authored by Swann Perarnau's avatar Swann Perarnau
Browse files

Merge branch 'area_options' into 'master'

[feature] add area options on mmap

See merge request !78
parents 001799b6 c3a501b9
Pipeline #8379 passed with stages
in 5 minutes and 39 seconds
...@@ -34,16 +34,14 @@ int main(int argc, char *argv[]) ...@@ -34,16 +34,14 @@ int main(int argc, char *argv[])
long int N = atol(argv[3]); long int N = atol(argv[3]);
unsigned long memsize = sizeof(double)*N*N; unsigned long memsize = sizeof(double)*N*N;
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
double alpha = 1.0, beta = 1.0; double alpha = 1.0, beta = 1.0;
......
...@@ -77,17 +77,15 @@ int main(int argc, char* argv[]) ...@@ -77,17 +77,15 @@ int main(int argc, char* argv[])
assert(!aml_tiling_2d_create(&tiling_col, AML_TILING_TYPE_2D_COLMAJOR, assert(!aml_tiling_2d_create(&tiling_col, AML_TILING_TYPE_2D_COLMAJOR,
tilesize, memsize, N/T , N/T)); tilesize, memsize, N/T , N/T));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
/* allocation */ /* allocation */
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize; size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
......
...@@ -105,20 +105,18 @@ int main(int argc, char* argv[]) ...@@ -105,20 +105,18 @@ int main(int argc, char* argv[])
assert(!aml_tiling_1d_create(&tiling_prefetch, assert(!aml_tiling_1d_create(&tiling_prefetch,
tilesize*(N/T), memsize)); tilesize*(N/T), memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
assert(!aml_dma_linux_seq_create(&dma, 2)); assert(!aml_dma_linux_seq_create(&dma, 2));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2)); assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2));
assert(!aml_scratch_par_create(&sb, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2)); assert(!aml_scratch_par_create(&sb, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2));
/* allocation */ /* allocation */
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize; size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
......
...@@ -53,9 +53,9 @@ int main(int argc, char *argv[]) ...@@ -53,9 +53,9 @@ int main(int argc, char *argv[])
chunk_msz = MEMSIZE/(numthreads*CHUNKING); chunk_msz = MEMSIZE/(numthreads*CHUNKING);
esz = chunk_msz/sizeof(unsigned long); esz = chunk_msz/sizeof(unsigned long);
} }
a = aml_area_mmap(slow, NULL, MEMSIZE); a = aml_area_mmap(slow, MEMSIZE, NULL);
b = aml_area_mmap(slow, NULL, MEMSIZE); b = aml_area_mmap(slow, MEMSIZE, NULL);
c = aml_area_mmap(fast, NULL, MEMSIZE); c = aml_area_mmap(fast, MEMSIZE, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
/* create virtually accessible address range, backed by slow memory */ /* create virtually accessible address range, backed by slow memory */
......
...@@ -64,11 +64,9 @@ int main(int argc, char *argv[]) ...@@ -64,11 +64,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */ /* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize)); assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
assert(!aml_dma_linux_par_create(&dma, numthreads*2)); assert(!aml_dma_linux_par_create(&dma, numthreads*2));
assert(!aml_scratch_seq_create(&sa, fast, slow, dma, tiling, assert(!aml_scratch_seq_create(&sa, fast, slow, dma, tiling,
...@@ -77,9 +75,9 @@ int main(int argc, char *argv[]) ...@@ -77,9 +75,9 @@ int main(int argc, char *argv[])
(size_t)2*numthreads, (size_t)1)); (size_t)2*numthreads, (size_t)1));
/* allocation */ /* allocation */
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long); unsigned long esize = memsize/sizeof(unsigned long);
......
...@@ -95,11 +95,9 @@ int main(int argc, char *argv[]) ...@@ -95,11 +95,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */ /* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize)); assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
assert(!aml_dma_linux_seq_create(&dma, numthreads*2)); assert(!aml_dma_linux_seq_create(&dma, numthreads*2));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling, assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling,
...@@ -108,9 +106,9 @@ int main(int argc, char *argv[]) ...@@ -108,9 +106,9 @@ int main(int argc, char *argv[])
2*numthreads, numthreads)); 2*numthreads, numthreads));
/* allocation */ /* allocation */
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long); unsigned long esize = memsize/sizeof(unsigned long);
......
...@@ -101,11 +101,9 @@ int main(int argc, char *argv[]) ...@@ -101,11 +101,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */ /* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize)); assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(slow != NULL); assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
assert(fast != NULL); assert(fast != NULL);
assert(!aml_dma_linux_seq_create(dma, (size_t)numthreads*4)); assert(!aml_dma_linux_seq_create(dma, (size_t)numthreads*4));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling, assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling,
...@@ -114,9 +112,9 @@ int main(int argc, char *argv[]) ...@@ -114,9 +112,9 @@ int main(int argc, char *argv[])
(size_t)2*numthreads, (size_t)numthreads)); (size_t)2*numthreads, (size_t)numthreads));
/* allocation */ /* allocation */
a = aml_area_mmap(slow, NULL, memsize); a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, NULL, memsize); b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, NULL, memsize); c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL); assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long); unsigned long esize = memsize/sizeof(unsigned long);
...@@ -127,7 +125,7 @@ int main(int argc, char *argv[]) ...@@ -127,7 +125,7 @@ int main(int argc, char *argv[])
} }
/* run kernel */ /* run kernel */
struct winfo *wis = aml_area_mmap(slow, NULL, numthreads * sizeof(struct winfo)); struct winfo *wis = aml_area_mmap(slow, numthreads * sizeof(struct winfo), NULL);
for(unsigned long i = 0; i < numthreads; i++) { for(unsigned long i = 0; i < numthreads; i++) {
wis[i].tid = i; wis[i].tid = i;
pthread_create(&wis[i].th, NULL, &th_work, (void*)&wis[i]); pthread_create(&wis[i].th, NULL, &th_work, (void*)&wis[i]);
......
...@@ -266,9 +266,9 @@ main(int argc, char *argv[]) ...@@ -266,9 +266,9 @@ main(int argc, char *argv[])
aml_init(&argc, &argv); aml_init(&argc, &argv);
size_t size = sizeof(STREAM_TYPE)*(STREAM_ARRAY_SIZE+OFFSET); size_t size = sizeof(STREAM_TYPE)*(STREAM_ARRAY_SIZE+OFFSET);
struct aml_area *area = aml_area_linux; struct aml_area *area = aml_area_linux;
a = aml_area_mmap(area, NULL, size); a = aml_area_mmap(area, size, NULL);
b = aml_area_mmap(area, NULL, size); b = aml_area_mmap(area, size, NULL);
c = aml_area_mmap(area, NULL, size); c = aml_area_mmap(area, size, NULL);
/* Get initial value for system clock. */ /* Get initial value for system clock. */
#pragma omp parallel for #pragma omp parallel for
......
...@@ -104,6 +104,14 @@ int aml_finalize(void); ...@@ -104,6 +104,14 @@ int aml_finalize(void);
**/ **/
struct aml_area_data; struct aml_area_data;
/**
* Opaque handle to pass additional options to area mmap hook.
* This is implementation specific and cannot be used as a
* generic interface but rather for customizing area behaviour
* on per mmap basis.
**/
struct aml_area_mmap_options;
/** /**
* aml_area_ops is a structure containing implementations * aml_area_ops is a structure containing implementations
* of an area operations. * of an area operations.
...@@ -114,21 +122,20 @@ struct aml_area_ops { ...@@ -114,21 +122,20 @@ struct aml_area_ops {
/** /**
* Building block for coarse grain allocator of virtual memory. * Building block for coarse grain allocator of virtual memory.
* *
* @param data: Opaque handle to implementation specific data. * @param[in] data: Opaque handle to implementation specific data.
* @param ptr: A virtual address to be used by underlying * @param[in] size: The minimum size of allocation.
* implementation.
* Can be NULL.
* @param size: The minimum size of allocation.
* Is greater than 0. Must not fail unless not enough * Is greater than 0. Must not fail unless not enough
* memory is available, or ptr argument does not point to a * memory is available, or ptr argument does not point to a
* suitable address. * suitable address.
* In case of failure, aml_errno must be set to an appropriate * In case of failure, aml_errno must be set to an appropriate
* value. * value.
* @param[in/out] opts: Opaque handle to pass additional options to area
* mmap hook. Can be NULL and must work with NULL opts.
* @return a pointer to allocated memory object. * @return a pointer to allocated memory object.
**/ **/
void* (*mmap)(const struct aml_area_data *data, void* (*mmap)(const struct aml_area_data *data,
void *ptr, size_t size,
size_t size); struct aml_area_mmap_options *opts);
/** /**
* Building block for unmapping of virtual memory mapped with mmap() * Building block for unmapping of virtual memory mapped with mmap()
...@@ -162,14 +169,16 @@ struct aml_area { ...@@ -162,14 +169,16 @@ struct aml_area {
/** /**
* Low-level function for getting memory from an area. * Low-level function for getting memory from an area.
* @param area: A valid area implementing access to target memory. * @param[in] area: A valid area implementing access to target memory.
* @param ptr: Implementation specific argument. See specific header. * @param[in] size: The usable size of memory returned.
* @param size: The usable size of memory returned. * @param[in, out] opts: Opaque handle to pass additional options to area
* @return virtual memory from this area with at least queried size bytes. * @return virtual memory from this area with at least queried size bytes.
* @return NULL on failure, with aml_errno set to the appropriate error
* code.
**/ **/
void *aml_area_mmap(const struct aml_area *area, void *aml_area_mmap(const struct aml_area *area,
void **ptr, size_t size,
size_t size); struct aml_area_mmap_options *opts);
/** /**
* Release data provided with aml_area_mmap() and the same area. * Release data provided with aml_area_mmap() and the same area.
......
...@@ -27,32 +27,103 @@ ...@@ -27,32 +27,103 @@
**/ **/
/** /**
* Structure containing aml area hooks for cuda implementation. * Default cuda area flags.
* For now there is only a single implementation of the hooks. * * Allocation on device only,
* This implementation will choose between different cuda functions. * * Allocation visible by a single device.
* * Allocation not mapped on host memory.
**/ **/
extern struct aml_area_ops aml_area_cuda_ops; #define AML_AREA_CUDA_FLAG_DEFAULT 0
/** /**
* Default cuda area with private mapping in current device. * Device allocation flag.
* Can be used out of the box with aml_area_*() functions. * Default behaviour is allocation on device.
* If this flag is set then allocation will
* be on host.
**/ **/
extern struct aml_area aml_area_cuda; #define AML_AREA_CUDA_FLAG_ALLOC_HOST (1 << 0)
/**
* Mapping flag.
* Default behaviour is allocation not mapped.
* If set, the pointer returned by mmap function
* will be host side memory mapped on device.
* A pointer to device memory can then be retrieved
* by calling cudaHostGetDevicePointer().
* If AML_AREA_CUDA_FLAG_ALLOC_HOST is set, then
* host side memory will be allocated. Else,
* "ptr" field of mmap options will be used to map
* device memory ("ptr" must not be NULL).
*
* @see cudaHostRegister(), cudaHostAlloc().
**/
#define AML_AREA_CUDA_FLAG_ALLOC_MAPPED (1 << 1)
/** /**
* Allocation flags to pass to cudaMallocManaged(). * Unified memory flag.
* @see cuda runtime API documentation / memory management. * If this flag is set, then allocation will create
* a unified memory pointer usable on host and device.
* Additionally, AML_AREA_CUDA_FLAG_ALLOC_HOST and
* AML_AREA_CUDA_FLAG_ALLOC_MAPPED will be ignored.
*
* @see cudaMallocManaged()
**/
#define AML_AREA_CUDA_FLAG_ALLOC_UNIFIED (1 << 2)
/**
* Unified memory setting flag.
* If AML_AREA_CUDA_FLAG_ALLOC_UNIFIED is set,
* then this flagged is looked to set
* cudaMallocManaged() flag cudaAttachGlobal.
* Else if AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set,
* or AML_AREA_CUDA_FLAG_ALLOC_HOST flag is set,
* then this flag is looked to set cudaMallocHost()
* flag cudaHostAllocPortable.
* The default behaviour is to make allocation
* visible from a single device. If this flag is set,
* then allocation will be visible on all devices.
*
* @see cudaMallocManaged()
**/ **/
enum aml_area_cuda_flags { #define AML_AREA_CUDA_FLAG_ALLOC_GLOBAL (1 << 3)
AML_AREA_CUDA_ATTACH_GLOBAL,
AML_AREA_CUDA_ATTACH_HOST, /**
* Options that can eventually be passed to mmap
* call.
**/
struct aml_area_cuda_mmap_options {
/**
* Specify a different device for one mmap call.
* if device < 0 use area device.
**/
int device;
/**
* Host memory pointer used for mapped allocations.
* If flag AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set
* and ptr is NULL, ptr will be overwritten with
* host allocated memory and will have to be freed
* using cudaFreeHost().
**/
void *ptr;
}; };
/** aml area hooks for cuda implementation. **/
extern struct aml_area_ops aml_area_cuda_ops;
/**
* Default cuda area:
* Allocation on device, visible by a single device,
* and not mapped on host memory.
**/
extern struct aml_area aml_area_cuda;
/** Implementation of aml_area_data for cuda areas. **/ /** Implementation of aml_area_data for cuda areas. **/
struct aml_area_cuda_data { struct aml_area_cuda_data {
/** allocation flags in cuda format **/ /** Area allocation flags. **/
int flags; int flags;
/** The device id on which allocation is done. **/ /**
* The device id on which allocation is done.
* If device < 0, use current device.
**/
int device; int device;
}; };
...@@ -62,8 +133,8 @@ struct aml_area_cuda_data { ...@@ -62,8 +133,8 @@ struct aml_area_cuda_data {
* @param[out] area pointer to an uninitialized struct aml_area pointer to * @param[out] area pointer to an uninitialized struct aml_area pointer to
* receive the new area. * receive the new area.
* @param[in] device: A valid cuda device id, i.e from 0 to num_devices-1. * @param[in] device: A valid cuda device id, i.e from 0 to num_devices-1.
* If device id is negative, then no cuda device will be selected when * If device id is negative, then current cuda device will be used using
* using aml_area_cuda_mmap(). * aml_area_cuda_mmap().
* @param[in] flags: Allocation flags. * @param[in] flags: Allocation flags.
* *
* @return AML_SUCCESS on success and area points to the new aml_area. * @return AML_SUCCESS on success and area points to the new aml_area.
...@@ -72,11 +143,10 @@ struct aml_area_cuda_data { ...@@ -72,11 +143,10 @@ struct aml_area_cuda_data {
* of devices. * of devices.
* @return -AML_ENOMEM if space to carry area cannot be allocated. * @return -AML_ENOMEM if space to carry area cannot be allocated.
* *
* @see enum aml_area_cuda_flags. * @see AML_AREA_CUDA_FLAG_*.
**/ **/
int aml_area_cuda_create(struct aml_area **area, int aml_area_cuda_create(struct aml_area **area,
const int device, const int device, const int flags);
const enum aml_area_cuda_flags flags);
/** /**
* \brief Cuda area destruction. * \brief Cuda area destruction.
...@@ -94,8 +164,9 @@ void aml_area_cuda_destroy(struct aml_area **area); ...@@ -94,8 +164,9 @@ void aml_area_cuda_destroy(struct aml_area **area);
* This function is a wrapper on cuda alloc functions. * This function is a wrapper on cuda alloc functions.
* It uses area settings to: select device on which to perform allocation, * It uses area settings to: select device on which to perform allocation,
* select allocation function and set its parameters. * select allocation function and set its parameters.
* Allocations can be standalone on device, shared across multiple devices, * Any pointer obtained through aml_area_cuda_mmap() must be unmapped with
* and backed with cpu memory. * aml_area_cuda_munmap().
*
* Device selection is not thread safe and requires to set the global * Device selection is not thread safe and requires to set the global
* state of cuda library. When selecting a device, allocation may succeed * state of cuda library. When selecting a device, allocation may succeed
* while setting device back to original context devices may fail. In that * while setting device back to original context devices may fail. In that
...@@ -103,31 +174,38 @@ void aml_area_cuda_destroy(struct aml_area **area); ...@@ -103,31 +174,38 @@ void aml_area_cuda_destroy(struct aml_area **area);
* function in order to catch the error when return value is not NULL. * function in order to catch the error when return value is not NULL.
* *
* @param[in] area_data: The structure containing cuda area settings. * @param[in] area_data: The structure containing cuda area settings.
* @param[in, out] ptr: If ptr is NULL, then call cudaMallocManaged() with
* area flags. Memory will be allocated only device side.
* If ptr is not NULL:
* * ptr must point to a valid memory area.
* Device side memory will be mapped on this host side memory.
* According to cuda runtime API documentation
* (cudaHostRegister()), host side memory pages will be locked or allocation
* will fail.
* @param[in] size: The size to allocate. * @param[in] size: The size to allocate.
* @param[in] options: A struct aml_area_cuda_mmap_options *. If > 0,
* device will be used to select the target device.
* If area flags AML_AREA_CUDA_FLAG_MAPPED is set and
* AML_AREA_CUDA_FLAG_HOST is not set, then options field "ptr" must not
* be NULL and point to a host memory that can be mapped on GPU.
* *
* @return A cuda pointer to allocated device memory on success, NULL on * @return NULL on failure with aml errno set to the following error codes:
* failure. If failure occures, aml_errno variable is set with one of the * AML_ENOTSUP is one of the cuda calls failed with error:
* following values:
* * AML_ENOTSUP is one of the cuda calls failed with error:
* cudaErrorInsufficientDriver, cudaErrorNoDevice. * cudaErrorInsufficientDriver, cudaErrorNoDevice.
* * AML_EINVAL if target device id is not valid. * * AML_EINVAL if target device id is not valid or provided argument are not
* * AML_EBUSY if a specific device was requested and call to failed with error * compatible.
* cudaErrorDeviceAlreadyInUse, or if region was already mapped on device. * * AML_EBUSY if a specific device was requested but was in already use.
* * AML_ENOMEM if memory allocation failed with error * * AML_ENOMEM if memory allocation failed with error
* cudaErrorMemoryAllocation. * cudaErrorMemoryAllocation.
* * AML_FAILURE if one of the cuda calls resulted in error * * AML_FAILURE if one of the cuda calls resulted in error
* cudaErrorInitializationError. * cudaErrorInitializationError.
* @return A cuda pointer usable on device and host if area flags contains
* AML_AREA_CUDA_FLAG_ALLOC_UNIFIED.
* @return A pointer to host memory on which one can call
* cudaHostGetDevicePointer() to get a pointer to mapped device memory, if
* AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set.
* Obtained pointer must be unmapped with aml_area_cuda_munmap(). If host side
* memory was provided as mmap option, then it still has to be freed.
* @return A pointer to host memory if area flag AML_AREA_CUDA_FLAG_ALLOC_HOST
* is set.
* @return A pointer to device memory if no flag is set.
*
* @see AML_AREA_CUDA_FLAG_*
**/ **/
void *aml_area_cuda_mmap(const struct aml_area_data *area_data, void *aml_area_cuda_mmap(const struct aml_area_data *area_data,
void *ptr, size_t size); size_t size, struct aml_area_mmap_options *options);
/** /**
* \brief munmap hook for aml area. * \brief munmap hook for aml area.
......
...@@ -28,42 +28,6 @@ ...@@ -28,42 +28,6 @@
* @{ * @{
**/ **/
/**
* Allowed binding flag for area creation.
* This flag will apply strict binding to the selected bitmask.
* If subsequent allocation will failt if they cannot enforce binding
* on bitmask.
**/
#define AML_AREA_LINUX_BINDING_FLAG_BIND (MPOL_BIND)
/**
* Allowed binding flag for area creation.