Commit a466aaa7 authored by Swann Perarnau's avatar Swann Perarnau

Merge branch 'area_options' into 'master'

[feature] add area options on mmap

See merge request !78
parents 001799b6 c3a501b9
Pipeline #8379 passed with stages
in 5 minutes and 39 seconds
......@@ -34,16 +34,14 @@ int main(int argc, char *argv[])
long int N = atol(argv[3]);
unsigned long memsize = sizeof(double)*N*N;
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
double alpha = 1.0, beta = 1.0;
......
......@@ -77,17 +77,15 @@ int main(int argc, char* argv[])
assert(!aml_tiling_2d_create(&tiling_col, AML_TILING_TYPE_2D_COLMAJOR,
tilesize, memsize, N/T , N/T));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
......
......@@ -105,20 +105,18 @@ int main(int argc, char* argv[])
assert(!aml_tiling_1d_create(&tiling_prefetch,
tilesize*(N/T), memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_seq_create(&dma, 2));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2));
assert(!aml_scratch_par_create(&sb, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
......
......@@ -53,9 +53,9 @@ int main(int argc, char *argv[])
chunk_msz = MEMSIZE/(numthreads*CHUNKING);
esz = chunk_msz/sizeof(unsigned long);
}
a = aml_area_mmap(slow, NULL, MEMSIZE);
b = aml_area_mmap(slow, NULL, MEMSIZE);
c = aml_area_mmap(fast, NULL, MEMSIZE);
a = aml_area_mmap(slow, MEMSIZE, NULL);
b = aml_area_mmap(slow, MEMSIZE, NULL);
c = aml_area_mmap(fast, MEMSIZE, NULL);
assert(a != NULL && b != NULL && c != NULL);
/* create virtually accessible address range, backed by slow memory */
......
......@@ -64,11 +64,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_par_create(&dma, numthreads*2));
assert(!aml_scratch_seq_create(&sa, fast, slow, dma, tiling,
......@@ -77,9 +75,9 @@ int main(int argc, char *argv[])
(size_t)2*numthreads, (size_t)1));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long);
......
......@@ -95,11 +95,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_seq_create(&dma, numthreads*2));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling,
......@@ -108,9 +106,9 @@ int main(int argc, char *argv[])
2*numthreads, numthreads));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long);
......
......@@ -101,11 +101,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_seq_create(dma, (size_t)numthreads*4));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling,
......@@ -114,9 +112,9 @@ int main(int argc, char *argv[])
(size_t)2*numthreads, (size_t)numthreads));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long);
......@@ -127,7 +125,7 @@ int main(int argc, char *argv[])
}
/* run kernel */
struct winfo *wis = aml_area_mmap(slow, NULL, numthreads * sizeof(struct winfo));
struct winfo *wis = aml_area_mmap(slow, numthreads * sizeof(struct winfo), NULL);
for(unsigned long i = 0; i < numthreads; i++) {
wis[i].tid = i;
pthread_create(&wis[i].th, NULL, &th_work, (void*)&wis[i]);
......
......@@ -266,9 +266,9 @@ main(int argc, char *argv[])
aml_init(&argc, &argv);
size_t size = sizeof(STREAM_TYPE)*(STREAM_ARRAY_SIZE+OFFSET);
struct aml_area *area = aml_area_linux;
a = aml_area_mmap(area, NULL, size);
b = aml_area_mmap(area, NULL, size);
c = aml_area_mmap(area, NULL, size);
a = aml_area_mmap(area, size, NULL);
b = aml_area_mmap(area, size, NULL);
c = aml_area_mmap(area, size, NULL);
/* Get initial value for system clock. */
#pragma omp parallel for
......
......@@ -104,6 +104,14 @@ int aml_finalize(void);
**/
struct aml_area_data;
/**
* Opaque handle to pass additional options to area mmap hook.
* This is implementation specific and cannot be used as a
* generic interface but rather for customizing area behaviour
* on per mmap basis.
**/
struct aml_area_mmap_options;
/**
* aml_area_ops is a structure containing implementations
* of an area operations.
......@@ -114,21 +122,20 @@ struct aml_area_ops {
/**
* Building block for coarse grain allocator of virtual memory.
*
* @param data: Opaque handle to implementation specific data.
* @param ptr: A virtual address to be used by underlying
* implementation.
* Can be NULL.
* @param size: The minimum size of allocation.
* @param[in] data: Opaque handle to implementation specific data.
* @param[in] size: The minimum size of allocation.
* Is greater than 0. Must not fail unless not enough
* memory is available, or ptr argument does not point to a
* suitable address.
* In case of failure, aml_errno must be set to an appropriate
* value.
* @param[in/out] opts: Opaque handle to pass additional options to area
* mmap hook. Can be NULL and must work with NULL opts.
* @return a pointer to allocated memory object.
**/
void* (*mmap)(const struct aml_area_data *data,
void *ptr,
size_t size);
size_t size,
struct aml_area_mmap_options *opts);
/**
* Building block for unmapping of virtual memory mapped with mmap()
......@@ -162,14 +169,16 @@ struct aml_area {
/**
* Low-level function for getting memory from an area.
* @param area: A valid area implementing access to target memory.
* @param ptr: Implementation specific argument. See specific header.
* @param size: The usable size of memory returned.
* @param[in] area: A valid area implementing access to target memory.
* @param[in] size: The usable size of memory returned.
* @param[in, out] opts: Opaque handle to pass additional options to area
* @return virtual memory from this area with at least queried size bytes.
* @return NULL on failure, with aml_errno set to the appropriate error
* code.
**/
void *aml_area_mmap(const struct aml_area *area,
void **ptr,
size_t size);
void *aml_area_mmap(const struct aml_area *area,
size_t size,
struct aml_area_mmap_options *opts);
/**
* Release data provided with aml_area_mmap() and the same area.
......
......@@ -27,32 +27,103 @@
**/
/**
* Structure containing aml area hooks for cuda implementation.
* For now there is only a single implementation of the hooks.
* This implementation will choose between different cuda functions.
* Default cuda area flags.
* * Allocation on device only,
* * Allocation visible by a single device.
* * Allocation not mapped on host memory.
**/
extern struct aml_area_ops aml_area_cuda_ops;
#define AML_AREA_CUDA_FLAG_DEFAULT 0
/**
* Default cuda area with private mapping in current device.
* Can be used out of the box with aml_area_*() functions.
* Device allocation flag.
* Default behaviour is allocation on device.
* If this flag is set then allocation will
* be on host.
**/
extern struct aml_area aml_area_cuda;
#define AML_AREA_CUDA_FLAG_ALLOC_HOST (1 << 0)
/**
* Mapping flag.
* Default behaviour is allocation not mapped.
* If set, the pointer returned by mmap function
* will be host side memory mapped on device.
* A pointer to device memory can then be retrieved
* by calling cudaHostGetDevicePointer().
* If AML_AREA_CUDA_FLAG_ALLOC_HOST is set, then
* host side memory will be allocated. Else,
* "ptr" field of mmap options will be used to map
* device memory ("ptr" must not be NULL).
*
* @see cudaHostRegister(), cudaHostAlloc().
**/
#define AML_AREA_CUDA_FLAG_ALLOC_MAPPED (1 << 1)
/**
* Allocation flags to pass to cudaMallocManaged().
* @see cuda runtime API documentation / memory management.
* Unified memory flag.
* If this flag is set, then allocation will create
* a unified memory pointer usable on host and device.
* Additionally, AML_AREA_CUDA_FLAG_ALLOC_HOST and
* AML_AREA_CUDA_FLAG_ALLOC_MAPPED will be ignored.
*
* @see cudaMallocManaged()
**/
#define AML_AREA_CUDA_FLAG_ALLOC_UNIFIED (1 << 2)
/**
* Unified memory setting flag.
* If AML_AREA_CUDA_FLAG_ALLOC_UNIFIED is set,
* then this flagged is looked to set
* cudaMallocManaged() flag cudaAttachGlobal.
* Else if AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set,
* or AML_AREA_CUDA_FLAG_ALLOC_HOST flag is set,
* then this flag is looked to set cudaMallocHost()
* flag cudaHostAllocPortable.
* The default behaviour is to make allocation
* visible from a single device. If this flag is set,
* then allocation will be visible on all devices.
*
* @see cudaMallocManaged()
**/
enum aml_area_cuda_flags {
AML_AREA_CUDA_ATTACH_GLOBAL,
AML_AREA_CUDA_ATTACH_HOST,
#define AML_AREA_CUDA_FLAG_ALLOC_GLOBAL (1 << 3)
/**
* Options that can eventually be passed to mmap
* call.
**/
struct aml_area_cuda_mmap_options {
/**
* Specify a different device for one mmap call.
* if device < 0 use area device.
**/
int device;
/**
* Host memory pointer used for mapped allocations.
* If flag AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set
* and ptr is NULL, ptr will be overwritten with
* host allocated memory and will have to be freed
* using cudaFreeHost().
**/
void *ptr;
};
/** aml area hooks for cuda implementation. **/
extern struct aml_area_ops aml_area_cuda_ops;
/**
* Default cuda area:
* Allocation on device, visible by a single device,
* and not mapped on host memory.
**/
extern struct aml_area aml_area_cuda;
/** Implementation of aml_area_data for cuda areas. **/
struct aml_area_cuda_data {
/** allocation flags in cuda format **/
/** Area allocation flags. **/
int flags;
/** The device id on which allocation is done. **/
/**
* The device id on which allocation is done.
* If device < 0, use current device.
**/
int device;
};
......@@ -62,8 +133,8 @@ struct aml_area_cuda_data {
* @param[out] area pointer to an uninitialized struct aml_area pointer to
* receive the new area.
* @param[in] device: A valid cuda device id, i.e from 0 to num_devices-1.
* If device id is negative, then no cuda device will be selected when
* using aml_area_cuda_mmap().
* If device id is negative, then current cuda device will be used using
* aml_area_cuda_mmap().
* @param[in] flags: Allocation flags.
*
* @return AML_SUCCESS on success and area points to the new aml_area.
......@@ -72,11 +143,10 @@ struct aml_area_cuda_data {
* of devices.
* @return -AML_ENOMEM if space to carry area cannot be allocated.
*
* @see enum aml_area_cuda_flags.
* @see AML_AREA_CUDA_FLAG_*.
**/
int aml_area_cuda_create(struct aml_area **area,
const int device,
const enum aml_area_cuda_flags flags);
const int device, const int flags);
/**
* \brief Cuda area destruction.
......@@ -94,8 +164,9 @@ void aml_area_cuda_destroy(struct aml_area **area);
* This function is a wrapper on cuda alloc functions.
* It uses area settings to: select device on which to perform allocation,
* select allocation function and set its parameters.
* Allocations can be standalone on device, shared across multiple devices,
* and backed with cpu memory.
* Any pointer obtained through aml_area_cuda_mmap() must be unmapped with
* aml_area_cuda_munmap().
*
* Device selection is not thread safe and requires to set the global
* state of cuda library. When selecting a device, allocation may succeed
* while setting device back to original context devices may fail. In that
......@@ -103,31 +174,38 @@ void aml_area_cuda_destroy(struct aml_area **area);
* function in order to catch the error when return value is not NULL.
*
* @param[in] area_data: The structure containing cuda area settings.
* @param[in, out] ptr: If ptr is NULL, then call cudaMallocManaged() with
* area flags. Memory will be allocated only device side.
* If ptr is not NULL:
* * ptr must point to a valid memory area.
* Device side memory will be mapped on this host side memory.
* According to cuda runtime API documentation
* (cudaHostRegister()), host side memory pages will be locked or allocation
* will fail.
* @param[in] size: The size to allocate.
* @param[in] options: A struct aml_area_cuda_mmap_options *. If > 0,
* device will be used to select the target device.
* If area flags AML_AREA_CUDA_FLAG_MAPPED is set and
* AML_AREA_CUDA_FLAG_HOST is not set, then options field "ptr" must not
* be NULL and point to a host memory that can be mapped on GPU.
*
* @return A cuda pointer to allocated device memory on success, NULL on
* failure. If failure occures, aml_errno variable is set with one of the
* following values:
* * AML_ENOTSUP is one of the cuda calls failed with error:
* @return NULL on failure with aml errno set to the following error codes:
* AML_ENOTSUP is one of the cuda calls failed with error:
* cudaErrorInsufficientDriver, cudaErrorNoDevice.
* * AML_EINVAL if target device id is not valid.
* * AML_EBUSY if a specific device was requested and call to failed with error
* cudaErrorDeviceAlreadyInUse, or if region was already mapped on device.
* * AML_EINVAL if target device id is not valid or provided argument are not
* compatible.
* * AML_EBUSY if a specific device was requested but was in already use.
* * AML_ENOMEM if memory allocation failed with error
* cudaErrorMemoryAllocation.
* * AML_FAILURE if one of the cuda calls resulted in error
* cudaErrorInitializationError.
* @return A cuda pointer usable on device and host if area flags contains
* AML_AREA_CUDA_FLAG_ALLOC_UNIFIED.
* @return A pointer to host memory on which one can call
* cudaHostGetDevicePointer() to get a pointer to mapped device memory, if
* AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set.
* Obtained pointer must be unmapped with aml_area_cuda_munmap(). If host side
* memory was provided as mmap option, then it still has to be freed.
* @return A pointer to host memory if area flag AML_AREA_CUDA_FLAG_ALLOC_HOST
* is set.
* @return A pointer to device memory if no flag is set.
*
* @see AML_AREA_CUDA_FLAG_*
**/
void *aml_area_cuda_mmap(const struct aml_area_data *area_data,
void *ptr, size_t size);
size_t size, struct aml_area_mmap_options *options);
/**
* \brief munmap hook for aml area.
......
......@@ -28,42 +28,6 @@
* @{
**/
/**
* Allowed binding flag for area creation.
* This flag will apply strict binding to the selected bitmask.
* If subsequent allocation will failt if they cannot enforce binding
* on bitmask.
**/
#define AML_AREA_LINUX_BINDING_FLAG_BIND (MPOL_BIND)
/**
* Allowed binding flag for area creation.
* This flag will make subsequent allocations to interleave
* pages on memories of the bitmask.
**/
#define AML_AREA_LINUX_BINDING_FLAG_INTERLEAVE (MPOL_INTERLEAVE)
/**
* Allowed binding flag for area creation.
* This flag will make subsequent allocations to bound to the
* nodes of bitmask if possible, else to some other node.
**/
#define AML_AREA_LINUX_BINDING_FLAG_PREFERRED (MPOL_PREFERRED)
/**
* Allowed mapping flag for area creation.
* This flag will make subsequent allocations to be private
* to the process making them.
**/
#define AML_AREA_LINUX_MMAP_FLAG_PRIVATE (MAP_PRIVATE | MAP_ANONYMOUS)
/**
* Allowed mapping flag for area creation.
* This flag will make subsequent allocations to be visible to
* other processes of the system.
**/
#define AML_AREA_LINUX_MMAP_FLAG_SHARED (MAP_SHARED | MAP_ANONYMOUS)
/**
* This contains area operations implementation
* for linux area.
......@@ -76,6 +40,21 @@ extern struct aml_area_ops aml_area_linux_ops;
**/
extern struct aml_area aml_area_linux;
/** Allowed policy flag for area creation. **/
enum aml_area_linux_policy {
/** default allocation policy **/
AML_AREA_LINUX_POLICY_DEFAULT,
/** Enforce binding on specified area nodeset or fail. **/
AML_AREA_LINUX_POLICY_BIND,
/**
* bind on specified area nodeset,
* fallback on other available nodes.
**/
AML_AREA_LINUX_POLICY_PREFERRED,
/** bind on specified area nodeset in a round-robin fashion. **/
AML_AREA_LINUX_POLICY_INTERLEAVE,
};
/**
* Implementation of aml_area_data for linux areas.
**/
......@@ -83,9 +62,24 @@ struct aml_area_linux_data {
/** numanodes to use when allocating data **/
struct bitmask *nodeset;
/** binding policy **/
int binding_flags;
/** mmap flags **/
int mmap_flags;
enum aml_area_linux_policy policy;
};
/**
* Options implementation for aml area linux mmap.
* @see mmap man(2) page.
**/
struct aml_area_linux_mmap_options {
/** hint address where to perform allocation **/
void *ptr;
/** Combination of mmap flags **/
int flags;
/** prot flags **/
int mode;
/** File descriptor backing and initializing memory. **/
int fd;
/** Offset in file descriptor for mapping **/
off_t offset;
};
/**
......@@ -94,20 +88,20 @@ struct aml_area_linux_data {
* Allocate and initialize a struct aml_area implemented by aml_area_linux
* operations.
* @param[out] area pointer to an uninitialized struct aml_area pointer to
* receive the new area.
* @param[in] mmap_flags flags to use when retrieving virtual memory with mmap
* @param[in] binding_flags, flags to use when binding memory.
* receive the new area.
* @param[in] nodemask list of memory nodes to use. Default to allowed memory
* nodes if NULL.
* nodes if NULL.
* @param[in] policy: The memory allocation policy to use when binding on
* nodeset.
* @return On success, returns 0 and area points to the new aml_area.
* @return On failure, sets area to NULL and returns one of AML error codes:
* - AML_ENOMEM if there wasn't enough memory available.
* - AML_EINVAL if inputs flags were invalid.
* - AML_EDOM the nodemask provided is out of bounds (allowed nodeset).
**/
int aml_area_linux_create(struct aml_area **area, const int mmap_flags,
int aml_area_linux_create(struct aml_area **area,
const struct aml_bitmap *nodemask,
const int binding_flags);
const enum aml_area_linux_policy policy);
/**
......@@ -157,15 +151,15 @@ aml_area_linux_check_binding(struct aml_area_linux_data *area_data,
* with aml_area_linux_create().
* @param area_data: The structure containing mmap_flags for mmap call.
* nodemask and bind_flags fields are ignored.
* @param ptr: A hint provided to mmap function.
* @param size: The size to allocate.
* @param opts: The size to allocate.
* @return NULL on failure, else a valid pointer to memory.
* Upon failure, errno should be checked for further error investigations.
**/
void*
aml_area_linux_mmap(const struct aml_area_data *area_data,
void *ptr,
size_t size);
size_t size,
struct aml_area_mmap_options *opts);
/**
* \brief munmap hook for aml area.
......
......@@ -11,7 +11,9 @@
#include "aml.h"
#include <stdlib.h>
void *aml_area_mmap(const struct aml_area *area, void **ptr, size_t size)
void *aml_area_mmap(const struct aml_area *area,
size_t size,
struct aml_area_mmap_options *opts)
{
if (size == 0)
return NULL;
......@@ -27,7 +29,7 @@ void *aml_area_mmap(const struct aml_area *area, void **ptr, size_t size)
}
return area->ops->mmap(area->data, ptr, size);
return area->ops->mmap(area->data, size, opts);
}
int aml_area_munmap(const struct aml_area *area, void *ptr, size_t size)
......
......@@ -52,107 +52,157 @@ static int aml_set_cuda_device(const int device, int *current_device)
return AML_SUCCESS;
}
static inline int handle_malloc_error(const int cuda_error)
static inline int cuda_to_aml_alloc_error(const int cuda_error)
{
switch (cuda_error) {
case cudaErrorInvalidValue:
aml_errno = AML_EINVAL;
return 1;
return AML_EINVAL;
case cudaErrorMemoryAllocation:
aml_errno = AML_ENOMEM;
return 1;
return AML_ENOMEM;
case cudaErrorNotSupported:
aml_errno = AML_ENOTSUP;
return 1;
return AML_ENOTSUP;
case cudaErrorInsufficientDriver:
aml_errno = AML_ENOTSUP;
return 1;
return AML_ENOTSUP;
case cudaErrorNoDevice:
aml_errno = AML_ENOTSUP;
return 1;
return AML_ENOTSUP;
case cudaErrorInitializationError:
aml_errno = AML_FAILURE;
return 1;
return AML_FAILURE;
case cudaErrorHostMemoryAlreadyRegistered:
aml_errno = AML_EBUSY;
return 1;
return AML_EBUSY;
default:
return 0;
return AML_SUCCESS;
}
}
void *aml_area_cuda_mmap(const struct aml_area_data *area_data,
void *ptr, size_t size)
int aml_area_cuda_mmap_opts(void **out,
const size_t size,
const int device, const int flags, void **ptr_map)
{
(void)ptr;
int aml_error;
int cuda_error;
int current_device;
void *ret;
int error = AML_SUCCESS;
int cuda_flags;
// Set target device.
if (device >= 0) {
error = aml_set_cuda_device(device, &current_device);
if (error != AML_SUCCESS)
goto cuda_fail;
}
// Unified Memory Allocation
if (flags & AML_AREA_CUDA_FLAG_ALLOC_UNIFIED) {
cuda_flags = cudaMemAttachHost;
struct aml_area_cuda_data *data =
(struct aml_area_cuda_data *)area_data;
if (flags & AML_AREA_CUDA_FLAG_ALLOC_GLOBAL)
cuda_flags = cudaMemAttachGlobal;
// Set area target device.
if (data->device >= 0) {
aml_error = aml_set_cuda_device(data->device, &current_device);
if (aml_error != AML_SUCCESS) {
aml_errno = -aml_error;
return NULL;
error = cudaMallocManaged(out, size, cuda_flags);
if (error != cudaSuccess)
goto cuda_fail;
}
// Mapped Allocation
else if (flags & AML_AREA_CUDA_FLAG_ALLOC_MAPPED) {
cuda_flags = cudaHostAllocMapped;
if (flags & AML_AREA_CUDA_FLAG_ALLOC_GLOBAL)
cuda_flags |= cudaHostAllocPortable;
if (flags & AML_AREA_CUDA_FLAG_ALLOC_HOST) {
error = cudaHostAlloc(out, size, cuda_flags);
if (error != cudaSuccess)
goto cuda_fail;
} else if (*ptr_map != NULL) {
error = cudaHostRegister(*ptr_map, size, cuda_flags);
if (error != cudaSuccess)
goto cuda_fail;
*out = *ptr_map;
} else {
error = AML_EINVAL;
goto fail;
}
}
// Host Allocation
else if (flags & AML_AREA_CUDA_FLAG_ALLOC_HOST) {
cuda_flags = cudaHostAllocDefault;
if (flags & AML_AREA_CUDA_FLAG_ALLOC_GLOBAL)
cuda_flags |= cudaHostAllocPortable;
// Actual allocation