Commit c3a501b9 authored by Nicolas Denoyelle's avatar Nicolas Denoyelle Committed by Swann Perarnau

[feature] add area options on mmap

## Change area mmap interface to include implementation specific options.

### Summary of changes in mmap interface
Previous mmap interface was:
```
int aml_area_mmap(struct aml_area *area, void*ptr, size_t size);
```
First, in this interface, the purpose of ptr is fuzzy as it is implementation specific and neither
the user nor aml generic interface knows what to do with it in a generic way.
Second, this interface does not allow for extensive options. In particular, linux mmap allow
multiple arguments that cannot be elegantly passed with this interface.
Instead we aml_mmap replace with this new interface:
```
int aml_area_mmap(struct aml_area *area, size_t size, struct aml_area_options *options);
```
Where the last field clearly state it is used for options and can always be NULL so that it can explicitly be used without knowledge of options implementation.
parent 001799b6
......@@ -34,16 +34,14 @@ int main(int argc, char *argv[])
long int N = atol(argv[3]);
unsigned long memsize = sizeof(double)*N*N;
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
double alpha = 1.0, beta = 1.0;
......
......@@ -77,17 +77,15 @@ int main(int argc, char* argv[])
assert(!aml_tiling_2d_create(&tiling_col, AML_TILING_TYPE_2D_COLMAJOR,
tilesize, memsize, N/T , N/T));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
......
......@@ -105,20 +105,18 @@ int main(int argc, char* argv[])
assert(!aml_tiling_1d_create(&tiling_prefetch,
tilesize*(N/T), memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_seq_create(&dma, 2));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2));
assert(!aml_scratch_par_create(&sb, fast, slow, dma, tiling_prefetch, (size_t)2, (size_t)2));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
......
......@@ -53,9 +53,9 @@ int main(int argc, char *argv[])
chunk_msz = MEMSIZE/(numthreads*CHUNKING);
esz = chunk_msz/sizeof(unsigned long);
}
a = aml_area_mmap(slow, NULL, MEMSIZE);
b = aml_area_mmap(slow, NULL, MEMSIZE);
c = aml_area_mmap(fast, NULL, MEMSIZE);
a = aml_area_mmap(slow, MEMSIZE, NULL);
b = aml_area_mmap(slow, MEMSIZE, NULL);
c = aml_area_mmap(fast, MEMSIZE, NULL);
assert(a != NULL && b != NULL && c != NULL);
/* create virtually accessible address range, backed by slow memory */
......
......@@ -64,11 +64,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_par_create(&dma, numthreads*2));
assert(!aml_scratch_seq_create(&sa, fast, slow, dma, tiling,
......@@ -77,9 +75,9 @@ int main(int argc, char *argv[])
(size_t)2*numthreads, (size_t)1));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long);
......
......@@ -95,11 +95,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_seq_create(&dma, numthreads*2));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling,
......@@ -108,9 +106,9 @@ int main(int argc, char *argv[])
2*numthreads, numthreads));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long);
......
......@@ -101,11 +101,9 @@ int main(int argc, char *argv[])
/* initialize all the supporting struct */
assert(!aml_tiling_1d_create(&tiling, tilesz, memsize));
aml_area_linux_create(&slow, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&slow, &slowb, AML_AREA_LINUX_POLICY_BIND);
assert(slow != NULL);
aml_area_linux_create(&fast, AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
&fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
aml_area_linux_create(&fast, &fastb, AML_AREA_LINUX_POLICY_BIND);
assert(fast != NULL);
assert(!aml_dma_linux_seq_create(dma, (size_t)numthreads*4));
assert(!aml_scratch_par_create(&sa, fast, slow, dma, tiling,
......@@ -114,9 +112,9 @@ int main(int argc, char *argv[])
(size_t)2*numthreads, (size_t)numthreads));
/* allocation */
a = aml_area_mmap(slow, NULL, memsize);
b = aml_area_mmap(slow, NULL, memsize);
c = aml_area_mmap(fast, NULL, memsize);
a = aml_area_mmap(slow, memsize, NULL);
b = aml_area_mmap(slow, memsize, NULL);
c = aml_area_mmap(fast, memsize, NULL);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = memsize/sizeof(unsigned long);
......@@ -127,7 +125,7 @@ int main(int argc, char *argv[])
}
/* run kernel */
struct winfo *wis = aml_area_mmap(slow, NULL, numthreads * sizeof(struct winfo));
struct winfo *wis = aml_area_mmap(slow, numthreads * sizeof(struct winfo), NULL);
for(unsigned long i = 0; i < numthreads; i++) {
wis[i].tid = i;
pthread_create(&wis[i].th, NULL, &th_work, (void*)&wis[i]);
......
......@@ -266,9 +266,9 @@ main(int argc, char *argv[])
aml_init(&argc, &argv);
size_t size = sizeof(STREAM_TYPE)*(STREAM_ARRAY_SIZE+OFFSET);
struct aml_area *area = aml_area_linux;
a = aml_area_mmap(area, NULL, size);
b = aml_area_mmap(area, NULL, size);
c = aml_area_mmap(area, NULL, size);
a = aml_area_mmap(area, size, NULL);
b = aml_area_mmap(area, size, NULL);
c = aml_area_mmap(area, size, NULL);
/* Get initial value for system clock. */
#pragma omp parallel for
......
......@@ -104,6 +104,14 @@ int aml_finalize(void);
**/
struct aml_area_data;
/**
* Opaque handle to pass additional options to area mmap hook.
* This is implementation specific and cannot be used as a
* generic interface but rather for customizing area behaviour
* on per mmap basis.
**/
struct aml_area_mmap_options;
/**
* aml_area_ops is a structure containing implementations
* of an area operations.
......@@ -114,21 +122,20 @@ struct aml_area_ops {
/**
* Building block for coarse grain allocator of virtual memory.
*
* @param data: Opaque handle to implementation specific data.
* @param ptr: A virtual address to be used by underlying
* implementation.
* Can be NULL.
* @param size: The minimum size of allocation.
* @param[in] data: Opaque handle to implementation specific data.
* @param[in] size: The minimum size of allocation.
* Is greater than 0. Must not fail unless not enough
* memory is available, or ptr argument does not point to a
* suitable address.
* In case of failure, aml_errno must be set to an appropriate
* value.
* @param[in/out] opts: Opaque handle to pass additional options to area
* mmap hook. Can be NULL and must work with NULL opts.
* @return a pointer to allocated memory object.
**/
void* (*mmap)(const struct aml_area_data *data,
void *ptr,
size_t size);
size_t size,
struct aml_area_mmap_options *opts);
/**
* Building block for unmapping of virtual memory mapped with mmap()
......@@ -162,14 +169,16 @@ struct aml_area {
/**
* Low-level function for getting memory from an area.
* @param area: A valid area implementing access to target memory.
* @param ptr: Implementation specific argument. See specific header.
* @param size: The usable size of memory returned.
* @param[in] area: A valid area implementing access to target memory.
* @param[in] size: The usable size of memory returned.
* @param[in, out] opts: Opaque handle to pass additional options to area
* @return virtual memory from this area with at least queried size bytes.
* @return NULL on failure, with aml_errno set to the appropriate error
* code.
**/
void *aml_area_mmap(const struct aml_area *area,
void **ptr,
size_t size);
void *aml_area_mmap(const struct aml_area *area,
size_t size,
struct aml_area_mmap_options *opts);
/**
* Release data provided with aml_area_mmap() and the same area.
......
......@@ -27,32 +27,103 @@
**/
/**
* Structure containing aml area hooks for cuda implementation.
* For now there is only a single implementation of the hooks.
* This implementation will choose between different cuda functions.
* Default cuda area flags.
* * Allocation on device only,
* * Allocation visible by a single device.
* * Allocation not mapped on host memory.
**/
extern struct aml_area_ops aml_area_cuda_ops;
#define AML_AREA_CUDA_FLAG_DEFAULT 0
/**
* Default cuda area with private mapping in current device.
* Can be used out of the box with aml_area_*() functions.
* Device allocation flag.
* Default behaviour is allocation on device.
* If this flag is set then allocation will
* be on host.
**/
extern struct aml_area aml_area_cuda;
#define AML_AREA_CUDA_FLAG_ALLOC_HOST (1 << 0)
/**
* Mapping flag.
* Default behaviour is allocation not mapped.
* If set, the pointer returned by mmap function
* will be host side memory mapped on device.
* A pointer to device memory can then be retrieved
* by calling cudaHostGetDevicePointer().
* If AML_AREA_CUDA_FLAG_ALLOC_HOST is set, then
* host side memory will be allocated. Else,
* "ptr" field of mmap options will be used to map
* device memory ("ptr" must not be NULL).
*
* @see cudaHostRegister(), cudaHostAlloc().
**/
#define AML_AREA_CUDA_FLAG_ALLOC_MAPPED (1 << 1)
/**
* Allocation flags to pass to cudaMallocManaged().
* @see cuda runtime API documentation / memory management.
* Unified memory flag.
* If this flag is set, then allocation will create
* a unified memory pointer usable on host and device.
* Additionally, AML_AREA_CUDA_FLAG_ALLOC_HOST and
* AML_AREA_CUDA_FLAG_ALLOC_MAPPED will be ignored.
*
* @see cudaMallocManaged()
**/
#define AML_AREA_CUDA_FLAG_ALLOC_UNIFIED (1 << 2)
/**
* Unified memory setting flag.
* If AML_AREA_CUDA_FLAG_ALLOC_UNIFIED is set,
* then this flagged is looked to set
* cudaMallocManaged() flag cudaAttachGlobal.
* Else if AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set,
* or AML_AREA_CUDA_FLAG_ALLOC_HOST flag is set,
* then this flag is looked to set cudaMallocHost()
* flag cudaHostAllocPortable.
* The default behaviour is to make allocation
* visible from a single device. If this flag is set,
* then allocation will be visible on all devices.
*
* @see cudaMallocManaged()
**/
enum aml_area_cuda_flags {
AML_AREA_CUDA_ATTACH_GLOBAL,
AML_AREA_CUDA_ATTACH_HOST,
#define AML_AREA_CUDA_FLAG_ALLOC_GLOBAL (1 << 3)
/**
* Options that can eventually be passed to mmap
* call.
**/
struct aml_area_cuda_mmap_options {
/**
* Specify a different device for one mmap call.
* if device < 0 use area device.
**/
int device;
/**
* Host memory pointer used for mapped allocations.
* If flag AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set
* and ptr is NULL, ptr will be overwritten with
* host allocated memory and will have to be freed
* using cudaFreeHost().
**/
void *ptr;
};
/** aml area hooks for cuda implementation. **/
extern struct aml_area_ops aml_area_cuda_ops;
/**
* Default cuda area:
* Allocation on device, visible by a single device,
* and not mapped on host memory.
**/
extern struct aml_area aml_area_cuda;
/** Implementation of aml_area_data for cuda areas. **/
struct aml_area_cuda_data {
/** allocation flags in cuda format **/
/** Area allocation flags. **/
int flags;
/** The device id on which allocation is done. **/
/**
* The device id on which allocation is done.
* If device < 0, use current device.
**/
int device;
};
......@@ -62,8 +133,8 @@ struct aml_area_cuda_data {
* @param[out] area pointer to an uninitialized struct aml_area pointer to
* receive the new area.
* @param[in] device: A valid cuda device id, i.e from 0 to num_devices-1.
* If device id is negative, then no cuda device will be selected when
* using aml_area_cuda_mmap().
* If device id is negative, then current cuda device will be used using
* aml_area_cuda_mmap().
* @param[in] flags: Allocation flags.
*
* @return AML_SUCCESS on success and area points to the new aml_area.
......@@ -72,11 +143,10 @@ struct aml_area_cuda_data {
* of devices.
* @return -AML_ENOMEM if space to carry area cannot be allocated.
*
* @see enum aml_area_cuda_flags.
* @see AML_AREA_CUDA_FLAG_*.
**/
int aml_area_cuda_create(struct aml_area **area,
const int device,
const enum aml_area_cuda_flags flags);
const int device, const int flags);
/**
* \brief Cuda area destruction.
......@@ -94,8 +164,9 @@ void aml_area_cuda_destroy(struct aml_area **area);
* This function is a wrapper on cuda alloc functions.
* It uses area settings to: select device on which to perform allocation,
* select allocation function and set its parameters.
* Allocations can be standalone on device, shared across multiple devices,
* and backed with cpu memory.
* Any pointer obtained through aml_area_cuda_mmap() must be unmapped with
* aml_area_cuda_munmap().
*
* Device selection is not thread safe and requires to set the global
* state of cuda library. When selecting a device, allocation may succeed
* while setting device back to original context devices may fail. In that
......@@ -103,31 +174,38 @@ void aml_area_cuda_destroy(struct aml_area **area);
* function in order to catch the error when return value is not NULL.
*
* @param[in] area_data: The structure containing cuda area settings.
* @param[in, out] ptr: If ptr is NULL, then call cudaMallocManaged() with
* area flags. Memory will be allocated only device side.
* If ptr is not NULL:
* * ptr must point to a valid memory area.
* Device side memory will be mapped on this host side memory.
* According to cuda runtime API documentation
* (cudaHostRegister()), host side memory pages will be locked or allocation
* will fail.
* @param[in] size: The size to allocate.
* @param[in] options: A struct aml_area_cuda_mmap_options *. If > 0,
* device will be used to select the target device.
* If area flags AML_AREA_CUDA_FLAG_MAPPED is set and
* AML_AREA_CUDA_FLAG_HOST is not set, then options field "ptr" must not
* be NULL and point to a host memory that can be mapped on GPU.
*
* @return A cuda pointer to allocated device memory on success, NULL on
* failure. If failure occures, aml_errno variable is set with one of the
* following values:
* * AML_ENOTSUP is one of the cuda calls failed with error:
* @return NULL on failure with aml errno set to the following error codes:
* AML_ENOTSUP is one of the cuda calls failed with error:
* cudaErrorInsufficientDriver, cudaErrorNoDevice.
* * AML_EINVAL if target device id is not valid.
* * AML_EBUSY if a specific device was requested and call to failed with error
* cudaErrorDeviceAlreadyInUse, or if region was already mapped on device.
* * AML_EINVAL if target device id is not valid or provided argument are not
* compatible.
* * AML_EBUSY if a specific device was requested but was in already use.
* * AML_ENOMEM if memory allocation failed with error
* cudaErrorMemoryAllocation.
* * AML_FAILURE if one of the cuda calls resulted in error
* cudaErrorInitializationError.
* @return A cuda pointer usable on device and host if area flags contains
* AML_AREA_CUDA_FLAG_ALLOC_UNIFIED.
* @return A pointer to host memory on which one can call
* cudaHostGetDevicePointer() to get a pointer to mapped device memory, if
* AML_AREA_CUDA_FLAG_ALLOC_MAPPED is set.
* Obtained pointer must be unmapped with aml_area_cuda_munmap(). If host side
* memory was provided as mmap option, then it still has to be freed.
* @return A pointer to host memory if area flag AML_AREA_CUDA_FLAG_ALLOC_HOST
* is set.
* @return A pointer to device memory if no flag is set.
*
* @see AML_AREA_CUDA_FLAG_*
**/
void *aml_area_cuda_mmap(const struct aml_area_data *area_data,
void *ptr, size_t size);
size_t size, struct aml_area_mmap_options *options);
/**
* \brief munmap hook for aml area.
......
......@@ -28,42 +28,6 @@
* @{
**/
/**
* Allowed binding flag for area creation.
* This flag will apply strict binding to the selected bitmask.
* If subsequent allocation will failt if they cannot enforce binding
* on bitmask.
**/
#define AML_AREA_LINUX_BINDING_FLAG_BIND (MPOL_BIND)
/**
* Allowed binding flag for area creation.
* This flag will make subsequent allocations to interleave
* pages on memories of the bitmask.
**/
#define AML_AREA_LINUX_BINDING_FLAG_INTERLEAVE (MPOL_INTERLEAVE)
/**
* Allowed binding flag for area creation.
* This flag will make subsequent allocations to bound to the
* nodes of bitmask if possible, else to some other node.
**/
#define AML_AREA_LINUX_BINDING_FLAG_PREFERRED (MPOL_PREFERRED)
/**
* Allowed mapping flag for area creation.
* This flag will make subsequent allocations to be private
* to the process making them.
**/
#define AML_AREA_LINUX_MMAP_FLAG_PRIVATE (MAP_PRIVATE | MAP_ANONYMOUS)
/**
* Allowed mapping flag for area creation.
* This flag will make subsequent allocations to be visible to
* other processes of the system.
**/
#define AML_AREA_LINUX_MMAP_FLAG_SHARED (MAP_SHARED | MAP_ANONYMOUS)
/**
* This contains area operations implementation
* for linux area.
......@@ -76,6 +40,21 @@ extern struct aml_area_ops aml_area_linux_ops;
**/
extern struct aml_area aml_area_linux;
/** Allowed policy flag for area creation. **/
enum aml_area_linux_policy {
/** default allocation policy **/
AML_AREA_LINUX_POLICY_DEFAULT,
/** Enforce binding on specified area nodeset or fail. **/
AML_AREA_LINUX_POLICY_BIND,
/**
* bind on specified area nodeset,
* fallback on other available nodes.
**/
AML_AREA_LINUX_POLICY_PREFERRED,
/** bind on specified area nodeset in a round-robin fashion. **/
AML_AREA_LINUX_POLICY_INTERLEAVE,
};
/**
* Implementation of aml_area_data for linux areas.
**/
......@@ -83,9 +62,24 @@ struct aml_area_linux_data {
/** numanodes to use when allocating data **/
struct bitmask *nodeset;
/** binding policy **/
int binding_flags;
/** mmap flags **/
int mmap_flags;
enum aml_area_linux_policy policy;
};
/**
* Options implementation for aml area linux mmap.
* @see mmap man(2) page.
**/
struct aml_area_linux_mmap_options {
/** hint address where to perform allocation **/
void *ptr;
/** Combination of mmap flags **/
int flags;
/** prot flags **/
int mode;
/** File descriptor backing and initializing memory. **/
int fd;
/** Offset in file descriptor for mapping **/
off_t offset;
};
/**
......@@ -94,20 +88,20 @@ struct aml_area_linux_data {
* Allocate and initialize a struct aml_area implemented by aml_area_linux
* operations.
* @param[out] area pointer to an uninitialized struct aml_area pointer to
* receive the new area.
* @param[in] mmap_flags flags to use when retrieving virtual memory with mmap
* @param[in] binding_flags, flags to use when binding memory.
* receive the new area.
* @param[in] nodemask list of memory nodes to use. Default to allowed memory
* nodes if NULL.
* nodes if NULL.
* @param[in] policy: The memory allocation policy to use when binding on
* nodeset.
* @return On success, returns 0 and area points to the new aml_area.
* @return On failure, sets area to NULL and returns one of AML error codes:
* - AML_ENOMEM if there wasn't enough memory available.
* - AML_EINVAL if inputs flags were invalid.
* - AML_EDOM the nodemask provided is out of bounds (allowed nodeset).
**/
int aml_area_linux_create(struct aml_area **area, const int mmap_flags,
int aml_area_linux_create(struct aml_area **area,
const struct aml_bitmap *nodemask,
const int binding_flags);
const enum aml_area_linux_policy policy);
/**
......@@ -157,15 +151,15 @@ aml_area_linux_check_binding(struct aml_area_linux_data *area_data,
* with aml_area_linux_create().
* @param area_data: The structure containing mmap_flags for mmap call.
* nodemask and bind_flags fields are ignored.
* @param ptr: A hint provided to mmap function.
* @param size: The size to allocate.
* @param opts: The size to allocate.
* @return NULL on failure, else a valid pointer to memory.
* Upon failure, errno should be checked for further error investigations.
**/
void*
aml_area_linux_mmap(const struct aml_area_data *area_data,
void *ptr,
size_t size);
size_t size,
struct aml_area_mmap_options *opts);
/**
* \brief munmap hook for aml area.
......
......@@ -11,7 +11,9 @@
#include "aml.h"
#include <stdlib.h>
void *aml_area_mmap(const struct aml_area *area, void **ptr, size_t size)
void *aml_area_mmap(const struct aml_area *area,
size_t size,
struct aml_area_mmap_options *opts)
{
if (size == 0)
return NULL;
......@@ -27,7 +29,7 @@ void *aml_area_mmap(const struct aml_area *area, void **ptr, size_t size)
}
return area->ops->mmap(area->data, ptr, size);
return area->ops->mmap(area->data, size, opts);
}
int aml_area_munmap(const struct aml_area *area, void *ptr, size_t size)
......
......@@ -52,107 +52,157 @@ static int aml_set_cuda_device(const int device, int *current_device)
return AML_SUCCESS;
}
static inline int handle_malloc_error(const int cuda_error)
static inline int cuda_to_aml_alloc_error(const int cuda_error)
{
switch (cuda_error) {
case cudaErrorInvalidValue:
aml_errno = AML_EINVAL;
return 1;
return AML_EINVAL;
case cudaErrorMemoryAllocation:
aml_errno = AML_ENOMEM;
return 1;
return AML_ENOMEM;
case cudaErrorNotSupported:
aml_errno = AML_ENOTSUP;
return 1;
return AML_ENOTSUP;
case cudaErrorInsufficientDriver:
aml_errno = AML_ENOTSUP;
return 1;
return AML_ENOTSUP;
case cudaErrorNoDevice:
aml_errno = AML_ENOTSUP;
return 1;
return AML_ENOTSUP;
case cudaErrorInitializationError:
aml_errno = AML_FAILURE;
return 1;
return AML_FAILURE;
case cudaErrorHostMemoryAlreadyRegistered:
aml_errno = AML_EBUSY;
return 1;
return AML_EBUSY;
default:
return 0;
return AML_SUCCESS;
}
}
void *aml_area_cuda_mmap(const struct aml_area_data *area_data,
void *ptr, size_t size)
int aml_area_cuda_mmap_opts(void **out,
const size_t size,
const int device, const int flags, void **ptr_map)
{
(void)ptr;
int aml_error;
int cuda_error;
int current_device;
void *ret;
int error = AML_SUCCESS;
int cuda_flags;
// Set target device.
if (device >= 0) {
error = aml_set_cuda_device(device, &current_device);
if (error != AML_SUCCESS)
goto cuda_fail;
}
// Unified Memory Allocation
if (flags & AML_AREA_CUDA_FLAG_ALLOC_UNIFIED) {
cuda_flags = cudaMemAttachHost;
struct aml_area_cuda_data *data =
(struct aml_area_cuda_data *)area_data;
if (flags & AML_AREA_CUDA_FLAG_ALLOC_GLOBAL)
cuda_flags = cudaMemAttachGlobal;
// Set area target device.
if (data->device >= 0) {
aml_error = aml_set_cuda_device(data->device, &current_device);
if (aml_error != AML_SUCCESS) {
aml_errno = -aml_error;
return NULL;
error = cudaMallocManaged(out, size, cuda_flags);
if (error != cudaSuccess)
goto cuda_fail;
}
// Mapped Allocation
else if (flags & AML_AREA_CUDA_FLAG_ALLOC_MAPPED) {
cuda_flags = cudaHostAllocMapped;
if (flags & AML_AREA_CUDA_FLAG_ALLOC_GLOBAL)
cuda_flags |= cudaHostAllocPortable;
if (flags & AML_AREA_CUDA_FLAG_ALLOC_HOST) {
error = cudaHostAlloc(out, size, cuda_flags);
if (error != cudaSuccess)
goto cuda_fail;
} else if (*ptr_map != NULL) {
error = cudaHostRegister(*ptr_map, size, cuda_flags);
if (error != cudaSuccess)
goto cuda_fail;
*out = *ptr_map;
} else {
error = AML_EINVAL;
goto fail;
}
}
// Host Allocation
else if (flags & AML_AREA_CUDA_FLAG_ALLOC_HOST) {
cuda_flags = cudaHostAllocDefault;
if (flags & AML_AREA_CUDA_FLAG_ALLOC_GLOBAL)
cuda_flags |= cudaHostAllocPortable;
// Actual allocation
if (ptr == NULL)