Commit 80669c37 authored by Swann Perarnau's avatar Swann Perarnau

Add first working version: limit numa allocs

This is the first working version of the library. It does very little:
- only numa support
- one allocation per node only
- limited tests
- limited set of functions

Nevertheless, this gives a good idea of what the API should look like, and the
kind of benchmarks we can write with it.
parent cf9e0d1a
ACLOCAL_AMFLAGS = -I m4
SUBDIRS = src tests
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = aml.pc
EXTRA_DIST = autogen.sh aml.pc README.markdown
prefix=@prefix@
exec_prefix=@prefix@
libdir=@libdir@
includedir=@includedir@
Name: libaml
Description: Argonne's Memory Library
Version: 0.0.1
Libs: -L${libdir}
Cflags: -I${includedir}
#!/bin/sh
autoreconf --verbose --install --force
# see semver.org for version info
AC_INIT([aml],[0.0.1],[swann@anl.gov])
# are we in the right source dir ?
AC_CONFIG_SRCDIR([src/aml.h])
# build artefacts in separate dir
AC_CONFIG_AUX_DIR([m4])
AC_CONFIG_MACRO_DIR([m4])
# automake should fail on any error
AM_INIT_AUTOMAKE([-Wall -Werror foreign 1.12])
AM_PROG_AR
# check for libtool
LT_INIT
# check for programs
AC_LANG([C])
AC_PROG_CC
AC_PROG_CC_STDC
AC_PROG_CPP
AC_TYPE_SIZE_T
# support for testing with valgrind
AC_ARG_ENABLE(valgrind,
[AS_HELP_STRING([--enable-valgrind],[Also valgrind on checks (default is no).])],
[valgrind=true],[valgrind=false])
if [[ "x$valgrind" = xtrue ]]; then
AC_PATH_PROG(VALGRIND, valgrind, no)
if [[ "x$VALGRIND" = xno ]]; then
AC_MSG_ERROR([Valgrind not found in PATH. ])
fi
fi
AM_CONDITIONAL([TEST_VALGRIND],[test "x$valgrind" = xtrue])
# dependencies
AC_CHECK_HEADERS(numa.h)
AC_CHECK_LIB(numa, move_pages)
AC_CONFIG_HEADERS([src/config.h])
AC_CONFIG_FILES([Makefile
src/Makefile
tests/Makefile
aml.pc])
AC_OUTPUT
lib_LTLIBRARIES = libaml.la
LIBCSOURCES = aml.c
LIBHSOURCES = aml.h
libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES)
include_HEADERS = $(LIBHSOURCES)
#include <aml.h>
#include <assert.h>
#include <fcntl.h>
#include <numa.h>
#include <numaif.h>
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
const char *tmpfs = "/tmp";
#define min(a,b) ((a) < (b)? (a) : (b))
int aml_init(int *argc, char **argv[])
{
return 0;
}
int aml_finalize(void)
{
return 0;
}
int aml_node_init(struct aml_node *node, struct bitmask *mask, size_t maxsize)
{
char *template, zero[4096];
size_t pos;
ssize_t count;
int fd;
int mode;
unsigned long oldmask[NUMA_NUM_NODES];
assert(node != NULL);
/* new, temporary file, to hold data */
template = calloc(1024, sizeof(char));
snprintf(template, 1024, "%s/%u.XXXXXX", tmpfs, getpid());
fd = mkstemp(template);
assert(fd != -1);
/* as weird as it sounds, using mempolicy here forces the
* future writes to end up in the right memory node.
* Only necessary on first write to a page.
* We retrieve the current policy first to restore it later
*/
assert(!get_mempolicy(&mode, oldmask, NUMA_NUM_NODES, 0, 0));
assert(!set_mempolicy(MPOL_BIND, mask->maskp, mask->size));
/* write zeros all over to pull its pages in memory*/
for(pos = 0; pos < maxsize; pos += count)
if((count = write(fd, zero, min(maxsize - pos, 4096))) <= 0)
break;
/* restore the original mempolicy */
assert(!set_mempolicy(mode, oldmask, NUMA_NUM_NODES));
node->path = template;
node->fd = fd;
node->maxsize = maxsize;
return 0;
return 0;
}
int aml_node_destroy(struct aml_node *node)
{
assert(node != NULL);
close(node->fd);
unlink(node->path);
return 0;
}
int aml_malloc(struct aml_alloc *a, size_t memsize, size_t blocksize,
struct aml_node *node)
{
assert(a != NULL);
assert(memsize % blocksize == 0);
/* find one good initial pointer:
* the system will give us a start pointer so that the entire alloc can
* fit in memory.
*/
void *m = mmap(NULL, memsize, PROT_READ|PROT_WRITE, MAP_PRIVATE,
node->fd, 0);
assert(m != MAP_FAILED);
/* as long as nothing else is doing mmaps in our back, we can munmap
* and reuse the pointer immediately.
*/
munmap(m, memsize);
a->start = m;
a->memsize = memsize;
a->blocksize = blocksize;
a->numblocks = memsize/blocksize;
a->nodemap = calloc(a->numblocks, sizeof(*a->nodemap));
for(unsigned long i = 0; i < a->numblocks; i++)
{
a->nodemap[i] = NULL;
aml_pull_sync(a, i, node);
}
return 0;
}
int aml_free(struct aml_alloc *a)
{
assert(a != NULL);
return munmap(a->start, a->memsize);
}
int aml_pull_sync(struct aml_alloc *a, unsigned long block,
struct aml_node *node)
{
int flags = MAP_PRIVATE|MAP_FIXED;
int prot = PROT_READ|PROT_WRITE;
size_t offset;
void *begin, *ret;
assert(a != NULL);
assert(block < a->numblocks);
if(a->nodemap[block] != node)
{
offset = block*a->blocksize;
begin = (void*)((unsigned long)a->start + offset);
ret = mmap(begin, a->blocksize, prot, flags, node->fd, offset);
assert(ret != MAP_FAILED && ret == begin);
a->nodemap[block] = node;
}
return 0;
}
int aml_push_sync(struct aml_alloc *a, unsigned long block,
struct aml_node *node)
{
return aml_pull_sync(a, block, node);
}
#ifndef AML_H
#define AML_H 1
#include<numa.h>
#include <stdlib.h>
/* An allocation.
*
* Contains information about an allocation.
*/
struct aml_node;
struct aml_alloc;
struct aml_alloc {
void *start;
size_t memsize;
size_t blocksize;
size_t numblocks;
struct aml_node **nodemap;
};
struct aml_node {
char *path;
int fd;
size_t maxsize;
};
int aml_init(int *argc, char **argv[]);
int aml_finalize(void);
int aml_node_init(struct aml_node *, struct bitmask *, size_t);
int aml_node_destroy(struct aml_node *);
int aml_malloc(struct aml_alloc *, size_t, size_t, struct aml_node *);
int aml_free(struct aml_alloc *);
int aml_pull_sync(struct aml_alloc *, unsigned long, struct aml_node *);
int aml_push_sync(struct aml_alloc *, unsigned long, struct aml_node *);
#endif
AM_COLOR_TESTS = yes
# add OpenMP to flags for test programs
AM_CFLAGS = -fopenmp -I$(top_srcdir)/src
AM_LDFLAGS = -fopenmp ../src/libaml.la
# valgrind support
if TEST_VALGRIND
TESTS_ENVIRONMENT= @LIBTOOL@ --mode=execute @VALGRIND@ --tool=memcheck -q --leak-check=full
endif
# all check programs
TST_PROGS = stream_add
check_PROGRAMS = $(TST_PROGS)
TESTS = $(TST_PROGS)
#include <assert.h>
#include <errno.h>
#include <omp.h>
#include <aml.h>
#include <stdlib.h>
#define ITER 10
#define MEMSIZE (1UL<<26)
#define PHASES 20
#define CHUNKING 4
int kernel(unsigned long *tab, size_t elems)
{
size_t i;
unsigned int r;
for(r = 0; r < ITER; r++) {
for(i = 1; i < elems -1; i++)
tab[i] = tab[i-1] + tab[i] + tab[i+1];
}
return 0;
}
int main(int argc, char *argv[])
{
assert(argc == 1);
aml_init(&argc, &argv);
/* we want to back our array on the slow node and use the fast node as
* a faster buffer.
*/
struct aml_node slow, fast;
struct bitmask *mask = numa_parse_nodestring_all("0");
assert(!aml_node_init(&slow, mask, MEMSIZE));
assert(!aml_node_init(&fast, mask, MEMSIZE));
/* we are only dealing with one contiguous array */
struct aml_alloc alloc;
/* describe the allocation */
size_t chunk_msz, chunk_esz;
int numthreads;
#pragma omp parallel
{
numthreads = omp_get_num_threads();
chunk_msz = MEMSIZE/(numthreads*CHUNKING);
chunk_esz = chunk_msz/sizeof(unsigned long);
}
assert(!aml_malloc(&alloc, MEMSIZE, chunk_msz, &slow));
/* create virtually accessible address range, backed by slow memory */
unsigned long *wa = (unsigned long*)alloc.start;
for(unsigned long i = 0; i < MEMSIZE/sizeof(unsigned long); i++) {
wa[i] = i;
}
/* run kernel */
#pragma omp parallel
#pragma omp single nowait
{
for(unsigned long phase = 0; phase < PHASES; phase++) {
for(unsigned long i = 0; i < numthreads*CHUNKING; i++) {
#pragma omp task depend(inout: wa[i*chunk_esz:chunk_esz])
assert(!aml_pull_sync(&alloc, i, &fast));
#pragma omp task depend(inout: wa[i*chunk_esz:chunk_esz])
kernel(&wa[i*chunk_esz], chunk_esz);
#pragma omp task depend(inout: wa[i*chunk_esz:chunk_esz])
assert(!aml_push_sync(&alloc, i, &slow));
}
}
}
aml_free(&alloc);
aml_node_destroy(&slow);
aml_node_destroy(&fast);
aml_finalize();
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment