Commit 449a959d authored by Swann Perarnau's avatar Swann Perarnau

Checkpoint before restart from scratch

I'm quite convinced that this is not the appropriate way of working with
this stuff but I prefer to checkpoint everything before a wipeout.
parent 80669c37
lib_LTLIBRARIES = libaml.la
LIBCSOURCES = aml.c
LIBHSOURCES = aml.h
LIBCSOURCES = aml.c allocator.c
LIBHSOURCES = aml.h allocator.h
libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES)
include_HEADERS = $(LIBHSOURCES)
#include <stdlib.h>
#include <string.h>
#include "allocator.h"
/* an area of free memory inside a node */
struct aml_allocator_area {
size_t size;
struct aml_allocator_area *next;
};
/* header management and pointer conversion macros */
#define AREA_HEADER_SIZE (sizeof(size_t))
#define AREA_2_USER(p) (void *)((char *)p + AREA_HEADER_SIZE)
#define USER_2_AREA(p) (struct aml_allocator_area *)((char *)p - AREA_HEADER_SIZE)
/* alignments and allocator overhead.
* We keep a dummy head at the start of the area, so we need at least one full
* struct + the overhead of a single allocation.
*/
#define AREA_ALIGN_MASK (sizeof(struct aml_allocator_area)-((size_t)1))
#define AREA_OVERHEAD ((sizeof(struct aml_allocator_area) + AREA_HEADER_SIZE + AREA_ALIGN_MASK) & ~AREA_ALIGN_MASK)
static size_t usz2asz(size_t size)
{
if(size < AREA_HEADER_SIZE)
return sizeof(struct aml_allocator_area);
else
{
size += AREA_HEADER_SIZE;
return (size + AREA_ALIGN_MASK) & ~AREA_ALIGN_MASK;
}
}
static void aml_allocator_find(struct aml_allocator_area *head, size_t size,
struct aml_allocator_area **ret,
struct aml_allocator_area **prev)
{
struct aml_allocator_area *it, *old;
it = head->next;
old = head;
for(it = head->next, old = head; it != NULL; old = it, it = it->next)
{
if(it->size >= size)
{
*ret = it;
*prev = old;
return;
}
}
*ret = NULL;
*prev = NULL;
}
static struct aml_allocator_area *
aml_allocator_findprev(struct aml_allocator_area *head,
struct aml_allocator_area *area)
{
struct aml_allocator_area *it, *prev;
for(it = head->next, prev = head; it != NULL; prev = it, it = it->next)
if(it > area)
break;
return prev;
}
int aml_allocator_init(void *start, size_t memsize)
{
struct aml_allocator_area *head, *next;
head = (struct aml_allocator_area *)start;
next = head+1;
next->size = memsize - sizeof(*head);
next->next = NULL;
head->size = memsize - sizeof(*head);
head->next = next;
return 0;
}
void *aml_allocator_alloc(void *start, size_t size)
{
struct aml_allocator_area *head, *it, *prev, *next;
void *ret;
if(size == 0)
return NULL;
size = usz2asz(size);
head = (struct aml_allocator_area*)start;
if(size > head->size)
return NULL;
aml_allocator_find(head, size, &it, &prev);
if(it == NULL)
return NULL;
if(it->size - size < sizeof(struct aml_allocator_area))
{
prev->next = it->next;
ret = AREA_2_USER(it);
}
else
{
next = (struct aml_allocator_area*)((char *)it + size);
next->size = it->size - size;
prev->next = next;
it->size = size;
ret = AREA_2_USER(it);
}
head->size -= size;
return ret;
}
void aml_allocator_free(void *start, void *p)
{
struct aml_allocator_area *area, *prev, *next, *head;
if(p == NULL)
return;
area = USER_2_AREA(p);
area->next = NULL;
head = (struct aml_allocator_area*) start;
prev = aml_allocator_findprev(head, area);
if(prev != head &&
(struct aml_allocator_area*)(prev + prev->size) == area)
{
prev->size += area->size;
area = prev;
}
else
{
area->next = prev->next;
prev->next = area;
}
next = area->next;
if(next != NULL &&
(struct aml_allocator_area*)((char *)area + area->size) == next)
{
area->size += next->size;
area->next = next->next;
}
}
void *aml_allocator_realloc(void *start, void *p, size_t size)
{
void *ret;
struct aml_allocator_area *area;
if(p == NULL)
return aml_allocator_alloc(start, size);
if(size == 0)
{
aml_allocator_free(start, p);
return NULL;
}
ret = aml_allocator_alloc(start, size);
if(ret != NULL)
{
ret = memcpy(ret, p, size);
aml_allocator_free(start, p);
}
return ret;
}
#ifndef AML_ALLOCATOR_H
#define AML_ALLOCATOR_H 1
int aml_allocator_init(void *start, size_t memsize);
void *aml_allocator_alloc(void *start, size_t size);
void aml_allocator_free(void *start, void *p);
void *aml_allocator_realloc(void *start, void *p, size_t size);
#endif
......@@ -10,6 +10,8 @@
#include <sys/mman.h>
#include <unistd.h>
#include "allocator.h"
const char *tmpfs = "/tmp";
#define min(a,b) ((a) < (b)? (a) : (b))
......@@ -52,6 +54,13 @@ int aml_node_init(struct aml_node *node, struct bitmask *mask, size_t maxsize)
if((count = write(fd, zero, min(maxsize - pos, 4096))) <= 0)
break;
/* init internal allocator */
void *m = mmap(NULL, maxsize, PROT_READ|PROT_WRITE, MAP_PRIVATE,
fd, 0);
assert(m != MAP_FAILED);
aml_allocator_init(m, maxsize);
munmap(m, maxsize);
/* restore the original mempolicy */
assert(!set_mempolicy(mode, oldmask, NUMA_NUM_NODES));
......
......@@ -9,14 +9,11 @@
#define PHASES 20
#define CHUNKING 4
int kernel(unsigned long *tab, size_t elems)
int kernel(unsigned long *a, unsigned long *b, unsigned long *c, size_t n)
{
size_t i;
unsigned int r;
for(r = 0; r < ITER; r++) {
for(i = 1; i < elems -1; i++)
tab[i] = tab[i-1] + tab[i] + tab[i+1];
}
for(i = 0; i < n; i++)
c[i] = a[i] + b[i];
return 0;
}
......@@ -30,46 +27,67 @@ int main(int argc, char *argv[])
*/
struct aml_node slow, fast;
struct bitmask *mask = numa_parse_nodestring_all("0");
assert(!aml_node_init(&slow, mask, MEMSIZE));
assert(!aml_node_init(&fast, mask, MEMSIZE));
assert(!aml_node_init(&slow, mask, MEMSIZE*3));
assert(!aml_node_init(&fast, mask, MEMSIZE*3));
/* we are only dealing with one contiguous array */
struct aml_alloc alloc;
struct aml_alloc a,b,c;
/* describe the allocation */
size_t chunk_msz, chunk_esz;
size_t chunk_msz, esz;
int numthreads;
#pragma omp parallel
{
numthreads = omp_get_num_threads();
chunk_msz = MEMSIZE/(numthreads*CHUNKING);
chunk_esz = chunk_msz/sizeof(unsigned long);
esz = chunk_msz/sizeof(unsigned long);
}
assert(!aml_malloc(&alloc, MEMSIZE, chunk_msz, &slow));
assert(!aml_malloc(&a, MEMSIZE, chunk_msz, &slow));
assert(!aml_malloc(&b, MEMSIZE, chunk_msz, &slow));
assert(!aml_malloc(&c, MEMSIZE, chunk_msz, &slow));
/* create virtually accessible address range, backed by slow memory */
unsigned long *wa = (unsigned long*)alloc.start;
for(unsigned long i = 0; i < MEMSIZE/sizeof(unsigned long); i++) {
unsigned long *wa = (unsigned long*)a.start;
unsigned long *wb = (unsigned long*)b.start;
unsigned long *wc = (unsigned long*)c.start;
unsigned long esize = MEMSIZE/sizeof(unsigned long);
for(unsigned long i = 0; i < esize; i++) {
wa[i] = i;
wb[i] = esize - i;
wc[i] = 0;
}
/* run kernel */
#pragma omp parallel
#pragma omp single nowait
{
for(unsigned long phase = 0; phase < PHASES; phase++) {
for(unsigned long i = 0; i < numthreads*CHUNKING; i++) {
#pragma omp task depend(inout: wa[i*chunk_esz:chunk_esz])
assert(!aml_pull_sync(&alloc, i, &fast));
#pragma omp task depend(inout: wa[i*chunk_esz:chunk_esz])
kernel(&wa[i*chunk_esz], chunk_esz);
#pragma omp task depend(inout: wa[i*chunk_esz:chunk_esz])
assert(!aml_push_sync(&alloc, i, &slow));
}
for(unsigned long i = 0; i < numthreads*CHUNKING; i++) {
#pragma omp task depend(inout: wa[i*esz:esz])
assert(!aml_pull_sync(&a, i, &fast));
#pragma omp task depend(inout: wb[i*esz:esz])
assert(!aml_pull_sync(&b, i, &fast));
#pragma omp task depend(inout: wc[i*esz:esz])
assert(!aml_pull_sync(&c, i, &fast));
#pragma omp task depend(in: wa[i*esz:esz], wb[i*esz:esz]) depend(out: wc[i*esz:esz])
kernel(&wa[i*esz], &wb[i*esz], &wc[i*esz], esz);
#pragma omp task depend(inout: wa[i*esz:esz])
assert(!aml_push_sync(&a, i, &slow));
#pragma omp task depend(inout: wb[i*esz:esz])
assert(!aml_push_sync(&b, i, &slow));
#pragma omp task depend(inout: wc[i*esz:esz])
assert(!aml_push_sync(&c, i, &slow));
}
}
aml_free(&alloc);
/* validate */
for(unsigned long i = 0; i < esize; i++) {
assert(wc[i] == esize);
}
aml_free(&a);
aml_free(&b);
aml_free(&c);
aml_node_destroy(&slow);
aml_node_destroy(&fast);
aml_finalize();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment