Commit 449a959d authored by Swann Perarnau's avatar Swann Perarnau
Browse files

Checkpoint before restart from scratch

I'm quite convinced that this is not the appropriate way of working with
this stuff but I prefer to checkpoint everything before a wipeout.
parent 80669c37
lib_LTLIBRARIES = libaml.la
LIBCSOURCES = aml.c
LIBHSOURCES = aml.h
LIBCSOURCES = aml.c allocator.c
LIBHSOURCES = aml.h allocator.h
libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES)
include_HEADERS = $(LIBHSOURCES)
#include <stdlib.h>
#include <string.h>
#include "allocator.h"
/* an area of free memory inside a node */
struct aml_allocator_area {
size_t size;
struct aml_allocator_area *next;
};
/* header management and pointer conversion macros */
#define AREA_HEADER_SIZE (sizeof(size_t))
#define AREA_2_USER(p) (void *)((char *)p + AREA_HEADER_SIZE)
#define USER_2_AREA(p) (struct aml_allocator_area *)((char *)p - AREA_HEADER_SIZE)
/* alignments and allocator overhead.
* We keep a dummy head at the start of the area, so we need at least one full
* struct + the overhead of a single allocation.
*/
#define AREA_ALIGN_MASK (sizeof(struct aml_allocator_area)-((size_t)1))
#define AREA_OVERHEAD ((sizeof(struct aml_allocator_area) + AREA_HEADER_SIZE + AREA_ALIGN_MASK) & ~AREA_ALIGN_MASK)
static size_t usz2asz(size_t size)
{
if(size < AREA_HEADER_SIZE)
return sizeof(struct aml_allocator_area);
else
{
size += AREA_HEADER_SIZE;
return (size + AREA_ALIGN_MASK) & ~AREA_ALIGN_MASK;
}
}
static void aml_allocator_find(struct aml_allocator_area *head, size_t size,
struct aml_allocator_area **ret,
struct aml_allocator_area **prev)
{
struct aml_allocator_area *it, *old;
it = head->next;
old = head;
for(it = head->next, old = head; it != NULL; old = it, it = it->next)
{
if(it->size >= size)
{
*ret = it;
*prev = old;
return;
}
}
*ret = NULL;
*prev = NULL;
}
static struct aml_allocator_area *
aml_allocator_findprev(struct aml_allocator_area *head,
struct aml_allocator_area *area)
{
struct aml_allocator_area *it, *prev;
for(it = head->next, prev = head; it != NULL; prev = it, it = it->next)
if(it > area)
break;
return prev;
}
int aml_allocator_init(void *start, size_t memsize)
{
struct aml_allocator_area *head, *next;
head = (struct aml_allocator_area *)start;
next = head+1;
next->size = memsize - sizeof(*head);
next->next = NULL;
head->size = memsize - sizeof(*head);
head->next = next;
return 0;
}
void *aml_allocator_alloc(void *start, size_t size)
{
struct aml_allocator_area *head, *it, *prev, *next;
void *ret;
if(size == 0)
return NULL;
size = usz2asz(size);
head = (struct aml_allocator_area*)start;
if(size > head->size)
return NULL;
aml_allocator_find(head, size, &it, &prev);
if(it == NULL)
return NULL;
if(it->size - size < sizeof(struct aml_allocator_area))
{
prev->next = it->next;
ret = AREA_2_USER(it);
}
else
{
next = (struct aml_allocator_area*)((char *)it + size);
next->size = it->size - size;
prev->next = next;
it->size = size;
ret = AREA_2_USER(it);
}
head->size -= size;
return ret;
}
void aml_allocator_free(void *start, void *p)
{
struct aml_allocator_area *area, *prev, *next, *head;
if(p == NULL)
return;
area = USER_2_AREA(p);
area->next = NULL;
head = (struct aml_allocator_area*) start;
prev = aml_allocator_findprev(head, area);
if(prev != head &&
(struct aml_allocator_area*)(prev + prev->size) == area)
{
prev->size += area->size;
area = prev;
}
else
{
area->next = prev->next;
prev->next = area;
}
next = area->next;
if(next != NULL &&
(struct aml_allocator_area*)((char *)area + area->size) == next)
{
area->size += next->size;
area->next = next->next;
}
}
void *aml_allocator_realloc(void *start, void *p, size_t size)
{
void *ret;
struct aml_allocator_area *area;
if(p == NULL)
return aml_allocator_alloc(start, size);
if(size == 0)
{
aml_allocator_free(start, p);
return NULL;
}
ret = aml_allocator_alloc(start, size);
if(ret != NULL)
{
ret = memcpy(ret, p, size);
aml_allocator_free(start, p);
}
return ret;
}
#ifndef AML_ALLOCATOR_H
#define AML_ALLOCATOR_H 1
int aml_allocator_init(void *start, size_t memsize);
void *aml_allocator_alloc(void *start, size_t size);
void aml_allocator_free(void *start, void *p);
void *aml_allocator_realloc(void *start, void *p, size_t size);
#endif
......@@ -10,6 +10,8 @@
#include <sys/mman.h>
#include <unistd.h>
#include "allocator.h"
const char *tmpfs = "/tmp";
#define min(a,b) ((a) < (b)? (a) : (b))
......@@ -52,6 +54,13 @@ int aml_node_init(struct aml_node *node, struct bitmask *mask, size_t maxsize)
if((count = write(fd, zero, min(maxsize - pos, 4096))) <= 0)
break;
/* init internal allocator */
void *m = mmap(NULL, maxsize, PROT_READ|PROT_WRITE, MAP_PRIVATE,
fd, 0);
assert(m != MAP_FAILED);
aml_allocator_init(m, maxsize);
munmap(m, maxsize);
/* restore the original mempolicy */
assert(!set_mempolicy(mode, oldmask, NUMA_NUM_NODES));
......
......@@ -9,14 +9,11 @@
#define PHASES 20
#define CHUNKING 4
int kernel(unsigned long *tab, size_t elems)
int kernel(unsigned long *a, unsigned long *b, unsigned long *c, size_t n)
{
size_t i;
unsigned int r;
for(r = 0; r < ITER; r++) {
for(i = 1; i < elems -1; i++)
tab[i] = tab[i-1] + tab[i] + tab[i+1];
}
for(i = 0; i < n; i++)
c[i] = a[i] + b[i];
return 0;
}
......@@ -30,46 +27,67 @@ int main(int argc, char *argv[])
*/
struct aml_node slow, fast;
struct bitmask *mask = numa_parse_nodestring_all("0");
assert(!aml_node_init(&slow, mask, MEMSIZE));
assert(!aml_node_init(&fast, mask, MEMSIZE));
assert(!aml_node_init(&slow, mask, MEMSIZE*3));
assert(!aml_node_init(&fast, mask, MEMSIZE*3));
/* we are only dealing with one contiguous array */
struct aml_alloc alloc;
struct aml_alloc a,b,c;
/* describe the allocation */
size_t chunk_msz, chunk_esz;
size_t chunk_msz, esz;
int numthreads;
#pragma omp parallel
{
numthreads = omp_get_num_threads();
chunk_msz = MEMSIZE/(numthreads*CHUNKING);
chunk_esz = chunk_msz/sizeof(unsigned long);
esz = chunk_msz/sizeof(unsigned long);
}
assert(!aml_malloc(&alloc, MEMSIZE, chunk_msz, &slow));
assert(!aml_malloc(&a, MEMSIZE, chunk_msz, &slow));
assert(!aml_malloc(&b, MEMSIZE, chunk_msz, &slow));
assert(!aml_malloc(&c, MEMSIZE, chunk_msz, &slow));
/* create virtually accessible address range, backed by slow memory */
unsigned long *wa = (unsigned long*)alloc.start;
for(unsigned long i = 0; i < MEMSIZE/sizeof(unsigned long); i++) {
unsigned long *wa = (unsigned long*)a.start;
unsigned long *wb = (unsigned long*)b.start;
unsigned long *wc = (unsigned long*)c.start;
unsigned long esize = MEMSIZE/sizeof(unsigned long);
for(unsigned long i = 0; i < esize; i++) {
wa[i] = i;
wb[i] = esize - i;
wc[i] = 0;
}
/* run kernel */
#pragma omp parallel
#pragma omp single nowait
{
for(unsigned long phase = 0; phase < PHASES; phase++) {
for(unsigned long i = 0; i < numthreads*CHUNKING; i++) {
#pragma omp task depend(inout: wa[i*chunk_esz:chunk_esz])
assert(!aml_pull_sync(&alloc, i, &fast));
#pragma omp task depend(inout: wa[i*chunk_esz:chunk_esz])
kernel(&wa[i*chunk_esz], chunk_esz);
#pragma omp task depend(inout: wa[i*chunk_esz:chunk_esz])
assert(!aml_push_sync(&alloc, i, &slow));
}
for(unsigned long i = 0; i < numthreads*CHUNKING; i++) {
#pragma omp task depend(inout: wa[i*esz:esz])
assert(!aml_pull_sync(&a, i, &fast));
#pragma omp task depend(inout: wb[i*esz:esz])
assert(!aml_pull_sync(&b, i, &fast));
#pragma omp task depend(inout: wc[i*esz:esz])
assert(!aml_pull_sync(&c, i, &fast));
#pragma omp task depend(in: wa[i*esz:esz], wb[i*esz:esz]) depend(out: wc[i*esz:esz])
kernel(&wa[i*esz], &wb[i*esz], &wc[i*esz], esz);
#pragma omp task depend(inout: wa[i*esz:esz])
assert(!aml_push_sync(&a, i, &slow));
#pragma omp task depend(inout: wb[i*esz:esz])
assert(!aml_push_sync(&b, i, &slow));
#pragma omp task depend(inout: wc[i*esz:esz])
assert(!aml_push_sync(&c, i, &slow));
}
}
aml_free(&alloc);
/* validate */
for(unsigned long i = 0; i < esize; i++) {
assert(wc[i] == esize);
}
aml_free(&a);
aml_free(&b);
aml_free(&c);
aml_node_destroy(&slow);
aml_node_destroy(&fast);
aml_finalize();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment