stream_add_omp.c 2.64 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
#include <assert.h>
#include <errno.h>
#include <omp.h>
#include <aml.h>
#include <stdlib.h>

#define ITER 10
#define MEMSIZE (1UL<<26)
#define PHASES 20
#define CHUNKING 4

12
int kernel(unsigned long *a, unsigned long *b, unsigned long *c, size_t n)
13 14
{
	size_t i;
15 16
	for(i = 0; i < n; i++)
		c[i] = a[i] + b[i];
17 18 19 20 21 22 23 24 25 26 27
	return 0;
}

int main(int argc, char *argv[])
{
	assert(argc == 1);
	aml_init(&argc, &argv);

	/* we want to back our array on the slow node and use the fast node as
	 * a faster buffer.
	 */
28 29
	struct aml_area slow, fast;
	int type = AML_AREA_TYPE_REGULAR;
30 31
	assert(!aml_area_from_nodestring(&slow, type, "all"));
	assert(!aml_area_from_nodestring(&fast, type, "all"));
32

33 34 35 36
	struct aml_dma dma;
	assert(!aml_dma_init(&dma, 0));

	void *a, *b, *c;
37 38

	/* describe the allocation */
39
	size_t chunk_msz, esz;
40 41 42 43 44 45
	int numthreads;

	#pragma omp parallel
	{
		numthreads = omp_get_num_threads();
		chunk_msz = MEMSIZE/(numthreads*CHUNKING);
46
		esz = chunk_msz/sizeof(unsigned long);
47
	}
48 49 50 51
	a = aml_area_malloc(&slow, MEMSIZE);
	b = aml_area_malloc(&slow, MEMSIZE);
	c = aml_area_malloc(&slow, MEMSIZE);
	assert(a != NULL && b != NULL && c != NULL);
52 53

	/* create virtually accessible address range, backed by slow memory */
54 55 56
	unsigned long *wa = (unsigned long*)a;
	unsigned long *wb = (unsigned long*)b;
	unsigned long *wc = (unsigned long*)c;
57 58
	unsigned long esize = MEMSIZE/sizeof(unsigned long);
	for(unsigned long i = 0; i < esize; i++) {
59
		wa[i] = i;
60 61
		wb[i] = esize - i;
		wc[i] = 0;
62 63 64 65 66 67
	}

	/* run kernel */
	#pragma omp parallel
	#pragma omp single nowait
	{
68 69
		for(unsigned long i = 0; i < numthreads*CHUNKING; i++) {
			#pragma omp task depend(inout: wa[i*esz:esz])
70
			assert(!aml_dma_move(&dma, &fast, &slow, &wa[i*esz], esz));
71
			#pragma omp task depend(inout: wb[i*esz:esz])
72
			assert(!aml_dma_move(&dma, &fast, &slow, &wb[i*esz], esz));
73
			#pragma omp task depend(inout: wc[i*esz:esz])
74
			assert(!aml_dma_move(&dma, &fast, &slow, &wc[i*esz], esz));
75 76 77
			#pragma omp task depend(in: wa[i*esz:esz], wb[i*esz:esz]) depend(out: wc[i*esz:esz])
			kernel(&wa[i*esz], &wb[i*esz], &wc[i*esz], esz);
			#pragma omp task depend(inout: wa[i*esz:esz])
78
			assert(!aml_dma_move(&dma, &slow, &fast, &wa[i*esz], esz));
79
			#pragma omp task depend(inout: wb[i*esz:esz])
80
			assert(!aml_dma_move(&dma, &slow, &fast, &wb[i*esz], esz));
81
			#pragma omp task depend(inout: wc[i*esz:esz])
82
			assert(!aml_dma_move(&dma, &slow, &fast, &wc[i*esz], esz));
83 84
		}
	}
85 86 87 88 89 90

	/* validate */
	for(unsigned long i = 0; i < esize; i++) {
		assert(wc[i] == esize);
	}

91 92 93 94 95 96
	aml_area_free(&slow, a);
	aml_area_free(&slow, b);
	aml_area_free(&slow, c);
	aml_area_destroy(&slow);
	aml_area_destroy(&fast);
	aml_dma_destroy(&dma);
97 98 99
	aml_finalize();
	return 0;
}