stream_add_omp.c 2.92 KB
Newer Older
Swann Perarnau's avatar
Swann Perarnau committed
1 2 3 4 5 6 7 8 9 10
/*******************************************************************************
 * Copyright 2019 UChicago Argonne, LLC.
 * (c.f. AUTHORS, LICENSE)
 *
 * This file is part of the AML project.
 * For more info, see https://xgitlab.cels.anl.gov/argo/aml
 *
 * SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/

11 12 13
#include <assert.h>
#include <errno.h>
#include <omp.h>
14
#include "aml.h"
Nicolas Denoyelle's avatar
Nicolas Denoyelle committed
15
#include "aml/area/linux.h"
16 17 18 19 20 21 22
#include <stdlib.h>

#define ITER 10
#define MEMSIZE (1UL<<26)
#define PHASES 20
#define CHUNKING 4

23
int kernel(unsigned long *a, unsigned long *b, unsigned long *c, size_t n)
24 25
{
	size_t i;
26 27
	for(i = 0; i < n; i++)
		c[i] = a[i] + b[i];
28 29 30 31 32 33 34 35 36 37 38
	return 0;
}

int main(int argc, char *argv[])
{
	assert(argc == 1);
	aml_init(&argc, &argv);

	/* we want to back our array on the slow node and use the fast node as
	 * a faster buffer.
	 */
Nicolas Denoyelle's avatar
Nicolas Denoyelle committed
39
	struct aml_area *slow = &aml_area_linux, *fast = aml_area_linux;
40

41
	struct aml_dma *dma;
42
	assert(!aml_dma_create(&dma, 0, NULL, NULL));
43 44

	void *a, *b, *c;
45 46

	/* describe the allocation */
47
	size_t chunk_msz, esz;
48 49 50 51 52 53
	int numthreads;

	#pragma omp parallel
	{
		numthreads = omp_get_num_threads();
		chunk_msz = MEMSIZE/(numthreads*CHUNKING);
54
		esz = chunk_msz/sizeof(unsigned long);
55
	}
56 57 58
	a = aml_area_mmap(slow, MEMSIZE, NULL);
	b = aml_area_mmap(slow, MEMSIZE, NULL);
	c = aml_area_mmap(fast, MEMSIZE, NULL);
59
	assert(a != NULL && b != NULL && c != NULL);
60 61

	/* create virtually accessible address range, backed by slow memory */
62 63 64
	unsigned long *wa = (unsigned long*)a;
	unsigned long *wb = (unsigned long*)b;
	unsigned long *wc = (unsigned long*)c;
65 66
	unsigned long esize = MEMSIZE/sizeof(unsigned long);
	for(unsigned long i = 0; i < esize; i++) {
67
		wa[i] = i;
68 69
		wb[i] = esize - i;
		wc[i] = 0;
70 71 72 73 74 75
	}

	/* run kernel */
	#pragma omp parallel
	#pragma omp single nowait
	{
76 77
		for(unsigned long i = 0; i < numthreads*CHUNKING; i++) {
			#pragma omp task depend(inout: wa[i*esz:esz])
78
			assert(!aml_dma_move(dma, &fast, &slow, &wa[i*esz], esz));
79
			#pragma omp task depend(inout: wb[i*esz:esz])
80
			assert(!aml_dma_move(dma, &fast, &slow, &wb[i*esz], esz));
81
			#pragma omp task depend(inout: wc[i*esz:esz])
82
			assert(!aml_dma_move(dma, &fast, &slow, &wc[i*esz], esz));
83 84 85
			#pragma omp task depend(in: wa[i*esz:esz], wb[i*esz:esz]) depend(out: wc[i*esz:esz])
			kernel(&wa[i*esz], &wb[i*esz], &wc[i*esz], esz);
			#pragma omp task depend(inout: wa[i*esz:esz])
86
			assert(!aml_dma_move(dma, &slow, &fast, &wa[i*esz], esz));
87
			#pragma omp task depend(inout: wb[i*esz:esz])
88
			assert(!aml_dma_move(dma, &slow, &fast, &wb[i*esz], esz));
89
			#pragma omp task depend(inout: wc[i*esz:esz])
90
			assert(!aml_dma_move(dma, &slow, &fast, &wc[i*esz], esz));
91 92
		}
	}
93 94 95 96 97 98

	/* validate */
	for(unsigned long i = 0; i < esize; i++) {
		assert(wc[i] == esize);
	}

Nicolas Denoyelle's avatar
Nicolas Denoyelle committed
99 100 101
	aml_area_munmap(slow, a, MEMSIZE);
	aml_area_munmap(slow, b, MEMSIZE);
	aml_area_munmap(fast, c, MEMSIZE);
102
	aml_dma_destroy(&dma);
103 104 105
	aml_finalize();
	return 0;
}