Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
aml
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
28
Issues
28
List
Boards
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
argo
aml
Commits
0e89b24a
Commit
0e89b24a
authored
Jul 15, 2019
by
Swann Perarnau
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'remove-static-allocs' into 'master'
Remove static allocs Closes
#37
See merge request
!68
parents
c9d8ed8c
d3558a6d
Pipeline
#8044
passed with stages
in 24 minutes and 49 seconds
Changes
33
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
33 changed files
with
613 additions
and
1123 deletions
+613
-1123
benchmarks/dgemm_noprefetch.c
benchmarks/dgemm_noprefetch.c
+15
-15
benchmarks/dgemm_prefetch.c
benchmarks/dgemm_prefetch.c
+38
-38
benchmarks/stream_add_omp.c
benchmarks/stream_add_omp.c
+8
-8
benchmarks/stream_add_omp_mt.c
benchmarks/stream_add_omp_mt.c
+24
-24
benchmarks/stream_add_omp_st.c
benchmarks/stream_add_omp_st.c
+26
-26
benchmarks/stream_add_pth_st.c
benchmarks/stream_add_pth_st.c
+26
-26
include/aml.h
include/aml.h
+0
-27
include/aml/area/linux.h
include/aml/area/linux.h
+0
-30
include/aml/dma/linux-par.h
include/aml/dma/linux-par.h
+2
-33
include/aml/dma/linux-seq.h
include/aml/dma/linux-seq.h
+2
-30
include/aml/scratch/par.h
include/aml/scratch/par.h
+2
-39
include/aml/scratch/seq.h
include/aml/scratch/seq.h
+2
-39
include/aml/tiling/1d.h
include/aml/tiling/1d.h
+0
-39
include/aml/tiling/2d.h
include/aml/tiling/2d.h
+0
-47
include/aml/utils/bitmap.h
include/aml/utils/bitmap.h
+0
-25
include/aml/utils/vector.h
include/aml/utils/vector.h
+0
-20
src/area/linux.c
src/area/linux.c
+38
-45
src/dma/dma_linux_par.c
src/dma/dma_linux_par.c
+39
-56
src/dma/dma_linux_seq.c
src/dma/dma_linux_seq.c
+34
-51
src/scratch/scratch_par.c
src/scratch/scratch_par.c
+61
-84
src/scratch/scratch_seq.c
src/scratch/scratch_seq.c
+61
-84
src/tiling/tiling.c
src/tiling/tiling.c
+0
-16
src/tiling/tiling_1d.c
src/tiling/tiling_1d.c
+56
-65
src/tiling/tiling_2d.c
src/tiling/tiling_2d.c
+58
-81
src/utils/bitmap.c
src/utils/bitmap.c
+1
-20
src/utils/vector.c
src/utils/vector.c
+23
-36
tests/dma/test_dma_linux_par.c
tests/dma/test_dma_linux_par.c
+8
-12
tests/dma/test_dma_linux_seq.c
tests/dma/test_dma_linux_seq.c
+8
-12
tests/scratch/test_scratch_par.c
tests/scratch/test_scratch_par.c
+17
-20
tests/scratch/test_scratch_seq.c
tests/scratch/test_scratch_seq.c
+16
-19
tests/tiling/test_tiling.c
tests/tiling/test_tiling.c
+4
-13
tests/tiling/test_tiling_2d.c
tests/tiling/test_tiling_2d.c
+33
-32
tests/utils/test_vector.c
tests/utils/test_vector.c
+11
-11
No files found.
benchmarks/dgemm_noprefetch.c
View file @
0e89b24a
...
...
@@ -21,8 +21,8 @@
#include <math.h>
#include <stdlib.h>
AML_TILING_2D_ROWMAJOR_DECL
(
tiling_row
)
;
AML_TILING_2D_COLMAJOR_DECL
(
tiling_col
)
;
struct
aml_tiling
*
tiling_row
;
struct
aml_tiling
*
tiling_col
;
struct
aml_area
*
slow
,
*
fast
;
size_t
memsize
,
tilesize
,
N
,
T
;
double
*
a
,
*
b
,
*
c
;
...
...
@@ -34,7 +34,7 @@ void do_work()
ldb
=
lda
;
ldc
=
lda
;
size_t
ndims
[
2
];
aml_tiling_ndims
(
&
tiling_row
,
&
ndims
[
0
],
&
ndims
[
1
]);
aml_tiling_ndims
(
tiling_row
,
&
ndims
[
0
],
&
ndims
[
1
]);
for
(
int
k
=
0
;
k
<
ndims
[
1
];
k
++
)
{
...
...
@@ -45,12 +45,12 @@ void do_work()
{
size_t
aoff
,
boff
,
coff
;
double
*
ap
,
*
bp
,
*
cp
;
aoff
=
aml_tiling_tileid
(
&
tiling_col
,
i
,
k
);
boff
=
aml_tiling_tileid
(
&
tiling_row
,
k
,
j
);
coff
=
aml_tiling_tileid
(
&
tiling_row
,
i
,
j
);
ap
=
aml_tiling_tilestart
(
&
tiling_col
,
a
,
aoff
);
bp
=
aml_tiling_tilestart
(
&
tiling_row
,
b
,
boff
);
cp
=
aml_tiling_tilestart
(
&
tiling_row
,
c
,
coff
);
aoff
=
aml_tiling_tileid
(
tiling_col
,
i
,
k
);
boff
=
aml_tiling_tileid
(
tiling_row
,
k
,
j
);
coff
=
aml_tiling_tileid
(
tiling_row
,
i
,
j
);
ap
=
aml_tiling_tilestart
(
tiling_col
,
a
,
aoff
);
bp
=
aml_tiling_tilestart
(
tiling_row
,
b
,
boff
);
cp
=
aml_tiling_tilestart
(
tiling_row
,
c
,
coff
);
cblas_dgemm
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
ldc
,
lda
,
ldb
,
1
.
0
,
ap
,
lda
,
bp
,
ldb
,
1
.
0
,
cp
,
ldc
);
}
}
...
...
@@ -72,9 +72,9 @@ int main(int argc, char* argv[])
tilesize
=
sizeof
(
double
)
*
T
*
T
;
/* the initial tiling, of 2D square tiles */
assert
(
!
aml_tiling_2d_
init
(
&
tiling_row
,
AML_TILING_TYPE_2D_ROWMAJOR
,
assert
(
!
aml_tiling_2d_
create
(
&
tiling_row
,
AML_TILING_TYPE_2D_ROWMAJOR
,
tilesize
,
memsize
,
N
/
T
,
N
/
T
));
assert
(
!
aml_tiling_2d_
init
(
&
tiling_col
,
AML_TILING_TYPE_2D_COLMAJOR
,
assert
(
!
aml_tiling_2d_
create
(
&
tiling_col
,
AML_TILING_TYPE_2D_COLMAJOR
,
tilesize
,
memsize
,
N
/
T
,
N
/
T
));
aml_area_linux_create
(
&
slow
,
AML_AREA_LINUX_MMAP_FLAG_PRIVATE
,
...
...
@@ -156,8 +156,8 @@ int main(int argc, char* argv[])
aml_area_munmap
(
fast
,
c
,
memsize
);
aml_area_linux_destroy
(
&
slow
);
aml_area_linux_destroy
(
&
fast
);
aml_tiling_2d_
fini
(
&
tiling_row
);
aml_tiling_2d_
fini
(
&
tiling_col
);
aml_tiling_2d_
destroy
(
&
tiling_row
);
aml_tiling_2d_
destroy
(
&
tiling_col
);
aml_finalize
();
return
0
;
}
benchmarks/dgemm_prefetch.c
View file @
0e89b24a
...
...
@@ -24,11 +24,11 @@
#include <math.h>
#include <stdlib.h>
AML_TILING_2D_ROWMAJOR_DECL
(
tiling_row
)
;
AML_TILING_2D_COLMAJOR_DECL
(
tiling_col
)
;
AML_TILING_1D_DECL
(
tiling_prefetch
)
;
AML_SCRATCH_PAR_DECL
(
sa
)
;
AML_SCRATCH_PAR_DECL
(
sb
)
;
struct
aml_tiling
*
tiling_row
;
struct
aml_tiling
*
tiling_col
;
struct
aml_tiling
*
tiling_prefetch
;
struct
aml_scratch
*
sa
;
struct
aml_scratch
*
sb
;
struct
aml_area
*
slow
,
*
fast
;
size_t
memsize
,
tilesize
,
N
,
T
;
...
...
@@ -45,19 +45,19 @@ void do_work()
int
ai
,
bi
,
oldai
,
oldbi
;
void
*
abaseptr
,
*
bbaseptr
;
struct
aml_scratch_request
*
ar
,
*
br
;
aml_tiling_ndims
(
&
tiling_row
,
&
ndims
[
0
],
&
ndims
[
1
]);
abaseptr
=
aml_scratch_baseptr
(
&
sa
);
bbaseptr
=
aml_scratch_baseptr
(
&
sb
);
prea
=
aml_tiling_tilestart
(
&
tiling_prefetch
,
a
,
0
);
preb
=
aml_tiling_tilestart
(
&
tiling_prefetch
,
b
,
0
);
aml_tiling_ndims
(
tiling_row
,
&
ndims
[
0
],
&
ndims
[
1
]);
abaseptr
=
aml_scratch_baseptr
(
sa
);
bbaseptr
=
aml_scratch_baseptr
(
sb
);
prea
=
aml_tiling_tilestart
(
tiling_prefetch
,
a
,
0
);
preb
=
aml_tiling_tilestart
(
tiling_prefetch
,
b
,
0
);
ai
=
-
1
;
bi
=
-
1
;
for
(
int
k
=
0
;
k
<
ndims
[
1
];
k
++
)
{
oldbi
=
bi
;
oldai
=
ai
;
aml_scratch_async_pull
(
&
sa
,
&
ar
,
abaseptr
,
&
ai
,
a
,
k
+
1
);
aml_scratch_async_pull
(
&
sb
,
&
br
,
bbaseptr
,
&
bi
,
b
,
k
+
1
);
aml_scratch_async_pull
(
sa
,
&
ar
,
abaseptr
,
&
ai
,
a
,
k
+
1
);
aml_scratch_async_pull
(
sb
,
&
br
,
bbaseptr
,
&
bi
,
b
,
k
+
1
);
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
ndims
[
0
];
i
++
)
{
...
...
@@ -65,25 +65,25 @@ void do_work()
{
size_t
coff
;
double
*
ap
,
*
bp
,
*
cp
;
ap
=
aml_tiling_tilestart
(
&
tiling_row
,
prea
,
i
);
bp
=
aml_tiling_tilestart
(
&
tiling_row
,
preb
,
j
);
coff
=
aml_tiling_tileid
(
&
tiling_row
,
i
,
j
);
cp
=
aml_tiling_tilestart
(
&
tiling_row
,
c
,
coff
);
ap
=
aml_tiling_tilestart
(
tiling_row
,
prea
,
i
);
bp
=
aml_tiling_tilestart
(
tiling_row
,
preb
,
j
);
coff
=
aml_tiling_tileid
(
tiling_row
,
i
,
j
);
cp
=
aml_tiling_tilestart
(
tiling_row
,
c
,
coff
);
cblas_dgemm
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
ldc
,
lda
,
ldb
,
1
.
0
,
ap
,
lda
,
bp
,
ldb
,
1
.
0
,
cp
,
ldc
);
}
}
aml_scratch_wait
(
&
sa
,
ar
);
aml_scratch_wait
(
&
sb
,
br
);
prea
=
aml_tiling_tilestart
(
&
tiling_prefetch
,
abaseptr
,
ai
);
preb
=
aml_tiling_tilestart
(
&
tiling_prefetch
,
bbaseptr
,
bi
);
aml_scratch_release
(
&
sa
,
oldai
);
aml_scratch_release
(
&
sb
,
oldbi
);
aml_scratch_wait
(
sa
,
ar
);
aml_scratch_wait
(
sb
,
br
);
prea
=
aml_tiling_tilestart
(
tiling_prefetch
,
abaseptr
,
ai
);
preb
=
aml_tiling_tilestart
(
tiling_prefetch
,
bbaseptr
,
bi
);
aml_scratch_release
(
sa
,
oldai
);
aml_scratch_release
(
sb
,
oldbi
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
AML_DMA_LINUX_SEQ_DECL
(
dma
)
;
struct
aml_dma
*
dma
;
struct
aml_bitmap
slowb
,
fastb
;
aml_init
(
&
argc
,
&
argv
);
assert
(
argc
==
5
);
...
...
@@ -97,12 +97,12 @@ int main(int argc, char* argv[])
tilesize
=
sizeof
(
double
)
*
T
*
T
;
/* the initial tiling, 2d grid of tiles */
assert
(
!
aml_tiling_2d_
init
(
&
tiling_row
,
AML_TILING_TYPE_2D_ROWMAJOR
,
assert
(
!
aml_tiling_2d_
create
(
&
tiling_row
,
AML_TILING_TYPE_2D_ROWMAJOR
,
tilesize
,
memsize
,
N
/
T
,
N
/
T
));
assert
(
!
aml_tiling_2d_
init
(
&
tiling_col
,
AML_TILING_TYPE_2D_COLMAJOR
,
assert
(
!
aml_tiling_2d_
create
(
&
tiling_col
,
AML_TILING_TYPE_2D_COLMAJOR
,
tilesize
,
memsize
,
N
/
T
,
N
/
T
));
/* the prefetch tiling, 1D sequence of columns of tiles */
assert
(
!
aml_tiling_1d_
init
(
&
tiling_prefetch
,
assert
(
!
aml_tiling_1d_
create
(
&
tiling_prefetch
,
tilesize
*
(
N
/
T
),
memsize
));
aml_area_linux_create
(
&
slow
,
AML_AREA_LINUX_MMAP_FLAG_PRIVATE
,
...
...
@@ -112,9 +112,9 @@ int main(int argc, char* argv[])
&
fastb
,
AML_AREA_LINUX_BINDING_FLAG_BIND
);
assert
(
fast
!=
NULL
);
assert
(
!
aml_dma_linux_seq_
init
(
&
dma
,
2
));
assert
(
!
aml_scratch_par_
init
(
&
sa
,
&
fast
,
&
slow
,
&
dma
,
&
tiling_prefetch
,
(
size_t
)
2
,
(
size_t
)
2
));
assert
(
!
aml_scratch_par_
init
(
&
sb
,
&
fast
,
&
slow
,
&
dma
,
&
tiling_prefetch
,
(
size_t
)
2
,
(
size_t
)
2
));
assert
(
!
aml_dma_linux_seq_
create
(
&
dma
,
2
));
assert
(
!
aml_scratch_par_
create
(
&
sa
,
fast
,
slow
,
dma
,
tiling_prefetch
,
(
size_t
)
2
,
(
size_t
)
2
));
assert
(
!
aml_scratch_par_
create
(
&
sb
,
fast
,
slow
,
dma
,
tiling_prefetch
,
(
size_t
)
2
,
(
size_t
)
2
));
/* allocation */
a
=
aml_area_mmap
(
slow
,
NULL
,
memsize
);
b
=
aml_area_mmap
(
slow
,
NULL
,
memsize
);
...
...
@@ -182,17 +182,17 @@ int main(int argc, char* argv[])
/* print the flops in GFLOPS */
printf
(
"dgemm-prefetch: %llu %lld %lld %f
\n
"
,
N
,
memsize
,
time
,
flops
/
1e9
);
aml_scratch_par_
fini
(
&
sa
);
aml_scratch_par_
fini
(
&
sb
);
aml_dma_linux_seq_
fini
(
&
dma
);
aml_scratch_par_
destroy
(
&
sa
);
aml_scratch_par_
destroy
(
&
sb
);
aml_dma_linux_seq_
destroy
(
&
dma
);
aml_area_munmap
(
slow
,
a
,
memsize
);
aml_area_munmap
(
slow
,
b
,
memsize
);
aml_area_munmap
(
fast
,
c
,
memsize
);
aml_area_linux_destroy
(
&
slow
);
aml_area_linux_destroy
(
&
fast
);
aml_tiling_2d_
fini
(
&
tiling_row
);
aml_tiling_2d_
fini
(
&
tiling_col
);
aml_tiling_1d_
fini
(
&
tiling_prefetch
);
aml_tiling_2d_
destroy
(
&
tiling_row
);
aml_tiling_2d_
destroy
(
&
tiling_col
);
aml_tiling_1d_
destroy
(
&
tiling_prefetch
);
aml_finalize
();
return
0
;
}
benchmarks/stream_add_omp.c
View file @
0e89b24a
...
...
@@ -38,8 +38,8 @@ int main(int argc, char *argv[])
*/
struct
aml_area
*
slow
=
&
aml_area_linux
,
*
fast
=
aml_area_linux
;
struct
aml_dma
dma
;
assert
(
!
aml_dma_
init
(
&
dma
,
0
));
struct
aml_dma
*
dma
;
assert
(
!
aml_dma_
create
(
&
dma
,
0
));
void
*
a
,
*
b
,
*
c
;
...
...
@@ -75,19 +75,19 @@ int main(int argc, char *argv[])
{
for
(
unsigned
long
i
=
0
;
i
<
numthreads
*
CHUNKING
;
i
++
)
{
#pragma omp task depend(inout: wa[i*esz:esz])
assert
(
!
aml_dma_move
(
&
dma
,
&
fast
,
&
slow
,
&
wa
[
i
*
esz
],
esz
));
assert
(
!
aml_dma_move
(
dma
,
&
fast
,
&
slow
,
&
wa
[
i
*
esz
],
esz
));
#pragma omp task depend(inout: wb[i*esz:esz])
assert
(
!
aml_dma_move
(
&
dma
,
&
fast
,
&
slow
,
&
wb
[
i
*
esz
],
esz
));
assert
(
!
aml_dma_move
(
dma
,
&
fast
,
&
slow
,
&
wb
[
i
*
esz
],
esz
));
#pragma omp task depend(inout: wc[i*esz:esz])
assert
(
!
aml_dma_move
(
&
dma
,
&
fast
,
&
slow
,
&
wc
[
i
*
esz
],
esz
));
assert
(
!
aml_dma_move
(
dma
,
&
fast
,
&
slow
,
&
wc
[
i
*
esz
],
esz
));
#pragma omp task depend(in: wa[i*esz:esz], wb[i*esz:esz]) depend(out: wc[i*esz:esz])
kernel
(
&
wa
[
i
*
esz
],
&
wb
[
i
*
esz
],
&
wc
[
i
*
esz
],
esz
);
#pragma omp task depend(inout: wa[i*esz:esz])
assert
(
!
aml_dma_move
(
&
dma
,
&
slow
,
&
fast
,
&
wa
[
i
*
esz
],
esz
));
assert
(
!
aml_dma_move
(
dma
,
&
slow
,
&
fast
,
&
wa
[
i
*
esz
],
esz
));
#pragma omp task depend(inout: wb[i*esz:esz])
assert
(
!
aml_dma_move
(
&
dma
,
&
slow
,
&
fast
,
&
wb
[
i
*
esz
],
esz
));
assert
(
!
aml_dma_move
(
dma
,
&
slow
,
&
fast
,
&
wb
[
i
*
esz
],
esz
));
#pragma omp task depend(inout: wc[i*esz:esz])
assert
(
!
aml_dma_move
(
&
dma
,
&
slow
,
&
fast
,
&
wc
[
i
*
esz
],
esz
));
assert
(
!
aml_dma_move
(
dma
,
&
slow
,
&
fast
,
&
wc
[
i
*
esz
],
esz
));
}
}
...
...
benchmarks/stream_add_omp_mt.c
View file @
0e89b24a
...
...
@@ -27,10 +27,10 @@
size_t
numthreads
,
tilesz
,
esz
;
unsigned
long
*
a
,
*
b
,
*
c
;
AML_TILING_1D_DECL
(
tiling
)
;
struct
aml_tiling
*
tiling
;
struct
aml_area
*
slow
,
*
fast
;
AML_SCRATCH_SEQ_DECL
(
sa
)
;
AML_SCRATCH_SEQ_DECL
(
sb
)
;
struct
aml_scratch
*
sa
;
struct
aml_scratch
*
sb
;
int
kernel
(
unsigned
long
*
a
,
unsigned
long
*
b
,
unsigned
long
*
c
,
size_t
n
)
{
...
...
@@ -42,7 +42,7 @@ int kernel(unsigned long *a, unsigned long *b, unsigned long *c, size_t n)
int
main
(
int
argc
,
char
*
argv
[])
{
AML_DMA_LINUX_PAR_DECL
(
dma
)
;
struct
aml_dma
*
dma
;
struct
aml_bitmap
slowb
,
fastb
;
aml_init
(
&
argc
,
&
argv
);
assert
(
argc
==
4
);
...
...
@@ -63,17 +63,17 @@ int main(int argc, char *argv[])
}
/* initialize all the supporting struct */
assert
(
!
aml_tiling_1d_
init
(
&
tiling
,
tilesz
,
memsize
));
assert
(
!
aml_tiling_1d_
create
(
&
tiling
,
tilesz
,
memsize
));
aml_area_linux_create
(
&
slow
,
AML_AREA_LINUX_MMAP_FLAG_PRIVATE
,
&
slowb
,
AML_AREA_LINUX_BINDING_FLAG_BIND
);
assert
(
slow
!=
NULL
);
aml_area_linux_create
(
&
fast
,
AML_AREA_LINUX_MMAP_FLAG_PRIVATE
,
&
fastb
,
AML_AREA_LINUX_BINDING_FLAG_BIND
);
assert
(
fast
!=
NULL
);
assert
(
!
aml_dma_linux_par_
init
(
&
dma
,
numthreads
*
2
,
numthreads
));
assert
(
!
aml_scratch_seq_
init
(
&
sa
,
&
fast
,
&
slow
,
&
dma
,
&
tiling
,
assert
(
!
aml_dma_linux_par_
create
(
&
dma
,
numthreads
*
2
,
numthreads
));
assert
(
!
aml_scratch_seq_
create
(
&
sa
,
fast
,
slow
,
dma
,
tiling
,
(
size_t
)
2
*
numthreads
,
(
size_t
)
1
));
assert
(
!
aml_scratch_seq_
init
(
&
sb
,
&
fast
,
&
slow
,
&
dma
,
&
tiling
,
assert
(
!
aml_scratch_seq_
create
(
&
sb
,
fast
,
slow
,
dma
,
tiling
,
(
size_t
)
2
*
numthreads
,
(
size_t
)
1
));
/* allocation */
...
...
@@ -93,23 +93,23 @@ int main(int argc, char *argv[])
int
i
,
ai
,
bi
,
oldai
,
oldbi
;
unsigned
long
*
ap
,
*
bp
;
void
*
abaseptr
,
*
bbaseptr
;
ap
=
aml_tiling_tilestart
(
&
tiling
,
a
,
0
);
bp
=
aml_tiling_tilestart
(
&
tiling
,
b
,
0
);
abaseptr
=
aml_scratch_baseptr
(
&
sa
);
bbaseptr
=
aml_scratch_baseptr
(
&
sb
);
ap
=
aml_tiling_tilestart
(
tiling
,
a
,
0
);
bp
=
aml_tiling_tilestart
(
tiling
,
b
,
0
);
abaseptr
=
aml_scratch_baseptr
(
sa
);
bbaseptr
=
aml_scratch_baseptr
(
sb
);
ai
=
-
1
;
bi
=
-
1
;
for
(
i
=
0
;
i
<
(
memsize
/
tilesz
)
-
1
;
i
++
)
{
struct
aml_scratch_request
*
ar
,
*
br
;
oldai
=
ai
;
oldbi
=
bi
;
aml_scratch_async_pull
(
&
sa
,
&
ar
,
abaseptr
,
&
ai
,
a
,
i
+
1
);
aml_scratch_async_pull
(
&
sb
,
&
br
,
bbaseptr
,
&
bi
,
b
,
i
+
1
);
aml_scratch_async_pull
(
sa
,
&
ar
,
abaseptr
,
&
ai
,
a
,
i
+
1
);
aml_scratch_async_pull
(
sb
,
&
br
,
bbaseptr
,
&
bi
,
b
,
i
+
1
);
kernel
(
ap
,
bp
,
&
c
[
i
*
esz
],
esz
);
aml_scratch_wait
(
&
sa
,
ar
);
aml_scratch_wait
(
&
sb
,
br
);
ap
=
aml_tiling_tilestart
(
&
tiling
,
abaseptr
,
ai
);
bp
=
aml_tiling_tilestart
(
&
tiling
,
bbaseptr
,
bi
);
aml_scratch_release
(
&
sa
,
oldai
);
aml_scratch_release
(
&
sb
,
oldbi
);
aml_scratch_wait
(
sa
,
ar
);
aml_scratch_wait
(
sb
,
br
);
ap
=
aml_tiling_tilestart
(
tiling
,
abaseptr
,
ai
);
bp
=
aml_tiling_tilestart
(
tiling
,
bbaseptr
,
bi
);
aml_scratch_release
(
sa
,
oldai
);
aml_scratch_release
(
sb
,
oldbi
);
}
kernel
(
ap
,
bp
,
&
c
[
i
*
esz
],
esz
);
...
...
@@ -118,15 +118,15 @@ int main(int argc, char *argv[])
assert
(
c
[
i
]
==
esize
);
}
aml_scratch_seq_
fini
(
&
sa
);
aml_scratch_seq_
fini
(
&
sb
);
aml_dma_linux_par_
fini
(
&
dma
);
aml_scratch_seq_
destroy
(
&
sa
);
aml_scratch_seq_
destroy
(
&
sb
);
aml_dma_linux_par_
destroy
(
&
dma
);
aml_area_munmap
(
slow
,
a
,
memsize
);
aml_area_munmap
(
slow
,
b
,
memsize
);
aml_area_munmap
(
fast
,
c
,
memsize
);
aml_area_linux_destroy
(
&
slow
);
aml_area_linux_destroy
(
&
fast
);
aml_tiling_1d_
fini
(
&
tiling
);
aml_tiling_1d_
destroy
(
&
tiling
);
aml_finalize
();
return
0
;
}
benchmarks/stream_add_omp_st.c
View file @
0e89b24a
...
...
@@ -26,10 +26,10 @@
size_t
numthreads
,
tilesz
,
esz
;
unsigned
long
*
a
,
*
b
,
*
c
;
AML_TILING_1D_DECL
(
tiling
)
;
struct
aml_tiling
*
tiling
;
struct
aml_area
*
slow
,
*
fast
;
AML_SCRATCH_PAR_DECL
(
sa
)
;
AML_SCRATCH_PAR_DECL
(
sb
)
;
struct
aml_scratch
*
sa
;
struct
aml_scratch
*
sb
;
int
kernel
(
unsigned
long
*
a
,
unsigned
long
*
b
,
unsigned
long
*
c
,
size_t
n
)
{
...
...
@@ -47,32 +47,32 @@ void do_work(unsigned long tid)
unsigned
long
*
ap
,
*
bp
,
*
cp
;
void
*
abaseptr
,
*
bbaseptr
;
offset
=
tid
*
CHUNKING
;
ap
=
aml_tiling_tilestart
(
&
tiling
,
a
,
offset
);
bp
=
aml_tiling_tilestart
(
&
tiling
,
b
,
offset
);
cp
=
aml_tiling_tilestart
(
&
tiling
,
c
,
offset
);
abaseptr
=
aml_scratch_baseptr
(
&
sa
);
bbaseptr
=
aml_scratch_baseptr
(
&
sb
);
ap
=
aml_tiling_tilestart
(
tiling
,
a
,
offset
);
bp
=
aml_tiling_tilestart
(
tiling
,
b
,
offset
);
cp
=
aml_tiling_tilestart
(
tiling
,
c
,
offset
);
abaseptr
=
aml_scratch_baseptr
(
sa
);
bbaseptr
=
aml_scratch_baseptr
(
sb
);
ai
=
-
1
;
bi
=
-
1
;
for
(
i
=
0
;
i
<
CHUNKING
-
1
;
i
++
)
{
struct
aml_scratch_request
*
ar
,
*
br
;
oldai
=
ai
;
oldbi
=
bi
;
aml_scratch_async_pull
(
&
sa
,
&
ar
,
abaseptr
,
&
ai
,
a
,
offset
+
i
+
1
);
aml_scratch_async_pull
(
&
sb
,
&
br
,
bbaseptr
,
&
bi
,
b
,
offset
+
i
+
1
);
aml_scratch_async_pull
(
sa
,
&
ar
,
abaseptr
,
&
ai
,
a
,
offset
+
i
+
1
);
aml_scratch_async_pull
(
sb
,
&
br
,
bbaseptr
,
&
bi
,
b
,
offset
+
i
+
1
);
kernel
(
ap
,
bp
,
cp
,
esz
);
aml_scratch_wait
(
&
sa
,
ar
);
aml_scratch_wait
(
&
sb
,
br
);
ap
=
aml_tiling_tilestart
(
&
tiling
,
abaseptr
,
ai
);
bp
=
aml_tiling_tilestart
(
&
tiling
,
bbaseptr
,
bi
);
cp
=
aml_tiling_tilestart
(
&
tiling
,
c
,
offset
+
i
+
1
);
aml_scratch_release
(
&
sa
,
oldai
);
aml_scratch_release
(
&
sb
,
oldbi
);
aml_scratch_wait
(
sa
,
ar
);
aml_scratch_wait
(
sb
,
br
);
ap
=
aml_tiling_tilestart
(
tiling
,
abaseptr
,
ai
);
bp
=
aml_tiling_tilestart
(
tiling
,
bbaseptr
,
bi
);
cp
=
aml_tiling_tilestart
(
tiling
,
c
,
offset
+
i
+
1
);
aml_scratch_release
(
sa
,
oldai
);
aml_scratch_release
(
sb
,
oldbi
);
}
kernel
(
ap
,
bp
,
cp
,
esz
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
AML_DMA_LINUX_SEQ_DECL
(
dma
)
;
struct
aml_dma
*
dma
;
struct
aml_bitmap
slowb
,
fastb
;
aml_init
(
&
argc
,
&
argv
);
assert
(
argc
==
4
);
...
...
@@ -93,7 +93,7 @@ int main(int argc, char *argv[])
}
/* initialize all the supporting struct */
assert
(
!
aml_tiling_1d_
init
(
&
tiling
,
tilesz
,
memsize
));
assert
(
!
aml_tiling_1d_
create
(
&
tiling
,
tilesz
,
memsize
));
aml_area_linux_create
(
&
slow
,
AML_AREA_LINUX_MMAP_FLAG_PRIVATE
,
&
slowb
,
AML_AREA_LINUX_BINDING_FLAG_BIND
);
...
...
@@ -101,10 +101,10 @@ int main(int argc, char *argv[])
aml_area_linux_create
(
&
fast
,
AML_AREA_LINUX_MMAP_FLAG_PRIVATE
,
&
fastb
,
AML_AREA_LINUX_BINDING_FLAG_BIND
);
assert
(
fast
!=
NULL
);
assert
(
!
aml_dma_linux_seq_
init
(
&
dma
,
numthreads
*
2
));
assert
(
!
aml_scratch_par_
init
(
&
sa
,
&
fast
,
&
slow
,
&
dma
,
&
tiling
,
assert
(
!
aml_dma_linux_seq_
create
(
&
dma
,
numthreads
*
2
));
assert
(
!
aml_scratch_par_
create
(
&
sa
,
fast
,
slow
,
dma
,
tiling
,
2
*
numthreads
,
numthreads
));
assert
(
!
aml_scratch_par_
init
(
&
sb
,
&
fast
,
&
slow
,
&
dma
,
&
tiling
,
assert
(
!
aml_scratch_par_
create
(
&
sb
,
fast
,
slow
,
dma
,
tiling
,
2
*
numthreads
,
numthreads
));
/* allocation */
...
...
@@ -131,15 +131,15 @@ int main(int argc, char *argv[])
assert
(
c
[
i
]
==
esize
);
}
aml_scratch_par_
fini
(
&
sa
);
aml_scratch_par_
fini
(
&
sb
);
aml_dma_linux_seq_
fini
(
&
dma
);
aml_scratch_par_
destroy
(
&
sa
);
aml_scratch_par_
destroy
(
&
sb
);
aml_dma_linux_seq_
destroy
(
&
dma
);
aml_area_munmap
(
slow
,
a
,
memsize
);
aml_area_munmap
(
slow
,
b
,
memsize
);
aml_area_munmap
(
fast
,
c
,
memsize
);
aml_area_linux_destroy
(
&
slow
);
aml_area_linux_destroy
(
&
fast
);
aml_tiling_1d_
fini
(
&
tiling
);
aml_tiling_1d_
destroy
(
&
tiling
);
aml_finalize
();
return
0
;
}
benchmarks/stream_add_pth_st.c
View file @
0e89b24a
...
...
@@ -27,9 +27,9 @@
size_t
numthreads
,
tilesz
,
esz
;
unsigned
long
*
a
,
*
b
,
*
c
;
struct
aml_area
*
slow
,
*
fast
;
AML_TILING_1D_DECL
(
tiling
)
;
AML_SCRATCH_PAR_DECL
(
sa
)
;
AML_SCRATCH_PAR_DECL
(
sb
)
;
struct
aml_tiling
*
tiling
;
struct
aml_scratch
*
sa
;
struct
aml_scratch
*
sb
;
int
kernel
(
unsigned
long
*
a
,
unsigned
long
*
b
,
unsigned
long
*
c
,
size_t
n
)
{
...
...
@@ -53,25 +53,25 @@ void *th_work(void *arg)
void
*
abaseptr
,
*
bbaseptr
;
struct
winfo
*
wi
=
arg
;
offset
=
wi
->
tid
*
CHUNKING
;
ap
=
aml_tiling_tilestart
(
&
tiling
,
a
,
offset
);
bp
=
aml_tiling_tilestart
(
&
tiling
,
b
,
offset
);
cp
=
aml_tiling_tilestart
(
&
tiling
,
c
,
offset
);
abaseptr
=
aml_scratch_baseptr
(
&
sa
);
bbaseptr
=
aml_scratch_baseptr
(
&
sb
);
ap
=
aml_tiling_tilestart
(
tiling
,
a
,
offset
);
bp
=
aml_tiling_tilestart
(
tiling
,
b
,
offset
);
cp
=
aml_tiling_tilestart
(
tiling
,
c
,
offset
);
abaseptr
=
aml_scratch_baseptr
(
sa
);
bbaseptr
=
aml_scratch_baseptr
(
sb
);
ai
=
-
1
;
bi
=
-
1
;
for
(
i
=
0
;
i
<
CHUNKING
-
1
;
i
++
)
{
struct
aml_scratch_request
*
ar
,
*
br
;
oldai
=
ai
;
oldbi
=
bi
;
aml_scratch_async_pull
(
&
sa
,
&
ar
,
abaseptr
,
&
ai
,
a
,
offset
+
i
+
1
);
aml_scratch_async_pull
(
&
sb
,
&
br
,
bbaseptr
,
&
bi
,
b
,
offset
+
i
+
1
);
aml_scratch_async_pull
(
sa
,
&
ar
,
abaseptr
,
&
ai
,
a
,
offset
+
i
+
1
);
aml_scratch_async_pull
(
sb
,
&
br
,
bbaseptr
,
&
bi
,
b
,
offset
+
i
+
1
);
kernel
(
ap
,
bp
,
cp
,
esz
);
aml_scratch_wait
(
&
sa
,
ar
);
aml_scratch_wait
(
&
sb
,
br
);
ap
=
aml_tiling_tilestart
(
&
tiling
,
abaseptr
,
ai
);
bp
=
aml_tiling_tilestart
(
&
tiling
,
bbaseptr
,
bi
);
cp
=
aml_tiling_tilestart
(
&
tiling
,
c
,
offset
+
i
+
1
);
aml_scratch_release
(
&
sa
,
oldai
);
aml_scratch_release
(
&
sb
,
oldbi
);
aml_scratch_wait
(
sa
,
ar
);
aml_scratch_wait
(
sb
,
br
);
ap
=
aml_tiling_tilestart
(
tiling
,
abaseptr
,
ai
);
bp
=
aml_tiling_tilestart
(
tiling
,
bbaseptr
,
bi
);
cp
=
aml_tiling_tilestart
(
tiling
,
c
,
offset
+
i
+
1
);
aml_scratch_release
(
sa
,
oldai
);
aml_scratch_release
(
sb
,
oldbi
);
}
kernel
(
ap
,
bp
,
cp
,
esz
);
...
...
@@ -79,7 +79,7 @@ void *th_work(void *arg)
}
int
main
(
int
argc
,
char
*
argv
[])
{
AML_DMA_LINUX_SEQ_DECL
(
dma
)
;
struct
aml_dma
*
dma
;
struct
aml_bitmap
slowb
,
fastb
;
aml_init
(
&
argc
,
&
argv
);
assert
(
argc
==
4
);
...
...
@@ -100,17 +100,17 @@ int main(int argc, char *argv[])
}
/* initialize all the supporting struct */
assert
(
!
aml_tiling_1d_
init
(
&
tiling
,
tilesz
,
memsize
));
assert
(
!
aml_tiling_1d_
create
(
&
tiling
,
tilesz
,
memsize
));
aml_area_linux_create
(
&
slow
,
AML_AREA_LINUX_MMAP_FLAG_PRIVATE
,
&
slowb
,
AML_AREA_LINUX_BINDING_FLAG_BIND
);
assert
(
slow
!=
NULL
);
aml_area_linux_create
(
&
fast
,
AML_AREA_LINUX_MMAP_FLAG_PRIVATE
,
&
fastb
,
AML_AREA_LINUX_BINDING_FLAG_BIND
);
assert
(
fast
!=
NULL
);
assert
(
!
aml_dma_linux_seq_
init
(
&
dma
,
(
size_t
)
numthreads
*
4
));
assert
(
!
aml_scratch_par_
init
(
&
sa
,
&
fast
,
&
slow
,
&
dma
,
&
tiling
,
assert
(
!
aml_dma_linux_seq_
create
(
dma
,
(
size_t
)
numthreads
*
4
));
assert
(
!
aml_scratch_par_
create
(
&
sa
,
fast
,
slow
,
dma
,
tiling
,
(
size_t
)
2
*
numthreads
,
(
size_t
)
numthreads
));
assert
(
!
aml_scratch_par_
init
(
&
sb
,
&
fast
,
&
slow
,
&
dma
,
&
tiling
,
assert
(
!
aml_scratch_par_
create
(
&
sb
,
fast
,
slow
,
dma
,
tiling
,
(
size_t
)
2
*
numthreads
,
(
size_t
)
numthreads
));
/* allocation */
...
...
@@ -142,15 +142,15 @@ int main(int argc, char *argv[])
assert
(
c
[
i
]
==
esize
);
}
aml_scratch_par_
fini
(
&
sa
);
aml_scratch_par_
fini
(
&
sb
);
aml_dma_linux_seq_
fini
(
&
dma
);
aml_scratch_par_
destroy
(
&
sa
);
aml_scratch_par_
destroy
(
&
sb
);
aml_dma_linux_seq_
destroy
(
&
dma
);
aml_area_munmap
(
slow
,
a
,
memsize
);
aml_area_munmap
(
slow
,
b
,
memsize
);
aml_area_munmap
(
fast
,
c
,
memsize
);
aml_area_linux_destroy
(
&
slow
);
aml_area_linux_destroy
(
&
fast
);
aml_tiling_1d_
fini
(
&
tiling
);
aml_tiling_1d_
destroy
(
&
tiling
);
aml_finalize
();
return
0
;
}
include/aml.h
View file @
0e89b24a
...
...
@@ -265,16 +265,6 @@ struct aml_tiling_ops {
/**
* \todo Doc
**/
int
(
*
init_iterator
)(
struct
aml_tiling_data
*
tiling
,
struct
aml_tiling_iterator
*
iterator
,
int
flags
);
/**
* \todo Doc
**/
int
(
*
fini_iterator
)(
struct
aml_tiling_data
*
tiling
,
struct
aml_tiling_iterator
*
iterator
);
/**
* \todo Doc
**/
int
(
*
destroy_iterator
)(
struct
aml_tiling_data
*
tiling
,
struct
aml_tiling_iterator
**
iterator
);
/**
...
...
@@ -396,23 +386,6 @@ struct aml_tiling_iterator {
int
aml_tiling_create_iterator
(
struct
aml_tiling
*
tiling
,
struct
aml_tiling_iterator
**
iterator
,
int
flags
);
/**
* Initializes a tiling iterator.
* @param tiling: an initialized tiling structure.
* @param iterator: an allocated tiling iterator structure.
* @param flags: reserved for future use; pass 0 for now.
* @return 0 if successful; an error code otherwise.
**/
int
aml_tiling_init_iterator
(
struct
aml_tiling
*
tiling
,
struct
aml_tiling_iterator
*
iterator
,
int
flags
);
/**
* Finalize an initialized tiling iterator.
* @param tiling: an initialized tiling structure.
* @param iterator: an initialized tiling iterator structure.
**/
void
aml_tiling_fini_iterator
(
struct
aml_tiling
*
tiling
,
struct
aml_tiling_iterator
*
iterator
);
/**
* Tears down an initialized tiling iterator.
* @param tiling: an initialized tiling structure.
...
...
include/aml/area/linux.h
View file @
0e89b24a
...
...
@@ -88,23 +88,6 @@ struct aml_area_linux_data {
int
mmap_flags
;
};
/**
* Static declaration of an aml area with linux ops.
**/
#define AML_AREA_LINUX_DECL(name) \
struct aml_area_linux_data __ ##name## _inner_data; \
struct aml_area name = { \
&aml_area_linux_ops, \
(struct aml_area_data *)&__ ## name ## _inner_data, \
}
/**
* Static declaration of the size of a linux aml area.
**/
#define AML_AREA_LINUX_ALLOCSIZE \
(sizeof(struct aml_area_linux_data) + \
sizeof(struct aml_area))
/**
* \brief Linux area creation.
*
...
...
@@ -137,19 +120,6 @@ int aml_area_linux_create(struct aml_area **area, const int mmap_flags,
**/
void
aml_area_linux_destroy
(
struct
aml_area
**
area
);
/**
* Initialize a struct aml_area declared using the AML_AREA_LINUX_DECL macro.
* @see aml_area_linux_create() for details on arguments.
*/
int
aml_area_linux_init
(
struct
aml_area
*
area
,
const
int
mmap_flags
,
const
struct
aml_bitmap
*
nodemask
,
const
int
binding_flags
);
/**
* Finalize a struct aml_area initialized with aml_area_linux_init.
*/
void
aml_area_linux_fini
(
struct
aml_area
*
area
);
/**
* Bind memory of size "size" pointed by "ptr" to binding set in "bind".
* If mbind call was not successfull, i.e AML_FAILURE is returned, then errno
...
...
include/aml/dma/linux-par.h
View file @
0e89b24a
...
...
@@ -62,7 +62,7 @@ struct aml_dma_request_linux_par {
/** Inside of a parallel request for linux movement. **/
struct
aml_dma_linux_par_data
{
size_t
nbthreads
;
struct
aml_vector
requests
;
struct
aml_vector
*
requests
;
pthread_mutex_t
lock
;
};
...
...
@@ -75,7 +75,7 @@ struct aml_dma_linux_par_ops {
/**
* aml_dma structure for linux based, parallel dma movement
* Needs to be initialized with aml_dma_linux_par_
init
().
* Needs to be initialized with aml_dma_linux_par_
create
().
* Can be passed to generic aml_dma_*() functions.
**/
struct
aml_dma_linux_par
{
...
...
@@ -83,19 +83,6 @@ struct aml_dma_linux_par {
struct
aml_dma_linux_par_data
data
;
};
/** Static declaration of aml_dma_linux_par structure. **/
#define AML_DMA_LINUX_PAR_DECL(name) \
struct aml_dma_linux_par __ ##name## _inner_data; \
struct aml_dma name = { \
&aml_dma_linux_par_ops, \
(struct aml_dma_data *)&__ ## name ## _inner_data, \
}
/** Static declaration of aml_dma_linux_par structure size. **/
#define AML_DMA_LINUX_PAR_ALLOCSIZE \
(sizeof(struct aml_dma_linux_par) + \
sizeof(struct aml_dma))
/**
* Allocates and initializes a new parallel DMA.
*
...
...
@@ -110,24 +97,6 @@ struct aml_dma_linux_par {
int
aml_dma_linux_par_create
(
struct
aml_dma
**
dma
,
size_t
nbreqs
,
size_t
nbthreads
);
/**
* Initializes a new parallel DMA.
*
* @param dma a pointer to a dma declared with the AML_DMA_LINUX_PAR_DECL macro
* @param nbreqs the initial number of slots for asynchronous requests that are
* in-flight (will be increased automatically if necessary).
* @param nbthreads the number of threads to launch for each request.
*
* @return 0 if successful; an error code otherwise.
**/
int
aml_dma_linux_par_init
(
struct
aml_dma
*
dma
,
size_t
nbreqs
,
size_t
nbthreads
);
/**