Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
argo
aml
Commits
ed27873f
Commit
ed27873f
authored
May 19, 2020
by
Swann Perarnau
Browse files
Merge branch 'benchmarks-update' into 'staging'
Benchmarks Update See merge request
!86
parents
1c4b94d4
0244130a
Pipeline
#10515
passed with stages
in 3 minutes and 29 seconds
Changes
21
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
ed27873f
...
...
@@ -14,7 +14,7 @@ repoquality:
-
/^wip.*/
-
/^WIP.*/
script
:
-
git ls-files '*.c' '*.h'
| grep -vE "benchmarks"
> .repoquality
-
git ls-files '*.c' '*.h' > .repoquality
-
nix run -f "$ARGOPKGS" repoquality --command repoquality
tags
:
-
nix
...
...
@@ -74,7 +74,7 @@ make:generic:
-
/^wip.*/
-
/^WIP.*/
variables
:
CFLAGS
:
"
-std=c99
-pedantic
-Wall
-Wextra
-Werror
-Wno-unused-but-set-parameter"
CFLAGS
:
"
-std=c99
-pedantic
-Wall
-Wextra
-Werror
-Wno-unused-but-set-parameter
-Wno-builtin-declaration-mismatch
"
script
:
-
|
nix-shell --run bash <<< '
...
...
@@ -100,7 +100,7 @@ make:out-of-tree:
-
/^wip.*/
-
/^WIP.*/
variables
:
CFLAGS
:
"
-std=c99
-pedantic
-Wall
-Wextra
-Werror
-Wno-unused-but-set-parameter"
CFLAGS
:
"
-std=c99
-pedantic
-Wall
-Wextra
-Werror
-Wno-unused-but-set-parameter
-Wno-builtin-declaration-mismatch
"
script
:
-
|
nix-shell --run bash <<< '
...
...
@@ -123,13 +123,16 @@ make:knl:
except
:
-
/^wip.*/
-
/^WIP.*/
variables
:
CFLAGS
:
"
-mkl
-xhost"
CC
:
"
icc"
tags
:
-
knl
script
:
-
source /opt/intel/compilers_and_libraries/linux/bin/compilervars.sh intel64
-
./autogen.sh
-
mkdir build
-
CC=icc CFLAGS="-mkl -xhost"
./configure --prefix=`pwd`/build
--enable-benchmarks
-
./configure --prefix=`pwd`/build
-
make -j64
-
make -C tests check
-
make install
...
...
Makefile.am
View file @
ed27873f
ACLOCAL_AMFLAGS
=
-I
m4
SUBDIRS
=
src include tests doc
#if ADD_BENCHMARKS
#SUBDIRS += benchmarks
#endif
SUBDIRS
=
src include tests doc benchmarks
pkgconfigdir
=
$(libdir)
/pkgconfig
pkgconfig_DATA
=
aml.pc
...
...
benchmarks/Makefile.am
View file @
ed27873f
AM_CFLAGS
=
-I
$(top_srcdir)
/include
-I
.
$(OPENMP_CFLAGS)
AM_LDFLAGS
=
../src/libaml.la
$(OPENMP_CFLAGS)
noinst_LIBRARIES
=
libutils.a
libutils_a_SOURCES
=
utils.c utils.h
LDADD
=
libutils.a
noinst_PROGRAMS
=
stream_add_pth_st
\
stream_add_omp_st
\
stream_add_omp_mt
\
dgemm_vanilla
\
dgemm_mkl
\
dgemm_prefetch
\
dgemm_noprefetch
AM_COLOR_TESTS
=
yes
AM_CFLAGS
=
-I
$(top_srcdir)
/include
$(PTHREAD_CFLAGS)
$(OPENMP_CFLAGS)
AM_LDFLAGS
=
../src/libaml.la
$(PTHREAD_LIBS)
$(OPENMP_CFLAGS)
noinst_LIBRARIES
=
libkernel.a
libkernel_a_SOURCES
=
utils.c utils.h blas_l1_kernel.c blas_l1_kernel.h verify_blas_l1.c verify_blas_l1.h
LDADD
=
libkernel.a
NOPREFETCH
=
noprefetch/flat_blas_l1
\
noprefetch/tiled_blas_l1
BENCHMARKS
=
$(NOPREFETCH)
# all tests
check_PROGRAMS
=
$(BENCHMARKS)
TESTS
=
$(BENCHMARKS)
benchmarks/blas_l1_kernel.c
0 → 100644
View file @
ed27873f
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/
/*
* This is a benchmark for the BLAS Level 1 operations for AML.
*/
#include "blas_l1_kernel.h"
/* Look into another way to define these */
#define sign(a) ((a > 0) ? 1 : ((a < 0) ? -1 : 0))
double
dasum
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
)
{
(
void
)
*
b
;
(
void
)
*
c
;
(
void
)
scalar
;
size_t
i
;
double
dasum
=
0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
dasum
=
dasum
+
fabs
(
a
[
i
]);
}
return
dasum
;
}
double
daxpy
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
)
{
size_t
i
;
#pragma omp parallel for
for
(
i
=
0
;
i
<
n
;
i
++
)
c
[
i
]
=
b
[
i
]
+
scalar
*
a
[
i
];
return
1
;
}
double
dcopy
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
)
{
(
void
)
*
c
;
(
void
)
scalar
;
size_t
i
;
#pragma omp parallel for
for
(
i
=
0
;
i
<
n
;
i
++
)
b
[
i
]
=
a
[
i
];
return
1
;
}
double
ddot
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
)
{
(
void
)
*
c
;
(
void
)
scalar
;
size_t
i
;
long
double
dot
=
0
.
0
;
#pragma omp parallel for reduction(+ : dot)
for
(
i
=
0
;
i
<
n
;
i
++
)
{
long
double
temp
;
temp
=
a
[
i
]
*
b
[
i
];
dot
+=
temp
;
}
return
(
double
)
dot
;
}
double
dnrm2
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
)
{
(
void
)
*
b
;
(
void
)
*
c
;
(
void
)
scalar
;
size_t
i
;
double
scale
,
ssq
,
temp
;
scale
=
0
.
0
;
ssq
=
1
.
0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
if
(
a
[
i
]
!=
0
.
0
)
{
temp
=
fabs
(
a
[
i
]);
if
(
scale
<
temp
)
{
ssq
=
1
.
0
+
ssq
*
pow
(
scale
/
temp
,
2
);
scale
=
temp
;
}
else
ssq
=
ssq
+
pow
(
temp
/
scale
,
2
);
}
}
return
scale
*
sqrt
(
ssq
);
}
double
dscal
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
)
{
(
void
)
*
c
;
size_t
i
;
#pragma omp parallel for
for
(
i
=
0
;
i
<
n
;
i
++
)
b
[
i
]
=
scalar
*
a
[
i
];
return
1
;
}
double
dswap
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
)
{
(
void
)
*
c
;
(
void
)
scalar
;
size_t
i
;
double
temp
;
#pragma omp parallel for
for
(
i
=
0
;
i
<
n
;
i
++
)
{
temp
=
a
[
i
];
a
[
i
]
=
b
[
i
];
b
[
i
]
=
temp
;
}
return
1
;
}
double
idmax
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
)
{
(
void
)
*
b
;
(
void
)
*
c
;
(
void
)
scalar
;
if
(
n
==
1
)
return
0
;
size_t
i
;
double
max
;
size_t
id_max
=
0
;
max
=
a
[
0
];
for
(
i
=
1
;
i
<
n
;
i
++
)
{
if
(
fabs
(
a
[
i
])
>
max
)
{
id_max
=
i
;
max
=
fabs
(
a
[
i
]);
}
}
return
id_max
;
}
/* The rotations. Not included in the array of functions because of their
parameters */
/* Plane rotation */
void
drot
(
size_t
n
,
double
*
a
,
double
*
b
,
double
x
,
double
y
)
{
double
temp
;
size_t
i
;
#pragma omp parallel for
for
(
i
=
0
;
i
<
n
;
i
++
)
{
temp
=
x
*
a
[
i
]
+
y
*
b
[
i
];
b
[
i
]
=
x
*
b
[
i
]
-
y
*
a
[
i
];
a
[
i
]
=
temp
;
}
}
/* Create a plane rotation. TODO: Verify */
void
drotg
(
double
x
,
double
y
,
double
c
,
double
s
)
{
double
r
,
roe
,
scale
,
z
;
roe
=
y
;
if
(
fabs
(
x
)
>
fabs
(
y
))
roe
=
x
;
scale
=
fabs
(
x
)
+
fabs
(
y
);
if
(
scale
==
0
.
0
)
{
c
=
1
.
0
;
s
=
0
.
0
;
r
=
0
.
0
;
z
=
0
.
0
;
}
else
{
r
=
scale
*
sqrt
(
pow
(
x
/
scale
,
2
)
+
pow
(
y
/
scale
,
2
));
r
=
sign
(
roe
)
*
r
;
c
=
x
/
r
;
s
=
y
/
r
;
z
=
1
.
0
;
if
(
fabs
(
x
)
>
fabs
(
y
))
z
=
s
;
if
(
fabs
(
y
)
>=
fabs
(
x
)
&&
c
!=
0
.
0
)
z
=
1
.
0
/
c
;
}
x
=
r
;
y
=
z
;
}
void
drotm
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
param
)
{
double
flag
,
h11
,
h12
,
h21
,
h22
,
w
,
z
;
size_t
i
;
flag
=
param
[
0
];
if
(
flag
<
0
.
0
)
{
h11
=
param
[
1
];
h12
=
param
[
3
];
h21
=
param
[
2
];
h22
=
param
[
4
];
}
else
{
if
(
flag
==
0
)
{
h11
=
1
.
0
;
h12
=
param
[
3
];
h21
=
param
[
2
];
h22
=
1
.
0
;
}
else
{
h11
=
param
[
1
];
h12
=
1
.
0
;
h21
=
-
1
.
0
;
h22
=
param
[
4
];
}
}
#pragma omp parallel for
for
(
i
=
0
;
i
<
n
;
i
++
)
{
w
=
a
[
i
];
z
=
b
[
i
];
a
[
i
]
=
w
*
h11
+
z
*
h12
;
b
[
i
]
=
w
*
h21
+
z
*
h22
;
}
}
/* TODO: Verify */
void
drotmg
(
double
d1
,
double
d2
,
double
x
,
double
y
,
double
*
param
)
{
double
flag
,
h11
,
h12
,
h21
,
h22
,
p1
,
p2
,
q1
,
q2
,
temp
,
u
,
gam
,
gamsq
,
rgamsq
;
gam
=
4096
.
0
;
gamsq
=
16777216
.
0
;
rgamsq
=
5.9604645e-8
;
/* default initialization */
h11
=
0
.
0
;
h12
=
0
.
0
;
h21
=
0
.
0
;
h22
=
0
.
0
;
if
(
d1
<
0
)
{
flag
=
-
1
.
0
;
d1
=
0
.
0
;
d2
=
0
.
0
;
x
=
0
.
0
;
}
else
{
p2
=
d2
*
y
;
if
(
p2
==
0
)
{
flag
=
-
2
.
0
;
param
[
0
]
=
flag
;
}
p1
=
d1
*
x
;
q2
=
p2
*
y
;
q1
=
p1
*
x
;
if
(
fabs
(
q1
)
>
fabs
(
q2
))
{
h21
=
-
y
/
x
;
h12
=
p2
/
p1
;
u
=
1
.
0
-
h12
*
h21
;
if
(
u
>
0
)
{
flag
=
0
.
0
;
d1
=
d1
/
u
;
d2
=
d2
/
u
;
x
=
x
*
u
;
}
}
else
{
if
(
q2
<
0
.
0
)
{
flag
=
-
1
.
0
;
d1
=
0
.
0
;
d2
=
0
.
0
;
x
=
0
.
0
;
}
else
{
flag
=
1
.
0
;
h11
=
p1
/
p2
;
h22
=
x
/
y
;
u
=
1
.
0
+
h11
*
h22
;
temp
=
d2
/
u
;
d2
=
d1
/
u
;
d1
=
temp
;
x
=
y
*
u
;
}
}
if
(
d1
!=
0
.
0
)
{
while
(
fabs
(
d1
)
<=
rgamsq
||
d1
>=
gamsq
)
{
if
(
flag
==
0
.
0
)
{
h11
=
1
.
0
;
h22
=
1
.
0
;
}
else
{
h21
=
-
1
.
0
;
h12
=
1
.
0
;
}
flag
=
-
1
.
0
;
if
(
d1
<=
rgamsq
)
{
d1
=
d1
*
pow
(
gam
,
2
);
x
=
x
/
gam
;
h11
=
h11
/
gam
;
h12
=
h12
/
gam
;
}
else
{
d1
=
d1
/
pow
(
gam
,
2
);
x
=
x
*
gam
;
h11
=
h11
*
gam
;
h12
=
h12
*
gam
;
}
}
}
if
(
d2
!=
0
)
{
while
(
fabs
(
d2
)
<=
rgamsq
||
fabs
(
d2
)
>=
gamsq
)
{
if
(
flag
==
0
.
0
)
{
h11
=
1
.
0
;
h22
=
1
.
0
;
}
else
{
h21
=
-
1
.
0
;
h12
=
1
.
0
;
}
flag
=
-
1
.
0
;
if
(
fabs
(
d2
)
<=
rgamsq
)
{
d2
=
d2
*
pow
(
gam
,
2
);
h21
=
h21
/
gam
;
h22
=
h22
/
gam
;
}
else
{
d2
=
d2
/
pow
(
gam
,
2
);
h21
=
h21
*
gam
;
h22
=
h22
*
gam
;
}
}
}
}
param
[
1
]
=
h11
;
param
[
2
]
=
h21
;
param
[
3
]
=
h12
;
param
[
4
]
=
h22
;
param
[
0
]
=
flag
;
}
benchmarks/blas_l1_kernel.h
0 → 100644
View file @
ed27873f
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/
/*
* This is a benchmark for the BLAS Level 1 operations for AML.
*/
#include <float.h>
#include <limits.h>
#include <math.h>
#include <stdlib.h>
#include <unistd.h>
double
dasum
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
);
double
daxpy
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
);
double
dcopy
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
);
double
ddot
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
);
double
dnrm2
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
);
double
dscal
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
);
double
dswap
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
);
double
idmax
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
c
,
double
scalar
);
void
drot
(
size_t
n
,
double
*
a
,
double
*
b
,
double
c
,
double
s
);
void
drotg
(
double
x
,
double
y
,
double
c
,
double
s
);
void
drotm
(
size_t
n
,
double
*
a
,
double
*
b
,
double
*
param
);
void
drotmg
(
double
d1
,
double
d2
,
double
x
,
double
y
,
double
*
param
);
benchmarks/dgemm_mkl.c
deleted
100644 → 0
View file @
1c4b94d4
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#include "aml.h"
#include "aml/area/linux.h"
#include <assert.h>
#include <errno.h>
#include <mkl.h>
#include <omp.h>
#include <pthread.h>
#include <stdio.h>
#include <time.h>
#include <math.h>
#include <stdlib.h>
int
main
(
int
argc
,
char
*
argv
[])
{
struct
aml_area
*
slow
,
*
fast
;
struct
aml_bitmap
slowb
,
fastb
;
struct
timespec
start
,
stop
;
double
*
a
,
*
b
,
*
c
;
aml_init
(
&
argc
,
&
argv
);
assert
(
argc
==
4
);
assert
(
aml_bitmap_from_string
(
&
fastb
,
argv
[
1
])
==
0
);
assert
(
aml_bitmap_from_string
(
&
slowb
,
argv
[
2
])
==
0
);
long
int
N
=
atol
(
argv
[
3
]);
unsigned
long
memsize
=
sizeof
(
double
)
*
N
*
N
;
aml_area_linux_create
(
&
slow
,
&
slowb
,
AML_AREA_LINUX_POLICY_BIND
);
assert
(
slow
!=
NULL
);
aml_area_linux_create
(
&
fast
,
&
fastb
,
AML_AREA_LINUX_POLICY_BIND
);
assert
(
fast
!=
NULL
);
a
=
aml_area_mmap
(
slow
,
memsize
,
NULL
);
b
=
aml_area_mmap
(
slow
,
memsize
,
NULL
);
c
=
aml_area_mmap
(
fast
,
memsize
,
NULL
);
assert
(
a
!=
NULL
&&
b
!=
NULL
&&
c
!=
NULL
);
double
alpha
=
1
.
0
,
beta
=
1
.
0
;
for
(
unsigned
long
i
=
0
;
i
<
N
*
N
;
i
++
){
a
[
i
]
=
(
double
)
rand
();
b
[
i
]
=
(
double
)
rand
();
c
[
i
]
=
0
.
0
;
}
clock_gettime
(
CLOCK_REALTIME
,
&
start
);
cblas_dgemm
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
N
,
N
,
N
,
alpha
,
a
,
N
,
b
,
N
,
beta
,
c
,
N
);
clock_gettime
(
CLOCK_REALTIME
,
&
stop
);
long
long
int
time
=
0
;
time
=
(
stop
.
tv_nsec
-
start
.
tv_nsec
)
+
1e9
*
(
stop
.
tv_sec
-
start
.
tv_sec
);
double
flops
=
(
2
.
0
*
N
*
N
*
N
)
/
(
time
/
1e9
);
/* print the flops in GFLOPS */
printf
(
"dgemm-mkl: %llu %lld %lld %f
\n
"
,
N
,
memsize
,
time
,
flops
/
1e9
);
aml_area_munmap
(
slow
,
a
,
memsize
);
aml_area_munmap
(
slow
,
b
,
memsize
);
aml_area_munmap
(
fast
,
c
,
memsize
);
aml_area_linux_destroy
(
&
slow
);
aml_area_linux_destroy
(
&
fast
);
aml_finalize
();
return
0
;
}
benchmarks/dgemm_noprefetch.c
deleted
100644 → 0
View file @
1c4b94d4
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
#include "aml.h"
#include "aml/area/linux.h"
#include "aml/tiling/2d.h"
#include <assert.h>
#include <errno.h>
#include <mkl.h>
#include <omp.h>
#include <pthread.h>
#include <stdio.h>
#include <time.h>
#include <math.h>
#include <stdlib.h>
struct
aml_tiling
*
tiling_row
;
struct
aml_tiling
*
tiling_col
;
struct
aml_area
*
slow
,
*
fast
;
size_t
memsize
,
tilesize
,
N
,
T
;
double
*
a
,
*
b
,
*
c
;
struct
timespec
start
,
stop
;
void
do_work
()
{
int
lda
=
(
int
)
T
,
ldb
,
ldc
;
ldb
=
lda
;
ldc
=
lda
;
size_t
ndims
[
2
];
aml_tiling_ndims
(
tiling_row
,
&
ndims
[
0
],
&
ndims
[
1
]);
for
(
int
k
=
0
;
k
<
ndims
[
1
];
k
++
)
{
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
ndims
[
0
];
i
++
)
{
for
(
int
j
=
0
;
j
<
ndims
[
1
];
j
++
)
{
size_t
aoff
,
boff
,
coff
;
double
*
ap
,
*
bp
,
*
cp
;
aoff
=
aml_tiling_tileid
(
tiling_col
,
i
,
k
);
boff
=
aml_tiling_tileid
(
tiling_row
,
k
,
j
);
coff
=
aml_tiling_tileid
(
tiling_row
,
i
,
j
);
ap
=
aml_tiling_tilestart
(
tiling_col
,
a
,
aoff
);
bp
=
aml_tiling_tilestart
(
tiling_row
,
b
,
boff
);
cp
=
aml_tiling_tilestart
(
tiling_row
,
c
,
coff
);
cblas_dgemm
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
ldc
,
lda
,
ldb
,
1
.
0
,
ap
,
lda
,
bp
,
ldb
,
1
.
0
,
cp
,
ldc
);
}
}
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
struct
aml_bitmap
slowb
,
fastb
;
aml_init
(
&
argc
,
&
argv
);
assert
(
argc
==
5
);
assert
(
aml_bitmap_from_string
(
&
fastb
,
argv
[
1
])
==
0
);
assert
(
aml_bitmap_from_string
(
&
slowb
,
argv
[
2
])
==
0
);
N
=
atol
(
argv
[
3
]);
T
=
atol
(
argv
[
4
]);
/* let's not handle messy tile sizes */
assert
(
N
%
T
==
0
);
memsize
=
sizeof
(
double
)
*
N
*
N
;
tilesize
=
sizeof
(
double
)
*
T
*
T
;
/* the initial tiling, of 2D square tiles */
assert
(
!
aml_tiling_2d_create
(
&
tiling_row
,
AML_TILING_TYPE_2D_ROWMAJOR
,
tilesize
,
memsize
,
N
/
T
,
N
/
T
));
assert
(
!
aml_tiling_2d_create
(
&
tiling_col
,
AML_TILING_TYPE_2D_COLMAJOR
,
tilesize
,
memsize
,
N
/
T
,
N
/
T
));
aml_area_linux_create
(
&
slow
,
&
slowb
,
AML_AREA_LINUX_POLICY_BIND
);
assert
(
slow
!=
NULL
);