Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
aml
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
27
Issues
27
List
Boards
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
argo
aml
Commits
09817b51
Commit
09817b51
authored
Sep 06, 2019
by
Brice Videau
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First unprefectch working DGEMM version
parent
ad57531b
Pipeline
#8430
failed with stages
in 5 minutes and 5 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
137 additions
and
18 deletions
+137
-18
tests/dgemm_tiling_noprefetch.c
tests/dgemm_tiling_noprefetch.c
+137
-18
No files found.
tests/dgemm_tiling_noprefetch.c
View file @
09817b51
...
...
@@ -6,6 +6,8 @@
#include <aml/layout/pad.h>
#include <aml/tiling/resize.h>
#include <assert.h>
#include <stdlib.h>
#include <cblas.h>
void
larp_8_62_8_31_477
(
const
int32_t
nblockm
,
const
int32_t
nblockn
,
const
int32_t
nblockk
,
const
double
*
a
,
const
double
*
b
,
double
*
c
);
...
...
@@ -18,17 +20,91 @@ void larp_8_62_8_31_477(const int32_t nblockm, const int32_t nblockn, const int3
#define NR 8
#define KB 477
#define NBLOCKA 2
#define NBLOCKM NUM_OUTER_TH
#define NBLOCKN 294
#define NBLOCKK 4
#define NBLOCKM NUM_OUTER_TH
*2
#define NBLOCKN 294
*2
#define NBLOCKK 4
*2
#define M (MR*NBLOCKA*NBLOCKM)
#define N (NR*NBLOCKN)
#define K (KB*NBLOCKK)
void
init_matrix
(
double
*
ptr
,
size_t
size
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
ptr
[
i
]
=
((
double
)
random
()
*
2
/
(
double
)
RAND_MAX
)
-
1
.
0
;
}
}
#define PITCH_A_0_S KB
#define PITCH_A_1_S KB*NBLOCKK
#define PITCH_A_2_S KB*NBLOCKK*MR
#define PITCH_A_3_S KB*NBLOCKK*MR*NBLOCKA
#define PITCH_A_0_D MR
#define PITCH_A_1_D MR*KB
#define PITCH_A_2_D MR*KB*NBLOCKA
#define PITCH_A_3_D MR*KB*NBLOCKA*NBLOCKM
void
transform_a
(
size_t
block_number
,
double
*
restrict
dst
,
const
double
*
restrict
src
)
{
for
(
int
i
=
0
;
i
<
NBLOCKM
;
i
++
)
for
(
int
j
=
0
;
j
<
NBLOCKA
;
j
++
)
for
(
int
k
=
0
;
k
<
MR
;
k
++
)
for
(
int
l
=
0
;
l
<
NBLOCKK
;
l
++
)
for
(
int
m
=
0
;
m
<
KB
;
m
++
)
dst
[
k
+
m
*
PITCH_A_0_D
+
j
*
PITCH_A_1_D
+
i
*
PITCH_A_2_D
+
l
*
PITCH_A_3_D
]
=
src
[
m
+
l
*
PITCH_A_0_S
+
(
k
*
PITCH_A_1_S
+
j
*
PITCH_A_2_S
+
i
*
PITCH_A_3_S
)
*
block_number
];
}
#define PITCH_B_0_S NR
#define PITCH_B_1_S NR*NBLOCKN
#define PITCH_B_2_S NR*NBLOCKN*KB
#define PITCH_B_0_D NR
#define PITCH_B_1_D NR*KB
#define PITCH_B_2_D NR*KB*NBLOCKN
void
transform_b
(
size_t
block_number
,
double
*
restrict
dst
,
const
double
*
restrict
src
)
{
for
(
int
i
=
0
;
i
<
NBLOCKK
;
i
++
)
for
(
int
j
=
0
;
j
<
KB
;
j
++
)
for
(
int
k
=
0
;
k
<
NBLOCKN
;
k
++
)
for
(
int
l
=
0
;
l
<
NR
;
l
++
)
dst
[
l
+
j
*
PITCH_B_0_D
+
k
*
PITCH_B_1_D
+
i
*
PITCH_B_2_D
]
=
src
[
l
+
k
*
PITCH_B_0_S
+
(
j
*
PITCH_B_1_S
+
i
*
PITCH_B_2_S
)
*
block_number
];
}
#define PITCH_C_0_S NR
#define PITCH_C_1_S NR*NBLOCKN
#define PITCH_C_2_S NR*NBLOCKN*MR
#define PITCH_C_3_S NR*NBLOCKN*MR*NBLOCKA
#define PITCH_C_0_D NR
#define PITCH_C_1_D NR*MR
#define PITCH_C_2_D NR*MR*NBLOCKA
#define PITCH_C_3_D NR*MR*NBLOCKA*NBLOCKN
void
transform_c
(
size_t
block_number
,
double
*
restrict
dst
,
const
double
*
restrict
src
)
{
for
(
int
i
=
0
;
i
<
NBLOCKM
;
i
++
)
for
(
int
j
=
0
;
j
<
NBLOCKA
;
j
++
)
for
(
int
k
=
0
;
k
<
MR
;
k
++
)
for
(
int
l
=
0
;
l
<
NBLOCKN
;
l
++
)
for
(
int
m
=
0
;
m
<
NR
;
m
++
)
dst
[
m
+
k
*
PITCH_C_0_D
+
j
*
PITCH_C_1_D
+
l
*
PITCH_C_2_D
+
i
*
PITCH_C_3_D
]
=
src
[
m
+
l
*
PITCH_C_0_S
+
(
k
*
PITCH_C_1_S
+
j
*
PITCH_C_2_S
+
i
*
PITCH_C_3_S
)
*
block_number
];
}
void
transform_c_reverse
(
size_t
block_number
,
double
*
restrict
dst
,
const
double
*
restrict
src
)
{
for
(
int
i
=
0
;
i
<
NBLOCKM
;
i
++
)
for
(
int
j
=
0
;
j
<
NBLOCKN
;
j
++
)
for
(
int
k
=
0
;
k
<
NBLOCKA
;
k
++
)
for
(
int
l
=
0
;
l
<
MR
;
l
++
)
for
(
int
m
=
0
;
m
<
NR
;
m
++
)
dst
[
m
+
j
*
PITCH_C_0_S
+
(
l
*
PITCH_C_1_S
+
k
*
PITCH_C_2_S
+
i
*
PITCH_C_3_S
)
*
block_number
]
=
src
[
m
+
l
*
PITCH_C_0_D
+
k
*
PITCH_C_1_D
+
j
*
PITCH_C_2_D
+
i
*
PITCH_C_3_D
];
}
int
main
(
int
argc
,
char
*
argv
[])
{
aml_init
(
&
argc
,
&
argv
);
int
block_number
;
int
block_count
;
int
do_test
=
0
;
struct
aml_bitmap
slowb
,
fastb
;
struct
aml_area
*
slow
,
*
fast
;
...
...
@@ -38,7 +114,12 @@ int main(int argc, char *argv[]) {
assert
(
aml_bitmap_from_string
(
&
fastb
,
"1"
)
==
0
);
assert
(
aml_bitmap_from_string
(
&
slowb
,
"0"
)
==
0
);
block_number
=
atol
(
argv
[
1
]);
if
(
argc
>
1
)
{
block_number
=
atol
(
argv
[
1
]);
}
else
{
block_number
=
2
;
do_test
=
1
;
}
block_count
=
block_number
*
block_number
;
...
...
@@ -47,16 +128,27 @@ int main(int argc, char *argv[]) {
aml_area_linux_create
(
&
fast
,
&
fastb
,
AML_AREA_LINUX_POLICY_BIND
);
assert
(
fast
!=
NULL
);
double
*
a
=
(
double
*
)
aml_area_mmap
(
slow
,
sizeof
(
double
)
*
block_count
*
M
*
K
,
NULL
);
double
*
b
=
(
double
*
)
aml_area_mmap
(
slow
,
sizeof
(
double
)
*
block_count
*
K
*
N
,
NULL
);
double
*
c
=
(
double
*
)
aml_area_mmap
(
slow
,
sizeof
(
double
)
*
block_count
*
M
*
N
,
NULL
);
size_t
big_m
=
block_number
*
M
;
size_t
big_n
=
block_number
*
N
;
size_t
big_k
=
block_number
*
K
;
double
*
a
=
(
double
*
)
aml_area_mmap
(
slow
,
sizeof
(
double
)
*
big_m
*
big_k
,
NULL
);
double
*
b
=
(
double
*
)
aml_area_mmap
(
slow
,
sizeof
(
double
)
*
big_k
*
big_n
,
NULL
);
double
*
c
=
(
double
*
)
aml_area_mmap
(
slow
,
sizeof
(
double
)
*
big_m
*
big_n
,
NULL
);
double
*
c_ref
=
(
double
*
)
aml_area_mmap
(
slow
,
sizeof
(
double
)
*
big_m
*
big_n
,
NULL
);
double
*
a_fast
=
(
double
*
)
aml_area_mmap
(
fast
,
sizeof
(
double
)
*
M
*
K
,
NULL
);
double
*
b_fast
=
(
double
*
)
aml_area_mmap
(
fast
,
sizeof
(
double
)
*
K
*
N
,
NULL
);
double
*
c_fast
=
(
double
*
)
aml_area_mmap
(
fast
,
sizeof
(
double
)
*
M
*
N
,
NULL
);
size_t
a_dims
[]
=
{
block_number
*
M
,
block_number
*
K
};
size_t
b_dims
[]
=
{
block_number
*
K
,
block_number
*
N
};
size_t
c_dims
[]
=
{
block_number
*
M
,
block_number
*
N
};
init_matrix
(
a
,
big_m
*
big_k
);
init_matrix
(
b
,
big_k
*
big_n
);
init_matrix
(
c
,
big_m
*
big_n
);
larp_8_62_8_31_477
(
NBLOCKM
,
NBLOCKN
,
NBLOCKK
,
a
,
b
,
c_ref
);
memcpy
(
c_ref
,
c
,
sizeof
(
double
)
*
big_m
*
big_n
);
size_t
a_dims
[]
=
{
big_m
,
big_k
};
size_t
b_dims
[]
=
{
big_k
,
big_n
};
size_t
c_dims
[]
=
{
big_m
,
big_n
};
size_t
a_tile_dims
[]
=
{
M
,
K
};
size_t
a_reshape_tile_dims
[]
=
{
NBLOCKM
,
NBLOCKA
,
MR
,
NBLOCKK
,
KB
};
...
...
@@ -101,15 +193,17 @@ int main(int argc, char *argv[]) {
assert
(
!
aml_tiling_resize_create
(
&
c_t
,
AML_LAYOUT_ORDER_ROW_MAJOR
,
c_l
,
2
,
c_tile_dims
)
);
for
(
int
i
=
0
;
i
<
block_number
;
i
++
)
{
for
(
int
j
=
0
;
j
<
block_number
;
j
++
)
{
struct
timespec
start
,
stop
;
clock_gettime
(
CLOCK_REALTIME
,
&
start
);
for
(
int
i
=
0
;
i
<
block_number
;
i
++
)
{
for
(
int
j
=
0
;
j
<
block_number
;
j
++
)
{
struct
aml_layout
*
c_tile
;
struct
aml_layout
*
c_reshape_tile
;
c_tile
=
aml_tiling_index
(
c_t
,
(
size_t
[]){
i
,
j
});
aml_layout_reshape
(
c_tile
,
&
c_reshape_tile
,
5
,
c_reshape_tile_dims
);
aml_copy_layout_transform_native
(
c_l_fast
,
c_reshape_tile
,
&
c_transpose
[
0
]);
for
(
int
k
=
0
;
k
<
block_number
;
k
++
)
{
//aml_copy_layout_transform_native(c_l_fast, c_reshape_tile, c_transpose);
transform_c
(
block_number
,
c_fast
,
aml_layout_deref
(
c_reshape_tile
,(
size_t
[]){
0
,
0
,
0
,
0
,
0
}));
for
(
int
k
=
0
;
k
<
block_number
;
k
++
)
{
struct
aml_layout
*
a_tile
,
*
b_tile
;
struct
aml_layout
*
a_reshape_tile
,
*
b_reshape_tile
;
a_tile
=
aml_tiling_index
(
a_t
,
(
size_t
[]){
i
,
k
});
...
...
@@ -118,12 +212,37 @@ int main(int argc, char *argv[]) {
5
,
a_reshape_tile_dims
);
aml_layout_reshape
(
b_tile
,
&
b_reshape_tile
,
4
,
b_reshape_tile_dims
);
aml_copy_layout_transform_native
(
a_l_fast
,
a_reshape_tile
,
a_transpose
);
aml_copy_layout_transform_native
(
b_l_fast
,
b_reshape_tile
,
b_transpose
);
//aml_copy_layout_transform_native(a_l_fast, a_reshape_tile, a_transpose);
transform_a
(
block_number
,
a_fast
,
aml_layout_deref
(
a_reshape_tile
,(
size_t
[]){
0
,
0
,
0
,
0
,
0
}));
//aml_copy_layout_transform_native(b_l_fast, b_reshape_tile, b_transpose);
transform_b
(
block_number
,
b_fast
,
aml_layout_deref
(
b_reshape_tile
,(
size_t
[]){
0
,
0
,
0
,
0
}));
larp_8_62_8_31_477
(
NBLOCKM
,
NBLOCKN
,
NBLOCKK
,
a_fast
,
b_fast
,
c_fast
);
}
aml_copy_layout_transform_native
(
c_reshape_tile
,
c_l_fast
,
c_transpose_back
);
//aml_copy_layout_transform_native(c_reshape_tile, c_l_fast, c_transpose_back);
transform_c_reverse
(
block_number
,
aml_layout_deref
(
c_reshape_tile
,(
size_t
[]){
0
,
0
,
0
,
0
,
0
}),
c_fast
);
}
}
clock_gettime
(
CLOCK_REALTIME
,
&
stop
);
double
time
=
0
;
time
=
(
stop
.
tv_nsec
-
start
.
tv_nsec
)
+
1e9
*
(
stop
.
tv_sec
-
start
.
tv_sec
);
double
flops
=
(
2
.
0
*
big_n
*
big_m
*
big_k
)
/
(
time
/
1e9
);
printf
(
"dgemm : %lf ms %f GFlops/s
\n
"
,
time
/
1e6
,
flops
/
1e9
);
clock_gettime
(
CLOCK_REALTIME
,
&
start
);
cblas_dgemm
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
big_m
,
big_n
,
big_k
,
1
.
0
,
a
,
big_k
,
b
,
big_n
,
1
.
0
,
c_ref
,
big_n
);
clock_gettime
(
CLOCK_REALTIME
,
&
stop
);
cblas_daxpy
(
big_n
*
big_m
,
-
1
.
0
,
c
,
1
,
c_ref
,
1
);
double
max_error
;
size_t
max_error_index
;
max_error_index
=
cblas_idamax
(
big_n
*
big_m
,
c_ref
,
1
);
max_error
=
c_ref
[
max_error_index
];
time
=
(
stop
.
tv_nsec
-
start
.
tv_nsec
)
+
1e9
*
(
stop
.
tv_sec
-
start
.
tv_sec
);
flops
=
(
2
.
0
*
big_n
*
big_m
*
big_k
)
/
(
time
/
1e9
);
/* print the flops in GFLOPS */
printf
(
"dgemm-vanilla: %lf ms %f GFlops/s %le max error
\n
"
,
time
/
1e6
,
flops
/
1e9
,
max_error
);
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment