Commit 6a0d1cbd authored by Kamil Iskra's avatar Kamil Iskra

[feature] Respect tiling representation of arrays

The A, B, and C matrices are tiled (tiles in A are also transposed).
Add initialization code for A and B and conversion code for C that
respects the tiling, thus enabling a direct comparison of results with
mkl and vanilla.
parent b67dc368
......@@ -84,11 +84,40 @@ int main(int argc, char* argv[])
b = aml_area_malloc(&slow, memsize);
c = aml_area_malloc(&fast, memsize);
assert(a != NULL && b != NULL && c != NULL);
for(unsigned long i = 0; i < N*N; i++) {
a[i] = (double)rand();
b[i] = (double)rand();
c[i] = 0.0;
size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
rowsize = colsize = N;
tilerowsize = tilecolsize = T;
ntilerows = ntilecols = N/T;
for(unsigned long i = 0; i < N*N; i+=tilerowsize) {
size_t tilerow, tilecol, row, column;
/* Tile row index (row-major). */
tilerow = i / (tilerowsize * tilecolsize * ntilerows);
/* Tile column index (row-major). */
tilecol = (i / tilerowsize) % ntilerows;
/* Row index within a tile (row-major). */
row = (i / rowsize) % tilecolsize;
/* Column index within a tile (row-major). */
/* column = i % tilerowsize; */
size_t a_offset, b_offset;
/* Tiles in A need to be transposed (column-major). */
a_offset = (tilecol * ntilecols + tilerow) *
tilerowsize * tilecolsize +
row * tilerowsize;
/* Tiles in B are in row-major order. */
b_offset = (tilerow * ntilerows + tilecol) *
tilerowsize * tilecolsize +
row * tilerowsize;
for (column = 0; column < tilerowsize; column++) {
a[a_offset + column] = (double)rand();
b[b_offset + column] = (double)rand();
/* C is tiled as well (row-major) but since it's
all-zeros at this point, we don't bother. */
c[i+column] = 0.0;
}
}
clock_gettime(CLOCK_REALTIME, &start);
do_work();
clock_gettime(CLOCK_REALTIME, &stop);
......@@ -96,6 +125,24 @@ int main(int argc, char* argv[])
time = (stop.tv_nsec - start.tv_nsec) +
1e9* (stop.tv_sec - start.tv_sec);
double flops = (2.0*N*N*N)/(time/1e9);
/* De-tile the result matrix (C). I couldn't figure out how to do
it in-place so we are de-tiling to the A matrix. */
for(unsigned long i = 0; i < N*N; i+=tilerowsize) {
size_t tilerow, tilecol, row;
/* Tile row index (row-major). */
tilerow = i / (tilerowsize * tilecolsize * ntilerows);
/* Tile column index (row-major). */
tilecol = (i / tilerowsize) % ntilerows;
/* Row index within a tile (row-major). */
row = (i / rowsize) % tilecolsize;
/* i converted to tiled. */
unsigned long tiledi = (tilerow * ntilerows + tilecol) *
tilerowsize * tilecolsize + row * tilerowsize;
memcpy(&a[i], &c[tiledi], tilerowsize*sizeof(double));
}
/* print the flops in GFLOPS */
printf("dgemm-noprefetch: %llu %lld %lld %f\n", N, memsize, time,
flops/1e9);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment