Commit 9764f3c6 authored by Swann Perarnau's avatar Swann Perarnau
Browse files

[feature/fix] add column-major 2D tiling

Fix dgemm_noprefetch to match pattern from @suchyb in #19.
In order to do so we split our 2d tiling into column-major and
row-major ones. Note that those are refering to the order of the tiles,
not the internal data of a tile, as a tiling should be agnostic to it.
parent f673af2e
......@@ -9,7 +9,8 @@
#include <math.h>
#include <stdlib.h>
AML_TILING_2D_CONTIG_DECL(tiling);
AML_TILING_2D_CONTIG_ROWMAJOR_DECL(tiling_row);
AML_TILING_2D_CONTIG_COLMAJOR_DECL(tiling_col);
AML_AREA_LINUX_DECL(slow);
AML_AREA_LINUX_DECL(fast);
......@@ -24,22 +25,22 @@ void do_work()
ldc = lda;
double *ap, *bp, *cp;
size_t ndims[2];
aml_tiling_ndims(&tiling, &ndims[0], &ndims[1]);
aml_tiling_ndims(&tiling_row, &ndims[0], &ndims[1]);
size_t aoff, boff, coff;
for(int j = 0; j < ndims[1]; j++)
for(int k = 0; k < ndims[1]; k++)
{
for(int k = 0; k < ndims[1]; k++)
#pragma omp parallel for
for(int i = 0; i < ndims[0]; i++)
{
#pragma omp parallel for
for(int i = 0; i < ndims[1]; i++)
for(int j = 0; j < ndims[1]; j++)
{
aoff = i*ndims[0] + k;
boff = k*ndims[0] + j;
coff = i*ndims[0] + j;
ap = aml_tiling_tilestart(&tiling, ap, aoff);
bp = aml_tiling_tilestart(&tiling, bp, boff);
cp = aml_tiling_tilestart(&tiling, cp, coff);
aoff = i*ndims[1] + k;
boff = k*ndims[1] + j;
coff = i*ndims[1] + j;
ap = aml_tiling_tilestart(&tiling_col, ap, aoff);
bp = aml_tiling_tilestart(&tiling_row, bp, boff);
cp = aml_tiling_tilestart(&tiling_row, cp, coff);
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, ldc, lda, ldb, 1.0, ap, lda, bp, ldb, 1.0, cp, ldc);
}
}
......@@ -63,7 +64,9 @@ int main(int argc, char* argv[])
tilesize = sizeof(double)*T*T;
/* the initial tiling, of 2D square tiles */
assert(!aml_tiling_init(&tiling, AML_TILING_TYPE_2D_CONTIG,
assert(!aml_tiling_init(&tiling_row, AML_TILING_TYPE_2D_CONTIG_ROWMAJOR,
tilesize, memsize, N/T , N/T));
assert(!aml_tiling_init(&tiling_col, AML_TILING_TYPE_2D_CONTIG_COLMAJOR,
tilesize, memsize, N/T , N/T));
assert(!aml_arena_jemalloc_init(&arena, AML_ARENA_JEMALLOC_TYPE_REGULAR));
assert(!aml_area_linux_init(&slow,
......@@ -94,13 +97,15 @@ int main(int argc, char* argv[])
1e9* (stop.tv_sec - start.tv_sec);
double flops = (2.0*N*N*N)/(time/1e9);
/* print the flops in GFLOPS */
printf("dgemm-mkl: %llu %lld %lld %f\n", N, memsize, time, flops/1e9);
printf("dgemm-noprefetch: %llu %lld %lld %f\n", N, memsize, time,
flops/1e9);
aml_area_free(&slow, a);
aml_area_free(&slow, b);
aml_area_free(&fast, c);
aml_area_linux_destroy(&slow);
aml_area_linux_destroy(&fast);
aml_tiling_destroy(&tiling, AML_TILING_TYPE_2D_CONTIG);
aml_tiling_destroy(&tiling_row, AML_TILING_TYPE_2D_CONTIG_ROWMAJOR);
aml_tiling_destroy(&tiling_col, AML_TILING_TYPE_2D_CONTIG_ROWMAJOR);
aml_finalize();
return 0;
}
......@@ -905,8 +905,9 @@ int aml_tiling_iterator_get(const struct aml_tiling_iterator *iterator, ...);
/* Tiling types passed to the tiling create()/init()/vinit() routines. */
/* Regular, linear tiling with uniform tile sizes. */
#define AML_TILING_TYPE_1D 0
#define AML_TILING_TYPE_2D_CONTIG 1
#define AML_TILING_TYPE_2D 2
#define AML_TILING_TYPE_2D_CONTIG_ROWMAJOR 3
#define AML_TILING_TYPE_2D_CONTIG_COLMAJOR 4
/*
* Allocates and initializes a new tiling.
......@@ -1036,7 +1037,8 @@ struct aml_tiling_iterator_2d_data {
* a contiguous memory area composed of contiguous tiles arranged in 2D grid.
******************************************************************************/
extern struct aml_tiling_ops aml_tiling_2d_contig_ops;
extern struct aml_tiling_ops aml_tiling_2d_contig_rowmajor_ops;
extern struct aml_tiling_ops aml_tiling_2d_contig_colmajor_ops;
extern struct aml_tiling_iterator_ops aml_tiling_iterator_2d_contig_ops;
struct aml_tiling_2d_contig_data {
......@@ -1050,10 +1052,17 @@ struct aml_tiling_iterator_2d_contig_data {
struct aml_tiling_2d_contig_data *tiling;
};
#define AML_TILING_2D_CONTIG_DECL(name) \
#define AML_TILING_2D_CONTIG_ROWMAJOR_DECL(name) \
struct aml_tiling_2d_contig_data __ ##name## _inner_data; \
struct aml_tiling name = { \
&aml_tiling_2d_contig_ops, \
&aml_tiling_2d_contig_rowmajor_ops, \
(struct aml_tiling_data *)&__ ## name ## _inner_data, \
};
#define AML_TILING_2D_CONTIG_COLMAJOR_DECL(name) \
struct aml_tiling_2d_contig_data __ ##name## _inner_data; \
struct aml_tiling name = { \
&aml_tiling_2d_contig_colmajor_ops, \
(struct aml_tiling_data *)&__ ## name ## _inner_data, \
};
......
......@@ -139,7 +139,8 @@ int aml_tiling_create(struct aml_tiling **t, int type, ...)
err = aml_tiling_vinit(ret, type, ap);
}
else if(type == AML_TILING_TYPE_2D_CONTIG)
else if(type == AML_TILING_TYPE_2D_CONTIG_ROWMAJOR ||
type == AML_TILING_TYPE_2D_CONTIG_COLMAJOR)
{
baseptr = (intptr_t) calloc(1, AML_TILING_2D_CONTIG_ALLOCSIZE);
dataptr = baseptr + sizeof(struct aml_tiling);
......@@ -183,9 +184,20 @@ int aml_tiling_vinit(struct aml_tiling *t, int type, va_list ap)
data->totalsize = va_arg(ap, size_t);
err = data->blocksize > data->totalsize;
}
else if(type == AML_TILING_TYPE_2D_CONTIG)
else if(type == AML_TILING_TYPE_2D_CONTIG_ROWMAJOR)
{
t->ops = &aml_tiling_2d_contig_ops;
t->ops = &aml_tiling_2d_contig_rowmajor_ops;
struct aml_tiling_2d_contig_data *data =
(struct aml_tiling_2d_contig_data *)t->data;
data->blocksize = va_arg(ap, size_t);
data->totalsize = va_arg(ap, size_t);
data->ndims[0] = va_arg(ap, size_t);
data->ndims[1] = va_arg(ap, size_t);
err = data->blocksize > data->totalsize;
}
else if(type == AML_TILING_TYPE_2D_CONTIG_COLMAJOR)
{
t->ops = &aml_tiling_2d_contig_colmajor_ops;
struct aml_tiling_2d_contig_data *data =
(struct aml_tiling_2d_contig_data *)t->data;
data->blocksize = va_arg(ap, size_t);
......
......@@ -47,6 +47,7 @@ struct aml_tiling_iterator_ops aml_tiling_iterator_2d_contig_ops = {
/*******************************************************************************
* 2D ops
* Tileids are always in rowmajor: for NM matrix[i][j], tileid = i*M + j
******************************************************************************/
size_t aml_tiling_2d_contig_tilesize(const struct aml_tiling_data *t, int tileid)
......@@ -56,12 +57,33 @@ size_t aml_tiling_2d_contig_tilesize(const struct aml_tiling_data *t, int tileid
return data->blocksize;
}
void* aml_tiling_2d_contig_tilestart(const struct aml_tiling_data *t, const void *ptr, int tileid)
void* aml_tiling_2d_contig_rowmajor_tilestart(const struct aml_tiling_data *t,
const void *ptr, int tileid)
{
const struct aml_tiling_2d_contig_data *data =
(const struct aml_tiling_2d_contig_data *)t;
intptr_t p = (intptr_t)ptr;
return (void *)(p + tileid*data->blocksize);
size_t i = tileid/data->ndims[1];
size_t j = tileid % data->ndims[1];
if(i >= data->ndims[0] || j >= data->ndims[1])
return NULL;
else
return (void *)(p + tileid*data->blocksize);
}
void* aml_tiling_2d_contig_colmajor_tilestart(const struct aml_tiling_data *t,
const void *ptr, int tileid)
{
const struct aml_tiling_2d_contig_data *data =
(const struct aml_tiling_2d_contig_data *)t;
intptr_t p = (intptr_t)ptr;
size_t i = tileid/data->ndims[1];
size_t j = tileid % data->ndims[1];
size_t offset = j*data->ndims[0] + i;
if(i >= data->ndims[0] || j >= data->ndims[1])
return NULL;
else
return (void *)(p + offset*data->blocksize);
}
int aml_tiling_2d_contig_ndims(const struct aml_tiling_data *t, va_list ap)
......@@ -112,13 +134,24 @@ int aml_tiling_2d_contig_destroy_iterator(struct aml_tiling_data *t,
}
struct aml_tiling_ops aml_tiling_2d_contig_ops = {
struct aml_tiling_ops aml_tiling_2d_contig_rowmajor_ops = {
aml_tiling_2d_contig_create_iterator,
aml_tiling_2d_contig_init_iterator,
aml_tiling_2d_contig_destroy_iterator,
aml_tiling_2d_contig_tilesize,
NULL,
NULL,
aml_tiling_2d_contig_rowmajor_tilestart,
aml_tiling_2d_contig_ndims,
};
struct aml_tiling_ops aml_tiling_2d_contig_colmajor_ops = {
aml_tiling_2d_contig_create_iterator,
aml_tiling_2d_contig_init_iterator,
aml_tiling_2d_contig_destroy_iterator,
aml_tiling_2d_contig_tilesize,
NULL,
NULL,
aml_tiling_2d_contig_tilestart,
aml_tiling_2d_contig_colmajor_tilestart,
aml_tiling_2d_contig_ndims,
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment