dgemm_noprefetch.c 5.12 KB
Newer Older
Swann Perarnau's avatar
Swann Perarnau committed
1 2 3 4 5 6 7 8 9 10
/*******************************************************************************
 * Copyright 2019 UChicago Argonne, LLC.
 * (c.f. AUTHORS, LICENSE)
 *
 * This file is part of the AML project.
 * For more info, see https://xgitlab.cels.anl.gov/argo/aml
 *
 * SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/

11
#include "aml.h"
Nicolas Denoyelle's avatar
Nicolas Denoyelle committed
12
#include "aml/area/linux.h"
13 14
#include <assert.h>
#include <errno.h>
15
#include <mkl.h>
16 17 18 19 20 21 22
#include <omp.h>
#include <pthread.h>
#include <stdio.h>
#include <time.h>
#include <math.h>
#include <stdlib.h>

23 24
AML_TILING_2D_ROWMAJOR_DECL(tiling_row);
AML_TILING_2D_COLMAJOR_DECL(tiling_col);
Nicolas Denoyelle's avatar
Nicolas Denoyelle committed
25
struct aml_area *slow, *fast;
26
size_t memsize, tilesize, N, T;
27
double *a, *b, *c;
28
struct timespec start, stop;
29 30 31

void do_work()
{
32
	int lda = (int)T, ldb, ldc;
33 34
	ldb = lda;
	ldc = lda;
35
	size_t ndims[2];
36
	aml_tiling_ndims(&tiling_row, &ndims[0], &ndims[1]);
37

38
	for(int k = 0; k < ndims[1]; k++)
39
	{
40 41
		#pragma omp parallel for
		for(int i = 0; i < ndims[0]; i++)
42
		{
43
			for(int j = 0; j < ndims[1]; j++)
44
			{
45 46
				size_t aoff, boff, coff;
				double *ap, *bp, *cp;
47 48 49
				aoff = aml_tiling_tileid(&tiling_col, i, k);
				boff = aml_tiling_tileid(&tiling_row, k, j);
				coff = aml_tiling_tileid(&tiling_row, i, j);
50 51 52
				ap = aml_tiling_tilestart(&tiling_col, a, aoff);
				bp = aml_tiling_tilestart(&tiling_row, b, boff);
				cp = aml_tiling_tilestart(&tiling_row, c, coff);
53
				cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, ldc, lda, ldb, 1.0, ap, lda, bp, ldb, 1.0, cp, ldc);
54 55
			}
		}
56
	}
57 58
}

59
int main(int argc, char* argv[])
60
{
Nicolas Denoyelle's avatar
Nicolas Denoyelle committed
61
	struct aml_bitmap slowb, fastb;
62 63
	aml_init(&argc, &argv);
	assert(argc == 5);
Nicolas Denoyelle's avatar
Nicolas Denoyelle committed
64 65
	assert(aml_bitmap_from_string(&fastb, argv[1]) == 0);
	assert(aml_bitmap_from_string(&slowb, argv[2]) == 0);
66 67 68 69 70 71 72 73
	N = atol(argv[3]);
	T = atol(argv[4]);
	/* let's not handle messy tile sizes */
	assert(N % T == 0);
	memsize = sizeof(double)*N*N;
	tilesize = sizeof(double)*T*T;

	/* the initial tiling, of 2D square tiles */
74
	assert(!aml_tiling_init(&tiling_row, AML_TILING_TYPE_2D_ROWMAJOR,
75
				tilesize, memsize, N/T , N/T));
76
	assert(!aml_tiling_init(&tiling_col, AML_TILING_TYPE_2D_COLMAJOR,
77
				tilesize, memsize, N/T , N/T));
78

Nicolas Denoyelle's avatar
Nicolas Denoyelle committed
79 80 81 82 83 84 85
	slow = aml_area_linux_create(AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
				     &slowb, AML_AREA_LINUX_BINDING_FLAG_BIND);
	assert(slow != NULL);
	fast = aml_area_linux_create(AML_AREA_LINUX_MMAP_FLAG_PRIVATE,
				     &fastb, AML_AREA_LINUX_BINDING_FLAG_BIND);
	assert(fast != NULL);

86
	/* allocation */
Nicolas Denoyelle's avatar
Nicolas Denoyelle committed
87 88 89
	a = aml_area_mmap(slow, NULL, memsize);
	b = aml_area_mmap(slow, NULL, memsize);
	c = aml_area_mmap(fast, NULL, memsize);
90
	assert(a != NULL && b != NULL && c != NULL);
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122

	size_t ntilerows, ntilecols, tilerowsize, tilecolsize, rowsize, colsize;
	rowsize = colsize = N;
	tilerowsize = tilecolsize = T;
	ntilerows = ntilecols = N/T;
	for(unsigned long i = 0; i < N*N; i+=tilerowsize) {
		size_t tilerow, tilecol, row, column;
		/* Tile row index (row-major).  */
		tilerow = i / (tilerowsize * tilecolsize * ntilerows);
		/* Tile column index (row-major).  */
		tilecol = (i / tilerowsize) % ntilerows;
		/* Row index within a tile (row-major).  */
		row = (i / rowsize) % tilecolsize;
		/* Column index within a tile (row-major).  */
		/* column = i % tilerowsize; */

		size_t a_offset, b_offset;
		/* Tiles in A need to be transposed (column-major).  */
		a_offset = (tilecol * ntilecols + tilerow) *
			tilerowsize * tilecolsize +
			row * tilerowsize;
		/* Tiles in B are in row-major order.  */
		b_offset = (tilerow * ntilerows + tilecol) *
			tilerowsize * tilecolsize +
			row * tilerowsize;
		for (column = 0; column < tilerowsize; column++) {
			a[a_offset + column] = (double)rand();
			b[b_offset + column] = (double)rand();
			/* C is tiled as well (row-major) but since it's
			   all-zeros at this point, we don't bother.  */
			c[i+column] = 0.0;
		}
123
	}
124

125 126 127 128 129 130 131
	clock_gettime(CLOCK_REALTIME, &start);
	do_work();
	clock_gettime(CLOCK_REALTIME, &stop);
	long long int time = 0;
	time =  (stop.tv_nsec - start.tv_nsec) +
                1e9* (stop.tv_sec - start.tv_sec);
	double flops = (2.0*N*N*N)/(time/1e9);
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149

	/* De-tile the result matrix (C).  I couldn't figure out how to do
	   it in-place so we are de-tiling to the A matrix.  */
	for(unsigned long i = 0; i < N*N; i+=tilerowsize) {
		size_t tilerow, tilecol, row;
		/* Tile row index (row-major).  */
		tilerow = i / (tilerowsize * tilecolsize * ntilerows);
		/* Tile column index (row-major).  */
		tilecol = (i / tilerowsize) % ntilerows;
		/* Row index within a tile (row-major).  */
		row = (i / rowsize) % tilecolsize;
		/* i converted to tiled.  */
		unsigned long tiledi = (tilerow * ntilerows + tilecol) *
			tilerowsize * tilecolsize + row * tilerowsize;

		memcpy(&a[i], &c[tiledi], tilerowsize*sizeof(double));
	}

150
	/* print the flops in GFLOPS */
151 152
	printf("dgemm-noprefetch: %llu %lld %lld %f\n", N, memsize, time,
	       flops/1e9);
Nicolas Denoyelle's avatar
Nicolas Denoyelle committed
153 154 155 156 157
	aml_area_munmap(slow, a, memsize);
	aml_area_munmap(slow, b, memsize);
	aml_area_munmap(fast, c, memsize);
	aml_area_linux_destroy(slow);
	aml_area_linux_destroy(fast);
158 159
	aml_tiling_destroy(&tiling_row, AML_TILING_TYPE_2D_ROWMAJOR);
	aml_tiling_destroy(&tiling_col, AML_TILING_TYPE_2D_ROWMAJOR);
160
	aml_finalize();
161 162
	return 0;
}