Commit 50c7bbd0 authored by Swann Perarnau's avatar Swann Perarnau

[feature] add logging, common args to benchs

Move towards a common list of args and a common log/timing
infrastructure for benchmarks.
parent 51167d12
AM_CFLAGS = -I$(top_srcdir)/src @OPENMP_FLAG@
AM_CFLAGS = -I$(top_srcdir)/src -I. @OPENMP_FLAG@
AM_LDFLAGS = ../src/libaml.la ../jemalloc/lib/libjemalloc-aml.so @OPENMP_FLAG@
noinst_PROGRAMS = stream_add_pth_st stream_add_omp_st stream_add_omp_mt
noinst_LIBRARIES = libutils.a
libutils_a_SOURCES = utils.c utils.h
LDADD = libutils.a
noinst_PROGRAMS = stream_add_pth_st stream_add_omp_st stream_add_omp_mt
......@@ -6,8 +6,9 @@
#include <stdio.h>
#include <stdlib.h>
#include "utils.h"
#define ITER 10
#define MEMSIZE (1UL<<20)
#define CHUNKING 4
size_t numthreads, tilesz, esz;
......@@ -32,8 +33,14 @@ int main(int argc, char *argv[])
AML_ARENA_JEMALLOC_DECL(arena);
AML_DMA_LINUX_PAR_DECL(dma);
unsigned long nodemask[AML_NODEMASK_SZ];
struct bitmask *slowb, *fastb;
aml_init(&argc, &argv);
assert(argc == 1);
assert(argc == 4);
log_init(argv[0]);
fastb = numa_parse_nodestring_all(argv[1]);
slowb = numa_parse_nodestring_all(argv[2]);
unsigned long memsize = 1UL << atoi(argv[3]);
/* use openmp env to figure out how many threads we want
* (we actually use 3x as much)
......@@ -41,27 +48,25 @@ int main(int argc, char *argv[])
#pragma omp parallel
{
numthreads = omp_get_num_threads();
tilesz = MEMSIZE/(numthreads*CHUNKING);
tilesz = memsize/(numthreads*CHUNKING);
esz = tilesz/sizeof(unsigned long);
}
/* initialize all the supporting struct */
assert(!aml_binding_init(&binding, AML_BINDING_TYPE_SINGLE, 0));
assert(!aml_tiling_init(&tiling, AML_TILING_TYPE_1D, tilesz, MEMSIZE));
AML_NODEMASK_ZERO(nodemask);
AML_NODEMASK_SET(nodemask, 0);
assert(!aml_tiling_init(&tiling, AML_TILING_TYPE_1D, tilesz, memsize));
assert(!aml_arena_jemalloc_init(&arena, AML_ARENA_JEMALLOC_TYPE_REGULAR));
assert(!aml_area_linux_init(&slow,
AML_AREA_LINUX_MANAGER_TYPE_SINGLE,
AML_AREA_LINUX_MBIND_TYPE_REGULAR,
AML_AREA_LINUX_MMAP_TYPE_ANONYMOUS,
&arena, MPOL_BIND, nodemask));
&arena, MPOL_BIND, slowb->maskp));
assert(!aml_area_linux_init(&fast,
AML_AREA_LINUX_MANAGER_TYPE_SINGLE,
AML_AREA_LINUX_MBIND_TYPE_REGULAR,
AML_AREA_LINUX_MMAP_TYPE_ANONYMOUS,
&arena, MPOL_BIND, nodemask));
&arena, MPOL_BIND, fastb->maskp));
assert(!aml_dma_linux_par_init(&dma, numthreads*2, numthreads));
assert(!aml_scratch_seq_init(&sa, &fast, &slow, &dma, &tiling,
(size_t)2*numthreads, (size_t)1));
......@@ -69,12 +74,12 @@ int main(int argc, char *argv[])
(size_t)2*numthreads, (size_t)1));
/* allocation */
a = aml_area_malloc(&slow, MEMSIZE);
b = aml_area_malloc(&slow, MEMSIZE);
c = aml_area_malloc(&fast, MEMSIZE);
a = aml_area_malloc(&slow, memsize);
b = aml_area_malloc(&slow, memsize);
c = aml_area_malloc(&fast, memsize);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = MEMSIZE/sizeof(unsigned long);
unsigned long esize = memsize/sizeof(unsigned long);
for(unsigned long i = 0; i < esize; i++) {
a[i] = i;
b[i] = esize - i;
......@@ -90,7 +95,7 @@ int main(int argc, char *argv[])
abaseptr = aml_scratch_baseptr(&sa);
bbaseptr = aml_scratch_baseptr(&sb);
ai = -1; bi = -1;
for(i = 0; i < (MEMSIZE/tilesz) -1; i++) {
for(i = 0; i < (memsize/tilesz) -1; i++) {
struct aml_scratch_request *ar, *br;
oldai = ai; oldbi = bi;
aml_scratch_async_pull(&sa, &ar, abaseptr, &ai, a, i+1);
......
......@@ -3,11 +3,11 @@
#include <errno.h>
#include <omp.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include "utils.h"
#define ITER 10
#define MEMSIZE (1UL<<20)
#define CHUNKING 4
size_t numthreads, tilesz, esz;
......@@ -21,7 +21,7 @@ AML_SCRATCH_PAR_DECL(sb);
int kernel(unsigned long *a, unsigned long *b, unsigned long *c, size_t n)
{
size_t i;
printf("%p = %p + %p [%zi]\n",c,a,b,n);
debug("%p = %p + %p [%zi]\n",c,a,b,n);
for(i = 0; i < n; i++)
c[i] = a[i] + b[i];
return 0;
......@@ -63,8 +63,14 @@ int main(int argc, char *argv[])
AML_ARENA_JEMALLOC_DECL(arena);
AML_DMA_LINUX_SEQ_DECL(dma);
unsigned long nodemask[AML_NODEMASK_SZ];
struct bitmask *slowb, *fastb;
aml_init(&argc, &argv);
assert(argc == 1);
assert(argc == 4);
log_init(argv[0]);
fastb = numa_parse_nodestring_all(argv[1]);
slowb = numa_parse_nodestring_all(argv[2]);
unsigned long memsize = 1UL << atoi(argv[3]);
/* use openmp env to figure out how many threads we want
* (we actually use 3x as much)
......@@ -72,27 +78,25 @@ int main(int argc, char *argv[])
#pragma omp parallel
{
numthreads = omp_get_num_threads();
tilesz = MEMSIZE/(numthreads*CHUNKING);
tilesz = memsize/(numthreads*CHUNKING);
esz = tilesz/sizeof(unsigned long);
}
/* initialize all the supporting struct */
assert(!aml_binding_init(&binding, AML_BINDING_TYPE_SINGLE, 0));
assert(!aml_tiling_init(&tiling, AML_TILING_TYPE_1D, tilesz, MEMSIZE));
AML_NODEMASK_ZERO(nodemask);
AML_NODEMASK_SET(nodemask, 0);
assert(!aml_tiling_init(&tiling, AML_TILING_TYPE_1D, tilesz, memsize));
assert(!aml_arena_jemalloc_init(&arena, AML_ARENA_JEMALLOC_TYPE_REGULAR));
assert(!aml_area_linux_init(&slow,
AML_AREA_LINUX_MANAGER_TYPE_SINGLE,
AML_AREA_LINUX_MBIND_TYPE_REGULAR,
AML_AREA_LINUX_MMAP_TYPE_ANONYMOUS,
&arena, MPOL_BIND, nodemask));
&arena, MPOL_BIND, slowb->maskp));
assert(!aml_area_linux_init(&fast,
AML_AREA_LINUX_MANAGER_TYPE_SINGLE,
AML_AREA_LINUX_MBIND_TYPE_REGULAR,
AML_AREA_LINUX_MMAP_TYPE_ANONYMOUS,
&arena, MPOL_BIND, nodemask));
&arena, MPOL_BIND, fastb->maskp));
assert(!aml_dma_linux_seq_init(&dma, numthreads*2));
assert(!aml_scratch_par_init(&sa, &fast, &slow, &dma, &tiling,
2*numthreads, numthreads));
......@@ -100,12 +104,12 @@ int main(int argc, char *argv[])
2*numthreads, numthreads));
/* allocation */
a = aml_area_malloc(&slow, MEMSIZE);
b = aml_area_malloc(&slow, MEMSIZE);
c = aml_area_malloc(&fast, MEMSIZE);
a = aml_area_malloc(&slow, memsize);
b = aml_area_malloc(&slow, memsize);
c = aml_area_malloc(&fast, memsize);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = MEMSIZE/sizeof(unsigned long);
unsigned long esize = memsize/sizeof(unsigned long);
for(unsigned long i = 0; i < esize; i++) {
a[i] = i;
b[i] = esize - i;
......
......@@ -3,11 +3,11 @@
#include <errno.h>
#include <omp.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include "utils.h"
#define ITER 10
#define MEMSIZE (1UL<<20)
#define CHUNKING 4
size_t numthreads, tilesz, esz;
......@@ -21,7 +21,7 @@ AML_SCRATCH_PAR_DECL(sb);
int kernel(unsigned long *a, unsigned long *b, unsigned long *c, size_t n)
{
size_t i;
printf("%p = %p + %p [%zi]\n",c,a,b,n);
debug("%p = %p + %p [%zi]\n",c,a,b,n);
for(i = 0; i < n; i++)
c[i] = a[i] + b[i];
return 0;
......@@ -70,8 +70,14 @@ int main(int argc, char *argv[])
AML_ARENA_JEMALLOC_DECL(arena);
AML_DMA_LINUX_SEQ_DECL(dma);
unsigned long nodemask[AML_NODEMASK_SZ];
struct bitmask *slowb, *fastb;
aml_init(&argc, &argv);
assert(argc == 1);
assert(argc == 4);
log_init(argv[0]);
fastb = numa_parse_nodestring_all(argv[1]);
slowb = numa_parse_nodestring_all(argv[2]);
unsigned long memsize = 1UL << atoi(argv[3]);
/* use openmp env to figure out how many threads we want
* (we actually use 3x as much)
......@@ -79,27 +85,25 @@ int main(int argc, char *argv[])
#pragma omp parallel
{
numthreads = omp_get_num_threads();
tilesz = MEMSIZE/(numthreads*CHUNKING);
tilesz = memsize/(numthreads*CHUNKING);
esz = tilesz/sizeof(unsigned long);
}
/* initialize all the supporting struct */
assert(!aml_binding_init(&binding, AML_BINDING_TYPE_SINGLE, 0));
assert(!aml_tiling_init(&tiling, AML_TILING_TYPE_1D, tilesz, MEMSIZE));
AML_NODEMASK_ZERO(nodemask);
AML_NODEMASK_SET(nodemask, 0);
assert(!aml_tiling_init(&tiling, AML_TILING_TYPE_1D, tilesz, memsize));
assert(!aml_arena_jemalloc_init(&arena, AML_ARENA_JEMALLOC_TYPE_REGULAR));
assert(!aml_area_linux_init(&slow,
AML_AREA_LINUX_MANAGER_TYPE_SINGLE,
AML_AREA_LINUX_MBIND_TYPE_REGULAR,
AML_AREA_LINUX_MMAP_TYPE_ANONYMOUS,
&arena, MPOL_BIND, nodemask));
&arena, MPOL_BIND, slowb->maskp));
assert(!aml_area_linux_init(&fast,
AML_AREA_LINUX_MANAGER_TYPE_SINGLE,
AML_AREA_LINUX_MBIND_TYPE_REGULAR,
AML_AREA_LINUX_MMAP_TYPE_ANONYMOUS,
&arena, MPOL_BIND, nodemask));
&arena, MPOL_BIND, fastb->maskp));
assert(!aml_dma_linux_seq_init(&dma, (size_t)numthreads*4));
assert(!aml_scratch_par_init(&sa, &fast, &slow, &dma, &tiling,
(size_t)2*numthreads, (size_t)numthreads));
......@@ -107,12 +111,12 @@ int main(int argc, char *argv[])
(size_t)2*numthreads, (size_t)numthreads));
/* allocation */
a = aml_area_malloc(&slow, MEMSIZE);
b = aml_area_malloc(&slow, MEMSIZE);
c = aml_area_malloc(&fast, MEMSIZE);
a = aml_area_malloc(&slow, memsize);
b = aml_area_malloc(&slow, memsize);
c = aml_area_malloc(&fast, memsize);
assert(a != NULL && b != NULL && c != NULL);
unsigned long esize = MEMSIZE/sizeof(unsigned long);
unsigned long esize = memsize/sizeof(unsigned long);
for(unsigned long i = 0; i < esize; i++) {
a[i] = i;
b[i] = esize - i;
......
#include "utils.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
static const char *namespace;
static int active = 0;
void log_init(const char *nm) {
char *debug = getenv("DEBUG");
if(debug)
active = atoi(debug);
}
void log_msg(const char *level, unsigned int line, const char *fmt, ...)
{
va_list ap;
if(!active)
return;
printf("%s:\t%s:\t%u:\t", namespace, level, line);
va_start(ap, fmt);
vprintf(fmt, ap);
va_end(ap);
}
#ifndef AML_BENCHS_UTILS_H
#define AML_BENCHS_UTILS_H 1
void log_init(const char *nm);
void log_msg(const char *level, unsigned int line, const char *fmt, ...);
#define debug(...) log_msg("debug", __LINE__, __VA_ARGS__)
#endif // AML_BENCHS_UTILS_H
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment