Commit 912338a6 authored by Swann Perarnau's avatar Swann Perarnau
Browse files

Merge branch 'staging' into master

See merge request !171
parents bfc976af 83e7c188
Pipeline #12144 passed with stages
in 5 minutes and 59 seconds
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
#
# This file is part of the AML project.
# For more info, see https://xgitlab.cels.anl.gov/argo/aml
#
# SPDX-License-Identifier: BSD-3-Clause
---
Language: Cpp
AccessModifierOffset: -2
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Right
AlignOperands: true
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: None
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
BinPackArguments: true
BinPackParameters: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Linux
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: false
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeComma
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 8
ContinuationIndentWidth: 8
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: false
IncludeBlocks: Regroup
IncludeCategories:
- Regex: 'config\.h'
Priority: 1
- Regex: 'test.*'
Priority: 1
- Regex: 'aml\.h'
Priority: 3
- Regex: 'aml/.*'
Priority: 4
- Regex: '^<.*\.h>'
Priority: 2
- Regex: '.*'
Priority: 5
IncludeIsMainRegex: 'aml\.h'
IndentCaseLabels: false
IndentWidth: 8
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: Inner
PenaltyBreakAssignment: 10
PenaltyBreakBeforeFirstCallParameter: 30
PenaltyBreakComment: 10
PenaltyBreakFirstLessLess: 0
PenaltyBreakString: 10
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 100
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: false
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp03
TabWidth: 8
UseTab: ForIndentation
...
......@@ -2,6 +2,7 @@ variables:
NMC_FE1_SLURM_PARAMETERS: "--nodes=1 --partition=ecp-p9-4v100"
ANL_THETA_PROJECT_SERVICE_USER: "cscstss"
ANL_THETA_SCHEDULER_PARAMETERS: "-A CSC250STPR19 -n 1 -t 20 -q debug-flat-quad"
GIT_SUBMODULE_STRATEGY: "normal"
stages:
- build
......@@ -86,21 +87,20 @@ make:theta-batch:
- staging
variables:
- $ECP_MIRROR == "THETA"
variables:
CC: "icc"
CFLAGS: "-xKNL"
LDFLAGS: "-qopenmp-link=static"
script:
- |
cat > aml-ci-script.sh << EOF
#!/bin/bash
module list
./autogen.sh
./configure
make -j63
make check VERBOSE=1
EOF
- cat aml-ci-script.sh
- chmod +x aml-ci-script.sh
- aprun aml-ci-script.sh
- ./autogen.sh
- ./configure --host=x86_64
- make
- make check-programs
- aprun make check VERBOSE=1
artifacts:
when: on_failure
paths:
- tests/*.log
- doc/tutorials/*.log
- benchmarks/*.log
- config.log
......@@ -52,6 +52,7 @@ stamp-h1
/missing
/stamp-h1
version.h
features.h
/m4/libtool.m4
/m4/ltmain.sh
/m4/ltoptions.m4
......@@ -60,9 +61,8 @@ version.h
/m4/lt~obsolete.m4
# autotest
tests/**/*.trs
tests/**/*.log
*.test.*
*.trs
*.log
# libtool
/libtool
......
variables:
ARGOPKGS : "https://xgitlab.cels.anl.gov/argo/argopkgs/-/archive/master/argopkgs-master.tar.gz"
DEFAULT_MERGE_TARGET: "origin/staging"
GIT_SUBMODULE_STRATEGY: "normal"
stages:
- style
- build
- docs
- release
- distribution
repoquality:
stage: style
......@@ -13,7 +15,7 @@ repoquality:
- /^wip.*/
- /^WIP.*/
script:
- git ls-files *.c *.h | grep -v -e benchmarks > .repoquality
- git ls-files '*.c' '*.h' > .repoquality
- nix run -f "$ARGOPKGS" repoquality --command repoquality
tags:
- nix
......@@ -23,7 +25,7 @@ repoquality:
paths:
- .repoquality
checkpatch:
clang-format:
stage: style
except:
- /^wip.*/
......@@ -32,8 +34,16 @@ checkpatch:
- nix
- kvm
script:
- git ls-files *.c *.h | grep -v -e benchmarks >> .checkpatch.conf
- nix run -f "$ARGOPKGS" checkpatch --command checkpatch.pl
- rm -f clang-format-diff
- target=$(git rev-parse $DEFAULT_MERGE_TARGET)
- nix-shell --run "git-clang-format --quiet --diff $target > clang-format-diff"
- lint=$(grep -v --color=never "no modified files to format" clang-format-diff || true)
- if [ ! -z "$lint" ]; then echo "format errors, inspect the clang-format-diff artifact for info"; exit 1; else exit 0; fi
artifacts:
when: on_failure
paths:
- ./clang-format-diff
expire_in: 1 week
style:docs:
stage: style
......@@ -45,12 +55,12 @@ style:docs:
- kvm
script:
- |
nix-shell "$ARGOPKGS" -A aml-dist --arg aml-src ./. --run bash << EOF
./autogen.sh
mkdir build
./configure --prefix=`pwd`/build --enable-docs
make install-data
EOF
nix-shell --run bash <<< '
./autogen.sh
mkdir build
./configure --prefix=`pwd`/build --enable-docs
make install-data
'
artifacts:
when: on_failure
paths:
......@@ -58,20 +68,24 @@ style:docs:
make:generic:
tags:
- knl
- nix
- kvm
stage: build
except:
- /^wip.*/
- /^WIP.*/
variables:
CFLAGS: "-std=c99 -pedantic -Wall -Wextra -Werror -Wno-unused-but-set-parameter"
CFLAGS: "-std=c99 -pedantic -Wall -Wextra -Werror -Wno-unused-but-set-parameter -Wno-builtin-declaration-mismatch"
script:
- ./autogen.sh
- mkdir build
- ./configure --prefix=`pwd`/build
- make
- make check
- make install
- |
nix-shell --run bash <<< '
./autogen.sh
mkdir build
./configure --prefix=`pwd`/build
make
make check
make install
'
artifacts:
when: on_failure
paths:
......@@ -80,21 +94,52 @@ make:generic:
make:out-of-tree:
tags:
- knl
- nix
- kvm
stage: build
except:
- /^wip.*/
- /^WIP.*/
variables:
CFLAGS: "-std=c99 -pedantic -Wall -Wextra -Werror -Wno-unused-but-set-parameter"
CFLAGS: "-std=c99 -pedantic -Wall -Wextra -Werror -Wno-unused-but-set-parameter -Wno-builtin-declaration-mismatch"
script:
- ./autogen.sh
- mkdir out
- cd out
- mkdir build
- ../configure --prefix=`pwd`/build
- make
- make check
- |
nix-shell --run bash <<< '
./autogen.sh
mkdir out
cd out
mkdir build
../configure --prefix=`pwd`/build
make
make check
'
artifacts:
when: on_failure
paths:
- config.log
- tests/*.log
make:valgrind:
tags:
- nix
- kvm
stage: build
except:
- /^wip.*/
- /^WIP.*/
variables:
CFLAGS: "-std=c99 -pedantic -Wall -Wextra -Werror -Wno-unused-but-set-parameter -Wno-builtin-declaration-mismatch"
VERBOSE: "1"
OMP_NUM_THREADS: "1"
script:
- |
nix-shell --run bash <<< '
./autogen.sh
mkdir build
./configure --prefix=`pwd`/build --enable-valgrind
make
make check-valgrind
'
artifacts:
when: on_failure
paths:
......@@ -106,15 +151,18 @@ make:knl:
except:
- /^wip.*/
- /^WIP.*/
variables:
CFLAGS: "-mkl -xhost"
CC: "icc"
tags:
- knl
script:
- source /opt/intel/compilers_and_libraries/linux/bin/compilervars.sh intel64
- ./autogen.sh
- mkdir build
- CC=icc CFLAGS="-mkl -xhost" ./configure --prefix=`pwd`/build --enable-benchmarks
- ./configure --prefix=`pwd`/build
- make -j64
- make check
- make -C tests check
- make install
artifacts:
when: on_failure
......@@ -122,6 +170,25 @@ make:knl:
- config.log
- tests/*.log
make:chimera:
stage: build
except:
- /^wip.*/
- /^WIP.*/
tags:
- cuda
script:
- ./autogen.sh
- mkdir build
- ./configure --prefix=`pwd`/build --with-cuda=yes
- make -j
- make check
artifacts:
when: on_failure
paths:
- config.log
- tests/*.log
readthedocs:
stage: docs
when: on_success
......@@ -138,3 +205,27 @@ readthedocs:
script:
- nix run nixpkgs.curl -c curl -X POST -d "branches=$CI_COMMIT_REF_NAME" -d "token=$READTHEDOCS_TOKEN" https://readthedocs.org/api/v2/webhook/argo-aml/83161/
distcheck:
tags:
- nix
- kvm
stage: distribution
except:
- /^wip.*/
- /^WIP.*/
variables:
CFLAGS: "-std=c99 -pedantic -Wall -Wextra -Werror -Wno-unused-but-set-parameter -Wno-builtin-declaration-mismatch"
script:
- |
nix-shell --run bash <<< '
./autogen.sh
mkdir build
./configure --prefix=`pwd`/build
make distcheck
chmod -R +w aml-*
'
artifacts:
when: on_failure
paths:
- config.log
- tests/*.log
[submodule "excit"]
path = excit
url = https://xgitlab.cels.anl.gov/argo/excit.git
......@@ -7,3 +7,4 @@ Clement Foyer <cfoyer@cray.com>
Brice Videau <bvideau@anl.gov>
Aleksandr Danilin <danilin96@gmail.com>
Kyle Shaver <kshaver@anl.gov>
Florence Monna <fmonna@anl.gov>
ACLOCAL_AMFLAGS = -I m4
SUBDIRS = src include tests doc
#if ADD_BENCHMARKS
#SUBDIRS += benchmarks
#endif
SUBDIRS = src include tests doc benchmarks excit
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = aml.pc
......
......@@ -6,5 +6,5 @@ includedir=@includedir@
Name: libaml
Description: AML: Building Blocks for Memory Management
Version: @PACKAGE_VERSION@
Libs: -L${libdir} -laml
Cflags: -I${includedir}
Libs: -L${libdir} -laml -lexcit @HWLOC_LIBS@ @CUDA_LIBS@
Cflags: -I${includedir} @HWLOC_CFLAGS@ @CUDA_CFLAGS@
AM_CFLAGS = -I$(top_srcdir)/include -I. $(OPENMP_CFLAGS)
AM_LDFLAGS = ../src/libaml.la $(OPENMP_CFLAGS)
noinst_LIBRARIES = libutils.a
libutils_a_SOURCES = utils.c utils.h
LDADD = libutils.a
noinst_PROGRAMS = stream_add_pth_st \
stream_add_omp_st \
stream_add_omp_mt \
dgemm_vanilla \
dgemm_mkl \
dgemm_prefetch \
dgemm_noprefetch
AM_COLOR_TESTS = yes
AM_CFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/excit/src $(PTHREAD_CFLAGS) $(OPENMP_CFLAGS)
AM_LDFLAGS = ../src/libaml.la $(top_builddir)/excit/src/libexcit.la $(PTHREAD_LIBS) $(OPENMP_CFLAGS)
if HAVE_CUDA
# LIBS is used instead of AM_LDFLAGS on purpose
# AM_LDFLAGS appends flags before libraries added before LDADD.
# Thus, when linking with libaml.la, linking with cuda is not done.
LIBS += $(CUDA_CFLAGS)
LIBS += $(CUDA_LIBS)
AM_CFLAGS += $(CUDA_CFLAGS)
AM_LDFLAGS += $(CUDA_LIBS)
endif
if HAVE_HWLOC
AM_CFLAGS += $(HWLOC_CFLAGS)
AM_LDFLAGS += $(HWLOC_LIBS)
endif
# valgrind support
@VALGRIND_CHECK_RULES@
noinst_LIBRARIES = libkernel.a
libkernel_a_SOURCES = utils.c utils.h blas_l1_kernel.c blas_l1_kernel.h verify_blas_l1.c verify_blas_l1.h
LDADD = libkernel.a
NOPREFETCH = noprefetch/flat_blas_l1 \
noprefetch/tiled_blas_l1
BENCHMARKS = $(NOPREFETCH)
# all tests
check_PROGRAMS = $(BENCHMARKS)
TESTS = $(BENCHMARKS)
# phony target to allow us to compile the check programs without running the
# tests. Required for any environment where we are cross-compiling
check-programs-local: $(BENCHMARKS)
/*******************************************************************************
* Copyright 2019 UChicago Argonne, LLC.
* (c.f. AUTHORS, LICENSE)
*
* This file is part of the AML project.
* For more info, see https://xgitlab.cels.anl.gov/argo/aml
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/
/*
* This is a benchmark for the BLAS Level 1 operations for AML.
*/
#include "blas_l1_kernel.h"
/* Look into another way to define these */
#define sign(a) ((a > 0) ? 1 : ((a < 0) ? -1 : 0))
double dasum(size_t n, double *a, double *b, double *c, double scalar)
{
(void)*b;
(void)*c;
(void)scalar;
size_t i;
double dasum = 0;
for (i = 0; i < n; i++) {
dasum = dasum + fabs(a[i]);
}
return dasum;
}
double daxpy(size_t n, double *a, double *b, double *c, double scalar)
{
size_t i;
#pragma omp parallel for
for (i = 0; i < n; i++)
c[i] = b[i] + scalar * a[i];
return 1;
}
double dcopy(size_t n, double *a, double *b, double *c, double scalar)
{
(void)*c;
(void)scalar;
size_t i;
#pragma omp parallel for
for (i = 0; i < n; i++)
b[i] = a[i];
return 1;
}
double ddot(size_t n, double *a, double *b, double *c, double scalar)
{
(void)*c;
(void)scalar;
size_t i;
long double dot = 0.0;
#pragma omp parallel for reduction(+ : dot)
for (i = 0; i < n; i++) {
long double temp;
temp = a[i] * b[i];
dot += temp;
}
return (double)dot;
}
double dnrm2(size_t n, double *a, double *b, double *c, double scalar)
{
(void)*b;
(void)*c;
(void)scalar;
size_t i;
double scale, ssq, temp;
scale = 0.0;
ssq = 1.0;
for (i = 0; i < n; i++) {
if (a[i] != 0.0) {
temp = fabs(a[i]);
if (scale < temp) {
ssq = 1.0 + ssq * pow(scale / temp, 2);
scale = temp;
} else
ssq = ssq + pow(temp / scale, 2);
}
}
return scale * sqrt(ssq);
}
double dscal(size_t n, double *a, double *b, double *c, double scalar)
{
(void)*c;
size_t i;
#pragma omp parallel for
for (i = 0; i < n; i++)
b[i] = scalar * a[i];
return 1;
}
double dswap(size_t n, double *a, double *b, double *c, double scalar)
{
(void)*c;
(void)scalar;
size_t i;
double temp;
#pragma omp parallel for
for (i = 0; i < n; i++) {
temp = a[i];
a[i] = b[i];
b[i] = temp;
}
return 1;
}
double idmax(size_t n, double *a, double *b, double *c, double scalar)
{
(void)*b;
(void)*c;
(void)scalar;
if (n == 1)
return 0;
size_t i;
double max;
size_t id_max = 0;
max = a[0];
for (i = 1; i < n; i++) {
if (fabs(a[i]) > max) {
id_max = i;
max = fabs(a[i]);
}
}
return id_max;
}
/* The rotations. Not included in the array of functions because of their
parameters */
/* Plane rotation */
void drot(size_t n, double *a, double *b, double x, double y)
{
double temp;
size_t i;
#pragma omp parallel for