Commit 4791fec9 authored by Hal Finkel's avatar Hal Finkel
Browse files

Initial commit

Mirror r3378 from our internal repository.
parents
# Copyright (C) 2017, UChicago Argonne, LLC
# All Rights Reserved
#
# Hardware/Hybrid Cosmology Code (HACC), Version 1.0
#
# Salman Habib, Adrian Pope, Hal Finkel, Nicholas Frontiere, Katrin Heitmann,
# Vitali Morozov, Jeffrey Emberson, Thomas Uram, Esteban Rangel
# (Argonne National Laboratory)
#
# David Daniel, Patricia Fasel, Chung-Hsing Hsu, Zarija Lukic, James Ahrens
# (Los Alamos National Laboratory)
#
# George Zagaris
# (Kitware)
#
# OPEN SOURCE LICENSE
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer. Software changes,
# modifications, or derivative works, should be noted with comments and
# the author and organization’s name.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the names of UChicago Argonne, LLC or the Department of Energy
# nor the names of its contributors may be used to endorse or promote
# products derived from this software without specific prior written
# permission.
#
# 4. The software and the end-user documentation included with the
# redistribution, if any, must include the following acknowledgment:
#
# "This product includes software produced by UChicago Argonne, LLC under
# Contract No. DE-AC02-06CH11357 with the Department of Energy."
#
# *****************************************************************************
# DISCLAIMER
# THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. NEITHER THE
# UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR
# UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY,
# EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE
# ACCURARY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS,
# PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
# PRIVATELY OWNED RIGHTS.
#
# *****************************************************************************
cmake_minimum_required(VERSION 2.8)
project(HACCKernels)
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "No build type selected, defaulting to Release")
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type (default Release)" FORCE)
endif()
set(VERIFICATION_OUTPUT_ONLY OFF CACHE BOOL "Display only deterministic output")
if(VERIFICATION_OUTPUT_ONLY)
add_definitions(-DVERIFICATION_OUTPUT_ONLY)
endif()
include(FindOpenMP)
if(OPENMP_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -ffast-math")
elseif(MSVC)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /fp:fast")
endif()
add_executable(HACCKernels main.cpp GravityForceKernel.cpp)
Copyright (C) 2017, UChicago Argonne, LLC
All Rights Reserved
Hardware/Hybrid Cosmology Code (HACC), Version 1.0
Salman Habib, Adrian Pope, Hal Finkel, Nicholas Frontiere, Katrin Heitmann,
Vitali Morozov, Jeffrey Emberson, Thomas Uram, Esteban Rangel
(Argonne National Laboratory)
David Daniel, Patricia Fasel, Chung-Hsing Hsu, Zarija Lukic, James Ahrens
(Los Alamos National Laboratory)
George Zagaris
(Kitware)
OPEN SOURCE LICENSE
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer. Software changes,
modifications, or derivative works, should be noted with comments and the
author and organization’s name.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the names of UChicago Argonne, LLC or the Department of Energy nor
the names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
4. The software and the end-user documentation included with the
redistribution, if any, must include the following acknowledgment:
"This product includes software produced by UChicago Argonne, LLC under
Contract No. DE-AC02-06CH11357 with the Department of Energy."
********************************************************************************
DISCLAIMER
THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. NEITHER THE
UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR
UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS
OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURARY,
COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS, PRODUCT, OR
PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY
OWNED RIGHTS.
********************************************************************************
/*
* Copyright (C) 2017, UChicago Argonne, LLC
* All Rights Reserved
*
* Hardware/Hybrid Cosmology Code (HACC), Version 1.0
*
* Salman Habib, Adrian Pope, Hal Finkel, Nicholas Frontiere, Katrin Heitmann,
* Vitali Morozov, Jeffrey Emberson, Thomas Uram, Esteban Rangel
* (Argonne National Laboratory)
*
* David Daniel, Patricia Fasel, Chung-Hsing Hsu, Zarija Lukic, James Ahrens
* (Los Alamos National Laboratory)
*
* George Zagaris
* (Kitware)
*
* OPEN SOURCE LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. Software changes,
* modifications, or derivative works, should be noted with comments and
* the author and organization’s name.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the names of UChicago Argonne, LLC or the Department of Energy
* nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* 4. The software and the end-user documentation included with the
* redistribution, if any, must include the following acknowledgment:
*
* "This product includes software produced by UChicago Argonne, LLC under
* Contract No. DE-AC02-06CH11357 with the Department of Energy."
*
* *****************************************************************************
* DISCLAIMER
* THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. NEITHER THE
* UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR
* UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY,
* EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE
* ACCURARY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS,
* PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
* PRIVATELY OWNED RIGHTS.
*
* *****************************************************************************
*/
#include "HACCKernels.h"
#include <cmath>
extern const float PolyCoefficients4[] = {
0.263729f, -0.0686285f, 0.00882248f, -0.000592487f, 0.0000164622f
};
extern const float PolyCoefficients5[] = {
0.269327f, -0.0750978f, 0.0114808f, -0.00109313f, 0.0000605491f,
-0.00000147177f
};
extern const float PolyCoefficients6[] = {
0.271431f, -0.0783394f, 0.0133122f, -0.00159485f, 0.000132336f,
-0.00000663394f, 0.000000147305f
};
// HACC's gravity short-range-force kernel represents the part of the 1/r^2
// gravitational force that is not computed by the long-range grid solver. This
// kernel computes the acceleration of a target particle from all of the other
// particles in the provided interaction lists. It is assumed that the target
// particle has unit mass while the interaction-list can contain pseudo
// particles with larger mass values. Beyond a distance of MaxSep, the
// inter-particle force should be completely accounted for by the long-range
// grid solver (and thus we filter out such interactions here). Closer than
// MaxSep, we directly compute the inter-particle force, subtracting the
// long-range part of the force (as fit to a polynomial of the specified
// degree). A softening length, SofteningLen, is also used, as is standard in
// N-body codes.
template <int PolyOrder, const float (&PolyCoefficients)[PolyOrder+1]>
static void GravityForceKernel(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az) {
float lax = 0.0f, lay = 0.0f, laz = 0.0f;
// As written below, the mass array is conditionally accessed (i.e. accessed
// only if the interaction is not filtered by the distance checks). This will
// tend to inhibit vectorization on architectures without masked vector loads.
// With OpenMP 4+, we can explicitly inform the compiler that vectorization is
// safe.
#if _OPENMP >= 201307
#pragma omp simd reduction(+:lax,lay,laz)
#endif
for (int i = 0; i < n; ++i) {
float dx = x[i] - x0, dy = y[i] - y0, dz = z[i] - z0;
float r2 = dx * dx + dy * dy + dz * dz;
if (r2 >= MaxSepSqrd || r2 == 0.0f)
continue;
float r2s = r2 + SofteningLenSqrd;
float f = PolyCoefficients[PolyOrder];
for (int p = 1; p <= PolyOrder; ++p)
f = PolyCoefficients[PolyOrder-p] + r2*f;
f = (1.0f / (r2s * std::sqrt(r2s)) - f) * mass[i];
lax += f * dx;
lay += f * dy;
laz += f * dz;
}
ax += lax;
ay += lay;
az += laz;
}
void GravityForceKernel4(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az) {
GravityForceKernel<4, PolyCoefficients4>(n, x, y, z, mass, x0, y0, z0,
MaxSepSqrd, SofteningLenSqrd,
ax, ay, az);
}
void GravityForceKernel5(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az) {
GravityForceKernel<5, PolyCoefficients5>(n, x, y, z, mass, x0, y0, z0,
MaxSepSqrd, SofteningLenSqrd,
ax, ay, az);
}
void GravityForceKernel6(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az) {
GravityForceKernel<6, PolyCoefficients6>(n, x, y, z, mass, x0, y0, z0,
MaxSepSqrd, SofteningLenSqrd,
ax, ay, az);
}
/*
* Copyright (C) 2017, UChicago Argonne, LLC
* All Rights Reserved
*
* Hardware/Hybrid Cosmology Code (HACC), Version 1.0
*
* Salman Habib, Adrian Pope, Hal Finkel, Nicholas Frontiere, Katrin Heitmann,
* Vitali Morozov, Jeffrey Emberson, Thomas Uram, Esteban Rangel
* (Argonne National Laboratory)
*
* David Daniel, Patricia Fasel, Chung-Hsing Hsu, Zarija Lukic, James Ahrens
* (Los Alamos National Laboratory)
*
* George Zagaris
* (Kitware)
*
* OPEN SOURCE LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. Software changes,
* modifications, or derivative works, should be noted with comments and
* the author and organization’s name.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the names of UChicago Argonne, LLC or the Department of Energy
* nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* 4. The software and the end-user documentation included with the
* redistribution, if any, must include the following acknowledgment:
*
* "This product includes software produced by UChicago Argonne, LLC under
* Contract No. DE-AC02-06CH11357 with the Department of Energy."
*
* *****************************************************************************
* DISCLAIMER
* THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. NEITHER THE
* UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR
* UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY,
* EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE
* ACCURARY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS,
* PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
* PRIVATELY OWNED RIGHTS.
*
* *****************************************************************************
*/
#ifndef RESTRICT
#if defined(__GNUC__) || defined(__clang__)
#define RESTRICT __restrict__
#elif defined(_MSC_VER)
#define RESTRICT __restrict
#else
#define RESTRICT /* empty */
#endif
#endif
typedef void
(*GravityForceKernelFunc)(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az);
void GravityForceKernel4(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az);
void GravityForceKernel5(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az);
void GravityForceKernel6(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az);
# Copyright (C) 2017, UChicago Argonne, LLC
# All Rights Reserved
#
# Hardware/Hybrid Cosmology Code (HACC), Version 1.0
#
# Salman Habib, Adrian Pope, Hal Finkel, Nicholas Frontiere, Katrin Heitmann,
# Vitali Morozov, Jeffrey Emberson, Thomas Uram, Esteban Rangel
# (Argonne National Laboratory)
#
# David Daniel, Patricia Fasel, Chung-Hsing Hsu, Zarija Lukic, James Ahrens
# (Los Alamos National Laboratory)
#
# George Zagaris
# (Kitware)
#
# OPEN SOURCE LICENSE
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer. Software changes,
# modifications, or derivative works, should be noted with comments and
# the author and organization’s name.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the names of UChicago Argonne, LLC or the Department of Energy
# nor the names of its contributors may be used to endorse or promote
# products derived from this software without specific prior written
# permission.
#
# 4. The software and the end-user documentation included with the
# redistribution, if any, must include the following acknowledgment:
#
# "This product includes software produced by UChicago Argonne, LLC under
# Contract No. DE-AC02-06CH11357 with the Department of Energy."
#
# *****************************************************************************
# DISCLAIMER
# THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. NEITHER THE
# UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR
# UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY,
# EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE
# ACCURARY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS,
# PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
# PRIVATELY OWNED RIGHTS.
#
# *****************************************************************************
CXX = c++
CXXFLAGS = -O3 -ffast-math -fopenmp
all: HACCKernels
HACCKernels: main.cpp GravityForceKernel.cpp HACCKernels.h
$(CXX) -o HACCKernels $(CXXFLAGS) main.cpp GravityForceKernel.cpp
clean:
rm -f HACCKernels
HACCKernels: A Benchmark for HACC's Particle Force Kernels
The Hardware/Hybrid Accelerated Cosmology Code (HACC), a cosmology N-body-code
framework, is designed to run efficiently on diverse computing architectures
and to scale to millions of cores and beyond. The gravitational force is the
only significant force between particles at cosmological scales, and, in HACC,
this force is divided into two components: a long-range component and a
short-range component. The long-range component is handled using a distributed
grid-based solver, and the short-range component is by more-direct
particle-particle computations. On many systems, a tree-based multipole
approximation is used to further reduce the computational complexity of the
short-range force. The inner-most computation is a direct N^2 particle-particle
force calculation of the short-range part of the gravitational force. It is this
inner-most calculation that consumes most of the simulation time, is
computationally bound, and is what is represented by this benchmark.
Because this inner-most force calculation is algorithmically isolated from the
overall scale of the problem, the parameters don't need to be adjusted to
represent the workload on different machine scales (e.g. petascale or
exascale).
For more information on HACC, see:
Salman Habib, et al. HACC: Simulating Sky Surveys on State-of-the-Art
Supercomputing Architectures. New Astronomy Volume 42, January 2016, pp. 49-65.
http://doi.org/10.1016/j.newast.2015.06.003
https://arxiv.org/abs/1410.2805
The benchmark can be compiled using cmake (or make directly using
Makefile.simple) and then run like this:
$ ./HACCKernels
Maximum OpenMP Threads: 1
Iterations: 2000
Gravity Short-Range-Force Kernel (4th Order): 26307.2 -122.385 -1369.32: 4.45269 s
Gravity Short-Range-Force Kernel (5th Order): 26297.5 -123.056 -1368.67: 4.51347 s
Gravity Short-Range-Force Kernel (6th Order): 26297.6 -123.225 -1368.66: 4.8256 s
The accumulated acceleration in each direction for all particles in the last
iteration, which is a function of the total number of iterations, is printed as
a diagnostic. It should be similar for all polynomial kernel orders.
If you'd like the benchmark only to display deterministic output (i.e.
omitting information on the number of threads, timing, and the like), then
define the preprocessor symbol VERIFICATION_OUTPUT_ONLY when compiling.
You can enable this option when configuring by passing
-DVERIFICATION_OUTPUT_ONLY=ON to cmake.
For more information, contact: Hal Finkel <hfinkel@anl.gov>
/*
* Copyright (C) 2017, UChicago Argonne, LLC
* All Rights Reserved
*
* Hardware/Hybrid Cosmology Code (HACC), Version 1.0
*
* Salman Habib, Adrian Pope, Hal Finkel, Nicholas Frontiere, Katrin Heitmann,
* Vitali Morozov, Jeffrey Emberson, Thomas Uram, Esteban Rangel
* (Argonne National Laboratory)
*
* David Daniel, Patricia Fasel, Chung-Hsing Hsu, Zarija Lukic, James Ahrens
* (Los Alamos National Laboratory)
*
* George Zagaris
* (Kitware)
*
* OPEN SOURCE LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. Software changes,
* modifications, or derivative works, should be noted with comments and
* the author and organization’s name.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the names of UChicago Argonne, LLC or the Department of Energy
* nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* 4. The software and the end-user documentation included with the
* redistribution, if any, must include the following acknowledgment:
*
* "This product includes software produced by UChicago Argonne, LLC under
* Contract No. DE-AC02-06CH11357 with the Department of Energy."
*
* *****************************************************************************
* DISCLAIMER
* THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. NEITHER THE
* UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR
* UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY,
* EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE
* ACCURARY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS,
* PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
* PRIVATELY OWNED RIGHTS.
*
* *****************************************************************************
*/
#include "HACCKernels.h"
#include <ctime>
#include <cstdlib>
#include <limits>
#include <vector>
#include <iostream>
#ifdef _OPENMP
#include <omp.h>
#endif
// This number of iterations, which can be changed via the command line, is set
// so that the benchmark will run for a few seconds per polynomial degree of
// the force kernel on a single core of a modern CPU.
int NumIters = 2000;
// The interaction lists range is size between a few hundred and a few thousand.
int IListMin = 250;
int IListMax = 2250;
// The number of particles to update, which represents the number of particles
// per leaf node of the force evaluation tree in HACC, varies between tends of
// particles to around a hundred particles depending on the platform. These
// numbers represent the high side of the production range.
int PMin = 75;
int PMax = 150;
// The softening length and maximum separation similar to those used in HACC
// high-resolution configurations.
float SofteningLen = 0.1;
float MaxSep = 3.2;
// In this benchmark we offset the positions of the particles being updated
// from the particles in the interaction list so that some of the interactions
// will be filtered for being out of range. 0.1 yields ~5% of interactions
// filtered for being out of range.
float OffsetAdjFrac = 0.1;
// A simple random-number generator, see: https://en.wikipedia.org/wiki/Xorshift
static unsigned int rand32(unsigned int &state) {
unsigned int x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return (state = x);
}
static float randflt(unsigned int &state) {
return ((float) rand32(state)) / ((float) 0xffffffff);
}
void run(GravityForceKernelFunc GravityForceKernel, const char *Desc) {
#ifndef VERIFICATION_OUTPUT_ONLY
std::clock_t Start, End;
#endif
std::cout << "Gravity Short-Range-Force Kernel (" << Desc << "): ";
#ifndef VERIFICATION_OUTPUT_ONLY
Start = std::clock();
#endif
float ax, ay, az;
// We use lastprivate for (ax,ay,az) so that the reported output, which can
// be used for validation, does not depend on the order in which parallel
// loop iterations are executed.
// Because each iteration has a different amount of work, dynamic or guided
// scheduling is used here. guided gives the implementation more scheduling
// freedom.
#ifdef _OPENMP
#pragma omp parallel for schedule(guided) lastprivate(ax,ay,az)
#endif
for (int i = 0; i < NumIters; ++i) {
// Set the random seed used by each iteration to be a function of the
// iteration number only. This allows information from any fixed iteration
// (e.g. first or last) to be used for numerical validation.
unsigned int seed = i+1;
ax = ay = az = 0.0f;
int ILParticleCount = IListMin + rand32(seed) % (IListMax - IListMin);