Commit 6e9e77f7 authored by Adrian Pope's avatar Adrian Pope
Browse files

updated to README, GNUmakefiles, and test Fortran driver

parent d534826b
......@@ -60,7 +60,7 @@ DFFT_MPI_CC ?= mpicc
DFFT_MPI_CXX ?= mpicxx
# MPI Fortran compiler
DFFT_MPI_FC = mpif90
DFFT_MPI_FC ?= mpif90
# pre-processor flags
DFFT_MPI_CPPFLAGS ?= -DDFFT_TIMING=2
......@@ -72,10 +72,29 @@ DFFT_MPI_CFLAGS ?= -g -O3 -Wall -Wno-deprecated -std=gnu99
DFFT_MPI_CXXFLAGS ?= -g -O3 -Wall
# Fortran flags
DFFT_MPI_FFLAGS ?= -g -O3 -fpp
# -cpp seems to work with GNU and Intel
# though -fpp might be more correct for Intel
DFFT_MPI_FFLAGS ?= -g -O3 -cpp
# linker flags
DFFT_MPI_LDFLAGS ?= -lfftw3 -lm
DFFT_MPI_LDFLAGS ?=
# additional Fortran linker flags
# sometimes this also needs -lmpi++
DFFT_MPI_FLDFLAGS ?= -lstdc++
# FFTW3
DFFT_FFTW_HOME ?= $(shell dirname $(shell dirname $(shell which fftw-wisdom)))
DFFT_FFTW_CPPFLAGS ?= -I$(DFFT_FFTW_HOME)/include
DFFT_FFTW_LDFLAGS ?= -L$(DFFT_FFTW_HOME)/lib -lfftw3 -lm
# these should not usuall require modification
DFFT_MPI_CPPFLAGS += $(DFFT_FFTW_CPPFLAGS)
DFFT_MPI_LDFLAGS += $(DFFT_FFTW_LDFLAGS)
all: nativec utilities fortran
......@@ -85,6 +104,11 @@ fortran: $(DFFT_MPI_DIR)/TestFDfft
utilities: $(DFFT_MPI_DIR)/CheckDecomposition
clean:
rm -rf $(DFFT_MPI_DIR) *.mod
$(DFFT_MPI_DIR):
mkdir -p $(DFFT_MPI_DIR)
......@@ -95,7 +119,9 @@ $(DFFT_MPI_DIR)/%.o: %.cpp | $(DFFT_MPI_DIR)
$(DFFT_MPI_CXX) $(DFFT_MPI_CXXFLAGS) $(DFFT_MPI_CPPFLAGS) -c -o $@ $<
$(DFFT_MPI_DIR)/%.o: %.f90 | $(DFFT_MPI_DIR)
$(DFFT_MPI_FC) $(DFFT_MPI_FFLAGS) -c -o $@ $<
$(DFFT_MPI_FC) $(DFFT_MPI_FFLAGS) $(DFFT_MPI_CPPFLAGS) -c -o $@ $<
$(DFFT_MPI_DIR)/TestDfft: $(DFFT_MPI_DIR)/TestDfft.o $(DFFT_MPI_DIR)/distribution.o
$(DFFT_MPI_CXX) $(DFFT_MPI_CXXFLAGS) -o $@ $^ $(DFFT_MPI_LDFLAGS)
......@@ -104,10 +130,6 @@ $(DFFT_MPI_DIR)/CheckDecomposition: $(DFFT_MPI_DIR)/CheckDecomposition.o $(DFFT_
$(DFFT_MPI_CC) $(DFFT_MPI_CFLAGS) -o $@ $^ $(DFFT_MPI_LDFLAGS)
$(DFFT_MPI_DIR)/TestFDfft.o: TestFDfft.f90 $(DFFT_MPI_DIR)/FDistribution.o $(DFFT_MPI_DIR)/FDfft.o
$(DFFT_MPI_FC) $(DFFT_MPI_FFLAGS) -c -o $@ $<
$(DFFT_MPI_DIR)/TestFDfft: $(DFFT_MPI_DIR)/TestFDfft.o $(DFFT_MPI_DIR)/FDistribution.o $(DFFT_MPI_DIR)/FDfft.o $(DFFT_MPI_DIR)/DistributionC.o $(DFFT_MPI_DIR)/DfftC.o $(DFFT_MPI_DIR)/distribution.o
$(DFFT_MPI_FC) $(DFFT_MPI_FFLAGS) -o $@ $^ $(DFFT_MPI_LDFLAGS) -lstdc++
clean:
rm -rf $(DFFT_MPI_DIR) *.mod
$(DFFT_MPI_FC) $(DFFT_MPI_FFLAGS) -o $@ $^ $(DFFT_MPI_LDFLAGS) $(DFFT_MPI_FLDFLAGS)
......@@ -60,9 +60,10 @@ DFFT_MPI_CFLAGS ?= -g -O3 -fopenmp -Wall -Wno-deprecated -std=gnu99
DFFT_MPI_CXXFLAGS ?= -g -O3 -fopenmp -Wall
# Fortran flags
DFFT_MPI_FFLAGS ?= -g -O3 -fopenmp -fpp
DFFT_MPI_FFLAGS ?= -g -O3 -fopenmp -cpp
# linker flags
DFFT_MPI_LDFLAGS ?= -lfftw3_omp -lfftw3 -lm
# FFTW3
DFFT_FFTW_HOME ?= $(shell dirname $(shell dirname $(shell which fftw-wisdom)))
DFFT_FFTW_LDFLAGS ?= -L$(DFFT_FFTW_HOME)/lib -lfftw3_omp -lfftw3 -lm
include GNUmakefile
HACC DFFT MiniApp
SWFFT (HACC)
Adrian Pope (et al)
apope@anl.gov
2017-03-09
2017-10-02
========
Overview
......@@ -9,8 +9,9 @@ Overview
This directory contains the code necessary to run the Hardware Accelerated
Cosmology Code (HACC) 3D distributed memory discrete fast Fourier transform,
a lightweight make system, a driver for an example test code (TestDfft),
and a utility that checks grid sizes and MPI rank layouts (CheckDecomposition).
a lightweight make system, drivers for an example test code in C++ (TestDfft)
as well as Fortran (TestFDfft), and a utility that checks grid sizes and
MPI rank layouts (CheckDecomposition).
This code assumes that global grid will originally be distributed between
MPI ranks using a 3D Cartesian communicator. That data needs to be
......@@ -36,7 +37,7 @@ must be present in the set of prime factors of the grid, eg. if you have
20 MPI ranks then ng must be a multiple of 5 and 2. The "CheckDecomposition"
utility is provided to check (on one rank) whether a proposed grid size and
number of MPI ranks will work, which can be done before submitting a large
test with TestDfft.
test with TestDfft/TestFDfft.
========
Building
......@@ -46,7 +47,7 @@ Building
System Requirements
-------------------
MPI (version?)
MPI-1
FFTW3 (double precision, OpenMP optional, does not use FFTW3's MPI interface)
---------
......@@ -58,6 +59,14 @@ MPI-only version of the code:
$ make
By default the GNUmakefile will make the C/C++ interface and driver, the
Fortran interface and driver, and the CheckDecomposition utility. There are
also rules to make subsets of these:
$ make nativec
$ make utilities
$ make fortran
There is also an example of how to modify the makefile to compile with OpenMP
threading on each MPI rank:
......@@ -76,6 +85,21 @@ environment variables:
$ export DFFT_MPI_CC=cc
$ export DFFT_MPI_CXX=CC
$ export DFFT_MPI_FC=ftn
When using the Intel compilers without the Cray wrappers it may be easier to
use the Intel-specific MPI wrappers, eg:
$ export DFFT_MPI_CC=mpiicc
$ export DFFT_MPI_CXX=mpiicpc
$ export DFFT_MPI_FC=mpiifort
If FFTW3 is in C/PATH, then the GNUmakefile will find it by searching for the
fftw-wisdom executable. An alternate FFTW3 installation can be specified by
setting an environment variable to the directory above the "include" and "lib"
directories:
$ export DFFT_FFTW_HOME=/path/to/fftw3
See the base GNUmakefile for a commented list of variables.
......@@ -257,6 +281,40 @@ ALCF/Mira/BGQ has incomplete support for C++11 and is unlikely to add
features in the remaining lifetime of the machine (2019-2020) so we use
C++11 features very sparingly.
===========================
TestFDfft (Fortran Example)
===========================
-----
Usage
-----
This test program proivdes an example interface for the Fortran wrappers
built around the the C++ Distribution and Dfft classes. The structure
and usage of this program is identical to the C++ TestDfft dirver. The
command line signature follows as
$ build/TestFDfft <n_repetitions> <ngx> [ngy ngz]
with the same interpretation as described above for TestDfft.
The Fortran wrappers are contained in the FDistribution and FDfft modules
which are built around the F2003 standard ISO_C_BINDING module. An
important point to make is that the Fortran wrappers interface with C
pointers to the Fortran arrays containing the data to be transformed. This
is achieved using the F2003 standard C_F_POINTER subroutine. In addition,
optimal memory alignment is obtained using the fftw_alloc_complex function.
The TestFDfft driver provides a good example of each of these procedures.
See further notes below regarding the usage of multidimensional arrays
within the Fortran interface.
--------------
Example Output
--------------
The output from TestFDfft will match that of TestDfft shown above including
the hexadecimal cast to check bit-wise numerical accuracy.
============================
CheckDecomposition (Utility)
============================
......@@ -310,3 +368,17 @@ for iteration through the grids values themselves, so the total number of
grid vertexes locally and globally should not be limited by 32-bit integer
size. This distribution code has been tested up to 16384^3 global grid and
on >~10^6 MPI-ranks.
-------------------------------
Fortran Multidimensional Arrays
-------------------------------
The linear storage in memory of multidimensional arrays differs between
that of C (row-major) and Fortran (column-major). The Fortran interface
provided here implicitly assumes that the one-dimensional memory storage
of the arrays to be transformed conforms with the C convention. The
returned transformed data is also arranged in row-major format. Hence, care
must be taken to ensure that data is arranged in this way when interfacing
with the Fortran wrappers. In general, this involves a transpose of data when
using a multidimensional Fortran array to store the 3D data in memory.
......@@ -88,7 +88,7 @@ program main
#ifdef _OPENMP
ierr = fftw_init_threads()
if (.not. ierr) then
if (ierr == 0) then
write(*,*) "fftw_init_threads() failed!"
call MPI_Abort(MPI_COMM_WORLD, ierr, ierr)
endif
......@@ -224,9 +224,9 @@ subroutine test
numg = 1.0*ng(1)*ng(2)*ng(3)
write(*,*)
write(*,*) "Hex representations of double precision floats"
write(*,fmt="(F18.3,A,Z)") zero, " = ", zero
write(*,fmt="(F18.3,A,Z)") one, " = ", one
write(*,fmt="(F18.3,A,Z)") numg, " = ", numg
write(*,fmt="(F18.3,A,Z18)") zero, " = ", zero
write(*,fmt="(F18.3,A,Z18)") one, " = ", one
write(*,fmt="(F18.3,A,Z18)") numg, " = ", numg
write(*,*)
endif
......@@ -286,7 +286,8 @@ subroutine assign_delta_function(dfft, a, n)
!! Determine local grid dimensions in r-space
call nlocalRspace(dfft, local_ng)
!! Fill in the delta function
!! Fill in the delta function.
!! NOTE: We are filling in one-dimensional memory using the row-major C convention.
local_indx = 1
do i = 1, local_ng(1)
global_i = local_ng(1)*self(1) + i
......@@ -294,11 +295,10 @@ subroutine assign_delta_function(dfft, a, n)
global_j = local_ng(2)*self(2) + j
do k = 1, local_ng(3)
global_k = local_ng(3)*self(3) + k
a(local_indx)%im = 0.
if (global_i == 1 .and. global_j == 1 .and. global_k == 1) then
a(local_indx)%re = 1.
a(local_indx) = cmplx(1.,0.)
else
a(local_indx)%re = 0.
a(local_indx) = cmplx(0.,0.)
endif
local_indx = local_indx + 1
enddo
......@@ -325,12 +325,12 @@ subroutine check_kspace(dfft, a)
nlocal = localSize(dfft)
LocalRealMin = a(1)%re ; LocalRealMax = a(1)%re
LocalImagMin = a(1)%im ; LocalImagMax = a(1)%im
LocalRealMin = real(a(2)) ; LocalRealMax = real(a(2))
LocalImagMin = aimag(a(2)) ; LocalImagMax = aimag(a(2))
do i = 1, nlocal
re = a(i)%re
im = a(i)%im
re = real(a(i))
im = aimag(a(i))
if (re < LocalRealMin) LocalRealMin = re
if (re > LocalRealMax) LocalRealMax = re
if (im < LocalImagMin) LocalImagMin = im
......@@ -348,8 +348,8 @@ subroutine check_kspace(dfft, a)
if (parRank == 0) then
write(*,*)
write(*,*) "k-space:"
write(*,fmt="(A,F18.3,F18.3,A,Z,Z,A)") "real in [", GlobalRealMin, GlobalRealMax, "] = [", GlobalRealMin, GlobalRealMax, "]"
write(*,fmt="(A,F18.3,F18.3,A,Z,Z,A)") "imag in [", GlobalImagMin, GlobalImagMax, "] = [", GlobalImagMin, GlobalImagMax, "]"
write(*,fmt="(A,F18.3,F18.3,A,Z18,Z18,A)") "real in [", GlobalRealMin, GlobalRealMax, "] = [", GlobalRealMin, GlobalRealMax, "]"
write(*,fmt="(A,F18.3,F18.3,A,Z18,Z18,A)") "imag in [", GlobalImagMin, GlobalImagMax, "] = [", GlobalImagMin, GlobalImagMax, "]"
write(*,*)
endif
......@@ -378,8 +378,8 @@ subroutine check_rspace(dfft, a)
call selfRspace(dfft, self)
call nlocalRspace(dfft, local_ng)
LocalRealMin = a(1)%re ; LocalRealMax = a(1)%re
LocalImagMin = a(1)%im ; LocalImagMax = a(1)%im
LocalRealMin = real(a(2)) ; LocalRealMax = real(a(2))
LocalImagMin = aimag(a(2)) ; LocalImagMax = aimag(a(2))
parComm = parentComm(dfft)
call MPI_Comm_rank(parComm, parRank, ierr)
......@@ -397,10 +397,11 @@ subroutine check_rspace(dfft, a)
do k = 1, local_ng(3)
global_k = local_ng(3)*self(3) + k
if (global_i == 1 .and. global_j == 1 .and. global_k == 1) then
write(*,fmt="(A,F18.3,F18.3,A,Z,Z,A)") "a[0,0,0] = ", a(local_indx)%re, a(local_indx)%im , "= (", a(local_indx)%re, a(local_indx)%im, ")"
write(*,fmt="(A,F18.3,F18.3,A,Z18,Z18,A)") "a[0,0,0] = ", real(a(local_indx)), aimag(a(local_indx)), &
"= (", real(a(local_indx)), aimag(a(local_indx)), ")"
else
re = a(i)%re
im = a(i)%im
re = real(a(local_indx))
im = aimag(a(local_indx))
if (re < LocalRealMin) LocalRealMin = re
if (re > LocalRealMax) LocalRealMax = re
if (im < LocalImagMin) LocalImagMin = im
......@@ -411,9 +412,14 @@ subroutine check_rspace(dfft, a)
enddo
enddo
call MPI_Allreduce(LocalRealMin, GlobalRealMin, 1, MPI_DOUBLE, MPI_MIN, parComm, ierr)
call MPI_Allreduce(LocalRealMax, GlobalRealMax, 1, MPI_DOUBLE, MPI_MAX, parComm, ierr)
call MPI_Allreduce(LocalImagMin, GlobalImagMin, 1, MPI_DOUBLE, MPI_MIN, parComm, ierr)
call MPI_Allreduce(LocalImagMax, GlobalImagMax, 1, MPI_DOUBLE, MPI_MAX, parComm, ierr)
if (parRank == 0) then
write(*,fmt="(A,F18.3,F18.3,A,Z,Z,A)") "real in [", GlobalRealMin, GlobalRealMax, "] = [", GlobalRealMin, GlobalRealMax, "]"
write(*,fmt="(A,F18.3,F18.3,A,Z,Z,A)") "imag in [", GlobalImagMin, GlobalImagMax, "] = [", GlobalImagMin, GlobalImagMax, "]"
write(*,fmt="(A,F18.3,F18.3,A,Z18,Z18,A)") "real in [", GlobalRealMin, GlobalRealMax, "] = [", GlobalRealMin, GlobalRealMax, "]"
write(*,fmt="(A,F18.3,F18.3,A,Z18,Z18,A)") "imag in [", GlobalImagMin, GlobalImagMax, "] = [", GlobalImagMin, GlobalImagMax, "]"
write(*,*)
endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment