Commit fd172a0a authored by Ron Rahaman's avatar Ron Rahaman
Browse files

Squashed '3rd_party/nek5000_gslib/' content from commit 88f90cb96

git-subtree-dir: 3rd_party/nek5000_gslib
git-subtree-split: 88f90cb96953527e3e833f8dbf2719273fc8346d
parents
language: c
before_install:
- export ROOT_DIR=`pwd`
- sudo apt-get update -qq
- sudo apt-get install -y libmpich-dev mpich
env:
matrix:
- TEST=crystal_test NP=2
- TEST=findpts_el_2_test NP=2
- TEST=findpts_el_2_test2 NP=2
- TEST=findpts_el_3_test NP=2
- TEST=findpts_el_3_test2 NP=2
- TEST=findpts_el_2_test2 NP=2
- TEST=findpts_local_test NP=2
- TEST=findpts_test NP=2
- TEST=findpts_test_ms NP=2
- TEST=gs_test NP=2
- TEST=gs_test_gop_nonblocking NP=2
- TEST=gs_test_gop_blocking NP=2
- TEST=gs_unique_test NP=2
- TEST=lob_bnd_test NP=2
- TEST=obbox_test NP=2
- TEST=poly_test NP=2
- TEST=sarray_sort_test NP=2
- TEST=sarray_transfer_test NP=2
- TEST=sort_test NP=2
- TEST=sort_test2 NP=2
install: true
script:
- cd $ROOT_DIR
- make NBC=1 CC=mpicc tests/$TEST
- mpiexec -np $NP ./tests/$TEST
Copyright (c) 2008-2019, UCHICAGO ARGONNE, LLC.
The UChicago Argonne, LLC as Operator of Argonne National
Laboratory holds copyright in the Software. The copyright holder
reserves all rights except those expressly granted to licensees,
and U.S. Government license rights.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the disclaimer below.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the disclaimer (as noted below)
in the documentation and/or other materials provided with the
distribution.
3. Neither the name of ANL nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Additional BSD Notice
---------------------
1. This notice is required to be provided under our contract with
the U.S. Department of Energy (DOE). This work was produced at
Argonne National Laboratory under Contract
No. DE-AC02-06CH11357 with the DOE.
2. Neither the United States Government nor UCHICAGO ARGONNE,
LLC nor any of their employees, makes any warranty,
express or implied, or assumes any liability or responsibility for the
accuracy, completeness, or usefulness of any information, apparatus,
product, or process disclosed, or represents that its use would not
infringe privately-owned rights.
3. Also, reference herein to any specific commercial products, process,
or services by trade name, trademark, manufacturer or otherwise does
not necessarily constitute or imply its endorsement, recommendation,
or favoring by the United States Government or UCHICAGO ARGONNE LLC.
The views and opinions of authors expressed
herein do not necessarily state or reflect those of the United States
Government or UCHICAGO ARGONNE, LLC, and shall
not be used for advertising or product endorsement purposes.
MPI ?= 1
ADDUS ?= 1
USREXIT ?= 0
NBC ?= 0
LIBNAME ?= gs
BLAS ?= 0
DEBUG ?= 0
CFLAGS ?= -O2
FFLAGS ?= -O2
CPREFIX ?= gslib_
FPREFIX ?= fgslib_
SRCROOT=.
TESTDIR=$(SRCROOT)/tests
FTESTDIR=$(TESTDIR)/fortran
SRCDIR=$(SRCROOT)/src
INCDIR=$(SRCROOT)/src
LIBDIR=$(SRCROOT)/lib
ifneq (,$(strip $(DESTDIR)))
INSTALL_ROOT = $(DESTDIR)
else
INSTALL_ROOT = $(SRCROOT)/build
endif
$(shell >config.h)
ifneq (0,$(MPI))
SN=MPI
G:=$(G) -D$(SN)
ifeq ($(origin CC),default)
CC = mpicc
endif
ifeq ($(origin FC),default)
FC = mpif77
endif
$(shell printf "#ifndef ${SN}\n#define ${SN}\n#endif\n" >>config.h)
endif
ifneq (0,$(ADDUS))
SN=UNDERSCORE
G:=$(G) -D$(SN)
$(shell printf "#ifndef ${SN}\n#define ${SN}\n#endif\n" >>config.h)
endif
SN=GLOBAL_LONG_LONG
G:=$(G) -D$(SN)
$(shell printf "#ifndef ${SN}\n#define ${SN}\n#endif\n" >>config.h)
SN=PREFIX
G:=$(G) -D$(SN)=$(CPREFIX)
$(shell printf "#ifndef ${SN}\n#define ${SN} ${CPREFIX}\n#endif\n" >>config.h)
SN=FPREFIX
G:=$(G) -D$(SN)=$(FPREFIX)
$(shell printf "#ifndef ${SN}\n#define ${SN} ${FPREFIX}\n#endif\n" >>config.h)
ifneq (0,$(USREXIT))
G+=-DUSE_USR_EXIT
endif
ifneq (0,$(NBC))
G+=-DUSE_NBC
endif
ifeq (0,$(BLAS))
SN=USE_NAIVE_BLAS
G:=$(G) -D$(SN)
$(shell printf "#ifndef ${SN}\n#define ${SN}\n#endif\n" >>config.h)
endif
ifeq (1,$(BLAS))
G+=-DUSE_CBLAS
endif
ifneq (0,$(DEBUG))
G+=-DGSLIB_DEBUG
CFLAGS+=-g
endif
CCCMD=$(CC) $(CFLAGS) -I$(INCDIR) $(G)
FCCMD=$(FC) $(FFLAGS) -I$(INCDIR) $(G)
TESTS=$(TESTDIR)/sort_test $(TESTDIR)/sort_test2 $(TESTDIR)/sarray_sort_test \
$(TESTDIR)/comm_test $(TESTDIR)/crystal_test \
$(TESTDIR)/sarray_transfer_test $(TESTDIR)/gs_test \
$(TESTDIR)/gs_test_gop_blocking $(TESTDIR)/gs_test_gop_nonblocking \
$(TESTDIR)/gs_unique_test \
$(TESTDIR)/findpts_el_2_test \
$(TESTDIR)/findpts_el_2_test2 $(TESTDIR)/findpts_el_3_test \
$(TESTDIR)/findpts_el_3_test2 $(TESTDIR)/findpts_local_test \
$(TESTDIR)/findpts_test $(TESTDIR)/findpts_test_ms $(TESTDIR)/poly_test \
$(TESTDIR)/lob_bnd_test $(TESTDIR)/obbox_test
FTESTS=$(FTESTDIR)/f-igs
GS=$(SRCDIR)/gs.o $(SRCDIR)/sort.o $(SRCDIR)/sarray_transfer.o \
$(SRCDIR)/sarray_sort.o $(SRCDIR)/gs_local.o $(SRCDIR)/fail.o \
$(SRCDIR)/crystal.o $(SRCDIR)/comm.o $(SRCDIR)/tensor.o
FWRAPPER=$(SRCDIR)/fcrystal.o $(SRCDIR)/findpts.o
INTP=$(SRCDIR)/findpts_local.o $(SRCDIR)/obbox.o $(SRCDIR)/poly.o \
$(SRCDIR)/lob_bnd.o $(SRCDIR)/findpts_el_3.o $(SRCDIR)/findpts_el_2.o
.PHONY: all lib install tests clean objects
all : lib install
lib: $(GS) $(FWRAPPER) $(INTP)
@$(AR) cr $(SRCDIR)/lib$(LIBNAME).a $?
@ranlib $(SRCDIR)/lib$(LIBNAME).a
install: lib
@mkdir -p $(INSTALL_ROOT)/lib 2>/dev/null
@cp -v $(SRCDIR)/lib$(LIBNAME).a $(INSTALL_ROOT)/lib 2>/dev/null
@mkdir -p $(INSTALL_ROOT)/include 2>/dev/null
@cp $(SRCDIR)/*.h $(INSTALL_ROOT)/include 2>/dev/null
@cp $(SRCDIR)/*.h $(INSTALL_ROOT)/include 2>/dev/null
@mv config.h $(INSTALL_ROOT)/include 2>/dev/null
tests: $(TESTS)
clean: ; @$(RM) config.h $(SRCDIR)/*.o $(SRCDIR)/*.s $(SRCDIR)/*.a $(TESTDIR)/*.o $(FTESTDIR)/*.o $(TESTS)
$(TESTS): % : %.c | lib install
$(CC) $(CFLAGS) -I$(INSTALL_ROOT)/include $< -o $@ -L$(INSTALL_ROOT)/lib -l$(LIBNAME) -lm $(LDFLAGS)
$(FTESTS): % : %.o | lib install
$(FCCMD) $^ -o $@ -L$(SRCDIR) -l$(LIBNAME)
%.o: %.c ; $(CCCMD) -c $< -o $@
%.o: %.f ; $(FCCMD) -c $< -o $@
%.s: %.c ; $(CCCMD) -S $< -o $@
objects: $(OBJECTS) ;
# GSLIB
[![Build Status](https://travis-ci.org/gslib/gslib.svg?branch=master)](https://travis-ci.org/gslib/gslib)
* Scalable Many-to-Many (neighborhood) gather-scatter collectives
* Robust GLL interpolation on hexahedral spectral element meshes
# Build Instructions
The build system relies on GNU Make with the `make` command. To compile gslib just run:
```
make CC=mpicc
```
Different make options are available (see Makefile).
# Applications
**\[1]&#160;[Nek5000](https://nek5000.mcs.anl.gov/)**: Nek5000 open-source, spectral element code.
**\[2]&#160;[CEED](http://ceed.exascaleproject.org/)**: Co-design center for Efficient Exascale Discretizations.
**\[3]&#160;[Nektar++](http://www.nektar.info)**: Nektar++ open-source spectral/hp element code.
**\[4]&#160;[Libparanumal](https://github.com/paranumal/libparanumal)**: Accelerated finite element flow solvers .
# Release v1.0.3
## Major Features and Improvements
* Added non-blocking gs_op (compile with NBC=1, crystal router is unsupported)
* Added local sort routines for float/double
* Added Fortran wrapper for gs_unique
* Added gs_hf2c to convert Fortran into C handle
## Backwards-Incompatible Changes
* Prefixed all functions by default (see make variables PREFIX and FPREFIX)
* Removed XXT and AMG solver from distribution
* Non-blocking MPI collectives (NBC) require MPI > v3.x
## Bug Fixes and Other Changes
## Thanks to our Contributors
We are also grateful to all who filed issues or helped resolve them, asked and answered questions, and were part of inspiring discussions.
#ifndef C99_H
#define C99_H
#ifndef __STDC_VERSION__
# define NO_C99
#elif __STDC_VERSION__ < 199901L
# define NO_C99
#endif
#ifdef NO_C99
# define restrict
# define inline
# undef NO_C99
#endif
#endif
#include <stddef.h> /* for size_t */
#include <stdlib.h> /* for exit */
#include <string.h> /* memcpy */
#include <limits.h> /* for gs identities */
#include <float.h> /* for gs identities */
#include "name.h"
#include "fail.h"
#include "types.h"
#include "tensor.h"
#include "gs_defs.h"
#include "gs_local.h"
#include "comm.h"
uint comm_gbl_id=0, comm_gbl_np=1;
GS_DEFINE_IDENTITIES()
GS_DEFINE_DOM_SIZES()
static void scan_imp(void *scan, const struct comm *com, gs_dom dom, gs_op op,
const void *v, uint vn, void *buffer)
{
comm_req req[2];
size_t vsize = vn*gs_dom_size[dom];
const uint id=com->id, np=com->np;
uint n = np, c=1, odd=0, base=0;
void *buf[2];
void *red = (char*)scan+vsize;
buf[0]=buffer,buf[1]=(char*)buffer+vsize;
while(n>1) {
odd=(odd<<1)|(n&1);
c<<=1, n>>=1;
if(id>=base+n) c|=1, base+=n, n+=(odd&1);
}
gs_init_array(scan,vn,dom,op);
memcpy(red,v,vsize);
while(n<np) {
if(c&1) n-=(odd&1), base-=n;
c>>=1, n<<=1, n+=(odd&1);
odd>>=1;
if(base==id) {
comm_irecv(&req[0],com, buf[0],vsize, id+n/2,id+n/2);
comm_isend(&req[1],com, red ,vsize, id+n/2,id);
comm_wait(req,2);
gs_gather_array(red,buf[0],vn,dom,op);
} else {
comm_irecv(&req[0],com, scan,vsize, base,base);
comm_isend(&req[1],com, red ,vsize, base,id);
comm_wait(req,2);
break;
}
}
while(n>1) {
if(base==id) {
comm_send(com, scan ,2*vsize, id+n/2,id);
} else {
comm_recv(com, buffer,2*vsize, base,base);
gs_gather_array(scan,buf[0],vn,dom,op);
memcpy(red,buf[1],vsize);
}
odd=(odd<<1)|(n&1);
c<<=1, n>>=1;
if(id>=base+n) c|=1, base+=n, n+=(odd&1);
}
}
static void allreduce_imp(const struct comm *com, gs_dom dom, gs_op op,
void *v, uint vn, void *buf)
{
size_t total_size = vn*gs_dom_size[dom];
const uint id=com->id, np=com->np;
uint n = np, c=1, odd=0, base=0;
while(n>1) {
odd=(odd<<1)|(n&1);
c<<=1, n>>=1;
if(id>=base+n) c|=1, base+=n, n+=(odd&1);
}
while(n<np) {
if(c&1) n-=(odd&1), base-=n;
c>>=1, n<<=1, n+=(odd&1);
odd>>=1;
if(base==id) {
comm_recv(com, buf,total_size, id+n/2,id+n/2);
gs_gather_array(v,buf,vn, dom,op);
} else {
comm_send(com, v,total_size, base,id);
break;
}
}
while(n>1) {
if(base==id)
comm_send(com, v,total_size, id+n/2,id);
else
comm_recv(com, v,total_size, base,base);
odd=(odd<<1)|(n&1);
c<<=1, n>>=1;
if(id>=base+n) c|=1, base+=n, n+=(odd&1);
}
}
void comm_scan(void *scan, const struct comm *com, gs_dom dom, gs_op op,
const void *v, uint vn, void *buffer)
{
scan_imp(scan, com,dom,op, v,vn, buffer);
}
void comm_allreduce(const struct comm *com, gs_dom dom, gs_op op,
void *v, uint vn, void *buf)
{
if(vn==0) return;
#ifdef MPI
{
MPI_Datatype mpitype;
MPI_Op mpiop;
#define DOMAIN_SWITCH() do { \
switch(dom) { case gs_double: mpitype=MPI_DOUBLE; break; \
case gs_float: mpitype=MPI_FLOAT; break; \
case gs_int: mpitype=MPI_INT; break; \
case gs_long: mpitype=MPI_LONG; break; \
WHEN_LONG_LONG(case gs_long_long: mpitype=MPI_LONG_LONG; break;) \
default: goto comm_allreduce_byhand; \
} \
} while(0)
DOMAIN_SWITCH();
#undef DOMAIN_SWITCH
switch(op) { case gs_add: mpiop=MPI_SUM; break;
case gs_mul: mpiop=MPI_PROD; break;
case gs_min: mpiop=MPI_MIN; break;
case gs_max: mpiop=MPI_MAX; break;
default: goto comm_allreduce_byhand;
}
MPI_Allreduce(v,buf,vn,mpitype,mpiop,com->c);
memcpy(v,buf,vn*gs_dom_size[dom]);
return;
}
#endif
#ifdef MPI
comm_allreduce_byhand:
allreduce_imp(com,dom,op, v,vn, buf);
#endif
}
void comm_iallreduce(comm_req *req, const struct comm *com, gs_dom dom, gs_op op,
void *v, uint vn, void *buf)
{
if(vn==0) return;
#ifdef MPI
{
MPI_Datatype mpitype;
MPI_Op mpiop;
#define DOMAIN_SWITCH() do { \
switch(dom) { case gs_double: mpitype=MPI_DOUBLE; break; \
case gs_float: mpitype=MPI_FLOAT; break; \
case gs_int: mpitype=MPI_INT; break; \
case gs_long: mpitype=MPI_LONG; break; \
WHEN_LONG_LONG(case gs_long_long: mpitype=MPI_LONG_LONG; break;) \
default: goto comm_allreduce_byhand; \
} \
} while(0)
DOMAIN_SWITCH();
#undef DOMAIN_SWITCH
switch(op) { case gs_add: mpiop=MPI_SUM; break;
case gs_mul: mpiop=MPI_PROD; break;
case gs_min: mpiop=MPI_MIN; break;
case gs_max: mpiop=MPI_MAX; break;
default: goto comm_allreduce_byhand;
}
#ifdef USE_NBC
MPI_Iallreduce(v,buf,vn,mpitype,mpiop,com->c,req);
#else
fail(1,"comm_iallreduce",__LINE__,"Invalid call to MPI_Iallreduce!\n");
#endif
return;
}
#endif
#ifdef MPI
comm_allreduce_byhand:
allreduce_imp(com,dom,op, v,vn, buf);
#endif
}
double comm_dot(const struct comm *comm, double *v, double *w, uint n)
{
double s=tensor_dot(v,w,n),b;
comm_allreduce(comm,gs_double,gs_add, &s,1, &b);
return s;
}
/* T comm_reduce__T(const struct comm *comm, gs_op op, const T *in, uint n) */
#define SWITCH_OP_CASE(T,OP) case gs_##OP: WITH_OP(T,OP); break;
#define SWITCH_OP(T,op) do switch(op) { \
GS_FOR_EACH_OP(T,SWITCH_OP_CASE) case gs_op_n: break; } while(0)
#define WITH_OP(T,OP) \
do { T v = *in++; GS_DO_##OP(accum,v); } while(--n)
#define DEFINE_REDUCE(T) \
T PREFIXED_NAME(comm_reduce__##T)( \
const struct comm *comm, gs_op op, const T *in, uint n) \
{ \
T accum = gs_identity_##T[op], buf; \
if(n!=0) SWITCH_OP(T,op); \
comm_allreduce(comm,gs_##T,op, &accum,1, &buf); \
return accum; \
}
GS_FOR_EACH_DOMAIN(DEFINE_REDUCE)
#undef DEFINE_REDUCE
#undef WITH_OP
#undef SWITCH_OP
#undef SWITCH_OP_CASE
#ifndef COMM_H
#define COMM_H
/* requires:
<stddef.h> for size_t
<stdlib.h> for exit
"fail.h", "types.h"
"gs_defs.h" for comm_allreduce, comm_scan, comm_reduce_T
*/
#include <assert.h>
#include <string.h>
#if !defined(FAIL_H) || !defined(TYPES_H)
#warning "comm.h" requires "fail.h" and "types.h"
#endif
/*
When the preprocessor macro MPI is defined, defines (very) thin wrappers
for the handful of used MPI routines. Alternatively, when MPI is not defined,
these wrappers become dummy routines suitable for a single process run.
No code outside of "comm.h" and "comm.c" makes use of MPI at all.
Basic usage:
struct comm c;
comm_init(&c, MPI_COMM_WORLD); // initializes c using MPI_Comm_dup
comm_free(&c);
Very thin MPI wrappers: (see below for implementation)
comm_send,_recv,_isend,_irecv,_time,_barrier
Additionally, some reduction and scan routines are provided making use
of the definitions in "gs_defs.h" (provided this has been included first).
Example comm_allreduce usage:
double v[5], buf[5];
comm_allreduce(&c, gs_double,gs_add, v,5,buf);
// Computes the vector sum of v across all procs, using
// buf as a scratch area. Delegates to MPI_Allreduce if possible.
Example comm_scan usage:
long in[5], out[2][5], buf[2][5];
comm_scan(out, &c,gs_long,gs_add, in,5,buf);
// out[0] will be the vector sum of "in" across procs with ids
*strictly* less than this one (exclusive behavior),
and out[1] will be the vector sum across all procs, as would
be computed with comm_allreduce.
Note: differs from MPI_Scan which has inclusive behavior
Example comm_reduce_double, etc. usage:
T out, in[10];
out = comm_reduce_T(&c, gs_max, in, 10);
// out will equal the largest element of "in",
across all processors
// T can be "double", "float", "int", "long", "slong", "sint", etc.
as defined in "gs_defs.h"
*/
#ifdef MPI
#include <mpi.h>
typedef MPI_Comm comm_ext;
typedef MPI_Request comm_req;
#else
typedef int comm_ext;
typedef int comm_req;
typedef int MPI_Fint;
#endif
#define comm_allreduce PREFIXED_NAME(comm_allreduce )
#define comm_iallreduce PREFIXED_NAME(comm_iallreduce)
#define comm_scan PREFIXED_NAME(comm_scan )
#define comm_dot PREFIXED_NAME(comm_dot )
/* global id, np vars strictly for diagnostic messages (fail.c) */
#ifndef comm_gbl_id
#define comm_gbl_id PREFIXED_NAME(comm_gbl_id)
#define comm_gbl_np PREFIXED_NAME(comm_gbl_np)
extern uint comm_gbl_id, comm_gbl_np;
#endif
struct comm {
uint id, np;
comm_ext c;
};
static void comm_init(struct comm *c, comm_ext ce);
/* (macro) static void comm_init_check(struct comm *c, MPI_Fint ce, uint np); */
/* (macro) static void comm_dup(struct comm *d, const struct comm *s); */
static void comm_free(struct comm *c);
static double comm_time(void);
static void comm_barrier(const struct comm *c);
static void comm_recv(const struct comm *c, void *p, size_t n,
uint src, int tag);
static void comm_send(const struct comm *c, void *p, size_t n,
uint dst, int tag);
static void comm_irecv(comm_req *req, const struct comm *c,
void *p, size_t n, uint src, int tag);
static void comm_isend(comm_req *req, const struct comm *c,
void *p, size_t n, uint dst, int tag);
static void comm_wait(comm_req *req, int n);
static void comm_bcast(const struct comm *c, void *p, size_t n,
uint root);
double comm_dot(const struct comm *comm, double *v, double *w,
uint n);
static void comm_gather(const struct comm *c, void *out, size_t out_n,
void *in, size_t in_n, uint root);
#ifdef GS_DEFS_H
void comm_allreduce(const struct comm *com, gs_dom dom, gs_op op,
void *v, uint vn, void *buf);
void comm_iallreduce(comm_req *req, const struct comm *com, gs_dom dom, gs_op op,
void *v, uint vn, void *buf);
void comm_scan(void *scan, const struct comm *com, gs_dom dom, gs_op op,