Commit 35da9023 authored by April Novak's avatar April Novak
Browse files

Merge branch 'cardinal-update-1-19' into 'cardinal-update-1-13'

Cardinal Update 2020-1-19

See merge request !2
parents 66b51035 7388ee17
Pipeline #12320 passed with stages
in 9 minutes and 10 seconds
......@@ -38,7 +38,7 @@ set(OCCA_CXXFLAGS "-O2 -ftree-vectorize -funroll-loops -march=native -mtune=nati
CACHE STRING "C++ flags for OCCA JIT compile")
set(OCCA_TAG "ef7be8425e2460b1e07160b7a7e4dab41ccd510c" CACHE STRING "Git branch, tag, or hash for cloning OCCA")
set(NEK5000_TAG "a30afa09e90f3ca52916ae6816c8569412a3ca63" CACHE STRING "Git branch, tag, or hash for cloning Nek5000")
set(NEK5000_TAG "44f5904cd5c3622dd9d5fc6a3881ec7d4d498f18" CACHE STRING "Git branch, tag, or hash for cloning Nek5000")
set(HYPRE_VER "2.20.0" CACHE STRING "Version number of HYPRE to download")
set(USE_OCCA_MEM_BYTE_ALIGN "64" CACHE STRING "Memory allignment for OCCA kernels")
set(HYPRE_DIR "" CACHE STRING "Path to external HYPRE installation")
......@@ -175,6 +175,7 @@ include(config/blaslapack.cmake)
set(SRC
src/lib/nekrs.cpp
src/io/writeFld.cpp
src/io/utils.cpp
src/core/utils/mysort.cpp
src/core/utils/parallelSort.cpp
src/core/utils/occaHelpers.cpp
......
......@@ -5,7 +5,6 @@ set(ELLIPTIC_SOURCES
${ELLIPTIC_SOURCE_DIR}/ellipticBuildContinuousGalerkin.cpp
${ELLIPTIC_SOURCE_DIR}/ellipticBuildIpdg.cpp
${ELLIPTIC_SOURCE_DIR}/ellipticBuildJacobi.cpp
${ELLIPTIC_SOURCE_DIR}/ellipticHaloExchange.cpp
${ELLIPTIC_SOURCE_DIR}/ellipticKernelInfo.cpp
${ELLIPTIC_SOURCE_DIR}/ellipticBuildMultigridLevelFine.cpp
${ELLIPTIC_SOURCE_DIR}/ellipticBuildMultigridLevel.cpp
......
set(NEK5000_GS_VERSION "1.0.5")
set(NEK5000_GS_HASH "1b5b28de5b997c3b0893a3b4dcf5cee8614b9f27")
set(PARRSB_VERSION "0.4")
set(PARRSB_VERSION "0.6")
if (${NEK5000_PPLIST} MATCHES "PARRSB")
set(USE_PARRSB on)
......@@ -34,6 +34,12 @@ endif()
set(NEK5000_SOURCE_DIR ${nek5000_content_SOURCE_DIR})
if (USE_PARRSB)
install(FILES ${NEK5000_SOURCE_DIR}/core/PARALLEL.dprocmap DESTINATION ${NEK5000_SOURCE_DIR}/core RENAME "PARALLEL")
else()
install(FILES ${NEK5000_SOURCE_DIR}/core/PARALLEL.default DESTINATION ${NEK5000_SOURCE_DIR}/core RENAME "PARALLEL")
endif()
# blasLapack
# ==========
......@@ -110,7 +116,7 @@ ExternalProject_Add(
BUILD_COMMAND
${CMAKE_CURRENT_LIST_DIR}/run_nekconfig.sh
"CC=${CMAKE_C_COMPILER}"
"CFLAGS=${EXTERNAL_C_FLAGS}"
"NEK5000_SOURCE_DIRCFLAGS=${EXTERNAL_C_FLAGS}"
"FC=${CMAKE_Fortran_COMPILER}"
"FFLAGS=${EXTERNAL_Fortran_FLAGS}"
"NEK5000_SOURCE_DIR=${NEK5000_SOURCE_DIR}"
......
......@@ -12,7 +12,7 @@ static occa::memory o_scratch;
static cds_t* cdsSetup(ins_t* ins, mesh_t* mesh, setupAide options, occa::properties &kernelInfoH);
void nrsSetup(MPI_Comm comm, occa::device device, setupAide &options, int buildOnly, nrs_t *nrs)
void nrsSetup(MPI_Comm comm, occa::device device, setupAide &options, nrs_t *nrs)
{
nrs->options = options;
nrs->kernelInfo = new occa::properties();
......@@ -24,7 +24,9 @@ void nrsSetup(MPI_Comm comm, occa::device device, setupAide &options, int buildO
kernelInfo["include_paths"].asArray();
int N, cubN;
int buildOnly = 0;
string install_dir;
if(nrs->options.compareArgs("BUILD ONLY", "TRUE")) buildOnly = 1;
nrs->options.getArgs("POLYNOMIAL DEGREE", N);
nrs->options.getArgs("CUBATURE POLYNOMIAL DEGREE", cubN);
nrs->options.getArgs("NUMBER OF SCALARS", nrs->Nscalar);
......@@ -50,10 +52,13 @@ void nrsSetup(MPI_Comm comm, occa::device device, setupAide &options, int buildO
string casename;
nrs->options.getArgs("CASENAME", casename);
int err = 0;
int npTarget = size;
if (buildOnly) nrs->options.getArgs("NP TARGET", npTarget);
if (rank == 0) buildNekInterface(casename.c_str(), mymax(5, nrs->Nscalar), N, npTarget);
MPI_Barrier(comm);
if (rank == 0) err = buildNekInterface(casename.c_str(), mymax(5, nrs->Nscalar), N, npTarget);
MPI_Allreduce(MPI_IN_PLACE, &err, 1, MPI_INT, MPI_SUM, comm);
if (err) ABORT(EXIT_FAILURE);;
if (!buildOnly) {
nek_setup(comm, nrs->options, nrs);
nek_setic();
......@@ -77,7 +82,7 @@ void nrsSetup(MPI_Comm comm, occa::device device, setupAide &options, int buildO
if (nrs->cht && !nrs->options.compareArgs("SCALAR00 IS TEMPERATURE", "TRUE")) {
if (mesh->rank == 0) cout << "Conjugate heat transfer requires solving for temperature!\n";
EXIT(1);
ABORT(EXIT_FAILURE);;
}
{
......@@ -795,7 +800,7 @@ void nrsSetup(MPI_Comm comm, occa::device device, setupAide &options, int buildO
if(nrs->pSolver->levels[0] > mesh->N ||
nrs->pSolver->levels[nrs->pSolver->nLevels-1] < 1) {
if(mesh->rank == 0) printf("ERROR: Invalid multigrid coarsening!\n");
EXIT(1);
ABORT(EXIT_FAILURE);;
}
nrs->pOptions.setArgs("MULTIGRID COARSENING","CUSTOM");
} else if(nrs->pOptions.compareArgs("MULTIGRID DOWNWARD SMOOTHER","ASM") ||
......
......@@ -2,6 +2,6 @@
#define nekrs_inssetup_hpp_
#include "nrs.hpp"
void nrsSetup(MPI_Comm comm, occa::device device, setupAide &options, int buildOnly, nrs_t *nrs);
void nrsSetup(MPI_Comm comm, occa::device device, setupAide &options, nrs_t *nrs);
#endif
......@@ -68,7 +68,7 @@ string setupAide::readFile(string filename)
FILE* fh = fopen(filename.c_str(), "r");
if (fh == 0) {
printf("Failed to open: %s\n", filename.c_str());
exit(1);
ABORT(EXIT_FAILURE);
}
stat(filename.c_str(), &statbuf);
......
......@@ -37,6 +37,8 @@ SOFTWARE.
#include <sys/types.h>
#include <sys/stat.h>
#include "nrssys.hpp"
using std::stringstream;
using std::fstream;
using std::string;
......
......@@ -37,8 +37,6 @@
#include <cctype>
#include <sstream>
namespace
{
namespace inipp
{
namespace detail
......@@ -87,6 +85,7 @@ typedef enum
}
string_to_boolean_t;
namespace{
string_to_boolean_t string_to_boolean( const std::string s, bool strict = false )
{
const char* falses[] = { "false", "no", "0" };
......@@ -130,6 +129,7 @@ string_to_boolean_t string_to_boolean( const std::string s, bool strict = false
// The string was not recognized
return boolean_invalid;
}
}
template<class CharT>
class Ini
......@@ -315,6 +315,5 @@ private:
}
};
} // namespace inipp
} // namespace
#endif
/*
The MIT License (MIT)
Copyright (c) 2017 Tim Warburton, Noel Chalmers, Jesse Chan, Ali Karakus
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include "elliptic.h"
void ellipticStartHaloExchange(elliptic_t* elliptic,
occa::memory &o_q,
int Nentries,
dfloat* sendBuffer,
dfloat* recvBuffer)
{
mesh_t* mesh = elliptic->mesh;
// count size of halo for this process
dlong haloBytes = mesh->totalHaloPairs * Nentries * sizeof(dfloat);
// extract halo on DEVICE
if(haloBytes) {
// make sure compute device is ready to perform halo extract
mesh->device.finish();
// switch to data stream
mesh->device.setStream(elliptic->dataStream);
// extract halo on data stream
// printf("CRASH DATA: %d, %d, %p, %p, %p\n", mesh->totalHaloPairs, Nentries, mesh->o_haloElementList.ptr(), o_q.ptr(), mesh->o_haloBuffer.ptr());
mesh->haloExtractKernel(mesh->totalHaloPairs,
Nentries,
mesh->o_haloElementList,
o_q,
mesh->o_haloBuffer);
// queue up async copy of halo on data stream
mesh->o_haloBuffer.copyTo(sendBuffer,"async: true");
mesh->device.setStream(elliptic->defaultStream);
}
}
void ellipticInterimHaloExchange(elliptic_t* elliptic,
occa::memory &o_q,
int Nentries,
dfloat* sendBuffer,
dfloat* recvBuffer)
{
mesh_t* mesh = elliptic->mesh;
// count size of halo for this process
dlong haloBytes = mesh->totalHaloPairs * Nentries * sizeof(dfloat);
// extract halo on DEVICE
if(haloBytes) {
// copy extracted halo to HOST
mesh->device.setStream(elliptic->dataStream);
// make sure async copy finished
mesh->device.finish();
// start halo exchange HOST<>HOST
meshHaloExchangeStart(mesh,
Nentries * sizeof(dfloat),
sendBuffer,
recvBuffer);
mesh->device.setStream(elliptic->defaultStream);
}
}
void ellipticEndHaloExchange(elliptic_t* elliptic,
occa::memory &o_q,
int Nentries,
dfloat* recvBuffer)
{
mesh_t* mesh = elliptic->mesh;
// count size of halo for this process
dlong haloBytes = mesh->totalHaloPairs * Nentries * sizeof(dfloat);
dlong haloOffset = mesh->Nelements * Nentries * sizeof(dfloat);
// extract halo on DEVICE
if(haloBytes) {
// finalize recv on HOST
meshHaloExchangeFinish(mesh);
// copy into halo zone of o_r HOST>DEVICE
mesh->device.setStream(elliptic->dataStream);
o_q.copyFrom(recvBuffer, haloBytes, haloOffset,"async: true");
mesh->device.finish();
mesh->device.setStream(elliptic->defaultStream);
mesh->device.finish();
}
}
......@@ -338,12 +338,11 @@ dfloat MGLevel::maxEigSmoothAx()
const dlong N = Nrows;
const dlong M = Ncols;
int k = 10;
hlong Nlocal = (hlong) Nrows;
hlong Ntotal = 0;
MPI_Allreduce(&Nlocal, &Ntotal, 1, MPI_HLONG, MPI_SUM, mesh->comm);
if(k > Ntotal) k = (int) Ntotal;
const int k = std::min((hlong) 20, Ntotal);
// do an arnoldi
......
......@@ -167,7 +167,7 @@ compute_element_lengths(ElementLengths* lengths, elliptic_t* elliptic)
std::cout << "Encountered length of zero in middle for element e = " << e << "!\n";
std::cout << "x,y,z = " << lengths->length_middle_x[e] << ", "
<< lengths->length_middle_y[e] << ", " << lengths->length_middle_z[e] << "\n";
exit(-1);
ABORT(EXIT_FAILURE);;
}
bool negative = false;
negative |= lengths->length_middle_x[e] < -tol;
......@@ -177,7 +177,7 @@ compute_element_lengths(ElementLengths* lengths, elliptic_t* elliptic)
std::cout << "Encountered negative length in middle for element e = " << e << "!\n";
std::cout << "x,y,z = " << lengths->length_middle_x[e] << ", "
<< lengths->length_middle_y[e] << ", " << lengths->length_middle_z[e] << "\n";
exit(-1);
ABORT(EXIT_FAILURE);;
}
}
......@@ -506,7 +506,7 @@ void compute_1d_matrices(
std::cout << "lbc = " << lbc << ", rbc = " << rbc << "\n";
for(int iface = 0; iface < 6; ++iface)
std::cout << "EToB[iface] = " << elliptic->EToB[6 * e + iface] << "\n";
exit(-1);
ABORT(EXIT_FAILURE);;
}
if(lbc > 0)
row_zero(S,nl,0);
......@@ -608,6 +608,9 @@ mesh_t* create_extended_mesh(elliptic_t* elliptic)
{
mesh_t* meshRoot = elliptic->mesh;
int buildOnly = 0;
if(elliptic->options.compareArgs("BUILD ONLY", "TRUE")) buildOnly = 1;
mesh_t* mesh = new mesh_t();
mesh->rank = meshRoot->rank;
mesh->size = meshRoot->size;
......@@ -618,28 +621,31 @@ mesh_t* create_extended_mesh(elliptic_t* elliptic)
mesh->Nelements = meshRoot->Nelements;
mesh->Nverts = meshRoot->Nverts;
mesh->Nfaces = meshRoot->Nfaces;
mesh->NfaceVertices = meshRoot->NfaceVertices;
mesh->Nnodes = meshRoot->Nnodes;
mesh->EX = (dfloat*) calloc(mesh->Nverts * mesh->Nelements, sizeof(dfloat));
mesh->EY = (dfloat*) calloc(mesh->Nverts * mesh->Nelements, sizeof(dfloat));
mesh->EZ = (dfloat*) calloc(mesh->Nverts * mesh->Nelements, sizeof(dfloat));
memcpy(mesh->EX, meshRoot->EX, mesh->Nverts * mesh->Nelements * sizeof(dfloat));
memcpy(mesh->EY, meshRoot->EY, mesh->Nverts * mesh->Nelements * sizeof(dfloat));
memcpy(mesh->EZ, meshRoot->EZ, mesh->Nverts * mesh->Nelements * sizeof(dfloat));
mesh->faceVertices = (int*) calloc(mesh->NfaceVertices * mesh->Nfaces, sizeof(int));
memcpy(mesh->faceVertices, meshRoot->faceVertices, mesh->NfaceVertices * mesh->Nfaces * sizeof(int));
mesh->EToV = (hlong*) calloc(mesh->Nverts * mesh->Nelements, sizeof(hlong));
memcpy(mesh->EToV, meshRoot->EToV, mesh->Nverts * mesh->Nelements * sizeof(hlong));
meshParallelConnect(mesh);
meshConnectBoundary(mesh);
meshLoadReferenceNodesHex3D(mesh, mesh->N, 1);
int buildOnly = 0;
if(elliptic->options.compareArgs("BUILD ONLY", "TRUE")) buildOnly = 1;
meshPhysicalNodesHex3D(mesh, buildOnly);
meshHaloSetup(mesh);
meshPhysicalNodesHex3D(mesh, buildOnly);
meshHaloPhysicalNodes(mesh);
meshConnectFaceNodes3D(mesh);
meshParallelConnectNodes(mesh, buildOnly);
mesh->ogs = ogsSetup(mesh->Nelements * mesh->Np, mesh->globalIds, mesh->comm, 1, mesh->device);
const int bigNum = 1E9;
......@@ -786,8 +792,8 @@ void MGLevel::build(
elliptic_t* pSolver)
{
if(elliptic->elementType != HEXAHEDRA) {
printf("ERROR: Unsupported elements type!");
exit(-1);
printf("ERROR: Unsupported element type!");
ABORT(EXIT_FAILURE);
}
overlap = false;
......
......@@ -70,7 +70,7 @@ void ellipticAx(elliptic_t* elliptic,
if(integrationType != 0)
printf("Precision level (%s) does not support integrationType %d\n", precision, integrationType);
}
exit(1);
ABORT(EXIT_FAILURE);
}
}
......@@ -113,7 +113,7 @@ void ellipticAx(elliptic_t* elliptic,
}
}
} else {
exit(1);
ABORT(EXIT_FAILURE);
}
return;
}
......
......@@ -37,8 +37,7 @@ void ellipticPreconditionerSetup(elliptic_t* elliptic, ogs_t* ogs, occa::propert
} else if(options.compareArgs("PRECONDITIONER", "SEMFEM")) {
//ellipticSEMFEMSetup(elliptic,precon);
printf("ERROR: SEMFEM does not work right now.\n");
exit(-1);
ABORT(EXIT_FAILURE);;
} else if(options.compareArgs("PRECONDITIONER", "JACOBI")) {
dfloat* invDiagA;
ellipticBuildJacobi(elliptic,&invDiagA);
......@@ -48,6 +47,6 @@ void ellipticPreconditionerSetup(elliptic_t* elliptic, ogs_t* ogs, occa::propert
free(invDiagA);
} else {
printf("ERROR: Unknown preconditioner!\n");
exit(-1);
ABORT(EXIT_FAILURE);
}
}
......@@ -219,10 +219,15 @@ void ResidualProjection::pre(occa::memory& o_r)
}
if(rank == 0 && verbose)
std::cout << "Residual projection : "
<< std::cout.precision(15)
<< "Prior Residual Norm = "
<< priorResidualNorm << ", "
<< "Post Residual Norm = "
<< postResidualNorm << ", "
<< ratio << "\n";
<< "Reduction Ratio = "
<< ratio << ", "
<< "Number Vectors = "
<< numVecsProjection
<< "\n";
}
void ResidualProjection::post(occa::memory& o_x)
......
......@@ -64,7 +64,7 @@ int ellipticSolve(elliptic_t* elliptic,
Niter = pcg (elliptic, o_r, o_x, tol, maxIter);
}else{
printf("NONBLOCKING Krylov solvers currently not supported!");
exit(1);
ABORT(EXIT_FAILURE);
/*
if(!options.compareArgs("KRYLOV SOLVER", "FLEXIBLE"))
Niter = nbpcg (elliptic, o_r, o_x, tol, maxIter);
......
......@@ -43,8 +43,7 @@ void ellipticSolveSetup(elliptic_t* elliptic, occa::properties kernelInfo)
if(mesh->rank == 0)
printf("ERROR: Block solver is implemented for C0-HEXAHEDRA with Jacobi preconditioner only\n");
MPI_Finalize();
exit(-1);
ABORT(EXIT_FAILURE);
}
if (options.compareArgs("COEFFICIENT","VARIABLE") && elliptic->elementType != HEXAHEDRA &&
......@@ -52,8 +51,7 @@ void ellipticSolveSetup(elliptic_t* elliptic, occa::properties kernelInfo)
if(mesh->rank == 0)
printf("ERROR: Varibale coefficient solver is implemented for C0-HEXAHEDRA only\n");
MPI_Finalize();
exit(-1);
ABORT(EXIT_FAILURE);
}
if (options.compareArgs("COEFFICIENT","VARIABLE")) {
......@@ -63,8 +61,8 @@ void ellipticSolveSetup(elliptic_t* elliptic, occa::properties kernelInfo)
if(mesh->rank == 0)
printf(
"ERROR: Varibale coefficient solver is implemented for constant multigrid preconditioner only\n");
MPI_Finalize();
exit(-1);
ABORT(EXIT_FAILURE);
}
}
......@@ -210,34 +208,6 @@ void ellipticSolveSetup(elliptic_t* elliptic, occa::properties kernelInfo)
elliptic->Nblock = Nblock;
elliptic->Nblock2 = Nblock2;
//fill geometric factors in halo
if(mesh->totalHaloPairs) {
dlong Nlocal = mesh->Nelements;
dlong Nhalo = mesh->totalHaloPairs;
size_t Nbytes = mesh->Nvgeo * sizeof(dfloat);
if (elliptic->elementType == QUADRILATERALS || elliptic->elementType == HEXAHEDRA) {
Nlocal *= mesh->Np;
Nhalo *= mesh->Np;
Nbytes *= mesh->Np;
}
dfloat* vgeoSendBuffer = (dfloat*) calloc(Nhalo * mesh->Nvgeo, sizeof(dfloat));
// import geometric factors from halo elements
mesh->vgeo = (dfloat*) realloc(mesh->vgeo, (Nlocal + Nhalo) * mesh->Nvgeo * sizeof(dfloat));
meshHaloExchange(mesh,
Nbytes,
mesh->vgeo,
vgeoSendBuffer,
mesh->vgeo + Nlocal * mesh->Nvgeo);
mesh->o_vgeo =
mesh->device.malloc((Nlocal + Nhalo) * mesh->Nvgeo * sizeof(dfloat), mesh->vgeo);
free(vgeoSendBuffer);
}
// count total number of elements
hlong NelementsLocal = mesh->Nelements;
hlong NelementsGlobal = 0;
......@@ -856,7 +826,7 @@ void ellipticSolveSetup(elliptic_t* elliptic, occa::properties kernelInfo)
if(elliptic->var_coeff || elliptic->blockSolver) {
printf(
"ERROR: TRILINEAR form is not implemented for varibale coefficient and block solver yet \n");
exit(-1);
ABORT(EXIT_FAILURE);
}
kernelName = "ellipticPartialAxTrilinear" + suffix;
}else {
......
/*
The MIT License (MIT)
Copyright (c) 2017 Tim Warburton, Noel Chalmers, Jesse Chan, Ali Karakus
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include "elliptic.h"
// z = P*r
void ellipticThinOas(elliptic_t* elliptic, dfloat lambda, occa::memory &o_r, occa::memory &o_z)
{
mesh_t* mesh = elliptic->mesh;
setupAide options = elliptic->options;
int Np = mesh->Np;
int Nfp = mesh->Nfp;
int Nfaces = mesh->Nfaces;
dlong Nelements = mesh->Nelements;
dlong totalHaloPairs = mesh->totalHaloPairs;
dlong Ndata = mesh->Nfp * totalHaloPairs;
// extract halo for thin kernel
elliptic->oasHaloGetKernel(totalHaloPairs,
1, // number of fields
Np * (Nelements + totalHaloPairs), // field size including halo eelments
elliptic->o_oasHaloElementList,
elliptic->o_oasHaloGetNodeIds,
o_r,
elliptic->o_oasHaloBuffer);
// copy extracted halo to HOST
elliptic->o_oasHaloBuffer.copyTo(elliptic->oasSendBuffer, Ndata * sizeof(dfloat), 0);// zero offset
// start halo exchange
meshHaloExchangeStart(mesh,
mesh->Nfp * sizeof(dfloat),
elliptic->oasSendBuffer,
elliptic->oasRecvBuffer);
// finish halo exchange
meshHaloExchangeFinish(mesh);
// copy halo data to on device halo arary
elliptic->o_oasHaloBuffer.copyFrom(elliptic->oasRecvBuffer, Ndata * sizeof(dfloat), 0); // zero offset
// populate halo (with offset
elliptic->oasHaloPutKernel(totalHaloPairs,
1, // number of fields
Np * (Nelements + totalHaloPairs), // field size including halo eelments
elliptic->o_oasHaloElementList,
elliptic->o_oasHaloPutNodeIds,
elliptic->o_oasHaloBuffer,
o_r); // place incoming halo data into end buffer
// element-wise fast-directional-approximate-inverse