Commit b6847dbd authored by Stefan's avatar Stefan
Browse files

add more timers

parent e8125b16
......@@ -96,6 +96,16 @@ c-----------------------------------------------------------------------
include 'SIZE'
include 'TOTAL'
call rescale_x(xm1,-1.0,1.0)
call rescale_x(ym1,-1.0,1.0)
call rescale_x(zm1,-1.0,1.0)
do iel=1,nelt
do ifc=1,2*ndim
if (cbc(ifc,iel,1) .eq. 'v ') boundaryID(ifc,iel) = 1
enddo
enddo
return
end
c-----------------------------------------------------------------------
......
......@@ -24,6 +24,14 @@ SOFTWARE.
*/
@kernel void ady(const dlong N,
const dfloat alpha,
@restrict dfloat *y){
for(dlong n=0;n<N;++n;@tile(p_blockSize,@outer,@inner)){
y[n] = alpha/y[n];
}
}
@kernel void axdy(const dlong N,
const dfloat alpha,
@restrict const dfloat *x,
......@@ -33,6 +41,31 @@ SOFTWARE.
y[n] = alpha*x[n]/y[n];
}
}
@kernel void aydx(const dlong N,
const dfloat alpha,
@restrict const dfloat *x,
@restrict dfloat *y){
for(dlong n=0;n<N;++n;@tile(p_blockSize,@outer,@inner)){
y[n] = alpha*y[n]/x[n];
}
}
@kernel void aydxMany(const dlong N,
const dlong Nfields,
const dlong fieldOffset,
const dlong mode,
const dfloat alpha,
@restrict const dfloat *x,
@restrict dfloat *y){
for(dlong n=0;n<N;++n;@tile(p_blockSize,@outer,@inner)){
for(int fld = 0 ; fld < Nfields; ++fld){
const dlong xOffset = mode * fld * fieldOffset;
const dlong yOffset = fld * fieldOffset;
y[n+yOffset] = alpha*y[n+yOffset]/x[n+xOffset];
}
}
}
@kernel void axdyz(const dlong N,
const dfloat alpha,
......
......@@ -33,6 +33,22 @@ SOFTWARE.
y[n] = alpha*x[n]*y[n];
}
}
@kernel void axmyMany(const dlong N,
const dlong Nfields,
const dlong offset,
const dlong mode,
const dfloat alpha,
@restrict const dfloat *x,
@restrict dfloat *y){
for(dlong n=0;n<N;++n;@tile(p_blockSize,@outer,@inner)){
for(int fld = 0; fld < Nfields; ++fld){
const dlong xOffset = mode * fld * offset;
const dlong yOffset = fld * offset;
y[n + yOffset] = alpha*x[n + xOffset]*y[n + yOffset];
}
}
}
@kernel void axmyz(const dlong N,
const dfloat alpha,
......
......@@ -49,3 +49,20 @@ SOFTWARE.
z[n] = alpha*x[n] + beta*y[n];
}
}
@kernel void axpbyzMany(const dlong N,
const dlong Nfields,
const dlong offset,
const dfloat alpha,
@restrict const dfloat *x,
const dfloat beta,
@restrict const dfloat *y,
@restrict dfloat *z){
for(dlong n=0;n<N;++n;@tile(p_blockSize,@outer,@inner)){
for(int fld = 0; fld < Nfields; ++fld){
const dlong fldOffset = fld * offset;
z[n + fldOffset] = alpha*x[n + fldOffset] + beta*y[n + fldOffset];
}
}
}
\ No newline at end of file
......@@ -249,7 +249,7 @@ void nrsSetup(MPI_Comm comm, occa::device device, setupAide &options, nrs_t *nrs
if (mesh->rank == 0) cout << "done" << endl;
}
nrs->linAlg = new linAlg_t(mesh->device, nrs->kernelInfo, mesh->comm);
nrs->linAlg = new linAlg_t(mesh->device, kernelInfo, mesh->comm);
meshParallelGatherScatterSetup(mesh, nrs->Nlocal, mesh->globalIds, mesh->comm, 0);
oogs_mode oogsMode = OOGS_AUTO;
......
......@@ -53,11 +53,20 @@ void ellipticBuildContinuous(elliptic_t* elliptic,
ogs_t** ogs,
hlong* globalStarts)
{
mesh_t *mesh = elliptic->mesh;
MPI_Barrier(mesh->comm);
const double tStart = MPI_Wtime();
if(mesh->rank == 0) printf("Building full FEM matrix ... ");
fflush(stdout);
switch(elliptic->elementType) {
case HEXAHEDRA:
ellipticBuildContinuousHex3D(elliptic, A, nnz, ogs, globalStarts);
break;
}
MPI_Barrier(mesh->comm);
if(mesh->rank == 0) printf("done (%gs)\n", MPI_Wtime() - tStart);
}
void ellipticBuildContinuousHex3D(elliptic_t* elliptic,
......@@ -115,9 +124,6 @@ void ellipticBuildContinuousHex3D(elliptic_t* elliptic,
int* mask = (int*) calloc(mesh->Np * mesh->Nelements,sizeof(int));
for (dlong n = 0; n < elliptic->Nmasked; n++) mask[elliptic->maskIds[n]] = 1;
if(mesh->rank == 0) printf("Building full FEM matrix...");
fflush(stdout);
dlong cnt = 0;
for (dlong e = 0; e < mesh->Nelements; e++)
for (int nz = 0; nz < mesh->Nq; nz++)
......@@ -268,8 +274,6 @@ void ellipticBuildContinuousHex3D(elliptic_t* elliptic,
if (*nnz) cnt++;
*nnz = cnt;
if(mesh->rank == 0) printf("done.\n");
MPI_Barrier(mesh->comm);
MPI_Type_free(&MPI_NONZERO_T);
......
......@@ -78,6 +78,11 @@ void ellipticBuildJacobi(elliptic_t* elliptic, dfloat** invDiagA)
mesh_t* mesh = elliptic->mesh;
setupAide options = elliptic->options;
MPI_Barrier(mesh->comm);
const double tStart = MPI_Wtime();
if(mesh->rank == 0) printf("Building Jacobi ... ");
fflush(stdout);
// surface mass matrices MS = MM*LIFT
dfloat* MS = (dfloat*) calloc(mesh->Nfaces * mesh->Nfp * mesh->Nfp,sizeof(dfloat));
for (int f = 0; f < mesh->Nfaces; f++)
......@@ -156,9 +161,6 @@ void ellipticBuildJacobi(elliptic_t* elliptic, dfloat** invDiagA)
dfloat* diagA = (dfloat*) calloc(diagNnum, sizeof(dfloat));
if(mesh->rank == 0) printf("Building diagonal...");
fflush(stdout);
switch(elliptic->elementType) {
case HEXAHEDRA:
if(elliptic->blockSolver) {
......@@ -195,7 +197,8 @@ void ellipticBuildJacobi(elliptic_t* elliptic, dfloat** invDiagA)
}
}
if(mesh->rank == 0) printf("done.\n");
MPI_Barrier(mesh->comm);
if(mesh->rank == 0) printf("done (%gs)\n", MPI_Wtime() - tStart);
free(diagA);
free(MS);
......
......@@ -271,30 +271,11 @@ void ellipticSolveSetup(elliptic_t* elliptic, occa::properties kernelInfo)
if(mesh->rank == 0)
printf("allNeumann = %d \n", elliptic->allNeumann);
//set surface mass matrix for continuous boundary conditions
mesh->sMT = (dfloat*) calloc(mesh->Np * mesh->Nfaces * mesh->Nfp,sizeof(dfloat));
for (int n = 0; n < mesh->Np; n++)
for (int m = 0; m < mesh->Nfp * mesh->Nfaces; m++) {
dfloat MSnm = 0;
for (int i = 0; i < mesh->Np; i++)
MSnm += mesh->MM[n + i * mesh->Np] * mesh->LIFT[m + i * mesh->Nfp * mesh->Nfaces];
mesh->sMT[n + m * mesh->Np] = MSnm;
}
mesh->o_sMT =
mesh->device.malloc(mesh->Np * mesh->Nfaces * mesh->Nfp * sizeof(dfloat), mesh->sMT);
//copy boundary flags
elliptic->o_EToB = mesh->device.malloc(
mesh->Nelements * mesh->Nfaces * elliptic->Nfields * sizeof(int),
elliptic->EToB);
#if 0
if (mesh->rank == 0 && options.compareArgs("VERBOSE","TRUE"))
occa::setVerboseCompilation(true);
else
occa::setVerboseCompilation(false);
#endif
//setup an unmasked gs handle
int verbose = options.compareArgs("VERBOSE","TRUE") ? 1:0;
if(mesh->ogs == NULL) meshParallelGatherScatterSetup(mesh, Nlocal, mesh->globalIds, mesh->comm, verbose);
......@@ -383,9 +364,6 @@ void ellipticSolveSetup(elliptic_t* elliptic, occa::properties kernelInfo)
/*preconditioner setup */
elliptic->precon = new precon_t();
// kernelInfo["parser/" "automate-add-barriers"] = "disabled";
kernelInfo["defines/pfloat"] = pfloatString;
// set kernel name suffix
string suffix;
if(elliptic->elementType == HEXAHEDRA)
......@@ -393,6 +371,9 @@ void ellipticSolveSetup(elliptic_t* elliptic, occa::properties kernelInfo)
string filename, kernelName;
// kernelInfo["parser/" "automate-add-barriers"] = "disabled";
kernelInfo["defines/pfloat"] = pfloatString;
kernelInfo["defines/" "p_eNfields"] = elliptic->Nfields;
kernelInfo["defines/p_Nalign"] = USE_OCCA_MEM_BYTE_ALIGN;
kernelInfo["defines/" "p_blockSize"] = BLOCKSIZE;
......
......@@ -77,7 +77,7 @@ void coarseSolver::setup(
double settings[HYPRE_NPARAM+1];
settings[0] = 1; /* custom settings */
settings[1] = 10; /* coarsening */
settings[1] = 8; /* coarsening */
settings[2] = 6; /* interpolation */
settings[3] = 2; /* number of cycles */
settings[4] = 16; /* smoother for crs level */
......
......@@ -134,7 +134,6 @@ void meshFree(mesh_t* mesh)
if(mesh->LIFT) free(mesh->LIFT); // lift matrix
if(mesh->FMM) free(mesh->FMM); // Face Mass Matrix
if(mesh->sMT) free(mesh->sMT); // surface mass (MM*LIFT)^T
if(mesh->sgeo) free(mesh->sgeo);
......
......@@ -8,6 +8,10 @@
void meshNekReaderHex3D(int N, mesh_t* mesh)
{
MPI_Barrier(mesh->comm);
const double tStart = MPI_Wtime();
if(mesh->rank == 0) printf("loading mesh from nek ...\n"); fflush(stdout);
mesh->dim = 3;
mesh->Nverts = 8;
mesh->Nfaces = 2 * mesh->dim;
......@@ -115,4 +119,7 @@ void meshNekReaderHex3D(int N, mesh_t* mesh)
mesh->elementInfo[e] = 1; // solid
if(e < nekData.nelv ) mesh->elementInfo[e] = 0;
}
MPI_Barrier(mesh->comm);
if(mesh->rank == 0) printf("done (%gs)\n", MPI_Wtime() - tStart);
}
......@@ -81,6 +81,10 @@ void meshParallelConnect(mesh_t* mesh)
rank = mesh->rank;
size = mesh->size;
MPI_Barrier(mesh->comm);
const double tStart = MPI_Wtime();
if(mesh->rank == 0) printf("Building parallel face connectivity ... ");
// serial connectivity on each process
meshConnect(mesh);
......@@ -250,4 +254,7 @@ void meshParallelConnect(mesh_t* mesh)
MPI_Type_free(&MPI_PARALLELFACE_T);
free(sendFaces);
free(recvFaces);
MPI_Barrier(mesh->comm);
if(mesh->rank == 0) printf("done (%gs)\n", MPI_Wtime() - tStart);
}
......@@ -152,7 +152,7 @@ c-----------------------------------------------------------------------
call setupcomm(comm_in,newcomm,newcommg,path_in,session_in)
call iniproc()
etimes = dnekclock()
etimes = dnekclock_sync()
istep = 0
call initdim ! Initialize / set default values.
......@@ -255,6 +255,10 @@ c call findSYMOrient
p0thn = p0th
ntdump=0
etimeSetup = dnekclock_sync() - etimes
if(nio.eq.0) write(6,999) etimeSetup
999 format(' nek setup done in ', 1p1e13.4, ' s')
if(nio.eq.0) write(6,*)
call flush(6)
return
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment