Unverified Commit faeaa56d authored by Stefan K's avatar Stefan K Committed by GitHub
Browse files

compile blas -O0 + cp instead of patch + add cgflex (#617)

parent 98b47b65
......@@ -9,13 +9,13 @@ c Maximum number of elements (limited to 2**31/12, at least for now)
integer nelgt_max
parameter(nelgt_max = 178956970)
integer gllnid, gllel
integer*8 nvtot
integer nelg(0:ldimt1)
$ ,lglel(lelt)
$ ,gllel(lelg)
$ ,gllnid(lelg)
$ ,nelgv,nelgt
common /hcglb/ nvtot,nelg,lglel,gllel,gllnid,nelgv,nelgt
common /hcglb/ nvtot,nelg,lglel,nelgv,nelgt
logical ifgprnt
common /diagl/ ifgprnt
......
......@@ -192,7 +192,7 @@ C
NMXV = 1000
if (iftran) NMXV = 200
NMXH = NMXV ! not used anymore
NMXP = 1000
NMXP = 200
do ifield = 2,ldimt+1
NMXT(ifield-1) = 200
enddo
......
......@@ -341,6 +341,7 @@ c data iflag,if_hyb /.false. , .true. /
etime1 = dnekclock()
etime_p = 0.
divex = 0.
maxit = iter
iter = 0
m = lgmres
......@@ -363,7 +364,8 @@ c
call rzero(x_gmres,n)
outer = 0
do while (iconv.eq.0.and.iter.lt.500)
do while (iconv.eq.0)
outer = outer+1
if(iter.eq.0) then ! -1
......@@ -495,6 +497,7 @@ c enddo
66 format(i5,1p4e12.5,i8,' Divergence')
#ifndef FIXITER
if (iter+1.gt.maxit) goto 900
if (rnorm .lt. tolpss) goto 900 !converged
#else
if (iter.gt.param(151)-1) goto 900
......
......@@ -638,12 +638,22 @@ c
common /iterhm/ niterhm
character*4 name
c
if (ifsplit.and.name.eq.'PRES'.and.param(42).eq.0) then
n = lx1*ly1*lz1*nelv
call copy (x,f,n)
call hmh_gmres (x,h1,h2,mult,iter)
niterhm = iter
return
if (ifsplit.and.name.eq.'PRES') then
if (param(42).eq.0) then
n = lx1*ly1*lz1*nelv
call copy (x,f,n)
iter = maxit
call hmh_gmres (x,h1,h2,mult,iter)
niterhm = iter
return
elseif(param(42).eq.2) then
n = lx1*ly1*lz1*nelv
call copy (x,f,n)
iter = maxit
call hmh_flex_cg(x,h1,h2,mult,iter)
niterhm = iter
return
endif
endif
c ** zero out stuff for Lanczos eigenvalue estimator
......@@ -2143,3 +2153,122 @@ c Normally, we'd store this as a 2-vector: uf(2,...)
return
end
c-----------------------------------------------------------------------
subroutine hmh_flex_cg(res,h1,h2,wt,iter)
c Solve the Helmholtz equation by right-preconditioned
c GMRES iteration.
include 'SIZE'
include 'TOTAL'
include 'FDMH1'
include 'GMRES'
common /ctolpr/ divex
common /cprint/ ifprint
logical ifprint
real res (lx1*ly1*lz1*lelv)
real h1 (lx1,ly1,lz1,lelv)
real h2 (lx1,ly1,lz1,lelv)
real wt (lx1,ly1,lz1,lelv)
parameter (lt=lx1*ly1*lz1*lelt)
common /scrcg/ r(lt),z(lt),p(lt),w(lt)
common /scrmg/ r1(lt)
common /cgmres1/ y(lgmres)
common /ctmp0/ wk1(lgmres),wk2(lgmres)
real alpha, l, temp
integer outer
logical iflag,if_hyb
save iflag,if_hyb
c data iflag,if_hyb /.false. , .true. /
data iflag,if_hyb /.false. , .false. /
real norm_fac
save norm_fac
real*8 etime1,dnekclock
n = lx1*ly1*lz1*nelv
div0 = gamma_gmres(1)*norm_fac
etime1 = dnekclock()
etime_p = 0.
divex = 0.
maxit = iter
iter = 0
call chktcg1(tolps,res,h1,h2,pmask,vmult,1,1)
if (param(21).gt.0.and.tolps.gt.abs(param(21)))
$ tolps = abs(param(21))
if (istep.eq.0) tolps = 1.e-4
tolpss = tolps
iconv = 0
call copy (r,res,n) ! Residual
call rzero(r1 ,n) ! Lagged residual for flexible CG
call rzero(p,n) ! Search direction
call rzero(res,n) ! Solution vector
rho1 = 1
! ______
div0 = sqrt(glsc3(r,wt,r,n)/volvm1) ! gamma = \/ (r,r)
if (param(21).lt.0) tolpss=abs(param(21))*div0
do k=1,maxit
if (param(40).ge.0 .and. param(40).le.2) then
call h1mg_solve(z,r,if_hyb) ! z = M w
else if (param(40).eq.3) then
call fem_amg_solve(z,r)
endif
call sub2(r1,r,n)
rho0 = rho1
rho1 = glsc3(z,wt,r,n) ! Inner product weighted by multiplicity
rho2 = -glsc3(z,wt,r1,n) ! Inner product weighted by multiplicity
beta = rho2/rho0 ! Flexible GMRES
call copy(r1,r,n) ! Save prior residual
call add2s1(p,z,beta,n)
call ax(w,p,h1,h2,n) ! w = A p
den = glsc3(w,wt,p,n)
alpha = rho1/den
rnorm = 0.0
do i = 1,n
res(i) = res(i) + alpha*p(i)
r(i) = r(i) - alpha*w(i)
rnorm = rnorm + r(i)*r(i)*wt(i,1,1,1)
enddo
call gop(rnorm,temp,'+ ',1)
c rnorm = sqrt(glsc3(r,wt,r,n)/volvm1) ! gamma = \/ (r,r)
rnorm = sqrt(rnorm/volvm1) ! gamma = \/ (r,r)
ratio = rnorm/div0
iter=iter+1
if (ifprint.and.nio.eq.0)
$ write (6,66) iter,tolpss,rnorm,div0,ratio,istep
66 format(i5,1p4e12.5,i8,' Divergence')
if (rnorm .lt. tolpss) goto 900 !converged
enddo
900 iconv = 1
divex = rnorm
call ortho (res) ! Orthogonalize wrt null space, if present
etime1 = dnekclock()-etime1
if (nio.eq.0) write(6,9999) istep,iter,divex,div0,tolpss,etime_p,
& etime1,if_hyb
9999 format(4x,i7,' PRES cgflex',4x,i5,1p5e13.4,1x,l4)
if (outer.le.2) if_hyb = .false.
return
end
c-----------------------------------------------------------------------
......@@ -75,10 +75,10 @@ else
filters_cmt.o diagnostics.o artvisc.o
endif
DUMMY:= $(shell cp $S/PARALLEL_ $S/PARALLEL)
DUMMY:= $(shell cp $S/PARALLEL.default $S/PARALLEL)
ifeq ($(DPROCMAP),1)
CORE := ${CORE} dprocmap.o
DUMMY:= $(shell cd $S; patch < PARALLEL.patch)
DUMMY:= $(shell cp $S/PARALLEL.dprocmap $S/PARALLEL)
endif
ifneq ($(VISIT),0)
......
......@@ -12,7 +12,7 @@ function build_3rdparty() {
set -o pipefail
cd $SOURCE_ROOT_BLAS
FFLAGS="-O2 $FF77 $FFLAGS" ./install 2>&1 | tee -a $CASEDIR/build.log >/dev/null
FFLAGS="-O0 $FF77 $FFLAGS" ./install 2>&1 | tee -a $CASEDIR/build.log >/dev/null
cd $CASEDIR
cd $SOURCE_ROOT_GSLIB
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment