mirror of
https://code.it4i.cz/sccs/easyconfigs-it4i.git
synced 2025-04-17 04:00:49 +01:00
5912 lines
209 KiB
Fortran
5912 lines
209 KiB
Fortran
! {{{ ptcore
|
|
************************************************************************
|
|
subroutine ptcore(t,tt,fij,fab,co,first,last,dgroup,bsymmv,
|
|
$symmocc,iscrs,localcc,ui,esdi,et,iui,dfnb,ittend,
|
|
$ijij,ijia,ijab,ccsdalg,imo,lccoporder,ccmem,talg,
|
|
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,
|
|
$iout,maxcor,imem,dcore,iimem,icore,wsize,gbasfile,ecc,eref,locnoc,
|
|
$mpi_rank,mpi_size,imove, mpi_ccsd,
|
|
$ccsdmkl, ptthreads, omp_max_threads, mkl_max_threads,
|
|
$ccsdrest, ptrest, bcast_comm, ccsd_communicator, emp2corr,
|
|
$lno, lnaf, eppl_correction, emp2, emp2full, master_rank,
|
|
$master_thread,lf12,ppl_calc)
|
|
************************************************************************
|
|
implicit none
|
|
#if defined(MPI)
|
|
include "mpif.h"
|
|
#endif
|
|
integer irecln,ibufln,nbasis,nocc,nvirt,dgroup,nirmax
|
|
integer bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
|
|
integer iout,maxcor,imem,iimem,wsize,gbasfile,locnoc,vsize
|
|
real*8 dcore(*), ecc, eref, tfact, tscale(nocc), ets
|
|
integer icore(*), ccsd_communicator
|
|
integer a,i,j,nquad,iquad,nqmin, bcast_comm
|
|
integer ttind,ccmem(8)
|
|
real*8 tt(nvirt,nvirt,(nocc+1)*nocc/2)
|
|
real*8 t(nvirt,nocc)
|
|
real*8 fab(nvirt,nvirt),fij(nocc,nocc)
|
|
real*8 ui(nocc)
|
|
real*8 esdi, emp2corr, emp2, emp2full
|
|
|
|
character*8 ccsdalg,lccoporder
|
|
integer dfnb, ijij, ijia, ijab
|
|
integer itemp, imove, master_rank
|
|
integer mpi_rank, mpi_size, mpi_err
|
|
logical mpi_ccsd, lno, lnaf, master_thread,lf12
|
|
character*3 ccsdmkl
|
|
character*4 ccsdrest
|
|
logical ptrest,ppl_calc
|
|
integer ptthreads, omp_max_threads, mkl_max_threads
|
|
|
|
integer co(nbasis)
|
|
integer first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
|
|
integer abcistoremin,jlen,iIabIJ,ittl2,ittl3,ittend,ovmax
|
|
|
|
integer imem1,avmem,iscrs,dblalloc,iui
|
|
common/memcom/ imem1
|
|
C Vars for (T) corr
|
|
integer ifdo,ifdv,itnew,ittnew,iaijk,iabij,iv,iw
|
|
integer iabci,iscro,itrr,rrd,talgoccmin
|
|
c character*1 ison
|
|
character*4 localcc,talg
|
|
character*8 dfintran
|
|
character*16 laptol
|
|
integer iv1,iv2,iv3,iw1,iw2,iw3
|
|
integer imo,itmp
|
|
real*8 et,ltol
|
|
integer iss,ittl,it1,laplminmem,iblocki,nblocki,xyzomp
|
|
integer iapbc,illjab,illjpai,tmpmemdgemm,abciblmemmax
|
|
logical abci_inmem,lapl
|
|
c times
|
|
real*8 ttmp(6,10),times(6,0:100)
|
|
integer list(100),ii,imax
|
|
character*80 txt(0:100)
|
|
integer nit, omp_get_max_threads
|
|
|
|
real*8 ecc_ppl_scaled, ecc_ppl, eppl_correction
|
|
real*8 et_corrected
|
|
|
|
interface ! {{{
|
|
c {{{ interfaces for pointers
|
|
subroutine rpoint1d(egydim1,egydim2,dim1)
|
|
implicit none
|
|
integer dim1
|
|
real*8,target :: egydim1(dim1)
|
|
real*8, pointer :: egydim2(:)
|
|
end subroutine
|
|
c}}}
|
|
end interface ! }}}
|
|
|
|
#ifdef OMP
|
|
xyzomp = omp_get_max_threads()
|
|
#else
|
|
xyzomp = 1
|
|
#endif
|
|
|
|
times(1:6,0:100)=0.d0
|
|
et=0.d0
|
|
ets=0.d0
|
|
tfact=(emp2full-eref)/(emp2-eref)
|
|
|
|
C Calculate (T) correction
|
|
write(iout,*)
|
|
write(iout,*) 'Calculation of (T) correction...'
|
|
write(iout,*)
|
|
call flush(iout)
|
|
TIME0(ttmp)
|
|
TIME0(times(1,96))
|
|
|
|
ovmax=max(nocc,nvirt)
|
|
lapl=.false.
|
|
if (talg.eq.'lapl'.or.talg.eq.'topr'.or.talg.eq.'to '.or.
|
|
$ talg.eq.'lato') lapl=.true.
|
|
|
|
|
|
if(nocc.eq.0 .or. nvirt.eq.0) then
|
|
tfact=0.d0
|
|
goto 123
|
|
end if
|
|
c
|
|
TIME0(times)
|
|
C {{{ Preparing memory, fock, T1 and T2
|
|
|
|
c common part
|
|
call dbldealloc(iscrs) ! has to match the end of memory where talgmem is calculated @ccsdpt
|
|
ifdo=dblalloc(nocc)
|
|
ifdv=dblalloc(nvirt)
|
|
C Sort diagonals of the fock matrix
|
|
do i=1,nocc
|
|
dcore(ifdo+i-1)=fij(i,i)
|
|
enddo
|
|
do a=1,nvirt
|
|
dcore(ifdv+a-1)=fab(a,a)
|
|
enddo
|
|
C decompressing t2 amplitudes
|
|
itnew=dblalloc(nvirt*nocc)
|
|
call dcopy(nvirt*nocc,t,1,dcore(ittend),1)
|
|
call dcopy(nvirt*nocc,dcore(ittend),1,dcore(itnew),1)
|
|
ittnew=dblalloc(nvirt**2*nocc**2) ! must immediately follow itnew in memory
|
|
call dcopy(nvirt**2*(nocc+1)*nocc/2,tt,1,dcore(ittend),1) ! save tt, ittnew may overwrite tt
|
|
ttind=0
|
|
do j=1,nocc
|
|
do i=1,j
|
|
ttind=ttind+1
|
|
call dcopy(nvirt**2,dcore(ittend+(ttind-1)*nvirt**2),1,
|
|
$dcore(ittnew+nvirt**2*(i-1)+nvirt**2*nocc*(j-1)),1)
|
|
call tr(nvirt, dcore(ittend+(ttind-1)*nvirt**2),
|
|
$dcore(ittnew+nvirt**2*(j-1)+nvirt**2*nocc*(i-1)))
|
|
enddo
|
|
enddo
|
|
|
|
|
|
c allocate remaining arrays
|
|
c talg=virt {{{
|
|
if(talg .eq. 'virt') then
|
|
!{{{
|
|
illjpai=imem
|
|
iIabIJ=imem
|
|
illjab=imem
|
|
iss=imem
|
|
ittl=imem
|
|
ittl2=imem
|
|
ittl2=imem
|
|
ittl3=imem
|
|
it1=imem
|
|
iabci=imem
|
|
abci_inmem=.true.
|
|
iapbc=imem
|
|
nblocki=1
|
|
iblocki=nocc-1
|
|
!}}}
|
|
jlen=nocc
|
|
|
|
if(ccsdalg .eq. 'dfdirect') then
|
|
call reorderdfints(dcore(ijij), dcore(ijab),
|
|
$ nocc, nvirt, dfnb, dcore(imove))
|
|
end if
|
|
|
|
iaijk=dblalloc(nvirt*nocc**3)
|
|
iabij=dblalloc(nvirt**2*nocc*jlen) ! abij only for j=1 if lapl
|
|
|
|
avmem=maxcor-(imem-imem1) ! for talg=occ
|
|
itrr=dblalloc(nvirt**2*nocc**2) ! store also rr=tt^traspose for the full tt
|
|
rrd=nocc
|
|
|
|
if(localcc.ne.'off ')then
|
|
iv1=dblalloc(nocc**2)
|
|
iv2=dblalloc(nocc**2)
|
|
iv3=dblalloc(nocc**2)
|
|
iw1=dblalloc(nocc**2)
|
|
iw2=dblalloc(nocc**2)
|
|
iw3=dblalloc(nocc**2)
|
|
endif
|
|
|
|
iscro=dblalloc(nocc**3)
|
|
iw=dblalloc(nocc**3)
|
|
iv=dblalloc(wsize)
|
|
end if
|
|
c }}}
|
|
c talg=occ {{{
|
|
if(talg .eq. 'occ ') then
|
|
!{{{
|
|
iv1=1
|
|
iv2=1
|
|
iv3=1
|
|
iw1=1
|
|
iw2=1
|
|
iw3=1
|
|
iscro=1
|
|
nblocki=1
|
|
iblocki=nocc-1
|
|
illjpai=imem
|
|
iIabIJ=imem
|
|
illjab=imem
|
|
iss=imem
|
|
ittl=imem
|
|
ittl2=imem
|
|
ittl2=imem
|
|
ittl3=imem
|
|
it1=imem
|
|
iapbc=imem
|
|
iabci=imem
|
|
abci_inmem=.true.
|
|
!}}}
|
|
|
|
itemp = dblalloc(nocc*nvirt)
|
|
jlen=nocc
|
|
|
|
if(ccsdalg .eq. 'dfdirect') then
|
|
call reorderdfints(dcore(ijij), dcore(ijab),
|
|
$ nocc, nvirt, dfnb, dcore(imove))
|
|
end if
|
|
|
|
iaijk=dblalloc(nvirt*nocc**3)
|
|
if (ccsdalg.eq.'dfdirect') then
|
|
talgoccmin=ptthreads*(nvirt*nocc*max(nvirt,nocc) +
|
|
$ 3*nvirt**3+wsize) + nvirt**3 !minimal memory to be allocated besides abij: min trr, w,v, min abci
|
|
if (talgoccmin+nocc**2*nvirt**2 .gt. maxcor-(imem-imem1))
|
|
$ jlen=1 ! full abij does not fit in memory
|
|
if (jlen.eq.1) iIabIJ=dblalloc(nvirt**2*ptthreads)
|
|
endif
|
|
iabij=dblalloc(nvirt**2*nocc*jlen) ! abij only for j=1 if lapl
|
|
|
|
avmem=maxcor-(imem-imem1) ! for talg=occ
|
|
call rralloc(avmem,nocc,nvirt,itrr,rrd,
|
|
$ nvirt**3 + ptthreads*(3*nvirt**3+wsize))
|
|
|
|
iw=dblalloc(nvirt**3*ptthreads)
|
|
vsize=wsize*ptthreads
|
|
iv=dblalloc(vsize)
|
|
endif
|
|
c }}}
|
|
c talg=lapl {{{
|
|
if(lapl) then
|
|
!{{{
|
|
iv1=1
|
|
iv2=1
|
|
iv3=1
|
|
iw1=1
|
|
iw2=1
|
|
iw3=1
|
|
iscro=1
|
|
!}}}
|
|
|
|
itemp = dblalloc(nocc*nvirt)
|
|
jlen=nocc
|
|
|
|
if(lccoporder.eq.'trffirst' .or. ccsdalg .eq. 'disk ')
|
|
$ imo=dblalloc(nocc**2) ! occ basis trf mx to the GSL basis
|
|
if(ccsdalg .eq. 'disk ') then
|
|
ijij=dblalloc(dfnb*nocc**2)
|
|
ijia=dblalloc(dfnb*nocc*nvirt)
|
|
ijab=dblalloc(dfnb*nvirt**2)
|
|
itmp=dblalloc(dfnb*max(nvirt*(nvirt+1)/2,nocc*nvirt, ! tmp array for readdfints
|
|
$ nocc*(nocc+1)/2))
|
|
else
|
|
itmp = imem
|
|
end if
|
|
|
|
call readdfints(dcore(ijij),dcore(ijia),dcore(ijab),dcore(imo),
|
|
$ nocc,nvirt,dfnb,dcore(itmp),dcore(itmp),dcore(itmp),
|
|
$ localcc,ccsdalg,lccoporder,dcore(imove))
|
|
call dbldealloc(itmp)
|
|
|
|
iaijk=dblalloc(max(nvirt*nocc**3,dfnb*nocc*ovmax)) ! also jij and ljapi in trf2laplbasis
|
|
if(localcc.ne.'off ') jlen=1
|
|
illjpai=dblalloc(dfnb*nvirt*nocc)
|
|
iIabIJ=dblalloc(ovmax**2*ptthreads)
|
|
illjab=ijab ! warning lljab overwrites jab
|
|
iabij=dblalloc(nvirt**2*nocc*jlen) ! abij only for j=1 if lapl
|
|
|
|
iss=ittnew
|
|
ittl=dblalloc(nvirt*nocc*ovmax)! Laplace second type of T2(-,~,-,-k) for k=1
|
|
ittl2=dblalloc(nvirt**2*nocc)! Laplace second type of T2(-,~,-k,-) for k=1
|
|
ittl3=dblalloc(nvirt**2*ptthreads)! Laplace second type of T2(-,~,-i,-j) for fixed i and j
|
|
it1=dblalloc(nocc*ovmax) ! an other for t1
|
|
|
|
iw=dblalloc(
|
|
$ max(nvirt**3*ptthreads,dfnb*nocc**2,nocc**2*nvirt)) ! for W and for jij, labc, ofock1, ttijb, ttbij in trf2laplbasis
|
|
vsize=max(wsize*ptthreads,dfnb)
|
|
iv=dblalloc(vsize)
|
|
|
|
!minimum memory for blocked abci: abci block for a single occ index & abc1 & Apbc & rr for k=1
|
|
laplminmem = 2*nvirt**3 + nvirt**2*(dfnb+nocc*ptthreads)
|
|
abcistoremin = nvirt**3*nocc + nvirt**2*nocc*ptthreads ! store full abci & rr(a,b,i,1)
|
|
if(localcc .eq. 'off ') then
|
|
ccmem(7)=imem+abcistoremin-imem1
|
|
else
|
|
ccmem(7)=imem+min(laplminmem,abcistoremin)-imem1
|
|
end if
|
|
|
|
abci_inmem=.false.
|
|
iapbc=imem
|
|
nblocki=1
|
|
iblocki=nocc-1
|
|
avmem=maxcor-(imem-imem1) ! for talg=occ
|
|
if (avmem.gt.abcistoremin.or.nocc.eq.1) then ! store full abci & rr(a,b,i,1) & w and v; elseif does not work for nocc=1
|
|
abci_inmem=.true.
|
|
iabci=dblalloc(nvirt**3*nocc)
|
|
avmem=maxcor-(imem-imem1)
|
|
call rralloc(avmem,nocc,nvirt,itrr,rrd,
|
|
$ (nvirt**3+wsize)*ptthreads)
|
|
|
|
elseif(avmem.gt.laplminmem.and..not.abci_inmem
|
|
$ .and.nocc.gt.1) then! store only smaller block of abci, does not work for nocc=1
|
|
tmpmemdgemm=xyzomp*(2*nvirt**3+nvirt**2) ! extra unallocated space for dgemms
|
|
write(iout,"(f12.2,
|
|
$' MB more memory is required for more CPU efficient execution')")
|
|
$(abcistoremin+tmpmemdgemm+nvirt**2*nocc*(nocc-1)-avmem)
|
|
$*8.d0/(1024.d0**2)
|
|
write(iout, *)
|
|
laplminmem=laplminmem-(nvirt**2*nocc)*ptthreads ! minus the space for minimal itrr
|
|
call rralloc(avmem,nocc,nvirt,itrr,rrd,
|
|
$ laplminmem+tmpmemdgemm)
|
|
iapbc=dblalloc(dfnb*nvirt**2)
|
|
avmem=maxcor-(imem-imem1)
|
|
c determine block size, iblock and num of blocks, nblocki
|
|
abciblmemmax=avmem-!laplminmem+nvirt**2*nocc*ptthreads-
|
|
$ tmpmemdgemm ! v**2*o*ptthreads for rr
|
|
if (floor(dble(abciblmemmax)/dble(nvirt**3)) .lt. 2)
|
|
$ abciblmemmax=abciblmemmax+tmpmemdgemm ! not enough extra mem space for dgemms
|
|
iblocki=min(floor(dble(abciblmemmax)/dble(nvirt**3)), nocc) ! width of blocks, must be < nocc (abc1 is fixed at 1st position)
|
|
if (iblocki .lt. min(2, nocc)) then
|
|
write(iout,*)
|
|
$ 'Insufficient memory for Laplace (T) correction',
|
|
$ iblocki
|
|
call mrccend(1)
|
|
endif
|
|
c nblocki=floor(dble(nocc-1)/dble(iblocki)) ! number of blocks, -1 due to fixed abc1
|
|
c if (iblocki*nblocki.lt.nocc) nblocki=nblocki+1 ! +1 for the last (smaller) block if needed
|
|
iabci=dblalloc(nvirt**3*iblocki)
|
|
|
|
else
|
|
write(iout,*)'Insufficient memory!',
|
|
$ (laplminmem-avmem)*8.d0/1024**3,
|
|
$ 'GB more memory is required'
|
|
call mrccend(1)
|
|
endif
|
|
if (.not.abci_inmem .and. localcc.eq.'off ') then
|
|
write(iout,*)
|
|
$ 'Full abci list is stored in memory for canonical',
|
|
$ 'Laplace transformed (T), insufficient memory!'
|
|
call mrccend(1)
|
|
endif
|
|
|
|
c if (localcc.ne.'off ') then
|
|
c write(iout,'(f12.2," MB memory is utilized for (T)")')
|
|
c $ (imem-imem1)*8.d0/1024**2
|
|
c ccmem(8)=imem-imem1
|
|
c endif
|
|
endif
|
|
c }}}
|
|
c!{{{
|
|
c if(talg.eq.'occ ' .or. lapl) itemp = dblalloc(nocc*nvirt)
|
|
c jlen=nocc
|
|
c
|
|
c if (lapl) then
|
|
c if(lccoporder.eq.'trffirst'.or.ccsdalg .eq. 'disk ')
|
|
c $ imo=dblalloc(nocc**2) ! occ basis trf mx to the GSL basis
|
|
c if(ccsdalg .eq. 'disk ') then
|
|
c ijij=dblalloc(dfnb*nocc**2)
|
|
c ijia=dblalloc(dfnb*nocc*nvirt)
|
|
c ijab=dblalloc(dfnb*nvirt**2)
|
|
c itmp=dblalloc(dfnb*max(nvirt*(nvirt+1)/2,nocc*nvirt, ! tmp array for readdfints
|
|
c $ nocc*(nocc+1)/2))
|
|
c else
|
|
c itmp = imem
|
|
c end if
|
|
c call readdfints(dcore(ijij),dcore(ijia),dcore(ijab),dcore(imo),
|
|
c $nocc,nvirt,dfnb,dcore(itmp),dcore(itmp),dcore(itmp),localcc,
|
|
c $ccsdalg,lccoporder,dcore(imove))
|
|
c call dbldealloc(itmp)
|
|
c iaijk=dblalloc(max(nvirt*nocc**3,dfnb*nocc*ovmax)) ! also jij and ljapi in trf2laplbasis
|
|
c if(localcc.ne.'off ') jlen=1
|
|
c illjpai=dblalloc(dfnb*nvirt*nocc)
|
|
c iIabIJ=dblalloc(ovmax**2*ptthreads)
|
|
c illjab=ijab ! warning lljab overwrites jab
|
|
c else
|
|
c if(ccsdalg .eq. 'dfdirect') then
|
|
c call reorderdfints(dcore(ijij), dcore(ijab),
|
|
c $ nocc, nvirt, dfnb, dcore(imove))
|
|
c end if
|
|
c iaijk=dblalloc(nvirt*nocc**3)
|
|
c illjpai=imem
|
|
c iIabIJ=imem
|
|
c illjab=imem
|
|
c endif
|
|
c if (talg.eq.'occ '.and.ccsdalg.eq.'dfdirect') then
|
|
cc talgoccmin=nvirt**2*nocc+nvirt**3+wsize !minimal memory to be allocated besides abij: min trr, w,v, min abci
|
|
c talgoccmin=ptthreads*(nvirt*nocc*max(nvirt,nocc) +
|
|
c $ 3*nvirt**3+wsize) + nvirt**3 !minimal memory to be allocated besides abij: min trr, w,v, min abci
|
|
c if (talgoccmin+nocc**2*nvirt**2.gt.maxcor-(imem-imem1)) jlen=1 ! full abij does not fit in memory
|
|
c if (jlen.eq.1) iIabIJ=dblalloc(nvirt**2*ptthreads)
|
|
c endif
|
|
c iabij=dblalloc(nvirt**2*nocc*jlen) ! abij only for j=1 if lapl
|
|
c if (lapl) then
|
|
c iss=ittnew
|
|
c ittl=dblalloc(nvirt*nocc*ovmax)! Laplace second type of T2(-,~,-,-k) for k=1
|
|
c ittl2=dblalloc(nvirt**2*nocc)! Laplace second type of T2(-,~,-k,-) for k=1
|
|
c ittl3=dblalloc(nvirt**2*ptthreads)! Laplace second type of T2(-,~,-i,-j) for fixed i and j
|
|
c it1=dblalloc(nocc*ovmax) ! an other for t1
|
|
c else
|
|
c iss=imem
|
|
c ittl=imem
|
|
c ittl2=imem
|
|
c ittl2=imem
|
|
c endif
|
|
c!minimum memory for blocked abci: abci block for a single occ index & abc1 & w & v & Apbc & rr for k=1
|
|
c if (lapl) then
|
|
c laplminmem=(2+ptthreads)*nvirt**3+wsize*ptthreads+
|
|
c $ nvirt**2*(dfnb+nocc)
|
|
c abcistoremin=nvirt**3*(nocc+ptthreads)+wsize*ptthreads+
|
|
c $ nvirt**2*nocc ! store full abci & rr(a,b,i,1) & w and v
|
|
c if(localcc .eq. 'off ') then
|
|
c ccmem(7)=imem+abcistoremin-imem1
|
|
c else
|
|
c ccmem(7)=imem+min(laplminmem,abcistoremin)-imem1
|
|
c end if
|
|
c endif
|
|
c abci_inmem=.true.
|
|
c if (lapl) abci_inmem=.false.
|
|
c iapbc=imem
|
|
c nblocki=1
|
|
c iblocki=nocc-1
|
|
c avmem=maxcor-(imem-imem1) ! for talg=occ
|
|
c if (talg.eq.'virt') then
|
|
c itrr=dblalloc(nvirt**2*nocc**2) ! store also rr=tt^traspose for the full tt
|
|
c rrd=nocc
|
|
c elseif (talg.eq.'occ ') then
|
|
c call rralloc(avmem,nocc,nvirt,itrr,rrd,
|
|
c $ nvirt**3 + ptthreads*(3*nvirt**3+wsize))
|
|
c elseif (lapl) then
|
|
cc write(iout,*) 'WARNING restore if/elseif for iblocki test'
|
|
c if (avmem.gt.abcistoremin.or.nocc.eq.1) then ! store full abci & rr(a,b,i,1) & w and v; elseif does not work for nocc=1
|
|
cc if (.false.) then
|
|
c abci_inmem=.true.
|
|
c iabci=dblalloc(nvirt**3*nocc)
|
|
c avmem=maxcor-(imem-imem1)
|
|
c call rralloc(avmem,nocc,nvirt,itrr,rrd,
|
|
c $ (nvirt**3+wsize)*ptthreads)
|
|
c elseif(avmem.gt.laplminmem.and..not.abci_inmem
|
|
c $ .and.nocc.gt.1) then! store only smaller block of abci, does not work for nocc=1
|
|
cc elseif(.true.) then
|
|
c tmpmemdgemm=xyzomp*(2*nvirt**3+nvirt**2) ! extra unallocated space for dgemms
|
|
c write(iout,"(f12.2,
|
|
c $' MB more memory is required for more CPU efficient execution')")
|
|
c $ (abcistoremin+tmpmemdgemm+nvirt**2*nocc*(nocc-1)-avmem)
|
|
c $ *8.d0/1024**2
|
|
c laplminmem=laplminmem-(nvirt**2*nocc)*ptthreads ! minus the space for minimal itrr
|
|
c call rralloc(avmem,nocc,nvirt,itrr,rrd,
|
|
c $ laplminmem+tmpmemdgemm)
|
|
c avmem=maxcor-(imem-imem1)
|
|
cc determine block size, iblock and num of blocks, nblocki
|
|
c abciblmemmax=avmem-laplminmem+nvirt**2*nocc*ptthreads-
|
|
c $ tmpmemdgemm ! v**2*o*ptthreads for rr
|
|
c if (floor(dble(abciblmemmax)/dble(nvirt**3)).lt.
|
|
c $ (1.d0+ptthreads))
|
|
c $ abciblmemmax=abciblmemmax+tmpmemdgemm ! not enough extra mem space for dgemms
|
|
c iblocki=min(floor(dble(abciblmemmax)/dble(nvirt**3)),
|
|
c $ max(nocc-1,1)) ! width of blocks, must be < nocc (abc1 is fixed at 1st position)
|
|
cc write(*,*) 'tmpmemdgemm,abciblmemmax,avmem,laplminmem,iblocki'! dbprt xxx
|
|
cc write(*,*) tmpmemdgemm,abciblmemmax,avmem,laplminmem,iblocki ! dbprt xxx
|
|
cc write(*,*) dble(abciblmemmax)/dble(nvirt**3) ! dbprt xxx
|
|
cc flush(6)
|
|
c if (iblocki+1.lt.2) then
|
|
c write(iout,*)'Insufficient memory for Laplace (T) correction'
|
|
c $ ,iblocki
|
|
c call mrccend(1)
|
|
c endif
|
|
c nblocki=floor(dble(nocc-1)/dble(iblocki)) ! number of blocks, -1 due to fixed abc1
|
|
c if (1+iblocki*nblocki.lt.nocc) nblocki=nblocki+1 ! +1 for the last (smaller) block if needed
|
|
c iapbc=dblalloc(dfnb*nvirt**2)
|
|
c iabci=dblalloc(nvirt**3*(iblocki+1)) ! +1: also abc1 is kept in this array
|
|
c else
|
|
c write(iout,*)'Insufficient memory!',
|
|
c $(laplminmem-avmem)*8.d0/1024**3,'GB more memory is required'
|
|
c call mrccend(1)
|
|
c endif
|
|
c endif ! talg.eq.
|
|
cc write(*,*) 'full abci,min',(imem-imem1+nvirt**3*nocc)*8./1024**3,
|
|
cc $laplminmem*8.d0/1024**3
|
|
cc write(*,*) 'nblocki,iblocki,nocc',nblocki,iblocki
|
|
cc write(*,*) 'abci_inmem',abci_inmem,nocc
|
|
c if (.not.abci_inmem.and.localcc.eq.'off ') then
|
|
c write(iout,*)'Full abci list is stored in memory for canonical',
|
|
c $ 'Laplace transformed (T), insufficient memory!'
|
|
c call mrccend(1)
|
|
c endif
|
|
c if(localcc.ne.'off '.and.talg.eq.'virt')then
|
|
c iv1=dblalloc(nocc**2)
|
|
c iv2=dblalloc(nocc**2)
|
|
c iv3=dblalloc(nocc**2)
|
|
c iw1=dblalloc(nocc**2)
|
|
c iw2=dblalloc(nocc**2)
|
|
c iw3=dblalloc(nocc**2)
|
|
c else
|
|
c iv1=1
|
|
c iv2=1
|
|
c iv3=1
|
|
c iw1=1
|
|
c iw2=1
|
|
c iw3=1
|
|
c endif
|
|
c if(talg.eq.'virt')then
|
|
c iscro=dblalloc(nocc**3)
|
|
c elseif(talg.eq.'lapl'.or.talg.eq.'occ '.or.lapl) then
|
|
c iscro=1
|
|
c endif
|
|
c if(talg.eq.'virt')then
|
|
c iw=dblalloc(nocc**3)
|
|
c iv=dblalloc(wsize)
|
|
c elseif(talg.eq.'lapl'.or.talg.eq.'occ '.or.lapl) then
|
|
c if (talg.eq.'occ ') then
|
|
c iw=dblalloc(nvirt**3*ptthreads)
|
|
c else if (talg.eq.'lapl'.or.lapl) then
|
|
c iw=dblalloc(
|
|
c $max(nvirt**3*ptthreads,dfnb*nocc**2,nocc**2*nvirt)) ! for W and for jij, labc, ofock1, ttijb, ttbij in trf2laplbasis
|
|
c end if
|
|
c if (talg.eq.'lapl'.or.lapl) then
|
|
c vsize=max(wsize*ptthreads,dfnb)
|
|
c else if(talg .eq. 'occ ') then
|
|
c vsize=wsize*ptthreads
|
|
c else if(talg .eq. 'virt') then
|
|
c vsize=wsize
|
|
c end if
|
|
c iv=dblalloc(vsize)
|
|
c endif
|
|
c if (localcc.ne.'off '.and.talg.eq.'lapl') then
|
|
c write(iout,'(f12.2," MB memory is utilized for (T)")')
|
|
c $ (imem-imem1)*8.d0/1024**2
|
|
c ccmem(8)=imem-imem1
|
|
c endif
|
|
cc }}}!}}}
|
|
TIMEADD(times(1,24),times)
|
|
c
|
|
c evaluate (T) energy
|
|
if(talg.eq.'virt')then
|
|
call ttsort(dcore(ittnew),dcore(iabij),nocc,nvirt)
|
|
call dcopy(nocc*nvirt,dcore(itnew),1,dcore(iabij),1)
|
|
call gtrans(dcore(iabij),dcore(itnew),nvirt,nocc) !transpose t1
|
|
c if(localcc.eq.'off ')then
|
|
C Calculate (T) correction, virtual loops out
|
|
call localbtcorr(dcore(itnew),dcore(ittnew),dcore(ifdo),
|
|
$ dcore(ifdv),dcore(iaijk),dcore(iabij),dcore(iscro),
|
|
$ dcore(iw),dcore(iv),bsymmv,symmocc,dgroup,first,last,co,
|
|
$ localcc,dcore(iv1),dcore(iv2),dcore(iv3),dcore(iw1),
|
|
$ dcore(iw2),dcore(iw3),ui,et,times,dcore(itrr),ccsdalg,
|
|
$ dcore(ijij),dcore(ijia),dcore(ijab),dfnb,irecln,ibufln,
|
|
$ nbasis,nocc,nvirt,nirmax,multpg,imem,iout,maxcor,dcore)
|
|
c elseif(lapl) then
|
|
c if (talg.eq.'lapl'.or.talg.eq.'lato') then
|
|
cC Get Laplace quadrature
|
|
c call getkey('laptol',6,laptol,16)
|
|
c read(laptol,*) ltol
|
|
c nqmin=1
|
|
c if (ltol.lt.1.d0) nqmin=floor(-dlog10(ltol))+1
|
|
c write(iout,"(' Laplace threshold, nquadmin:',es10.2,i4)")
|
|
c $ ltol,nqmin
|
|
c call laplace((nvirt*nocc)**3,nquad,dcore(imem),dcore(ifdo),
|
|
c $dcore(ifdv),nvirt,nocc,dcore(ifdo),dcore(ifdv),0,0,3,3,ltol,
|
|
c $gbasfile,iout,dcore(imem),nqmin,0.d0,0,dcore(imem))
|
|
c iquad=dblalloc(nquad*(nvirt+nocc))
|
|
c write(iout,"(1x,a29,i4)")'Number of quadrature points: ',nquad
|
|
c else
|
|
c iquad=imem
|
|
c nquad=1
|
|
c endif
|
|
cC Calculate local (T) correction
|
|
c call localbtcorr_lap(dcore(itnew),dcore(ittnew),dcore(ifdo),
|
|
c $dcore(ifdv),dcore(iaijk),dcore(iabij),dcore(iw),dcore(iv),
|
|
c $bsymmv,symmocc,dgroup,first,last,co,imem,dcore,
|
|
c $et,nquad,dcore(iquad),dcore(it1),dcore(iabci),dcore(ittl),
|
|
c $dcore(iss),dcore(itrr),rrd,abci_inmem,times,dcore(imo),
|
|
c $dcore(ijij),dcore(ijia),dcore(ijab),dfnb,localcc,talg,
|
|
c $dcore(illjab),dcore(illjpai),dcore(iapbc),nblocki,iblocki,
|
|
c $dcore(iv),dcore(iIabIJ),dcore(ittl2),dcore(ittl3),ccsdalg,
|
|
c $dcore(ijia),
|
|
c $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,iout,
|
|
c $mpi_rank,master_thread,mpi_size,mpi_ccsd,ccsdmkl,ptthreads,omp_max_threads,
|
|
c $ccsd_communicator)
|
|
elseif(talg.eq.'occ ' .or. lapl) then
|
|
if (talg.eq.'lapl'.or.talg.eq.'lato') then
|
|
C Get Laplace quadrature
|
|
call getkey('laptol',6,laptol,16)
|
|
read(laptol,*) ltol
|
|
nqmin=1
|
|
if (ltol.lt.1.d0) nqmin=floor(-dlog10(ltol))+1
|
|
write(iout,"(' Laplace threshold, nquadmin:',es10.2,i4)")
|
|
$ ltol,nqmin
|
|
call laplace((nvirt*nocc)**3,nquad,dcore(imem),dcore(ifdo),
|
|
$dcore(ifdv),nvirt,nocc,dcore(ifdo),dcore(ifdv),0,0,3,3,ltol,
|
|
$gbasfile,iout,dcore(imem),nqmin,0.d0,0,dcore(imem))
|
|
iquad=dblalloc(nquad*(nvirt+nocc))
|
|
write(iout,"(1x,a29,i4)")'Number of quadrature points: ',
|
|
$ nquad
|
|
ccmem(7) = ccmem(7) + nquad*(nvirt+nocc)
|
|
c write(iout,'(f12.2,
|
|
c $ " MB is the minimal memory requirement for (T)")')
|
|
c $ ccmem(7)*8.d0/1024**2
|
|
c write(iout,'(f12.2," MB memory is utilized for (T)")')
|
|
c $ (imem-imem1)*8.d0/1024**2
|
|
ccmem(8)=imem-imem1
|
|
else
|
|
iquad=imem
|
|
nquad=1
|
|
endif
|
|
C Calculate (T) correction, occupied loops outside
|
|
call btcorr(dcore(itnew),dcore(ittnew),dcore(ifdo),
|
|
$ dcore(ifdv),dcore(iaijk),dcore(iabij),dcore(iw),
|
|
$ dcore(iv),bsymmv,symmocc,dgroup,first,last,co,et,times,
|
|
$ dcore(itrr),rrd,ccsdalg,dcore(ijij),dcore(ijia),
|
|
$ dcore(ijab),dfnb,jlen,dcore(iIabIJ),nquad,dcore(iquad),
|
|
$ dcore(it1),dcore(iabci),dcore(iabci+nvirt**3),
|
|
$ dcore(ittl),dcore(iss),abci_inmem,dcore(imo),localcc,
|
|
$ talg,dcore(illjab),dcore(illjpai),dcore(iapbc),nblocki,
|
|
$ iblocki,dcore(iv),dcore(ittl2),dcore(ittl3), lapl,irecln,
|
|
$ ibufln,nbasis,nocc,nvirt,nirmax,multpg, iout,maxcor,imem,
|
|
$ dcore,iimem,icore,dcore(itemp),mpi_rank,mpi_size,
|
|
$ mpi_ccsd,ccsdmkl, ptthreads, omp_max_threads,
|
|
$ mkl_max_threads, ccsdrest, ptrest, bcast_comm,
|
|
$ ccsd_communicator, lno, lnaf, ets, master_rank,
|
|
$ master_thread,lf12,tfact)
|
|
call dbldealloc(itemp)
|
|
endif
|
|
|
|
123 continue
|
|
|
|
if(localcc.ne.'off ')then
|
|
write(iout,*)
|
|
write(iout,'(" (T) correction [au]: ",f22.12)') et
|
|
write(iout,'(" CCSD(T) correlation energy [au]:",f22.12)')
|
|
$ et+esdi
|
|
write(iout,*)
|
|
call prtenergc('CCSD(T) ', et+esdi, eref, locnoc)
|
|
else
|
|
call getkey('dfintran',8,dfintran,8)
|
|
write(iout,*)
|
|
if(lf12) then
|
|
write(iout,'(" (T) correction [au]: ",
|
|
$ f22.12)') et
|
|
write(iout,'(" (T*) correction [au]: ",
|
|
$ f22.12)') et*tfact
|
|
write(iout,'(" (T+) correction [au]: ",
|
|
$ f22.12)') ets
|
|
write(iout,'(" CCSD(F12*)(T) correlation energy [au]:",
|
|
$ f22.12)') et+ecc-eref
|
|
write(iout,'(" CCSD(F12*)(T*) correlation energy [au]:",
|
|
$ f22.12)') tfact*et+ecc-eref
|
|
write(iout,'(" CCSD(F12*)(T+) correlation energy [au]:",
|
|
$ f22.12)') ets+ecc-eref
|
|
if((lno .or. lnaf) .and. dfintran.ne.'ovirt ') then
|
|
write(iout,'(" CCSD(F12*)(T) corr. energy + MP2 [au]:",
|
|
$ f22.12)') et+ecc-eref+emp2corr
|
|
write(iout,'(" CCSD(F12*)(T*) corr. energy + MP2 [au]:",
|
|
$ f22.12)') tfact*et+ecc-eref+emp2corr
|
|
write(iout,'(" CCSD(F12*)(T+) corr. energy + MP2 [au]:",
|
|
$ f22.12)') ets+ecc-eref+emp2corr
|
|
endif
|
|
write(iout,'(" Total CCSD(F12*)(T) energy [au]: ",
|
|
$ f22.12)') et+ecc
|
|
write(iout,'(" Total CCSD(F12*)(T*) energy [au]: ",
|
|
$ f22.12)') tfact*et+ecc
|
|
write(iout,'(" Total CCSD(F12*)(T+) energy [au]: ",
|
|
$ f22.12)') ets+ecc
|
|
if((lno .or. lnaf) .and. dfintran.ne.'ovirt ') then
|
|
write(iout,'(" Total CCSD(F12*)(T) energy + MP2 [au]:",
|
|
$ f22.12)') et+ecc+emp2corr
|
|
write(iout,'(" Total CCSD(F12*)(T*) energy + MP2 [au]:",
|
|
$ f22.12)') tfact*et+ecc+emp2corr
|
|
write(iout,'(" Total CCSD(F12*)(T+) energy + MP2 [au]:",
|
|
$ f22.12)') ets+ecc+emp2corr
|
|
endif
|
|
et=ets
|
|
et_corrected = et + ecc
|
|
if((lno .or. lnaf) .and. dfintran.ne.'ovirt ')
|
|
$ et_corrected = et_corrected+emp2corr
|
|
else
|
|
write(iout,
|
|
$ '(" (T) correction [au]: ",
|
|
$ f22.12)') et
|
|
|
|
if((lno .or. lnaf) .and. dfintran.ne."ovirt ") then
|
|
write(iout,
|
|
$ '(" (T*) correction [au]: ",
|
|
$ f22.12)') et*tfact
|
|
write(iout,
|
|
$ '(" (T+) correction [au]: ",
|
|
$ f22.12)') ets
|
|
end if
|
|
|
|
write(iout,
|
|
$ '(" CCSD(T) correlation energy [au]: ",
|
|
$ f22.12)') et+ecc-eref
|
|
if((lno .or. lnaf) .and. dfintran.ne."ovirt ") write(iout,
|
|
$ '(" CCSD(T) correlation energy + MP2 correction [au]:",
|
|
$ f22.12)') et+ecc-eref+emp2corr
|
|
if(ppl_calc) then !(lno .or. lnaf) .and. dfintran.ne."ovirt ") then
|
|
write(iout,
|
|
$ '(" CCSD(T+) correlation en. + MP2 + PPL corr. [au]: ",
|
|
$ f22.12)') ets+ecc-eref+emp2corr+eppl_correction
|
|
else if((lno .or. lnaf) .and. dfintran.ne."ovirt ") then
|
|
write(iout,
|
|
$ '(" CCSD(T+) correlation en. + MP2 correction [au]: ",
|
|
$ f22.12)') ets+ecc-eref+emp2corr
|
|
end if
|
|
|
|
write(iout,
|
|
$ '(" Total CCSD(T) energy [au]: ",
|
|
$ f22.12)') et+ecc
|
|
et_corrected = et + ecc
|
|
|
|
if((lno .or. lnaf) .and. dfintran.ne."ovirt ")
|
|
$ write(iout,
|
|
$ '(" Total CCSD(T) energy + MP2 correction [au]: ",
|
|
$ f22.12)') et+ecc+emp2corr
|
|
et_corrected = et + ecc + emp2corr
|
|
|
|
if(ppl_calc) then !(lno .or. lnaf) .and. dfintran.ne."ovirt ") then
|
|
write(iout,
|
|
$ '(" Total CCSD(T+) energy + MP2 + PPL corr. [au]: ",
|
|
$ f22.12)') ets+ecc+emp2corr+eppl_correction
|
|
et_corrected = ets+ecc+emp2corr+eppl_correction
|
|
else if((lno .or. lnaf) .and. dfintran.ne."ovirt ") then
|
|
write(iout,
|
|
$ '(" Total CCSD(T+) energy + MP2 correction [au]: ",
|
|
$ f22.12)') ets+ecc+emp2corr
|
|
et_corrected = ets+ecc+emp2corr
|
|
endif
|
|
write(iout,*)
|
|
c et_corrected = et + ecc - eref
|
|
end if ! lf12
|
|
|
|
c call prtenergc('CCSD(T) ', et+ecc-eref, eref, locnoc)
|
|
call prtenergc('CCSD(T) ',
|
|
c $ et+ecc-eref+emp2corr+eppl_correction, eref, locnoc)
|
|
$ et_corrected-eref, eref, locnoc)
|
|
endif
|
|
call flush(iout)
|
|
TIMEADD(times(1,23),ttmp)
|
|
c timing data
|
|
# if defined(MPI)
|
|
if(mpi_ccsd .and. mpi_size .ne. 1)
|
|
$ call MPI_Barrier(ccsd_communicator, mpi_err)
|
|
#endif
|
|
if (WRITE_TIMES) then
|
|
times(:,12:20) = times(:,12:20) / ptthreads
|
|
times(:,27:28) = times(:,27:28) / ptthreads
|
|
times(:,37:55) = times(:,37:55) / ptthreads
|
|
times(:,57:64) = times(:,57:64) / ptthreads
|
|
write(iout,*)
|
|
write(iout,"(1x,70('*'))")
|
|
write(iout,'(a94)') ' Time of manipulations:' //
|
|
$ '1 it. CPU/Wall [min], tot CPU/Wall [min], CPU/Wall ratio, tot %'
|
|
write(iout,*)
|
|
txt(12)='" total abci in (T) "'
|
|
txt(13)='" i1=i2 total abci*T2 in (T) "'
|
|
txt(14)='" i1=i2 total ijka*T2 in (T) "'
|
|
txt(15)='" i1=i3 total abci*T2 in (T) "'
|
|
txt(16)='" i1=i3 total ijka*T2 in (T) "'
|
|
txt(17)='" not = total abci*T2 in (T) "'
|
|
txt(18)='" not = total ijka*T2 in (T) "'
|
|
txt(19)='" total V constr. in (T) "'
|
|
txt(20)='" total E evaluation in (T) "'
|
|
txt(21)='" total abci*T2 in (T) "'
|
|
txt(22)='" total ijka*T2 in (T) "'
|
|
txt(23)='" total (T) correction "'
|
|
txt(24)='" T2 "'
|
|
txt(25)='" abck "'
|
|
txt(26)='" ptindices "'
|
|
txt(27)='" W & V "'
|
|
txt(28)='" communication "'
|
|
txt(29)='" test_iprobe "'
|
|
txt(30)='" cancel_after while "'
|
|
txt(31)='" isend_recv "'
|
|
times(3:4,21)=times(3:4,13)+times(3:4,15)+times(3:4,17)
|
|
times(3:4,22)=times(3:4,14)+times(3:4,16)+times(3:4,18)
|
|
imax=31
|
|
list(1:imax)=(/(i,i=1,imax)/)
|
|
do ii=12, 31 ! 1,imax
|
|
i=list(ii)
|
|
ttmp(1,1)=times(4,6)
|
|
if (i.ge.12) then
|
|
nit=1
|
|
ttmp(1,1)=times(4,23)
|
|
end if
|
|
write(txt(0),*) trim(txt(i))
|
|
write(iout,"(" // txt(0) // ",1x,7f10.3)")
|
|
$ times(3, i)/nit/60.d0, times(4, i)/nit/60.d0,
|
|
$ times(3, i)/60.d0, times(4, i)/60.d0,
|
|
$ times(3, i)/times(4, i), times(4, i)/ttmp(1, 1)*100.d0
|
|
enddo
|
|
c (T) fine grain timing
|
|
ttmp(1,1)=times(4,23)
|
|
txt(37)='" abci*T2 i = j 37 b <ba|di>*Tkjcd "'
|
|
txt(38)='" abci*T2 i = j 38 c <ca|di>*Tjkbd "'
|
|
txt(39)='" abci*T2 i = j 39 a <ac|dk>*Tijbd "'
|
|
txt(40)='" abci*T2 j = k 40 c <ca|di>*Tjkbd "'
|
|
txt(41)='" abci*T2 j = k 41 a <ac|dk>*Tijbd "'
|
|
txt(42)='" abci*T2 j = k 42 e <cb|dj>*Tikad "'
|
|
txt(43)='" abci*T2 not = 43 a <ac|dk>*Tijbd "'
|
|
txt(44)='" abci*T2 not = 44 b <ba|di>*Tkjcd "'
|
|
txt(45)='" abci*T2 not = 45 c <ca|di>*Tjkbd "'
|
|
txt(46)='" abci*T2 not = 46 d <bc|dk>*Tijad "'
|
|
txt(47)='" abci*T2 not = 47 e <cb|dj>*Tikad "'
|
|
txt(48)='" abci*T2 not = 48 f <ab|dj>*Tkicd "'
|
|
|
|
txt(49)='" ijka*T2 not = 49 perm "'
|
|
txt(50)='" ijka*T2 not = 50 a <cj|kl>*Tilab "'
|
|
txt(51)='" ijka*T2 not = 51 b <bk|jl>*Tilac "'
|
|
txt(52)='" ijka*T2 not = 52 c <bi|jl>*Tklac "'
|
|
txt(53)='" ijka*T2 not = 53 d <ak|il>*Tjlbc "'
|
|
txt(54)='" ijka*T2 not = 54 e <aj|il>*Tklcb "'
|
|
txt(55)='" ijka*T2 not = 55 f <ci|kl>*Tjlba "'
|
|
txt(56)='" ijka*T2 not = 56 sum "'
|
|
txt(57)='" ijka*T2 i = j 57 b <bk|jl>*Tilac "'
|
|
txt(58)='" ijka*T2 i = j 58 c <bi|jl>*Tklac "'
|
|
txt(59)='" ijka*T2 i = j 59 a <cj|kl>*Tilab "'
|
|
txt(60)='" ijka*T2 j = k 60 b <bk|jl>*Tilac "'
|
|
txt(61)='" ijka*T2 j = k 61 c <bi|jl>*Tklac "'
|
|
txt(62)='" ijka*T2 j = k 62 d <ak|il>*Tjlbc "'
|
|
txt(63)='" ijka*T2 i = j 63 perm "'
|
|
txt(64)='" ijka*T2 j = k 64 perm "'
|
|
c do ii=0,9
|
|
c i=40+ii
|
|
cc write(txt(i),"(" // txt(99) // ",1x,1i3)") ii+1
|
|
c if (ii.le.8) times(3:4,49)=times(3:4,49)+times(3:4,i)
|
|
c write(txt(0),*) trim(txt(i))
|
|
c write(iout,"(" // txt(0) // ",1x,7f10.3)")
|
|
c $ times(3,i)/nit/60.d0,times(4,i)/nit/60.d0,
|
|
c $ times(3,i)/60.d0,times(4,i)/60.d0,times(3,i)/times(4,i),
|
|
c $ times(4,i)/ttmp(1,1)*100.d0
|
|
c enddo
|
|
times(3:4,56)=0.d0
|
|
do ii=-13,14
|
|
i=50+ii
|
|
if(i.eq.56) cycle
|
|
if (ii.le.5) times(3:4,56)=times(3:4,56)+times(3:4,i)
|
|
write(txt(0),*) trim(txt(i))
|
|
write(iout,"(" // txt(0) // ",1x,7f10.3)")
|
|
$ times(3, i)/nit/60.d0, times(4, i)/nit/60.d0,
|
|
$ times(3, i)/60.d0, times(4, i)/60.d0,
|
|
$ times(3, i)/times(4, i), times(4, i)/ttmp(1, 1)*100.d0
|
|
enddo
|
|
endif ! timing data
|
|
|
|
if(localcc.ne.'off '.and.talg.eq.'virt')then
|
|
open(722,file='iabc')
|
|
close(722,status="delete")
|
|
endif
|
|
c call dbldealloc(iui)
|
|
|
|
return
|
|
end subroutine !}}}
|
|
c
|
|
c {{{ localbtcorr
|
|
************************************************************************
|
|
subroutine localbtcorr(t,tt,fdo,fdv,ijka,ijab,scro,w,v,
|
|
$bsymmv,symmocc,dgroup,first,last,co,localcc,
|
|
$v1,v2,v3,w1,w2,w3,ui,et,times,rr,ccsdalg,jij,jia,jab,dfnb,
|
|
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,imem,iout,
|
|
$maxcor,dcore)
|
|
************************************************************************
|
|
implicit none
|
|
integer irecln,ibufln,nbasis,nocc,nvirt,dgroup,nirmax,imem,iout
|
|
integer bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
|
|
integer maxcor
|
|
real*8 dcore(*)
|
|
integer first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
|
|
integer co(nbasis)
|
|
real*8 t(nocc,nvirt),tt(nocc,nocc,nvirt,nvirt)
|
|
real*8 rr(nocc,nocc,nvirt,nvirt)
|
|
real*8 fdv(nvirt),fdo(nocc)
|
|
real*8 et,sum,y,z
|
|
integer a,b,c,i,j,k
|
|
real*8 w(nocc,nocc,nocc),v(nocc,nocc,nocc)
|
|
real*8 delta,delta1,delta2,delta3
|
|
real*8 fcb,fcba,fkj,fkji
|
|
real*8 ijab(nocc,nocc,nvirt,nvirt)
|
|
real*8 ijka(nocc,nocc,nocc,nvirt)
|
|
real*8 scro(nocc,nocc,nocc)
|
|
real*8 w1(nocc,nocc),w2(nocc,nocc),w3(nocc,nocc)
|
|
real*8 v1(nocc,nocc),v2(nocc,nocc),v3(nocc,nocc)
|
|
real*8 ui(nocc),pr,prold,dcba
|
|
cnp real*8 cut !NP
|
|
logical notend
|
|
integer ind,n,avmem,la,lb,astart,istart,prk,cba
|
|
integer imem1
|
|
common/memcom/imem1
|
|
character*4 localcc
|
|
character*8 ccsdalg
|
|
integer dfnb, ic
|
|
real*8 jij(dfnb, nocc, nocc), jia(nocc, nvirt, dfnb)
|
|
real*8 jab(dfnb, nvirt, nvirt)
|
|
real*8 scr(nvirt, nocc), scr1o(nocc), scr1v(nvirt)
|
|
real*8, pointer :: iab_c(:,:,:,:), ibc_ofa(:,:,:), ica_ofb(:,:,:)
|
|
C !NP
|
|
real*8 times(6,0:100)
|
|
|
|
interface
|
|
c {{{ interfaces for pointers
|
|
subroutine rpoint3d(egydim,haromdim,dim1,dim2,dim3)
|
|
implicit none
|
|
integer dim1,dim2,dim3
|
|
real*8,target :: egydim(dim1,dim2,dim3)
|
|
real*8, pointer :: haromdim(:,:,:)
|
|
end subroutine
|
|
subroutine rpoint4d(egydim,haromdim,dim1,dim2,dim3,dim4)
|
|
implicit none
|
|
integer dim1,dim2,dim3,dim4
|
|
real*8,target :: egydim(dim1,dim2,dim3,dim4)
|
|
real*8, pointer :: haromdim(:,:,:,:)
|
|
end subroutine
|
|
c}}}
|
|
end interface
|
|
cnp character*8 tvirtcut,cscr !NP
|
|
cnp call getkey('tvirtcut',8,tvirtcut,8)
|
|
cnp read(tvirtcut,*) cut !NP
|
|
cnp cut=cut/100.d0
|
|
cnp if (cut.lt.1.d0) then
|
|
cnp write(cscr,'(f8.1)') (1.d0-cut)*100.d0
|
|
cnp write(iout,*)
|
|
cnp write(iout,*) 'Highest lying' // trim(cscr) // '% of virtual MOs'
|
|
cnp $ // ' are excluded from (T) correction'
|
|
cnp write(iout,*)
|
|
cnp endif
|
|
!NP
|
|
c
|
|
|
|
cc TIME0(times)
|
|
c transpose T2 to R
|
|
do a=1,nvirt
|
|
do b=1,nvirt
|
|
call dcopy(nocc**2,tt(1:nocc,1:nocc,a,b),1,
|
|
$ rr(1:nocc,1:nocc,b,a),1)
|
|
enddo
|
|
enddo
|
|
if(ccsdalg .eq. 'disk ') then
|
|
call ijabread(ijab,bsymmv,symmocc,dgroup,first,last,co,v,
|
|
$ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
call ijkaread(ijka,bsymmv,symmocc,dgroup,first,last,co,v,
|
|
$ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
call sortabci(bsymmv,symmocc,dgroup,co,first,last,v,
|
|
$ maxcor,imem,imem1,dcore,nocc,nvirt,nbasis,iout,nirmax,
|
|
$ irecln,ibufln,multpg)
|
|
else if(ccsdalg .eq. 'dfdirect') then
|
|
! (ia|jb) = <ij|ab>
|
|
call dgemm('n', 't', nocc*nvirt, nocc*nvirt, dfnb,
|
|
$ 1.d0, jia, nocc*nvirt, jia, nocc*nvirt,
|
|
$ 0.d0, ijab, nocc*nvirt) ! store (ia|jb) in ijab
|
|
! iajb -> ijab
|
|
do b = 1, nvirt
|
|
do i = 1, nocc
|
|
call dcopy(nvirt*nocc, ijab(i, 1, 1, b), nocc,
|
|
$ scr, 1)
|
|
|
|
do j = 1, nocc
|
|
ijab(i, j, 1:nvirt, b) = scr(1:nvirt, j)
|
|
end do
|
|
end do
|
|
end do
|
|
|
|
! (ik|ja) = <ij|ka>
|
|
call dgemm('t', 't', nocc**2, nocc*nvirt, dfnb,
|
|
$ 1.d0, jij, dfnb, jia, nocc*nvirt,
|
|
$ 0.d0, ijka, nocc**2)
|
|
! ikja -> ijka
|
|
do a = 1, nvirt
|
|
do i = 1, nocc
|
|
do k = 1, nocc
|
|
scr1o(k+1:nocc) = ijka(i, k+1:nocc, k, a)
|
|
ijka(i, k+1:nocc, k, a) = ijka(i, k, k+1:nocc, a)
|
|
ijka(i, k, k+1:nocc, a) = scr1o(k+1:nocc)
|
|
end do
|
|
end do
|
|
end do
|
|
end if
|
|
cc TIMEADD(times(1,12),times)
|
|
C Construct W Eq.4
|
|
et=0.d0
|
|
ind=1
|
|
notend=.true.
|
|
prk=0
|
|
pr=0.d0
|
|
prold=0.d0
|
|
c all cba combination:
|
|
cba=0
|
|
do k=1,nvirt
|
|
do i=k+1,nvirt ! j=k
|
|
cba=cba+1
|
|
enddo
|
|
do j=k+1,nvirt ! j>k
|
|
do i=j,nvirt
|
|
cba=cba+1
|
|
enddo
|
|
enddo
|
|
enddo
|
|
c dnvirt=dble(nvirt)
|
|
dcba=dble(cba)
|
|
|
|
do while(notend)
|
|
avmem=maxcor-(imem-imem1)
|
|
if(avmem.ge.3*nvirt**2*nocc)then
|
|
n=(avmem-modulo(avmem,nvirt**2*nocc))/(nvirt**2*nocc)
|
|
else
|
|
write(iout,*)'Insufficient memory for <ia|bc> block!'
|
|
call mrccend(1)
|
|
endif
|
|
c
|
|
if (ind.eq.1) then
|
|
write(iout,'(f12.2," MB memory is utilized for (T)")')
|
|
$ (imem+n*nvirt**2*nocc-imem1)*8.d0/1024**2
|
|
write(iout,'(" 1% done.")')
|
|
endif
|
|
c
|
|
if(ind+n-1.ge.nvirt)then
|
|
notend=.false.
|
|
n=nvirt-ind+1
|
|
endif
|
|
if(ind+n-1.ne.nvirt)then
|
|
n=n-2
|
|
endif
|
|
call rpoint4d(dcore(imem), iab_c, nocc, nvirt, nvirt, n)
|
|
if (notend) then
|
|
call rpoint3d(dcore(imem+nocc*nvirt**2*n), ica_ofb,
|
|
$ nocc, nvirt, nvirt)
|
|
call rpoint3d(dcore(imem+nocc*nvirt**2*(n+1)), ibc_ofa,
|
|
$ nocc, nvirt, nvirt)
|
|
endif
|
|
cc TIME0(times)
|
|
if(ccsdalg .eq. 'disk ') then
|
|
c call iabcread(n,ind,dcore(imem),v,nocc,nvirt,irecln,ibufln)
|
|
call iabcread(n,ind,iab_c,v,nocc,nvirt,irecln,ibufln)
|
|
else if(ccsdalg .eq. 'dfdirect') then
|
|
! (ib|ac) = <ia|bc>
|
|
call dgemm('n', 'n', nocc*nvirt, nvirt*n, dfnb,
|
|
$ 1.d0, jia, nocc*nvirt, jab(1, 1, ind), dfnb,
|
|
$ 0.d0, iab_c, nocc*nvirt)
|
|
! ibac -> iabc
|
|
do c = 1, n
|
|
do i = 1, nocc
|
|
do b = 1, nvirt
|
|
scr1v(b+1:nvirt) = iab_c(i, b+1:nvirt, b, c)
|
|
iab_c(i,b+1:nvirt,b,c) = iab_c(i,b,b+1:nvirt,c)
|
|
iab_c(i, b, b+1:nvirt, c) = scr1v(b+1:nvirt)
|
|
end do
|
|
end do
|
|
end do
|
|
end if
|
|
cc TIMEADD(times(1,12),times)
|
|
c
|
|
do c=ind,ind+n-1
|
|
pr=dble(prk)/dcba
|
|
if(pr.ge.prold+0.1d0.and.pr.ne.1.d0)then
|
|
prold=pr
|
|
write(iout,'(i4,"% done.")')nint(pr*100)
|
|
endif
|
|
cnp if (c.gt.cut*nvirt) cycle !NP since a>=b>=c; a,b,c>cut*nvirt
|
|
do b=c,nvirt
|
|
if(b.gt.ind+n-1)then
|
|
lb=n+1
|
|
cc TIME0(times)
|
|
if(ccsdalg .eq. 'disk ') then
|
|
c call iabcread(1,b,dcore(imem+(lb-1)*nocc*nvirt**2),v,
|
|
c $ nocc,nvirt,irecln,ibufln)
|
|
call iabcread(1,b,ica_ofb,v,nocc,nvirt,irecln,ibufln)
|
|
else
|
|
! (ia|cb) = <ic|ab>
|
|
call dgemm('n', 'n', nocc*nvirt, nvirt, dfnb,
|
|
$ 1.d0, jia, nocc*nvirt, jab(1, 1, b), dfnb,
|
|
$ 0.d0, ica_ofb, nocc*nvirt)
|
|
! iacb -> icab
|
|
do i = 1, nocc
|
|
do a = 1, nvirt
|
|
scr1v(a+1:nvirt) = ica_ofb(i, a+1:nvirt, a)
|
|
ica_ofb(i, a+1:nvirt, a) = ica_ofb(i, a, a+1:nvirt)
|
|
ica_ofb(i, a, a+1:nvirt) = scr1v(a+1:nvirt)
|
|
end do
|
|
end do
|
|
end if
|
|
cc TIMEADD(times(1,12),times)
|
|
else
|
|
lb=b-ind+1
|
|
endif
|
|
delta1=1.d0
|
|
if(b.eq.c)delta1=delta1+1.d0
|
|
fcb=fdv(c)+fdv(b)
|
|
if(b.eq.c)then !a=b=c => no contribution!
|
|
astart=b+1
|
|
else
|
|
astart=b
|
|
endif
|
|
cnp if (b.gt.cut*nvirt) cycle !NP since a>=b; b,c>cut*nvirt
|
|
do a=astart,nvirt
|
|
c do a=b,nvirt
|
|
la=a-ind+1
|
|
if(a.gt.ind+n-1.and.a.ne.b)then
|
|
la=n+2
|
|
cc TIME0(times)
|
|
if(ccsdalg .eq. 'disk ') then
|
|
c call iabcread(1,a,dcore(imem+(la-1)*nocc*nvirt**2),v,nocc,nvirt,irecln,ibufln)
|
|
call iabcread(1,a,ibc_ofa,v,nocc,nvirt,irecln,ibufln)
|
|
else
|
|
! (ic|ba) = <ib|ca>
|
|
call dgemm('n', 'n', nocc*nvirt, nvirt, dfnb,
|
|
$ 1.d0, jia, nocc*nvirt, jab(1, 1, a), dfnb,
|
|
$ 0.d0, ibc_ofa, nocc*nvirt)
|
|
! icba -> ibca
|
|
do i = 1, nocc
|
|
do ic = 1, nvirt
|
|
scr1v(ic+1:nvirt) = ibc_ofa(i, ic, ic+1:nvirt)
|
|
ibc_ofa(i, ic, ic+1:nvirt) =
|
|
$ ibc_ofa(i, ic+1:nvirt, ic)
|
|
ibc_ofa(i, ic+1:nvirt, ic) = scr1v(ic+1:nvirt)
|
|
end do
|
|
end do
|
|
end if
|
|
cc TIMEADD(times(1,12),times)
|
|
endif
|
|
if(a.gt.ind+n-1.and.a.eq.b)then
|
|
la=lb
|
|
endif
|
|
delta=delta1
|
|
if(a.eq.b)delta=delta+1.d0
|
|
fcba=fcb+fdv(a)
|
|
c
|
|
c {{{ inactive part: build w with loops
|
|
c do k=1,nocc
|
|
c do j=1,nocc
|
|
c do i=1,nocc
|
|
c sum=0.d0
|
|
c sum2=0.d0
|
|
c do d=1,nvirt
|
|
c sum=sum+iabc(i,d,a,b)*tt(k,j,c,d)
|
|
c sum=sum+iabc(i,d,a,c)*tt(j,k,b,d)
|
|
c sum=sum+iabc(k,d,c,a)*tt(j,i,b,d)
|
|
|
|
c sum=sum+iabc(k,d,c,b)*tt(i,j,a,d)
|
|
c sum=sum+iabc(j,d,b,c)*tt(i,k,a,d)
|
|
c sum=sum+iabc(j,d,b,a)*tt(k,i,c,d)
|
|
c enddo
|
|
c do l=1,nocc
|
|
c sum=sum-ijka(j,k,l,c)*tt(i,l,a,b)
|
|
c sum=sum-ijka(k,j,l,b)*tt(i,l,a,c)
|
|
c sum=sum-ijka(i,j,l,b)*tt(k,l,c,a)
|
|
|
|
c sum=sum-ijka(j,i,l,a)*tt(k,l,c,b)
|
|
c sum=sum-ijka(k,i,l,a)*tt(j,l,b,c)
|
|
c sum=sum-ijka(i,k,l,c)*tt(j,l,b,a)
|
|
c enddo
|
|
c w(i,j,k)=sum
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c }}}
|
|
IF(a.eq.b)THEN
|
|
c {{{ a.eq.b OR b.eq.c
|
|
cc TIME0(times)
|
|
call dgemm('n','t',nocc,nocc**2,nvirt,1.d0,dcore(imem+
|
|
$(a-1)*nocc*nvirt+(la-1)*nocc*nvirt**2),nocc,
|
|
$tt(1,1,1,c),nocc**2,0.d0,v,nocc)
|
|
|
|
call dgemm('n','t',nocc**2,nocc,nvirt,1.d0,tt(1,1,1,a),nocc**2,
|
|
$dcore(imem+
|
|
$(a-1)*nocc*nvirt+(c-ind)*nocc*nvirt**2),nocc,0.d0,scro,nocc**2)
|
|
do k=1,nocc
|
|
call daxpy(nocc**2,1.d0,scro(k,1,1),nocc,v(1,1,k),1)
|
|
enddo
|
|
|
|
call dgemm('n','t',nocc**2,nocc,nvirt,1.d0,tt(1,1,1,a),nocc**2,
|
|
$dcore(imem+
|
|
$(c-1)*nocc*nvirt+(la-1)*nocc*nvirt**2),nocc,1.d0,v,nocc**2)
|
|
cc TIMEADD(times(1,13),times)
|
|
cc TIME0(times)
|
|
|
|
C 2.tag
|
|
call dgemm('n','t',nocc,nocc**2,nocc,-1.d0,tt(1,1,a,a),nocc,
|
|
$ijka(1,1,1,c),nocc**2,1.d0,v,nocc)
|
|
call dgemm('n','n',nocc,nocc**2,nocc,-1.d0,tt(1,1,a,c),nocc,
|
|
$ijka(1,1,1,a),nocc,1.d0,v,nocc)
|
|
call dgemm('n','t',nocc**2,nocc,nocc,-1.d0,ijka(1,1,1,a),nocc**2,
|
|
$tt(1,1,c,a),nocc,1.d0,v,nocc**2)
|
|
|
|
call dcopy(nocc**3,v,1,w,1)
|
|
do k=1,nocc
|
|
call tradd(nocc,1.d0,v(1,1,k),w(1,1,k))
|
|
enddo
|
|
cc TIMEADD(times(1,14),times)
|
|
ELSE IF(b.eq.c)THEN
|
|
cc TIME0(times)
|
|
call dgemm('n','t',nocc,nocc**2,nvirt,1.d0,dcore(imem+
|
|
$(a-1)*nocc*nvirt+(lb-1)*nocc*nvirt**2),nocc,
|
|
$tt(1,1,1,b),nocc**2,0.d0,v,nocc)
|
|
|
|
call dgemm('n','t',nocc**2,nocc,nvirt,1.d0,tt(1,1,1,b),nocc**2,
|
|
$dcore(imem+
|
|
$(b-1)*nocc*nvirt+(la-1)*nocc*nvirt**2),nocc,1.d0,v,nocc**2)
|
|
|
|
call dgemm('n','t',nocc**2,nocc,nvirt,1.d0,tt(1,1,1,a),nocc**2,
|
|
$dcore(imem+
|
|
$(b-1)*nocc*nvirt+(lb-1)*nocc*nvirt**2),nocc,0.d0,scro,nocc**2)
|
|
do k=1,nocc
|
|
call daxpy(nocc**2,1.d0,scro(k,1,1),nocc,v(1,1,k),1)
|
|
enddo
|
|
cc TIMEADD(times(1,15),times)
|
|
cc TIME0(times)
|
|
|
|
C 2.tag
|
|
call dgemm('n','t',nocc,nocc**2,nocc,-1.d0,tt(1,1,a,b),nocc,
|
|
$ijka(1,1,1,b),nocc**2,1.d0,v,nocc)
|
|
call dgemm('n','t',nocc**2,nocc,nocc,-1.d0,ijka(1,1,1,b),nocc**2,
|
|
$tt(1,1,b,a),nocc,1.d0,v,nocc**2)
|
|
call dgemm('t','n',nocc**2,nocc,nocc,-1.d0,
|
|
$ijka(1,1,1,a),nocc,tt(1,1,b,b),nocc,1.d0,v,nocc**2)
|
|
|
|
call dcopy(nocc**3,v,1,w,1)
|
|
do k=1,nocc
|
|
do j=1,nocc
|
|
c do i=1,nocc
|
|
c w(i,j,k)=w(i,j,k)+v(i,k,j)
|
|
c enddo
|
|
call daxpy(nocc,1.d0,v(j,k,1),nocc**2,w(j,1,k),nocc)
|
|
enddo
|
|
enddo
|
|
cc TIMEADD(times(1,16),times)
|
|
c }}}
|
|
ELSE
|
|
c {{{ first 6 term abci*T2
|
|
cc TIME0(times(1,99))
|
|
c a)
|
|
cc TIME0(times)
|
|
call dgemm('n','t',nocc,nocc**2,nvirt,1.d0,dcore(imem+
|
|
$(a-1)*nocc*nvirt+(lb-1)*nocc*nvirt**2),nocc,
|
|
$tt(1,1,1,c),nocc**2,0.d0,w,nocc)
|
|
cc TIMEADD(times(1,40),times)
|
|
c d)
|
|
cc TIME0(times)
|
|
call dgemm('n','t',nocc**2,nocc,nvirt,1.d0,tt(1,1,1,b),nocc**2,
|
|
$dcore(imem+
|
|
$(c-1)*nocc*nvirt+(la-1)*nocc*nvirt**2),nocc,1.d0,w,nocc**2)
|
|
cc TIMEADD(times(1,44),times)
|
|
c f)
|
|
cc TIME0(times)
|
|
call dgemm('n','t',nocc**2,nocc,nvirt,1.d0,tt(1,1,1,a),nocc**2,
|
|
$dcore(imem+
|
|
$(b-1)*nocc*nvirt+(c-ind)*nocc*nvirt**2),nocc,0.d0,scro,nocc**2)
|
|
cc TIMEADD(times(1,47),times)
|
|
c old do k=1,nocc
|
|
c old call daxpy(nocc**2,1.d0,scro(k,1,1),nocc,w(1,1,k),1)
|
|
c old enddo
|
|
c c) <jd|ba>*R(ki,dc)=<jd|ba>*T(ki,cd)
|
|
cc TIME0(times)
|
|
call dgemm('n','t',nocc**2,nocc,nvirt,1.d0,rr(1,1,1,c),nocc**2,
|
|
$dcore(imem+
|
|
$(b-1)*nocc*nvirt+(la-1)*nocc*nvirt**2),nocc,1.d0,scro,nocc**2)
|
|
cc TIMEADD(times(1,42),times)
|
|
cccc c)
|
|
c old call dgemm('n','t',nocc**2,nocc,nvirt,1.d0,tt(1,1,1,c),nocc**2,
|
|
c old $dcore(imem+
|
|
c old $(b-1)*nocc*nvirt+(la-1)*nocc*nvirt**2),nocc,1.d0,scro,nocc**2)
|
|
c old do k=1,nocc
|
|
c old do j=1,nocc
|
|
c old call daxpy(nocc,1.d0,scro(j,k,1),nocc**2,w(j,1,k),nocc)
|
|
c old enddo
|
|
c old enddo
|
|
c b) <id|ac>*R(jk,db)=<id|ac>*T(jk,bd)
|
|
cc TIME0(times)
|
|
call dgemm('n','t',nocc,nocc**2,nvirt,1.d0,dcore(imem+
|
|
$(a-1)*nocc*nvirt+(c-ind)*nocc*nvirt**2),nocc,
|
|
$rr(1,1,1,b),nocc**2,1.d0,w,nocc)
|
|
cc TIMEADD(times(1,41),times)
|
|
c e) R(ij,da)*<kd|bc>=T(ij,ad)*<kd|bc>
|
|
cc TIME0(times)
|
|
call dgemm('n','t',nocc**2,nocc,nvirt,1.d0,rr(1,1,1,a),nocc**2,
|
|
$dcore(imem+
|
|
$(c-1)*nocc*nvirt+(lb-1)*nocc*nvirt**2),nocc,1.d0,w,nocc**2)
|
|
cc TIMEADD(times(1,45),times)
|
|
cc TIMEADD(times(1,17),times(1,99))
|
|
c old do k=1,nocc
|
|
c old call tradd(nocc,1.d0,scro(1,1,k),w(1,1,k))
|
|
c old enddo
|
|
c }}}
|
|
c
|
|
c {{{ second 6 term ijka*T2
|
|
c b)
|
|
cc TIME0(times)
|
|
call dgemm('n','n',nocc,nocc**2,nocc,-1.d0,tt(1,1,a,c),nocc,
|
|
$ijka(1,1,1,b),nocc,1.d0,w,nocc)
|
|
cc TIMEADD(times(1,51),times)
|
|
c c)
|
|
cc TIME0(times)
|
|
call dgemm('n','n',nocc**2,nocc,nocc,-1.d0,ijka(1,1,1,b),nocc**2,
|
|
$tt(1,1,a,c),nocc,1.d0,w,nocc**2)
|
|
cc TIMEADD(times(1,52),times)
|
|
c a)
|
|
cc TIME0(times)
|
|
call dgemm('n','t',nocc,nocc**2,nocc,-1.d0,tt(1,1,a,b),nocc,
|
|
$ijka(1,1,1,c),nocc**2,1.d0,w,nocc)
|
|
cc TIMEADD(times(1,50),times)
|
|
c f)
|
|
cc TIME0(times)
|
|
call dgemm('t','n',nocc**2,nocc,nocc,-1.d0,ijka(1,1,1,c),nocc,
|
|
$tt(1,1,a,b),nocc,1.d0,scro,nocc**2)
|
|
cc TIMEADD(times(1,55),times)
|
|
c e) <ki|la>*T(lj,cb)
|
|
cc TIME0(times)
|
|
call dgemm('n','n',nocc**2,nocc,nocc,-1.d0,ijka(1,1,1,a),nocc**2,
|
|
$tt(1,1,c,b),nocc,1.d0,scro,nocc**2)
|
|
cc TIMEADD(times(1,54),times)
|
|
c d) T(kl,cb)*<li|ja>
|
|
cc TIME0(times)
|
|
call dgemm('n','n',nocc,nocc**2,nocc,-1.d0,tt(1,1,c,b),nocc,
|
|
$ijka(1,1,1,a),nocc,1.d0,scro,nocc)
|
|
cc TIMEADD(times(1,53),times)
|
|
cc TIME0(times)
|
|
do k=1,nocc
|
|
call daxpy(nocc**2,1.d0,scro(k,1,1),nocc,w(1,1,k),1)
|
|
enddo
|
|
cc TIMEADD(times(1,48),times)
|
|
c old do k=1,nocc
|
|
c old call dgemv('t',nocc,nocc**2,-1.d0,ijka(1,1,1,a),nocc,
|
|
c old $tt(1,k,b,c),1,1.d0,w(1,1,k),1)
|
|
c old call dgemm('t','t',nocc,nocc,nocc,-1.d0,ijka(1,1,k,a),nocc,
|
|
c old $tt(1,1,b,c),nocc,1.d0,w(1,1,k),nocc)
|
|
c old do l=1,nocc
|
|
c old call dger(nocc,nocc,-1.d0,ijka(1,k,l,c),1,
|
|
c old $tt(1,l,b,a),1,w(1,1,k),nocc)
|
|
c old enddo
|
|
c old enddo
|
|
cc TIMEADD(times(1,18),times(1,99))
|
|
c }}}
|
|
ENDIF
|
|
C Construct V Eq.5
|
|
cc TIME0(times)
|
|
c call dcopy(nocc**3,w,1,v,1)
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(i)
|
|
do i=1,nocc
|
|
v(i,1:nocc,1:nocc)=w(i,1:nocc,1:nocc)
|
|
$ +t(i,a)*ijab(1:nocc,1:nocc,b,c)
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(j)
|
|
do j=1,nocc
|
|
v(1:nocc,j,1:nocc)=v(1:nocc,j,1:nocc)
|
|
$ +t(j,b)*ijab(1:nocc,1:nocc,a,c)
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(k)
|
|
do k=1,nocc
|
|
v(1:nocc,1:nocc,k)=v(1:nocc,1:nocc,k)
|
|
$ +t(k,c)*ijab(1:nocc,1:nocc,a,b)
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
cc TIMEADD(times(1,19),times)
|
|
C Calculate Energy
|
|
c {{{ canonical
|
|
sum=0.d0
|
|
IF(localcc.eq.'off ')THEN
|
|
cc TIME0(times)
|
|
IF(a.eq.b.or.b.eq.c)THEN
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(i,j,k,delta3,fkj,istart,delta2,fkji,y)
|
|
C$OMP& REDUCTION(+:sum)
|
|
do k=1,nocc
|
|
do j=k,nocc
|
|
delta3=1.d0
|
|
if(j.eq.k)delta3=delta3+1
|
|
fkj=fdo(k)+fdo(j)
|
|
if(j.eq.k)then
|
|
istart=j+1
|
|
else
|
|
istart=j
|
|
endif
|
|
do i=istart,nocc
|
|
delta2=delta3
|
|
if(i.eq.j)delta2=delta2+1
|
|
fkji=fkj+fdo(i)
|
|
|
|
y=v(i,j,k)+v(k,i,j)+v(j,k,i)
|
|
C I
|
|
sum=sum+(
|
|
$-(w(i,j,k)+w(k,i,j)+w(j,k,i))*(v(i,j,k)+v(k,i,j)+v(j,k,i))
|
|
|
|
$+(w(i,j,k)*v(i,j,k)
|
|
$+w(k,i,j)*v(k,i,j)
|
|
$+w(j,k,i)*v(j,k,i))*3.d0
|
|
|
|
$)/((fkji-fcba)*delta2)
|
|
|
|
enddo
|
|
enddo
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
sum=2.d0*sum
|
|
ELSE ! .not.(a.eq.b.or.b.eq.c)
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(i,j,k,delta3,fkj,istart,delta2,fkji,y,z)
|
|
C$OMP& REDUCTION(+:sum)
|
|
do k=1,nocc
|
|
do j=k,nocc
|
|
delta3=1.d0
|
|
if(j.eq.k)delta3=delta3+1
|
|
fkj=fdo(k)+fdo(j)
|
|
if(j.eq.k)then
|
|
istart=j+1
|
|
else
|
|
istart=j
|
|
endif
|
|
do i=istart,nocc
|
|
c do i=j,nocc
|
|
delta2=delta3
|
|
if(i.eq.j)delta2=delta2+1
|
|
fkji=fkj+fdo(i)
|
|
|
|
y=v(i,j,k)+v(k,i,j)+v(j,k,i)
|
|
z=v(i,k,j)+v(j,i,k)+v(k,j,i)
|
|
C I
|
|
sum=sum+(
|
|
$+(w(i,j,k)+w(k,i,j)+w(j,k,i))*(y-2.d0*z)
|
|
$+(w(i,k,j)+w(j,i,k)+w(k,j,i))*(z-2.d0*y)
|
|
|
|
$+(w(i,j,k)*v(i,j,k)
|
|
$+w(k,i,j)*v(k,i,j)
|
|
$+w(j,k,i)*v(j,k,i)
|
|
$+w(i,k,j)*v(i,k,j)
|
|
$+w(j,i,k)*v(j,i,k)
|
|
$+w(k,j,i)*v(k,j,i))*3.d0
|
|
|
|
$)/((fkji-fcba)*delta2)
|
|
|
|
enddo
|
|
enddo
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
ENDIF
|
|
cc TIMEADD(times(1,20),times)
|
|
c }}}
|
|
ELSE ! localcc
|
|
c {{{ localcc energy
|
|
C Denominator in W!
|
|
do k=1,nocc
|
|
do j=1,nocc
|
|
fkj=fdo(k)+fdo(j)
|
|
do i=1,nocc
|
|
fkji=fkj+fdo(i)
|
|
w(i,j,k)=w(i,j,k)/(fkji-fcba)
|
|
c to compare with laplace
|
|
c w(i,j,k)=-w(i,j,k)/dsqrt(-fkji+fcba)
|
|
c v(i,j,k)=v(i,j,k)/dsqrt(-fkji+fcba)
|
|
enddo
|
|
enddo
|
|
enddo
|
|
c do k=1,nocc
|
|
C transforming the indices
|
|
call dgemv('t',nocc,nocc**2,1.d0,v,nocc,ui,1,0.d0,v1,1)
|
|
call dgemv('t',nocc,nocc**2,1.d0,w,nocc,ui,1,0.d0,w1,1)
|
|
call dgemv('n',nocc**2,nocc,1.d0,v,nocc**2,ui,1,0.d0,v3,1)
|
|
call dgemv('n',nocc**2,nocc,1.d0,w,nocc**2,ui,1,0.d0,w3,1)
|
|
do j=1,nocc
|
|
call dgemv('n',nocc,nocc,1.d0,v(1,1,j),nocc,ui,1,0.d0,
|
|
$v2(1,j),1)
|
|
call dgemv('n',nocc,nocc,1.d0,w(1,1,j),nocc,ui,1,0.d0,
|
|
$w2(1,j),1)
|
|
enddo
|
|
c
|
|
IF(a.eq.b)THEN
|
|
do j=1,nocc
|
|
sum=sum
|
|
$+ (w3(j,j)-w1(j,j))*(v3(j,j)-v1(j,j))
|
|
|
|
do i=j+1,nocc
|
|
sum=sum
|
|
$+ w3(i,j)*(2.d0*v3(i,j)-v1(i,j)-v2(j,i))
|
|
$+ w2(j,i)*(v2(j,i)-v3(i,j))
|
|
$+ w1(i,j)*(v1(i,j)-v3(i,j))
|
|
enddo
|
|
enddo
|
|
sum=2.d0*sum
|
|
|
|
ELSE IF(b.eq.c)THEN
|
|
do j=1,nocc
|
|
sum=sum
|
|
$+ (w3(j,j)-w1(j,j))*(v3(j,j)-v1(j,j))
|
|
|
|
do i=j+1,nocc
|
|
sum=sum
|
|
$+ w1(i,j)*(2.d0*v1(i,j)-v3(i,j)-v2(j,i))
|
|
$+ w2(j,i)*(v2(j,i)-v1(i,j))
|
|
$+ w3(i,j)*(v3(i,j)-v1(i,j))
|
|
enddo
|
|
enddo
|
|
sum=2.d0*sum
|
|
|
|
ELSE
|
|
do j=1,nocc
|
|
c if(i.eq.j)then
|
|
c y=v(k,j,j)+v(j,k,j)+v(j,j,k)
|
|
c sum=sum
|
|
c $+ w(j,j,k)*(3.d0*v(j,j,k)-y)
|
|
c $+ w(j,k,j)*(3.d0*v(j,k,j)-y)
|
|
c $+ w(k,j,j)*(3.d0*v(k,j,j)-y)
|
|
y=v1(j,j)+v2(j,j)+v3(j,j)
|
|
sum=sum
|
|
$+ w3(j,j)*(3.d0*v3(j,j)-y)
|
|
$+ w2(j,j)*(3.d0*v2(j,j)-y)
|
|
$+ w1(j,j)*(3.d0*v1(j,j)-y)
|
|
c endif
|
|
|
|
do i=j+1,nocc
|
|
c if(i.gt.j)then
|
|
c sum=sum+
|
|
c $ 2.d0/3.d0*
|
|
c $ (w(i,j,k)+w(j,k,i)+w(k,i,j)-w(i,k,j)-w(j,i,k)-w(k,j,i))*
|
|
c $ (v(i,j,k)+v(k,i,j)+v(j,k,i)-v(i,k,j)-v(j,i,k)-v(k,j,i))
|
|
|
|
c $+ w(i,j,k)*(2.d0*v(i,j,k)-v(i,k,j)-v(k,j,i))
|
|
c $+ w(j,k,i)*(2.d0*v(j,k,i)-v(k,j,i)-v(j,i,k))
|
|
c $+ w(k,i,j)*(2.d0*v(k,i,j)-v(j,i,k)-v(i,k,j))
|
|
c $+ w(i,k,j)*(2.d0*v(i,k,j)-v(i,j,k)-v(k,i,j))
|
|
c $+ w(j,i,k)*(2.d0*v(j,i,k)-v(k,i,j)-v(j,k,i))
|
|
c $+ w(k,j,i)*(2.d0*v(k,j,i)-v(j,k,i)-v(i,j,k))
|
|
sum=sum+
|
|
$ 2.d0/3.d0*
|
|
$ (w3(i,j)+w2(j,i)+w1(i,j)-w2(i,j)-w3(j,i)-w1(j,i))*
|
|
$ (v3(i,j)+v1(i,j)+v2(j,i)-v2(i,j)-v3(j,i)-v1(j,i))
|
|
|
|
$+ w3(i,j)*(2.d0*v3(i,j)-v2(i,j)-v1(j,i))
|
|
$+ w2(j,i)*(2.d0*v2(j,i)-v1(j,i)-v3(j,i))
|
|
$+ w1(i,j)*(2.d0*v1(i,j)-v3(j,i)-v2(i,j))
|
|
$+ w2(i,j)*(2.d0*v2(i,j)-v3(i,j)-v1(i,j))
|
|
$+ w3(j,i)*(2.d0*v3(j,i)-v1(i,j)-v2(j,i))
|
|
$+ w1(j,i)*(2.d0*v1(j,i)-v2(j,i)-v3(i,j))
|
|
c endif
|
|
enddo
|
|
enddo
|
|
c enddo
|
|
ENDIF
|
|
c }}}
|
|
ENDIF
|
|
et=et+sum/delta
|
|
enddo
|
|
prk=prk+nvirt-astart+1
|
|
enddo
|
|
enddo
|
|
ind=ind+n
|
|
enddo
|
|
write(iout,'(" 100% done.")')
|
|
if(localcc.eq.'off ') et=2.d0*et
|
|
c write(6,'(f17.12)')et
|
|
c call timer
|
|
return
|
|
end subroutine!}}}
|
|
c
|
|
cC {{{ laplace_localbtcorr
|
|
c************************************************************************
|
|
c subroutine localbtcorr_lap(t1,tt,fdo,fdv,aijk,abij,w,v,
|
|
c $bsymmv,symmocc,dgroup,first,last,co,imem,dcore,
|
|
c $et,nquad,quad,t,abci,ttl,ss,rr,rrd,abci_inmem,times,cmo,
|
|
c $jij,jia,jab,dfnb,localcc,talg,lljab,lljpai,apbc,nblocki,iblocki,
|
|
c $tmpvsize,IabIJ,ttl2,ttl3,ccsdalg,jai,
|
|
c $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,iout,
|
|
c $mpi_rank,mpi_size,mpi_ccsd,ccsdmkl,ptthreads,omp_max_threads,
|
|
c $ccsd_communicator,master_thread)
|
|
c************************************************************************
|
|
c implicit none
|
|
cc for mpi
|
|
c logical mpi_ccsd
|
|
c#if defined(MPI)
|
|
c include "mpif.h"
|
|
c integer comm_scr, stat(MPI_STATUS_SIZE)
|
|
c#endif
|
|
c integer mpi_rank, mpi_size, mpi_err, request, npr
|
|
c integer find, lind, nrank, bsize, kk, prk_sent
|
|
c integer icount(0:nocc),kjinum(nocc), ccsd_communicator
|
|
c integer nummes, max_threads,jfrstm1,jpullback,actblocknn
|
|
c logical flag,ptdone
|
|
cc end mpi
|
|
c character*3 ccsdmkl
|
|
c integer ptthreads,omp_max_threads,imem,jinrr
|
|
c real*8 dcore(*)
|
|
c integer irecln,ibufln,nbasis,nocc,nvirt,dgroup,nirmax,iout
|
|
c integer bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
|
|
c integer first(0:5,8,8,nbasis),last(0:5,8,8,nbasis),nquad,iquad
|
|
c integer co(nbasis),rrd,dfnb
|
|
c integer a,b,c,i,j,k,kmax,jj
|
|
c real*8 t(nvirt,nocc),t1(nvirt,nocc),tt(nvirt,nvirt,nocc,nocc)
|
|
c real*8 fdv(nvirt),fdo(nocc),quad(nvirt+nocc,nquad),fi,fj,fij,fbij
|
|
c real*8 ttl(nvirt,nvirt,nocc,1),ss(nvirt,nvirt,nocc,nocc)
|
|
c real*8 ttl2(nvirt,nvirt,1,nocc),ttl3(nvirt,nvirt),fb
|
|
c real*8 rr(nvirt,nvirt,nocc,rrd)
|
|
c real*8 et,sum,y,z,fc
|
|
c real*8 w(nvirt,nvirt,nvirt),v(nvirt,nvirt,nvirt)
|
|
c real*8 pr,prold,IabIJ(nvirt,nvirt)
|
|
c real*8 delta,delta1,delta2,delta3,dkji
|
|
c real*8 fcb,fcba,fkj,fkji,tmpvsize(*)
|
|
c real*8 abij(nvirt,nvirt,nocc,*) !<12|12>
|
|
c real*8 aijk(nvirt,nocc,nocc,nocc) !<12|12>
|
|
c real*8 abci(nvirt,nvirt,nvirt,*),cmo(nocc,nocc)
|
|
c real*8 jij(dfnb,nocc,nocc),jab(dfnb,nvirt,nvirt)
|
|
c real*8 jia(nocc,nvirt,dfnb),lljab(dfnb,nvirt,nvirt)
|
|
c real*8 lljpai(dfnb,nvirt,nocc),apbc(dfnb,nvirt,nvirt),rdummy
|
|
c logical notend,abci_inmem,denom,abijinmem,master_thread
|
|
c integer ind,n,astart,istart,li,lj,kinrr,prk,kji,ljj,jst
|
|
c integer nblocki,iblocki,actblock,actblocki
|
|
c character*4 localcc,talg
|
|
c
|
|
c character*8 ccsdalg
|
|
c real*8 scr(nocc, nvirt), jai(nvirt, nocc, dfnb)
|
|
c real*8 scr1v(nvirt), scr1o(nocc)
|
|
c
|
|
c integer nind ! dummy
|
|
c integer indices(nocc) ! dummy
|
|
cc
|
|
c real*8 times(6,0:100),fact
|
|
cc real*8 lt1(nvirt,nocc),scrt(nvirt,nvirt,nocc,nocc)
|
|
cc real*8 cpabci(nvirt,nvirt,nvirt,nocc),scr2(nvirt,nocc,nocc,nocc)
|
|
c logical can,iinbl
|
|
c real*8 epair(nocc,nocc)
|
|
c
|
|
c ptthreads=1 !!!!!!!!!!
|
|
c
|
|
c nummes = 0
|
|
c#if defined(MPI)
|
|
c comm_scr = 0
|
|
c if(localcc.eq.'off '.and.mpi_size.ne.1) then
|
|
c write(iout,*)'talg=lapl is not implemented with MPI&localcc=off'
|
|
c call mrccend(1)
|
|
c endif
|
|
c#endif
|
|
c abijinmem=.true.
|
|
c if (localcc.ne.'off ') abijinmem=.false.
|
|
cc
|
|
c if(localcc .eq. 'off ') then
|
|
c ! jai contains Jia, transpose it
|
|
c do k = 1, dfnb
|
|
c call dcopy(nvirt*nocc, jai(1, 1, k), 1, scr, 1)
|
|
c
|
|
c do i = 1, nocc
|
|
c call dcopy(nvirt, scr(i, 1), nocc, jai(1, i, k), 1)
|
|
c end do
|
|
c end do
|
|
c end if
|
|
c
|
|
c epair=0.d0
|
|
cc open(112,file='tripenergies')
|
|
cc
|
|
c if (.false.) then
|
|
c if (talg.eq.'topr') then
|
|
c write(*,*) 'warning cmo=I test'
|
|
c cmo=0.d0
|
|
c do i=1,nocc
|
|
c cmo(i,i)=1.d0
|
|
c enddo
|
|
c endif
|
|
c endif
|
|
cc write(*,*) 'rrd=nocc, warning',rrd
|
|
cc rrd=nocc
|
|
c can=.false.
|
|
c if (can) then
|
|
c write(*,*) 'warning quad=1'
|
|
c quad=1.d0
|
|
c localcc='off '
|
|
c endif
|
|
c if (localcc.eq.'off '.and..not.abci_inmem) then
|
|
c write(iout,*)'Full abci list is stored in memory for canonical',
|
|
c $'Laplace transformed (T), insufficient memory, should not be here'
|
|
c call mrccend(1)
|
|
c endif
|
|
cc write(*,*) 'warning can=T'
|
|
cc can=.true.
|
|
cc start
|
|
c et=0.d0
|
|
c quad=dsqrt(quad)
|
|
c prk=0
|
|
c pr=0.d0
|
|
c prold=0.d0
|
|
c denom=talg.eq.'topr'.or.talg.eq.'to '
|
|
c fact=1.d0
|
|
c kmax=1
|
|
c if(localcc.eq.'off ') kmax=nocc
|
|
cc all kji combination:
|
|
c icount(0:nocc) = 0
|
|
c kji=0
|
|
c npr=max(0,nocc-iblocki-1) ! max number of j for which j is outside the block and Ajk is needed
|
|
c do k=1,kmax
|
|
c do i=k+1,nocc ! j=k
|
|
c kji=kji+1
|
|
c enddo
|
|
c kjinum(1) = kji
|
|
c icount(1) = (6 * (nvirt**4 + nvirt**3*nocc) + ! T3
|
|
c $ 35 * nvirt**3) * kji + nvirt**3*dfnb + ! energy and <ab|cj>
|
|
c $ nvirt**3*dfnb*2 + npr*nvirt**4 ! Ajk, Akj and J*A
|
|
c do j=k+1,nocc ! j>k
|
|
c do i=j,nocc
|
|
c kji=kji+1
|
|
c enddo
|
|
c kjinum(j) = kji
|
|
c icount(j) = (6 * (nvirt**4 + nvirt**3*nocc) + ! T3
|
|
c $ 35 * nvirt**3) * kji + nvirt**3*dfnb*j + ! energy and <ab|cj>
|
|
c $ 2*nvirt**3*dfnb+nvirt**4*j*(max(npr-j-1,0))/2 ! sum_{jj=1}^j extra cost of Ajk, Akj and J*A
|
|
c enddo ! j
|
|
c enddo ! k
|
|
c dkji=dble(kji*nquad)
|
|
cc
|
|
c find = 1 ! will be first j index of mpi thread
|
|
c lind = nocc ! will be last j index of mpi thread
|
|
c notend=.true.
|
|
c#if defined(MPI)
|
|
c if(mpi_ccsd) then
|
|
c call ptindices_old(icount, nocc, mpi_rank, mpi_size, find,lind,
|
|
c $ notend, max_threads)
|
|
c
|
|
c if(mpi_size .gt. 1 .and. mpi_rank .eq. mpi_size-1) then
|
|
c if(mpi_size .ne. max_threads) then
|
|
c write(6, '(" Warning: only ", i3,
|
|
c $ " MPI threads were utilized for (T)"/)') max_threads
|
|
c endif
|
|
c endif
|
|
c
|
|
c if(mpi_size .gt. 1) then
|
|
c if(mpi_rank .eq. 0 .and. mpi_size .gt. 1) then
|
|
c write(iout, '(" Cost function")')
|
|
c write(iout, '(" j cost tot cost of k ")')
|
|
c do k = 1, nocc
|
|
c write(*, '(i6," ",2es13.6)') k, dble(icount(k)),
|
|
c $ dble(icount(k)-icount(k-1))
|
|
c end do
|
|
c write(iout, '(" Allocation:")')
|
|
c write(iout, '(" Rank Indices")')
|
|
c end if
|
|
c call flush(iout)
|
|
c call sleep_f(1)
|
|
c call MPI_Barrier(ccsd_communicator, mpi_err)
|
|
c
|
|
c write(*, '(i6,i7,"-",i2,i6,"%")') mpi_rank, find, lind,
|
|
c $ max(nint(dble((icount(lind)-icount(find-1)))/icount(nocc)*100),0)
|
|
c call flush(iout)
|
|
c call sleep_f(1)
|
|
c call MPI_Barrier(ccsd_communicator, mpi_err)
|
|
c end if
|
|
c endif
|
|
c#endif
|
|
c jfrstm1=max(1,find-1) ! first occ index in the stored (ab|ci) array minus 1
|
|
c jpullback=max(0,find-2) ! helps to set the staring occ index of the stored (ab|ci) array
|
|
c write(iout,'(" 1% done.")')
|
|
cc
|
|
c if (notend) then
|
|
c notend=.false.
|
|
c do iquad=1,nquad
|
|
cc multiply with Laplace factors
|
|
c if(localcc.eq.'off ')then
|
|
cc test canonical if(.false.) then
|
|
cc {{{ localcc.eq.'off ' case: FOR TEST PURPOSES, not efficient
|
|
c do i=1,nocc
|
|
c fi=quad(i,iquad)
|
|
c t(1:nvirt,i)=t1(1:nvirt,i)*quad(nocc+1:nocc+nvirt,iquad)*fi
|
|
c enddo
|
|
cc lt1=t
|
|
cc call dgemm('n','n',nvirt,nocc,nocc,1.d0,lt1,nvirt,cmo,nocc,
|
|
cc $ 0.d0,t,nvirt)
|
|
cC read integrals to memory
|
|
c if(ccsdalg .eq. 'disk ') then
|
|
c call abijread(abij,bsymmv,symmocc,dgroup,first,last,co,v,
|
|
c $ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
c call aijkread(aijk,bsymmv,symmocc,dgroup,first,last,co,v,
|
|
c $ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
c else
|
|
c ! (ai|bj) = <ab|ij>
|
|
c call dgemm('n', 't', nvirt*nocc, nvirt*nocc, dfnb,
|
|
c $ 1.d0, jai, nvirt*nocc, jai, nvirt*nocc,
|
|
c $ 0.d0, abij, nvirt*nocc)
|
|
c ! aibj -> abij
|
|
c do j = 1, nocc
|
|
c do a=1,nvirt
|
|
c call dcopy(nocc*nvirt, abij(a, 1, 1, j), nvirt, scr,1)
|
|
c
|
|
c do b=1,nvirt
|
|
c abij(a, b, 1:nocc, j) = scr(1:nocc, b)
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c
|
|
c ! (aj|ik) = <ai|jk>
|
|
c call dgemm('n', 'n', nvirt*nocc, nocc**2, dfnb,
|
|
c $ 1.d0, jai, nvirt*nocc, jij, dfnb,
|
|
c $ 0.d0, aijk, nvirt*nocc)
|
|
c ! ajik -> aijk
|
|
c do k = 1, nocc
|
|
c do a = 1, nvirt
|
|
c do j = 1, nocc
|
|
c scr1o(j+1:nocc) = aijk(a, j+1:nocc, j, k)
|
|
c aijk(a, j+1:nocc, j, k) = aijk(a, j, j+1:nocc, k)
|
|
c aijk(a, j, j+1:nocc, k) = scr1o(j+1:nocc)
|
|
c enddo
|
|
c enddo
|
|
c end do
|
|
c end if
|
|
cC$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
cC$OMP& PRIVATE(j,fj,i,fij,b,fbij)
|
|
c do j=1,nocc
|
|
c fj=quad(j,iquad)
|
|
c do i=1,nocc
|
|
c fij=quad(i,iquad)*fj
|
|
c do b=1,nvirt
|
|
c fbij=quad(nocc+b,iquad)*fij
|
|
c abij(1:nvirt,b,i,j)=abij(1:nvirt,b,i,j)*
|
|
c $ quad(nocc+1:nocc+nvirt,iquad)*fbij
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
cC$OMP END PARALLEL DO
|
|
c do j=1,nocc
|
|
c fj=quad(j,iquad)
|
|
c if (iquad.gt.1) fj=fj/quad(j,iquad-1)
|
|
c do i=1,nocc
|
|
c fij=quad(i,iquad)*fj
|
|
c if (iquad.gt.1) fij=fij/quad(i,iquad-1)
|
|
c do b=1,nvirt
|
|
c do a=1,nvirt
|
|
c fbij=fij*quad(nocc+b,iquad)*quad(nocc+a,iquad)
|
|
c if (iquad.gt.1)
|
|
c $ fbij=fbij/quad(nocc+a,iquad-1)/quad(nocc+b,iquad-1)
|
|
c ss(a,b,i,j)= tt(a,b,i,j)*fbij ! ss is in tt
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c if(ccsdalg .eq. 'disk ') then
|
|
cc call abciread(1,nvirt,abci,bsymmv,symmocc,dgroup,co,
|
|
cc $first,last,v,
|
|
cc $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
c call abciread(1,nocc,abci,bsymmv,symmocc,dgroup,co,
|
|
c $ first,last,v,irecln,ibufln,nbasis,nocc,nvirt,
|
|
c $ nirmax,multpg)
|
|
c else
|
|
c ! (ac|bi) = <ab|ci>
|
|
c call dgemm('t', 't', nvirt**2, nvirt*nocc, dfnb,
|
|
c $ 1.d0, jab, dfnb, jai, nvirt*nocc,
|
|
c $ 0.d0, abci, nvirt**2)
|
|
c ! acbi -> abci
|
|
c do i = 1, nocc
|
|
c do a = 1, nvirt
|
|
c do b = 1, nvirt
|
|
c scr1v(b+1:nvirt) = abci(a, b+1:nvirt, b, i)
|
|
c abci(a, b+1:nvirt, b, i) = abci(a, b, b+1:nvirt, i)
|
|
c abci(a, b, b+1:nvirt, i) = scr1v(b+1:nvirt)
|
|
c end do
|
|
c end do
|
|
c end do
|
|
c end if
|
|
c do c=1,nvirt
|
|
c fc=quad(nocc+c,iquad)
|
|
c do b=1,nvirt
|
|
c fj=quad(nocc+b,iquad)*fc
|
|
c do a=1,nvirt
|
|
c fij=quad(nocc+a,iquad)*fj
|
|
c do i=1,nocc
|
|
c abci(a,b,c,i)=abci(a,b,c,i)*quad(i,iquad)*fij
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c do j=1,nocc
|
|
c fj=quad(j,iquad)
|
|
c do i=1,nocc
|
|
c fij=quad(i,iquad)*fj
|
|
c do k=1,nocc
|
|
c do a=1,nvirt
|
|
c aijk(a,k,i,j)=aijk(a,k,i,j)*fij*
|
|
c $ quad(nocc+a,iquad)/quad(k,iquad)
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
cc }}}
|
|
c else ! if localcc=on
|
|
cc write(*,*) 'warning kmax'
|
|
cc kmax=nocc
|
|
cc multiply with Laplace factors and trf to the Mayer basis: (parts of) T2, abci, abij and aijk
|
|
c call trf2laplbasis(tt,ttl,jij,jab,jia,nocc,nvirt,dfnb,abij,
|
|
c $abci,aijk,quad,iquad,nquad,cmo,aijk,lljpai,lljab,rr,rr,w,w,aijk,v,
|
|
c $t,t,t1,talg,fdo,v,w,abci_inmem,ttl2,w,w,IabIJ)
|
|
c endif
|
|
c actblock=0
|
|
c if (find.gt.1) actblock=1
|
|
cC
|
|
cc transpose tt(a,b,i,k) to rr(b,a,i,k) for all k
|
|
c if (rrd.eq.nocc) then
|
|
c do k=1,nocc
|
|
c do i=1,nocc
|
|
c call tr(nvirt,ss(1,1,i,k),rr(1,1,i,k))
|
|
c enddo
|
|
c enddo
|
|
c endif
|
|
c ind=1
|
|
c n=nocc
|
|
cc
|
|
c do k=ind,ind+n-1
|
|
c if (k.gt.kmax) exit
|
|
cc transpose tt(a,b,i,k) to rr(b,a,i) for k
|
|
c if (rrd.eq.1) then
|
|
c do i=1,nocc
|
|
c call tr(nvirt,ss(1,1,i,k),rr(1,1,i,1))
|
|
c enddo
|
|
c kinrr=1
|
|
c else
|
|
c kinrr=k
|
|
c endif
|
|
cc {{{ localcc.eq.'off ' case: FOR TEST PURPOSES, not efficient
|
|
c if(localcc.eq.'off ')then
|
|
c if (talg.eq.'lapl'.or.talg.eq.'lato') then
|
|
cc make ttl(-,~,i,k) & ttl2(-,~,k,i) from ss(-,-,-.-)
|
|
c do i=1,nocc
|
|
c do b=1,nvirt
|
|
c fb=quad(nocc+b,iquad)**2
|
|
c ttl (1:nvirt,b,i,1)=tt(1:nvirt,b,i,k)/fb
|
|
c ttl2(1:nvirt,b,1,i)=tt(1:nvirt,b,k,i)/fb
|
|
c enddo
|
|
c enddo
|
|
c endif
|
|
c endif
|
|
cc }}}
|
|
cc
|
|
c find = k
|
|
c do j=find,min(lind,nocc)
|
|
c jinrr=j !!!!!!!!!!!
|
|
c TIME0(times(1,98))
|
|
c if(mpi_rank .eq. mpi_size-1) then
|
|
c pr=dble(prk)/dkji !progress counter
|
|
c else
|
|
c pr = dble(prk) / ((kjinum(lind) - kjinum(find-1))*nquad)
|
|
c end if
|
|
c if(pr.ge.prold+0.2d0.and.pr.ne.1.d0)then
|
|
c prold=pr
|
|
c if(mpi_rank .ne. 0)write(iout,'(i4,"% done.")')nint(pr*100)! last rank prints correct percentage
|
|
c if(mpi_rank .eq. mpi_size-1)
|
|
c $ write(6, '(i4, "% done.")') nint(pr * 100) ! for master
|
|
c endif
|
|
c actblocknn=max(actblock-1,0) ! make the actblock-1 index non-negative
|
|
c lj=j
|
|
cc {{{ if (.not.abci_inmem)
|
|
c if (.not.abci_inmem) then
|
|
c if (j.gt.1) lj=j-actblocknn*iblocki-jpullback ! position of j in abci array, if find>1 lj starts from 2, hence the -max(0,lind-2)
|
|
c if (j.eq.find.or.
|
|
c $ (j.ne.jfrstm1+1.and. ! j=2 is done if find=1, do not assembly it if actblock=1
|
|
c $ j.eq.jfrstm1+1+actblocknn*iblocki)) then! make (ab|cj) for the entire actual block if j= 1st index of the actual block
|
|
c jst=j
|
|
c if (j.eq.1) jst=2
|
|
c do jj=jst,min(jst+iblocki-1,nocc)
|
|
c call dgemm('t','n',nvirt**2,nvirt,dfnb,1.d0,lljab,dfnb,
|
|
c $lljpai(1,1,jj),dfnb,0.d0,w,nvirt**2) ! (a,b|c,jj)=lljab(\bar a \bar b | P)^T lljpai(P | \bar c \bar jj)
|
|
c ljj=jj-actblocknn*iblocki-jpullback ! position of jj in abci array
|
|
c if (actblock.eq.0) ljj=jj
|
|
c do b=1,nvirt
|
|
c do c=1,nvirt
|
|
c abci(1:nvirt,c,b,ljj)=w(1:nvirt,b,c) ! <a,b|c,j>= (a,c|b,j)
|
|
c enddo
|
|
c enddo
|
|
c enddo ! jj
|
|
c endif
|
|
c if (jfrstm1+actblock*iblocki.lt.nocc) then ! if not last block -> (ab|ci) will be missed for i>jfrstm1+actblocki*iblocki -> contruct Ajk(P,b,c)
|
|
cc Ajk(P,c,b)=lljab(P,\bar c,\bar d)*ttl(\bar b,\tilde d,\bar j,\bar k)^T
|
|
c call dgemm('n','t',dfnb*nvirt,nvirt,nvirt,1.d0,
|
|
c $ lljab,dfnb*nvirt,ttl(1,1,j,k),nvirt,0.d0,apbc,dfnb*nvirt)
|
|
c if (j.ne.k) then ! j=k is taken care of inside wabc in the j=k case
|
|
cc Ajk(P,b,c)=Ajk(P,c,b)
|
|
c do b=1,nvirt
|
|
c do c=b+1,nvirt
|
|
c tmpvsize(1:dfnb)=apbc(1:dfnb,c,b)
|
|
c apbc(1:dfnb,c,b)=apbc(1:dfnb,b,c)
|
|
c apbc(1:dfnb,b,c)=tmpvsize(1:dfnb)
|
|
c enddo
|
|
c enddo
|
|
cc Ajk(P,b,c)=Ajk(P,b,c)+lljab(P,\bar b,\bar d)*ttl(\bar c,\tilde d,\bar k,\bar j)^T
|
|
c call dgemm('n','t',dfnb*nvirt,nvirt,nvirt,1.d0,
|
|
c $ lljab,dfnb*nvirt,ttl2(1,1,k,j),nvirt,1.d0,apbc,dfnb*nvirt)
|
|
c endif ! j.ne.k
|
|
c endif ! not last block
|
|
c endif
|
|
cc }}}
|
|
c delta2=1.d0
|
|
c if(j.eq.k)delta2=delta2+1
|
|
c fkj=fdo(k)+fdo(j)
|
|
c if(j.eq.k)then !i=j=k => no contribution!
|
|
c istart=j+1
|
|
c else
|
|
c istart=j
|
|
c endif
|
|
c do i=istart,nocc
|
|
c iinbl=.true.
|
|
c li=i-ind+1
|
|
cc {{{ if (.not.abci_inmem)
|
|
cc if(i.gt.ind+n-1.and.i.ne.j)then
|
|
cc li=n+2
|
|
cc if (.not.abci_inmem.and.localcc.eq.'off ')
|
|
cc $ call abciread(i,1,dcore(imem+(li-1)*nvirt**3),
|
|
cc $bsymmv,symmocc,dgroup,co,first,last,v,
|
|
cc $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
cc endif
|
|
cc if(i.gt.ind+n-1.and.i.eq.j)then
|
|
cc li=lj
|
|
cc endif
|
|
c if (.not.abci_inmem) then
|
|
c actblocki=actblock
|
|
c if (j.eq.1) actblocki=1
|
|
c if (i.le.jfrstm1+actblocki*iblocki) then ! if i is still in the actual block, i.e. i <= the index of the last stored (ab|ci)
|
|
c li=i-max(actblocki-1,0)*iblocki-jpullback ! no j=1 & i=1 contribution, i>1
|
|
c else ! i.eq.j is in the other branch
|
|
c if (j.ne.k) then ! i.ne.j must be fulfilled
|
|
c call dgemm('t','n',nvirt,nvirt**2,dfnb,1.d0,
|
|
c $ lljpai(1,1,i),dfnb,apbc,dfnb,0.d0,w,nvirt) ! w(a,b,c)= lljpai(P,\bar a,\bar I) Ajk(P,b,c)
|
|
c elseif (j.eq.k) then
|
|
c call dgemm('t','n',nvirt,nvirt**2,dfnb,1.d0,
|
|
c $ lljpai(1,1,i),dfnb,apbc,dfnb,0.d0,v,nvirt) ! v(a,c,b)= lljpai(P,\bar a,\bar I) Ajk(P,c,b)
|
|
c endif
|
|
c iinbl=.false.
|
|
c endif
|
|
c endif ! .not.abci_inmem
|
|
cc }}}
|
|
c delta3=delta2
|
|
c if(i.eq.j)delta3=delta3+1
|
|
c fkji=fkj+fdo(i)
|
|
c if (talg.eq.'lapl'.or.talg.eq.'lato') then
|
|
cc make ttl3(-,~,i,j) & ttl4(-,~,j,i) from ss(-,-,-,-)
|
|
c do b=1,nvirt
|
|
c fb=quad(nocc+b,iquad)**2
|
|
c ttl3(1:nvirt,b)=ss(1:nvirt,b,i,j)/fb
|
|
c IabIJ(1:nvirt,b)=ss(1:nvirt,b,j,i)/fb ! ttl4
|
|
c enddo
|
|
c else
|
|
c call dcopy(nvirt**2,ss(1,1,i,j),1,ttl3,1)
|
|
c call dcopy(nvirt**2,ss(1,1,j,i),1,IabIJ,1) ! ttl4
|
|
c endif
|
|
cc compute W and V for all abc indices
|
|
c call wabc(nocc,nvirt,i,j,k,imem,dcore,t,ttl,rr,rrd,
|
|
c $ abci(1,1,1,li), abci(1,1,1,lj), abci(1,1,1,k-ind+1),
|
|
c $kinrr,jinrr,w,v,abij,aijk,times,'lapl',ss,iinbl,localcc,IabIJ,
|
|
c $lljpai,dfnb,ttl2,ttl3,IabIJ,1,abijinmem,rdummy,ccsdmkl,ptthreads,
|
|
c $omp_max_threads)
|
|
cc call wabc_old(nocc,nvirt,i,j,k,1,abci,t,ttl,rr,rrd,li,lj,ind,
|
|
cc $kinrr,w,v,abij,aijk,times,'lapl',ss,iinbl,localcc,IabIJ,lljpai,
|
|
cc $dfnb,ttl2,ttl3,IabIJ,1,abijinmem,rdummy)
|
|
cc {{{ w_abc debug with loops
|
|
cc if (.false.) then
|
|
cc write(*,*) 'loops'
|
|
cc do a=1,nvirt
|
|
cc do b=1,nvirt
|
|
cc do c=1,nvirt
|
|
cc sum=0.d0
|
|
cc do d=1,nvirt
|
|
cc sum=sum+abci(c,b,d,j)*ttl(a,d,i,k) !e
|
|
cc sum=sum+abci(a,b,d,j)*ttl2(c,d,k,i) !f
|
|
cc sum=sum+abci(b,c,d,k)*ttl3(a,d) !d
|
|
cc sum=sum+abci(a,c,d,k)*IabIJ(b,d) !a
|
|
cc sum=sum+abci(b,a,d,i)*ttl2(c,d,k,j) !b
|
|
cc sum=sum+abci(c,a,d,i)*ttl(b,d,j,k) !c
|
|
cc enddo
|
|
cc do l=1,nocc
|
|
cc sum=sum-aijk(b,l,j,i)*ss(a,c,l,k) !c
|
|
cc sum=sum-aijk(b,l,j,k)*ss(c,a,l,i) !b
|
|
cc sum=sum-aijk(c,l,k,j)*ss(b,a,l,i) !a
|
|
cc sum=sum-aijk(a,l,i,k)*ss(c,b,l,j) !d
|
|
cc sum=sum-aijk(c,l,k,i)*ss(a,b,l,j) !f
|
|
cc sum=sum-aijk(a,l,i,j)*ss(b,c,l,k) !e
|
|
cc enddo
|
|
ccc w(a,b,c)=sum
|
|
ccc write(*,'(6i4,es14.6)') k,j,i,a,b,c,sum
|
|
cc enddo
|
|
cc enddo
|
|
cc enddo
|
|
ccC calculating V
|
|
cc call dcopy(nvirt**3,w,1,v,1)
|
|
ccC$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
ccC$OMP& PRIVATE(a)
|
|
cc do a=1,nvirt
|
|
cc v(a,1:nvirt,1:nvirt)=v(a,1:nvirt,1:nvirt)
|
|
cc $ +t(a,i)*abij(1:nvirt,1:nvirt,j,k)
|
|
cc enddo
|
|
ccC$OMP END PARALLEL DO
|
|
ccC$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
ccC$OMP& PRIVATE(b)
|
|
cc do b=1,nvirt
|
|
cc v(1:nvirt,b,1:nvirt)=v(1:nvirt,b,1:nvirt)
|
|
cc $ +t(b,j)*abij(1:nvirt,1:nvirt,i,k)
|
|
cc enddo
|
|
ccC$OMP END PARALLEL DO
|
|
ccC$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
ccC$OMP& PRIVATE(c)
|
|
cc do c=1,nvirt
|
|
cc v(1:nvirt,1:nvirt,c)=v(1:nvirt,1:nvirt,c)
|
|
cc $ +t(c,k)*abij(1:nvirt,1:nvirt,i,j)
|
|
cc enddo
|
|
ccC$OMP END PARALLEL DO
|
|
cc do a=1,nvirt
|
|
cc do b=1,nvirt
|
|
cc do c=1,nvirt
|
|
cc write(*,'(6i4,2es14.6)') k,j,i,a,b,c,w(a,b,c),v(a,b,c)
|
|
cc enddo
|
|
cc enddo
|
|
cc enddo
|
|
cc endif
|
|
cc }}}
|
|
cC calculating energy
|
|
c if(localcc.eq.'off ')then
|
|
cc test canonical if(.true.) then
|
|
cc write(*,*) 'warning can energy'
|
|
cc {{{ canonical occ E formula
|
|
c IF(i.eq.j.or.j.eq.k)THEN
|
|
c sum=0.d0
|
|
cC$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
cC$OMP& PRIVATE(a,b,c,delta1,fcb,astart,delta,fcba)
|
|
cC$OMP& REDUCTION(+:sum)
|
|
c do c=1,nvirt
|
|
c do b=c,nvirt
|
|
c delta1=1.d0
|
|
c if(b.eq.c)delta1=delta1+1.d0
|
|
c fcb=fdv(c)+fdv(b)
|
|
c if(b.eq.c)then
|
|
c astart=b+1
|
|
c else
|
|
c astart=b
|
|
c endif
|
|
c do a=astart,nvirt
|
|
c delta=delta1
|
|
c if(a.eq.b)delta=delta+1.d0
|
|
c fcba=fcb+fdv(a)
|
|
c if (can) fact=(fkji-fcba)
|
|
cc y=v(a,b,c)+v(b,c,a)+v(c,a,b)
|
|
cc w1=w(a,b,c)+w(b,c,a)+w(c,a,b)
|
|
c sum=sum+(
|
|
cc $y*w1
|
|
c $-(v(a,b,c)+v(b,c,a)+v(c,a,b))*(w(a,b,c)+w(b,c,a)+w(c,a,b))
|
|
c
|
|
c $+(w(a,b,c)*v(a,b,c)
|
|
c $+w(b,c,a)*v(b,c,a)
|
|
c $+w(c,a,b)*v(c,a,b))*3.d0
|
|
c
|
|
cc $)/((fkji-fcba)*delta) ! canonical
|
|
c $)/(delta*fact)
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
cC$OMP END PARALLEL DO
|
|
c et=et-4.d0*sum/delta3 ! 2* in E expr and 2* from symmetry of i.eq.j...
|
|
c ELSE
|
|
c sum=0.d0
|
|
cC$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
cC$OMP& PRIVATE(a,b,c,delta1,fcb,astart,delta,fcba,y,z)
|
|
cC$OMP& REDUCTION(+:sum)
|
|
c do c=1,nvirt
|
|
c do b=c,nvirt
|
|
c delta1=1.d0
|
|
c if(b.eq.c)delta1=delta1+1.d0
|
|
c fcb=fdv(c)+fdv(b)
|
|
c if(b.eq.c)then
|
|
c astart=b+1
|
|
c else
|
|
c astart=b
|
|
c endif
|
|
c do a=astart,nvirt
|
|
cc do a=b,nvirt
|
|
c delta=delta1
|
|
c if(a.eq.b)delta=delta+1.d0
|
|
c fcba=fcb+fdv(a)
|
|
c if (can) fact=(fkji-fcba)
|
|
c y=v(a,b,c)+v(b,c,a)+v(c,a,b)
|
|
c z=v(a,c,b)+v(b,a,c)+v(c,b,a)
|
|
c sum=sum+(
|
|
c $+(w(a,b,c)+w(b,c,a)+w(c,a,b))*(y-2.d0*z)
|
|
c $+(w(a,c,b)+w(b,a,c)+w(c,b,a))*(z-2.d0*y)
|
|
c
|
|
c $+(w(a,b,c)*v(a,b,c)
|
|
c $+w(b,c,a)*v(b,c,a)
|
|
c $+w(c,a,b)*v(c,a,b)
|
|
c $+w(a,c,b)*v(a,c,b)
|
|
c $+w(b,a,c)*v(b,a,c)
|
|
c $+w(c,b,a)*v(c,b,a))*3.d0
|
|
c
|
|
cc $)/((fkji-fcba)*delta) ! canonical
|
|
c $)/(delta*fact)
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
cC$OMP END PARALLEL DO
|
|
c et=et-2.d0*sum/delta3
|
|
c ENDIF
|
|
cc write(*,'(4i4,2es16.6)') iquad,k,j,i,et,2.d0*sum/delta3
|
|
cc }}}
|
|
c else !if(localcc.ne.'off ')then
|
|
cc {{{ talg=occ local E expression
|
|
c sum=0.d0
|
|
cC$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
cC$OMP& PRIVATE(a,b,c,astart,y,delta1,delta)
|
|
cC$OMP& REDUCTION(+:sum)
|
|
c do c=1,nvirt
|
|
c do b=c,nvirt
|
|
c delta1=1.d0
|
|
c if(b.eq.c)delta1=delta1+1.d0
|
|
c if(b.eq.c)then
|
|
c astart=b+1
|
|
c else
|
|
c astart=b
|
|
c endif
|
|
c fcb=fdv(c)+fdv(b)
|
|
c do a=astart,nvirt
|
|
c delta=delta1
|
|
c if(a.eq.b)delta=delta+1.d0
|
|
c if (denom) then
|
|
c fcba=fcb+fdv(a)
|
|
c fact=fcba-fkji ! - due to other - in et=et-sum
|
|
c endif
|
|
c if (i.eq.j) then
|
|
c y=v(a,b,c)+v(a,c,b)+v(b,c,a)
|
|
c sum=sum+(
|
|
c $ w(a,b,c)*(3.d0*v(a,b,c)-y)+
|
|
c $ w(a,c,b)*(3.d0*v(a,c,b)-y)+
|
|
c $ w(b,c,a)*(3.d0*v(b,c,a)-y)
|
|
c $ )/(delta*fact)
|
|
cc re $ )/(3.d0*delta)
|
|
c else ! i>j
|
|
c sum=sum+(
|
|
c $ 2.d0/3.d0*
|
|
c $ (w(a,b,c)+w(b,c,a)+w(c,a,b)-w(a,c,b)-w(b,a,c)-w(c,b,a))*
|
|
c $ (v(a,b,c)+v(b,c,a)+v(c,a,b)-v(a,c,b)-v(b,a,c)-v(c,b,a))
|
|
cc
|
|
c $ +w(a,b,c)*(2.d0*v(a,b,c)-v(a,c,b)-v(c,b,a))
|
|
c $ +w(b,c,a)*(2.d0*v(b,c,a)-v(a,c,b)-v(b,a,c))
|
|
c $ +w(c,a,b)*(2.d0*v(c,a,b)-v(b,a,c)-v(c,b,a))
|
|
c $ +w(a,c,b)*(2.d0*v(a,c,b)-v(b,c,a)-v(a,b,c))
|
|
c $ +w(b,a,c)*(2.d0*v(b,a,c)-v(c,a,b)-v(b,c,a))
|
|
c $ +w(c,b,a)*(2.d0*v(c,b,a)-v(a,b,c)-v(c,a,b))
|
|
cc
|
|
c $)/(delta*fact)
|
|
c endif ! i.eq.j or i>j
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
cC$OMP END PARALLEL DO
|
|
c et=et-sum
|
|
c epair(j,i)=epair(j,i)-sum
|
|
cc }}}
|
|
c endif ! localcc.eq.'off '
|
|
ccc TIMEADD(times(1,20),times)
|
|
c enddo !i
|
|
c prk=prk+nocc-istart+1
|
|
c#if defined(MPI)
|
|
c if(mpi_rank .ne. mpi_size-1) then
|
|
c flag = .true.
|
|
c if(nummes .ne. 0) then
|
|
c call MPI_Test(request, flag, stat, mpi_err)
|
|
c end if
|
|
c
|
|
c if(.not.flag) then
|
|
c call MPI_Cancel(request, mpi_err)
|
|
c else
|
|
c comm_scr = 0
|
|
c end if
|
|
c
|
|
c comm_scr = comm_scr + prk - prk_sent
|
|
c prk_sent = prk
|
|
c
|
|
c call MPI_Isend(comm_scr, 1, MPI_INTEGER_MRCC_MRCC, mpi_size-1,
|
|
c $ 0, ccsd_communicator, request, mpi_err)
|
|
c nummes=nummes+1
|
|
c else if(mpi_ccsd) then ! last rank receives if ccsd is running MPI parallel
|
|
c flag = .true.
|
|
c do while(flag)
|
|
c call MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
|
|
c $ ccsd_communicator, flag, stat, mpi_err)
|
|
c
|
|
c if(flag) then
|
|
c call MPI_Recv(comm_scr, 1, MPI_INTEGER_MRCC_MRCC,
|
|
c $ stat(MPI_SOURCE), stat(MPI_TAG),
|
|
c $ ccsd_communicator,stat, mpi_err)
|
|
c prk = prk + comm_scr
|
|
c end if
|
|
c enddo
|
|
c end if
|
|
c#endif
|
|
c if (.not.abci_inmem.and.j.eq.jfrstm1+actblock*iblocki)
|
|
c $ actblock=actblock+1 ! step to the next block, j=1 is in block 0
|
|
c TIMEADD(times(1,97),times(1,98))
|
|
cc if (iquad.eq.1) then
|
|
cc write(*, "('Rank ',i3,' done with j',i4,' of q',i3,
|
|
cc $' CPU [min]:',f10.5,2x,'Wall [min]:',7f10.5)")
|
|
cc $ mpi_rank,j,iquad,times(1,97)/60.d0,times(2,97)/60.d0 ; flush(6)
|
|
cc elseif(iquad.eq.nquad) then
|
|
cc write(*, "('Rank ',i3,' done with j of all q ',i4,
|
|
cc $' CPU [min]:',f10.5,2x,'Wall [min]:',7f10.5)")
|
|
cc $ mpi_rank,j,times(3,97)/60.d0,times(4,97)/60.d0 ; flush(6)
|
|
cc else
|
|
cc endif
|
|
c enddo !j
|
|
c enddo !k
|
|
c ind=ind+n
|
|
c enddo ! iquad
|
|
c endif ! (notend) for blocks
|
|
cc
|
|
c#if defined(MPI)
|
|
c if(mpi_ccsd) then
|
|
c call MPI_Ibarrier(ccsd_communicator, request, mpi_err)
|
|
c TIMEADD(times(1,95),times(1,96))
|
|
cc if(mpi_size .gt. 1) then
|
|
cc write(*, "('Rank ',i3,' done with (T) ',
|
|
cc $'CPU [min]:',f10.3,2x,'Wall [min]:',7f10.3)")
|
|
cc $ mpi_rank,times(1,95)/60.d0,times(2,95)/60.d0 ; flush(6)
|
|
cc end if
|
|
c if(mpi_rank .eq. mpi_size-1) then
|
|
c ptdone = .false.
|
|
c do while(.not.ptdone)
|
|
c flag = .true.
|
|
c do while(flag)
|
|
c call MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
|
|
c $ ccsd_communicator, flag, stat, mpi_err)
|
|
c if(flag) then
|
|
c call MPI_Recv(comm_scr, 1, MPI_INTEGER_MRCC,
|
|
c $ stat(MPI_SOURCE), stat(MPI_TAG),
|
|
c $ ccsd_communicator, stat, mpi_err)
|
|
c prk = prk + comm_scr
|
|
c end if
|
|
c enddo
|
|
c
|
|
c pr=dble(prk)/dkji !progress counter
|
|
c if(pr.ge.prold+0.1d0.and.pr.ne.1.d0) then
|
|
c prold=pr
|
|
c write(iout,'(i4,"% done.")')nint(pr*100)
|
|
c endif
|
|
c
|
|
c call MPI_Test(request, ptdone, MPI_STATUS_IGNORE,
|
|
c $ mpi_err)
|
|
c end do
|
|
c end if
|
|
c
|
|
ccc call MPI_Barrier(ccsd_communicator, mpi_err)
|
|
ccc write(*,*) "rank:",mpi_rank,et
|
|
ccc call MPI_Barrier(ccsd_communicator, mpi_err)
|
|
cc if(mpi_rank .eq. 0) then
|
|
cc call MPI_Reduce(MPI_IN_PLACE, et, 1, MPI_DOUBLE_PRECISION,
|
|
cc $ MPI_SUM, 0, ccsd_communicator, request, mpi_err)
|
|
cc else
|
|
cc call MPI_Reduce(et, et, 1, MPI_DOUBLE_PRECISION,
|
|
cc $ MPI_SUM, 0, ccsd_communicator, request, mpi_err)
|
|
cc end if
|
|
c call MPI_Allreduce(MPI_IN_PLACE, et, 1, MPI_DOUBLE_PRECISION,
|
|
c $ MPI_SUM, ccsd_communicator, mpi_err)
|
|
c end if
|
|
c#endif
|
|
c if (can) et=et/nquad
|
|
cc write(6,'(f17.12)')et
|
|
cc call timer
|
|
cc print energy contribution of index triples ijk
|
|
cc do j=1,nocc
|
|
cc do i=1,nocc
|
|
cc if (i.lt.j) epair(j,i)=epair(i,j)
|
|
cc et2=et
|
|
cc if (et2.lt.1.d-15) et2=et2+1.d-15
|
|
cc write(112,'(2i5,9es20.12)') j,i,epair(j,i),epair(j,i)/et2,
|
|
cc $ fdo(i)+fdo(j)+fdo(1)
|
|
cc enddo
|
|
cc enddo
|
|
cc close(112)
|
|
c return
|
|
c end subroutine
|
|
cC }}}
|
|
c
|
|
c {{{ localbtcorr2 with loops
|
|
************************************************************************
|
|
subroutine localbtcorr2(t,tt,fdo,fdv,ijka,abij,
|
|
C nincs megkotes k-ra!
|
|
$w,v,bsymmv,symmocc,dgroup,first,last,co,ico,
|
|
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
************************************************************************
|
|
implicit none
|
|
integer irecln,ibufln,nbasis,nocc,nvirt,dgroup,nirmax
|
|
integer bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
|
|
integer first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
|
|
integer co(nbasis),ico(nbasis)
|
|
real*8 t(nvirt,nocc),tt(nocc,nocc,nvirt,nvirt)
|
|
real*8 fdv(nvirt),fdo(nocc)
|
|
real*8 et,sum,sum2,y
|
|
integer a,b,c,d,i,j,k,l,p,q,r,s
|
|
real*8,allocatable:: vint(:,:,:,:)
|
|
real*8,allocatable:: w2(:,:,:,:,:,:)
|
|
real*8,allocatable:: v2(:,:,:,:,:,:)
|
|
c real*8,allocatable:: w(:,:,:)
|
|
c real*8,allocatable:: v(:,:,:)
|
|
real*8 w(nocc,nocc,nocc),v(nocc,nocc,nocc)
|
|
real*8 delta
|
|
real*8 iabc(nocc,nvirt,nvirt,nvirt)
|
|
real*8 abij(nvirt,nvirt,nocc,nocc)
|
|
real*8 ijka(nocc,nocc,nocc,nvirt)
|
|
integer imem1
|
|
common/memcom/imem1
|
|
allocate(vint(nbasis,nbasis,nbasis,nbasis))
|
|
allocate(w2(nvirt,nvirt,nvirt,nocc,nocc,nocc))
|
|
allocate(v2(nvirt,nvirt,nvirt,nocc,nocc,nocc))
|
|
c allocate(w(nocc,nocc,nocc))
|
|
c allocate(v(nocc,nocc,nocc))
|
|
c Read integral list
|
|
c read(inp,*) nbasis,nocc
|
|
read(555,*)
|
|
read(555,*) (i,j=1,nbasis)
|
|
read(555,*)
|
|
|
|
read(555,*) sum,p,q,r,s
|
|
do while(r.ne.0)
|
|
vint(p,q,r,s)=sum
|
|
vint(q,p,r,s)=sum
|
|
vint(p,q,s,r)=sum
|
|
vint(q,p,s,r)=sum
|
|
vint(r,s,p,q)=sum
|
|
vint(r,s,q,p)=sum
|
|
vint(s,r,p,q)=sum
|
|
vint(s,r,q,p)=sum
|
|
read(555,*) sum,p,q,r,s
|
|
enddo
|
|
|
|
c if(maxcor-(imem-imem1).lt.nvirt**3)then
|
|
c else
|
|
c write(iout,*)'Insufficient memory!'
|
|
c call mrccend(1)
|
|
c endif
|
|
call abijread(abij,bsymmv,symmocc,dgroup,first,last,co,w,
|
|
$ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
call ijkaread(ijka,bsymmv,symmocc,dgroup,first,last,co,w,
|
|
$ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
c call abci4read(1,nocc,iabc,bsymmv,symmocc,dgroup,co,ico,
|
|
c $first,last,w)
|
|
c call sortabci(bsymmv,symmocc,dgroup,co,ico,first,last,w,
|
|
c $maxcor,imem,imem1,dcore,nocc,nvirt,nbasis,iout,nirmax,
|
|
c $irecln,ibufln,multpg)
|
|
c write(6,*)'d'
|
|
c call flush(6)
|
|
c call iabcread(nvirt,1,iabc,w,nocc,nvirt,irecln,ibufln)
|
|
do i=1,nocc
|
|
do c=1,nvirt
|
|
do b=1,nvirt
|
|
do a=1,nvirt
|
|
iabc(i,a,b,c)=vint(c+nocc,a+nocc,b+nocc,i)
|
|
c if(iabc(i,a,b,c).ne.iabc2(i,a,b,c))then
|
|
c write(6,'(4i3,2f14.10)')i,a,b,c,
|
|
c $iabc(i,a,b,c),iabc2(i,a,b,c)
|
|
c endif
|
|
enddo
|
|
enddo
|
|
enddo
|
|
enddo
|
|
|
|
c do j=1,nocc
|
|
c do i=1,nocc
|
|
c do b=1,nvirt
|
|
c do a=1,nvirt
|
|
c abij(a,b,i,j)=vint(a+nocc,i,b+nocc,j)
|
|
c if(abij(a,b,i,j).ne.abij2(a,b,i,j))then
|
|
c write(6,'(4i3,2f14.10)')a,b,i,j,
|
|
c $abij(a,b,i,j),abij2(a,b,i,j)
|
|
c endif
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c do k=1,nocc
|
|
c do j=1,nocc
|
|
c do i=1,nocc
|
|
c do a=1,nvirt
|
|
c ijka(i,j,k,a)=vint(i,k,j,a+nocc)
|
|
c if(ijka(i,j,k,a).ne.ijka2(i,j,k,a))then
|
|
c write(6,'(4i3,2f14.10)')i,j,k,a,
|
|
c $ijka(i,j,k,a),ijka2(i,j,k,a)
|
|
c endif
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
|
|
C Construct W Eq.4
|
|
|
|
et=0.d0
|
|
do c=1,nvirt
|
|
do b=c,nvirt
|
|
do a=b,nvirt
|
|
do k=1,nocc
|
|
do j=1,nocc
|
|
do i=1,nocc
|
|
sum=0.d0
|
|
sum2=0.d0
|
|
do d=1,nvirt
|
|
sum=sum+iabc(i,d,a,b)*tt(k,j,c,d)
|
|
sum=sum+iabc(i,d,a,c)*tt(j,k,b,d)
|
|
sum=sum+iabc(k,d,c,a)*tt(j,i,b,d)
|
|
|
|
sum=sum+iabc(k,d,c,b)*tt(i,j,a,d)
|
|
sum=sum+iabc(j,d,b,c)*tt(i,k,a,d)
|
|
sum=sum+iabc(j,d,b,a)*tt(k,i,c,d)
|
|
enddo
|
|
do l=1,nocc
|
|
sum=sum-ijka(j,k,l,c)*tt(i,l,a,b)
|
|
sum=sum-ijka(k,j,l,b)*tt(i,l,a,c)
|
|
sum=sum-ijka(i,j,l,b)*tt(k,l,c,a)
|
|
|
|
sum=sum-ijka(j,i,l,a)*tt(k,l,c,b)
|
|
sum=sum-ijka(k,i,l,a)*tt(j,l,b,c)
|
|
sum=sum-ijka(i,k,l,c)*tt(j,l,b,a)
|
|
enddo
|
|
w2(a,b,c,i,j,k)=sum
|
|
w(i,j,k)=sum
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
|
|
C Construct V Eq.5
|
|
|
|
c do k=1,nocc
|
|
c do j=1,nocc
|
|
c do i=1,nocc
|
|
c do c=1,nvirt
|
|
c do b=1,nvirt
|
|
c do a=1,nvirt
|
|
v(i,j,k)=w(i,j,k)+
|
|
$abij(b,c,j,k)*t(a,i)+
|
|
$abij(a,c,i,k)*t(b,j)+
|
|
$abij(a,b,i,j)*t(c,k)
|
|
C denominator into W
|
|
w(i,j,k)=w(i,j,k)/(fdo(i)+fdo(j)+fdo(k)-fdv(a)-fdv(b)-fdv(c))
|
|
enddo
|
|
enddo
|
|
enddo
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
C Calculate Energy
|
|
c et=0.d0
|
|
c do c=1,nvirt
|
|
c do b=c,nvirt
|
|
c do a=b,nvirt
|
|
sum=0.d0
|
|
delta=1.d0
|
|
if(a.eq.b)delta=delta+1.d0
|
|
if(c.eq.b)delta=delta+1.d0
|
|
do k=1,nocc
|
|
do j=1,nocc
|
|
c sum2=0.d0
|
|
c if(i.eq.j)then
|
|
y=v(k,j,j)+v(j,k,j)+v(j,j,k)
|
|
sum=sum
|
|
$+ w(j,j,k)*(3.d0*v(j,j,k)-y)
|
|
$+ w(j,k,j)*(3.d0*v(j,k,j)-y)
|
|
$+ w(k,j,j)*(3.d0*v(k,j,j)-y)
|
|
c endif
|
|
c sum=sum+sum2/(fdo(j)+fdo(j)+fdo(k)-fdv(a)-fdv(b)-fdv(c))
|
|
c sum=sum+sum2
|
|
|
|
do i=j+1,nocc
|
|
c sum2=0.d0
|
|
c if(i.gt.j)then
|
|
sum=sum+
|
|
$ 2.d0/3.d0*
|
|
$ (w(i,j,k)+w(j,k,i)+w(k,i,j)-w(i,k,j)-w(j,i,k)-w(k,j,i))*
|
|
$ (v(i,j,k)+v(k,i,j)+v(j,k,i)-v(i,k,j)-v(j,i,k)-v(k,j,i))
|
|
|
|
$+ w(i,j,k)*(2.d0*v(i,j,k)-v(i,k,j)-v(k,j,i))
|
|
$+ w(j,k,i)*(2.d0*v(j,k,i)-v(k,j,i)-v(j,i,k))
|
|
$+ w(k,i,j)*(2.d0*v(k,i,j)-v(j,i,k)-v(i,k,j))
|
|
$+ w(i,k,j)*(2.d0*v(i,k,j)-v(i,j,k)-v(k,i,j))
|
|
$+ w(j,i,k)*(2.d0*v(j,i,k)-v(k,i,j)-v(j,k,i))
|
|
$+ w(k,j,i)*(2.d0*v(k,j,i)-v(j,k,i)-v(i,j,k))
|
|
|
|
c endif
|
|
|
|
c sum=sum+sum2/(fdo(i)+fdo(j)+fdo(k)-fdv(a)-fdv(b)-fdv(c))
|
|
c sum=sum+sum2
|
|
enddo
|
|
enddo
|
|
enddo
|
|
et=et+sum/delta
|
|
enddo
|
|
enddo
|
|
enddo
|
|
c et=et/3.d0
|
|
write(6,'("(T)en: ",f17.12)')et
|
|
return
|
|
|
|
end subroutine
|
|
C }}}
|
|
c
|
|
c {{{ btcorr
|
|
************************************************************************
|
|
subroutine btcorr(t0,tt,fdo,fdv,aijk,abij,wa,va,
|
|
$bsymmv,symmocc,dgroup,first,last,co,et,times,rr,rrd,
|
|
$ccsdalg,jij,jai0,jab0,dfnb,jlen,Iabija,
|
|
$nquad, quad, tl, abc1, abci, ttla, ss, abci_inmem, cmo, localcc,
|
|
$talg,lljab, lljpai, Apbc, nblocki, iblocki, tmpvsize, ttl2a,ttl3a,
|
|
$lapl,irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,
|
|
$iout,maxcor,imem,dcore,iimem,icore,scr,mpi_rank,mpi_size,mpi_ccsd,
|
|
$ccsdmkl, ptthreads, omp_max_threads, mkl_max_threads,
|
|
$ccsdrest, ptrest, bcast_comm,ccsd_communicator, lno, lnaf, ets,
|
|
$master_rank,master_thread,lf12,tfact)
|
|
************************************************************************
|
|
#if defined(MPI)
|
|
c use mpi
|
|
c use counter
|
|
use manager
|
|
#endif
|
|
implicit none
|
|
#if defined(MPI)
|
|
include "mpif.h"
|
|
integer stat(MPI_STATUS_SIZE), bcast_comm
|
|
integer :: comm_scr
|
|
#else
|
|
integer stat, bcast_comm, MPI_STATUS_IGNORE, comm_scr
|
|
#endif
|
|
integer ptthreads, omp_max_threads, mkl_max_threads
|
|
integer dfnb, master_rank
|
|
integer irecln,ibufln,nbasis,nocc,nvirt,dgroup,nirmax
|
|
integer bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
|
|
integer iout, maxcor, imem, kjiind
|
|
real*8 dcore(*)
|
|
real*8 quad(nvirt+nocc, nquad), tl(nvirt, nocc)
|
|
real*8 abci(nvirt, nvirt, nvirt, *),
|
|
$ abc1(nvirt, nvirt, nvirt)
|
|
real*8 cmo(nocc, nocc)
|
|
real*8 ttla(nvirt,nvirt,nocc,1)
|
|
real*8 ss(nvirt,nvirt,nocc,nocc),fb
|
|
real*8 ttl2a(nvirt,nvirt,1,nocc),
|
|
$ ttl3a(nvirt,nvirt,ptthreads)
|
|
real*8 lljab(dfnb,nvirt,nvirt)
|
|
real*8 tmpvsize(*)
|
|
real*8 lljpai(dfnb,nvirt,nocc),Apbc(dfnb,nvirt,nvirt)
|
|
integer icore(*), iimem
|
|
integer nquad, nblocki, iblocki
|
|
real*8, pointer :: ttl(:,:,:,:), ttl2(:,:,:,:), ttl3(:,:),
|
|
$ ttl4(:,:)
|
|
c
|
|
character*3 ccsdmkl
|
|
character*4 ccsdrest, localcc, talg
|
|
logical mpi_ccsd, ptrest, abci_inmem, master_thread, lf12
|
|
integer mpi_rank, mpi_size, mpi_err, request
|
|
integer npr
|
|
integer blocknumber, bsize, kk, prk_sent
|
|
real*8 jij(dfnb, nocc, nocc), jab0(dfnb, nvirt, nvirt)
|
|
real*8 jai0(nvirt, nocc, dfnb) ! originally in iap order
|
|
character*8 ccsdalg
|
|
real*8 scr(nocc, nvirt), scr1o(nocc), scr1v(nvirt)
|
|
real*8, pointer :: abc_ofi(:,:,:), abc_ofj(:,:,:), abc_ofk(:,:,:)
|
|
real*8, pointer :: abc_k(:,:,:,:)
|
|
logical flag,ptdone, lapl, new_k, iinbl, localccXp
|
|
integer nummes, jmax, iquad
|
|
real*8, pointer :: t(:,:), jai(:,:,:), jab(:,:,:)
|
|
|
|
integer rrd,jlen
|
|
real*8 t0(nvirt,nocc),tt(nvirt,nvirt,nocc,nocc),fi,fj,fij,fbij
|
|
real*8 rr(nvirt,nvirt,nocc,rrd,*)
|
|
real*8, pointer :: rrp(:,:,:,:)
|
|
real*8 fdv(nvirt),fdo(nocc), fc
|
|
real*8 et,esum,delta,delta1,delta2,delta3,y,z,esum2,w1,w2
|
|
real*8 fkj,fkji,fcb,fcba,deltaij,fact,et2,epair(nocc,nocc)
|
|
integer a,b,c,i,j,k, k2, j2, i2
|
|
real*8 wa(nvirt,nvirt,nvirt,*),va(nvirt,nvirt,nvirt,*)
|
|
real*8, pointer :: w(:,:,:), v(:,:,:)
|
|
real*8 abij(nvirt,nvirt,nocc,jlen) !<12|12>
|
|
real*8 aijk(nvirt,nocc,nocc,nocc) !<12|12>
|
|
real*8 pr,prold,dkji
|
|
real*8 IabIJa(nvirt, nvirt, ptthreads)
|
|
real*8, pointer :: IabIJ(:,:)
|
|
integer co(nbasis)
|
|
integer first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
|
|
logical notend,abijinmem, abckinmem, abcjinmem, abciinmem, rstex
|
|
logical kdone, jdone(nocc), kjdone
|
|
logical*1 :: kjidone
|
|
integer ind,n,li, ttd
|
|
integer imem1,istart,astart,kinrr,prk,kji, jinrr
|
|
common/memcom/imem1
|
|
|
|
c debug
|
|
real*8 times(6,0:100), times0(2)
|
|
c real*8 kjinum(nocc)
|
|
|
|
! MPI declaration{{{
|
|
integer lk, nn
|
|
#ifdef MPI
|
|
integer :: counter_own
|
|
integer, pointer :: requests(:), ijk_indices(:), toSend(:)
|
|
integer, pointer :: requestsAll(:)
|
|
real*8, pointer :: energies(:)
|
|
#else
|
|
integer :: counter_own
|
|
integer :: requests, ijk_indices, toSend
|
|
integer :: requestsAll
|
|
real*8 :: energies
|
|
#endif
|
|
integer :: ccsd_communicator
|
|
integer :: counter_k, counter_win
|
|
integer :: ifree, reqptr, nReqs, nReqsAll
|
|
integer ijk, reqused, nReqs0, findex, lindex
|
|
integer ijk3(3*ptthreads), req3(3*ptthreads), toSend3(3*ptthreads)
|
|
real*8 en3(3*ptthreads)!}}}
|
|
|
|
|
|
c real*8, pointer :: w1(:), w2(:), x(:), y1(:), z1(:)
|
|
integer iok
|
|
real*8 tmp, denom, ei(nocc)
|
|
|
|
integer omp_get_num_threads, omp_get_thread_num
|
|
integer nthreade, mkl_threads, istat,
|
|
$ mkl_set_num_threads_local
|
|
integer :: omp_num_threads, omp_thread_num
|
|
|
|
integer dblalloc, kmax
|
|
integer iabci, iabcj, iabck
|
|
integer count_k
|
|
|
|
integer iijk, iene, ireq, itos, ijk_tot, intalloc, ijk_index
|
|
integer mpi_blocklen(2), mpi_displ(2), mpi_types(2)
|
|
integer mpi_type_e
|
|
|
|
real*8, pointer :: qfact(:)
|
|
integer iqfact
|
|
real*8 ecabs, emp2f12, tfact, ets
|
|
logical lno, lnaf
|
|
|
|
interface!{{{
|
|
c {{{ interfaces for pointers
|
|
subroutine rpoint1d(egydim,haromdim,dim1)
|
|
implicit none
|
|
integer dim1
|
|
real*8,target :: egydim(dim1)
|
|
real*8, pointer :: haromdim(:)
|
|
end subroutine
|
|
subroutine rpoint2d(egydim,haromdim,dim1,dim2)
|
|
implicit none
|
|
integer dim1,dim2
|
|
real*8,target :: egydim(dim1,dim2)
|
|
real*8, pointer :: haromdim(:,:)
|
|
end subroutine
|
|
subroutine rpoint3d(egydim,haromdim,dim1,dim2,dim3)
|
|
implicit none
|
|
integer dim1,dim2,dim3
|
|
real*8,target :: egydim(dim1,dim2,dim3)
|
|
real*8, pointer :: haromdim(:,:,:)
|
|
end subroutine
|
|
subroutine rpoint4d(egydim,haromdim,dim1,dim2,dim3,dim4)
|
|
implicit none
|
|
integer dim1,dim2,dim3,dim4
|
|
real*8,target :: egydim(dim1,dim2,dim3,dim4)
|
|
real*8, pointer :: haromdim(:,:,:,:)
|
|
end subroutine
|
|
subroutine ipoint1d(egydim1,egydim2,dim1)
|
|
implicit none
|
|
integer dim1
|
|
integer, target :: egydim1(dim1)
|
|
integer, pointer :: egydim2(:)
|
|
end subroutine
|
|
c}}}
|
|
subroutine get_abci(i, abc_ofi, bsymmv, symmocc, dgroup,
|
|
$ co, first, last, va, irecln, ibufln, nbasis, nocc, nvirt,
|
|
$ nirmax, multpg, ccsdalg, dfnb, jab, jai, dcore, imem, n,
|
|
$ addr, ccsdmkl)
|
|
implicit none
|
|
integer :: i, nvirt, nocc, nbasis, dfnb, imem, n, addr
|
|
integer :: irecln, ibufln, nirmax, dgroup
|
|
integer :: bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
|
|
real*8 :: va(nvirt,nvirt,nvirt,*)
|
|
real*8 :: dcore(*)
|
|
integer :: co(nbasis)
|
|
integer :: first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
|
|
real*8, pointer :: abc_ofi(:, :, :)
|
|
real*8 :: jai(nvirt, nocc, dfnb), jab(dfnb, nvirt, nvirt)
|
|
character*8 :: ccsdalg
|
|
character*3 :: ccsdmkl
|
|
end subroutine
|
|
end interface!}}}
|
|
|
|
|
|
if((lno.or.lnaf.or.lf12).and.trim(localcc).eq.'off') then
|
|
iqfact = dblalloc(nocc)
|
|
call rpoint1d(dcore(iqfact), qfact, nocc)
|
|
open(800, file = 'F12INTE', form = 'unformatted')
|
|
rewind(800)
|
|
read(800) ecabs, emp2f12
|
|
read(800) tfact, qfact
|
|
close(800)
|
|
end if
|
|
#ifdef MPI
|
|
c initialize arrays for MPI restart!{{{
|
|
if(ccsdrest .ne. 'off') then
|
|
ifree = maxcor - (imem-imem1)
|
|
|
|
ijk_tot = ijk_index(nocc-1, nocc, nocc, nocc)
|
|
if(ifree .lt. ijk_tot * 4) then
|
|
write(*, '(a, i3)') ' Insufficient memory on rank ',
|
|
$ mpi_rank
|
|
call mrccend(1)
|
|
end if
|
|
nReqs = ijk_tot
|
|
nReqs0 = ijk_tot
|
|
nReqsAll = ijk_tot
|
|
|
|
iijk = intalloc(ijk_tot)
|
|
iene = dblalloc(ijk_tot)
|
|
ireq = intalloc(ijk_tot)
|
|
itos = intalloc(ijk_tot)
|
|
call ipoint1d(icore(iijk), ijk_indices, nReqs)
|
|
call rpoint1d(dcore(iene), energies, nReqs)
|
|
call ipoint1d(icore(ireq), requestsAll, nReqs)
|
|
call ipoint1d(icore(ireq), requests, nReqs)
|
|
call ipoint1d(icore(itos), toSend, nReqs)
|
|
|
|
requests = MPI_REQUEST_NULL
|
|
|
|
c if(ifree .ge. 4*3*ptthreads) then
|
|
c ifree = min(ifree, 4096)
|
|
c nReqs = ifree/3 !4
|
|
c
|
|
c call ipoint1d(icore(iimem+n*nvirt**3), requests, nReqs)
|
|
c call ipoint1d(icore(iimem+n*nvirt**3+nReqs), ijk_indices,
|
|
c $ nReqs)
|
|
c call rpoint1d(dcore(iimem+n*nvirt**3+2*nReqs), energies,
|
|
c $ nReqs)
|
|
c
|
|
c else
|
|
c nReqs = 3*ptthreads
|
|
c
|
|
c call ipoint1d(req3, requests, nReqs)
|
|
c call ipoint1d(ijk3, ijk_indices, nReqs)
|
|
c call rpoint1d(en3, energies, nReqs)
|
|
c end if
|
|
c
|
|
c nReqs0 = nReqs
|
|
c nReqs = nReqs / ptthreads
|
|
c requests = MPI_REQUEST_NULL
|
|
end if!}}}
|
|
#endif
|
|
|
|
! initialization {{{
|
|
reqused = 0
|
|
nummes = 0
|
|
#if defined(MPI)
|
|
comm_scr = 0
|
|
prk_sent = 0
|
|
#endif
|
|
|
|
et=0.d0
|
|
ets=0.d0
|
|
et2=0.d0
|
|
ind=1
|
|
notend=.true.
|
|
prk=0
|
|
pr=0.d0
|
|
prold=0.d0
|
|
epair=0.d0
|
|
ei = 0.d0
|
|
|
|
lapl = talg.eq.'lapl' .or. talg.eq.'topr' .or. talg.eq.'to '
|
|
$ .or. talg.eq.'lato'
|
|
if(.not.lapl) then
|
|
nquad = 1
|
|
ttd = nocc
|
|
else
|
|
ttd = 1
|
|
end if
|
|
|
|
if(ptthreads .eq. 1) then
|
|
nthreade = omp_max_threads
|
|
else
|
|
nthreade = omp_max_threads / ptthreads
|
|
end if
|
|
if(ccsdmkl .eq. 'thr') then
|
|
mkl_threads = mkl_max_threads / ptthreads
|
|
else
|
|
mkl_threads = 1
|
|
end if
|
|
|
|
if(lapl) then ! .and. localcc.ne.'off ') then
|
|
|
|
call rpoint3d(abc1, abc_ofk, nvirt, nvirt, nvirt)
|
|
|
|
call rpoint3d(abci, abc_ofj, nvirt, nvirt, nvirt)
|
|
|
|
if(iblocki+1 .lt. nocc) then
|
|
n = iblocki - 2 ! 1 for abc_ofj
|
|
nn = n + 2 ! abcl's, abc1 and abcj
|
|
call rpoint4d(abci(1, 1, 1, 2), abc_k,
|
|
$ nvirt, nvirt, nvirt, n)
|
|
else
|
|
n = nocc
|
|
nn = n
|
|
call rpoint4d(abc1, abc_k, nvirt, nvirt, nvirt, nocc)
|
|
end if
|
|
|
|
else
|
|
n = blocknumber(nvirt**3, 1, nocc, 'y', 1, bsize,
|
|
$ 'Insufficient memory for <ab|ci> block! ',
|
|
$ maxcor - (imem-imem1))
|
|
nn = n
|
|
if(n .lt. min(nocc, 1 + ptthreads * 2)) then
|
|
write(iout, *) 'Error: insufficient memory in btcorr'
|
|
call mrccend(1)
|
|
end if
|
|
|
|
if(n .ne. nocc) then
|
|
c abckinmem = .true.
|
|
c abcjinmem = .true.
|
|
c abciinmem = .true.
|
|
c if(n .lt. 1 + 2 * ptthreads) then
|
|
c if(n .lt. 1 + ptthreads) then
|
|
c if(n .lt. 1) then
|
|
c abckinmem = .false.
|
|
c else
|
|
c n = n - 1
|
|
c call rpoint3d(dcore(imem+n*nvirt**3), abc_ofk,
|
|
c $ nvirt, nvirt, nvirt)
|
|
c end if
|
|
c abcjinmem = .false.
|
|
c else
|
|
c n = n - 1 - ptthreads
|
|
c call rpoint3d(dcore(imem+n*nvirt**3), abc_ofk,
|
|
c $ nvirt, nvirt, nvirt)
|
|
c call rpoint3d(dcore(imem+(n+1)*nvirt**3), abc_ofj,
|
|
c $ nvirt, nvirt, nvirt)
|
|
c end if
|
|
c abciinmem = .false.
|
|
c else
|
|
n = n - 1 - 2 * ptthreads
|
|
|
|
iabck = dblalloc(nvirt**3)
|
|
call rpoint3d(dcore(iabck), abc_ofk, nvirt, nvirt, nvirt)
|
|
|
|
iabcj = dblalloc(ptthreads * nvirt**3)
|
|
call rpoint3d(dcore(iabcj), abc_ofj, nvirt, nvirt, nvirt)
|
|
|
|
iabci = dblalloc(ptthreads * nvirt**3)
|
|
call rpoint3d(dcore(iabci), abc_ofi, nvirt, nvirt, nvirt)
|
|
|
|
if(n .gt. 0)
|
|
$ call rpoint4d(dcore(imem), abc_k, nvirt, nvirt, nvirt, n)
|
|
|
|
c end if
|
|
|
|
else
|
|
call rpoint4d(dcore(imem), abc_k, nvirt, nvirt, nvirt, nocc)
|
|
end if
|
|
end if
|
|
|
|
c all kji combination:
|
|
npr = nocc - n
|
|
kji = 0
|
|
if(localcc.ne.'off ' .and. talg.eq.'lapl') then
|
|
kmax = 1
|
|
else
|
|
kmax = nocc
|
|
end if
|
|
do k = 1, kmax
|
|
do i = k+1, nocc ! j = k
|
|
kji = kji + 1
|
|
end do
|
|
do j = k+1, nocc ! j>k
|
|
do i = j, nocc
|
|
kji = kji + 1
|
|
end do
|
|
end do
|
|
enddo
|
|
dkji = dble(kji) * nquad
|
|
|
|
#if defined(MPI)
|
|
if(mpi_ccsd) then
|
|
if(mpi_size .gt. nocc .and. master_thread) then !mpi_rank .eq. 0) then
|
|
write(6, '(" Warning: only ", i3,
|
|
$ " MPI threads were utilized for (T)"/)') nocc
|
|
end if
|
|
|
|
if(mpi_size .gt. 1) then
|
|
c call create_counter(ccsd_communicator, counter_k)
|
|
call create_manager(ccsd_communicator, master_rank)
|
|
call allocate_counter(ccsd_communicator, count_k)
|
|
end if
|
|
end if
|
|
#endif
|
|
|
|
if(ccsdalg.eq.'dfdirect' .and. localcc.eq.'off ') then
|
|
! jai contains Jia, transpose it
|
|
do k = 1, dfnb
|
|
call dcopy(nvirt*nocc, jai0(1, 1, k), 1, scr, 1)
|
|
|
|
do i = 1, nocc
|
|
call dcopy(nvirt, scr(i, 1), nocc, jai0(1, i, k), 1)
|
|
end do
|
|
end do
|
|
end if
|
|
|
|
if(lapl) then
|
|
quad = dsqrt(quad)
|
|
call rpoint2d(tl, t, nvirt, nocc)
|
|
call rpoint4d(ttla, ttl, nvirt, nvirt, nocc, 1)
|
|
call rpoint4d(ttl2a, ttl2, nvirt, nvirt, 1, nocc)
|
|
c call rpoint2d(ttl3a, ttl3, nvirt, nvirt)
|
|
c call rpoint2d(IabIJ, ttl4, nvirt, nvirt)
|
|
else
|
|
call rpoint2d(t0, t, nvirt, nocc)
|
|
call rpoint4d(tt, ttl, nvirt, nvirt, nocc, nocc)
|
|
call rpoint4d(tt, ttl2, nvirt, nvirt, nocc, nocc)
|
|
c call rpoint2d(tt0(1:nvirt, 1:nvirt, 1, 1), ttl3,
|
|
c nvirt, nvirt)
|
|
c call rpoint2d(tt0(1:nvirt, 1:nvirt, 1, 1), ttl4,
|
|
c nvirt, nvirt)
|
|
end if
|
|
if(lapl .and. localcc.ne.'off ') then
|
|
call rpoint3d(lljpai, jai, nvirt, nocc, dfnb)
|
|
call rpoint3d(lljab, jab, dfnb, nvirt, nvirt)
|
|
|
|
else
|
|
call rpoint3d(jai0, jai, nvirt, nocc, dfnb)
|
|
call rpoint3d(jab0, jab, dfnb, nvirt, nvirt)
|
|
end if
|
|
|
|
if(lapl .and. localcc.ne.'off ' .and. ccsdalg.eq.'disk ')
|
|
$ ccsdalg = 'dfdirect'
|
|
|
|
abijinmem=.true.
|
|
if (jlen.eq.1.and.(ccsdalg.eq.'dfdirect'.or.
|
|
$ localcc.ne.'off '))abijinmem=.false.
|
|
!}}}
|
|
|
|
c create and open restart file!{{{
|
|
if(ccsdrest .ne. 'off') then
|
|
inquire(file = 'pt.rst', exist = rstex)
|
|
c if(mpi_rank .eq. 0 .and. (.not. ptrest .or. .not.rstex))
|
|
if(master_thread .and. (.not. ptrest .or. .not.rstex))
|
|
$ call zero_rst_pt(nocc)
|
|
|
|
#ifdef MPI
|
|
call bcast_file('pt.rst', bcast_comm)
|
|
if(mpi_size .gt. 1) call MPI_Barrier(ccsd_communicator,mpi_err)
|
|
#endif
|
|
open(unit = 725, file = 'pt.rst', access = 'stream')
|
|
end if!}}}
|
|
|
|
write(iout, *)
|
|
if(talg .eq. 'occ ') then
|
|
write(iout,'(f12.2," MB memory is utilized for (T)"/)')
|
|
$ (imem+n*nvirt**3-imem1)*8.d0/1024**2
|
|
else
|
|
write(iout,'(f12.2," MB memory is utilized for (T)"/)')
|
|
$ (imem-imem1)*8.d0/1024**2
|
|
end if
|
|
c write(iout,'(" 1% done.")')
|
|
call progress_bar(iout, 0.d0, .true.)
|
|
|
|
do iquad = 1, nquad
|
|
new_k = .true.
|
|
if(lapl) then
|
|
c multiply with Laplace factors
|
|
if(localcc.eq.'off ')then
|
|
c {{{ localcc.eq.'off ' case: FOR TEST PURPOSES, not efficient
|
|
do i=1,nocc
|
|
fi=quad(i,iquad)
|
|
t(1:nvirt,i)=t0(1:nvirt,i)*quad(nocc+1:nocc+nvirt,iquad)*fi
|
|
enddo
|
|
c lt1=t
|
|
c call dgemm('n','n',nvirt,nocc,nocc,1.d0,lt1,nvirt,cmo,nocc,
|
|
c $ 0.d0,t,nvirt)
|
|
C read integrals to memory
|
|
if(ccsdalg .eq. 'disk ') then
|
|
call abijread(abij,bsymmv,symmocc,dgroup,first,last,co,va,
|
|
$ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
call aijkread(aijk,bsymmv,symmocc,dgroup,first,last,co,va,
|
|
$ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
else
|
|
! (ai|bj) = <ab|ij>
|
|
call dgemm('n', 't', nvirt*nocc, nvirt*nocc, dfnb,
|
|
$ 1.d0, jai, nvirt*nocc, jai, nvirt*nocc,
|
|
$ 0.d0, abij, nvirt*nocc)
|
|
! aibj -> abij
|
|
do j = 1, nocc
|
|
do a = 1, nvirt
|
|
call dcopy(nocc*nvirt, abij(a, 1, 1, j), nvirt, scr,1)
|
|
|
|
do b = 1, nvirt
|
|
abij(a, b, 1:nocc, j) = scr(1:nocc, b)
|
|
end do
|
|
end do
|
|
end do
|
|
|
|
! (aj|ik) = <ai|jk>
|
|
call dgemm('n', 'n', nvirt*nocc, nocc**2, dfnb,
|
|
$ 1.d0, jai, nvirt*nocc, jij, dfnb,
|
|
$ 0.d0, aijk, nvirt*nocc)
|
|
! ajik -> aijk
|
|
do k = 1, nocc
|
|
do a = 1, nvirt
|
|
do j = 1, nocc
|
|
scr1o(j+1:nocc) = aijk(a, j+1:nocc, j, k)
|
|
aijk(a, j+1:nocc, j, k) = aijk(a, j, j+1:nocc, k)
|
|
aijk(a, j, j+1:nocc, k) = scr1o(j+1:nocc)
|
|
end do
|
|
end do
|
|
end do
|
|
end if
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(j,fj,i,fij,b,fbij)
|
|
do j=1,nocc
|
|
fj=quad(j,iquad)
|
|
do i=1,nocc
|
|
fij=quad(i,iquad)*fj
|
|
do b=1,nvirt
|
|
fbij=quad(nocc+b,iquad)*fij
|
|
abij(1:nvirt,b,i,j)=abij(1:nvirt,b,i,j)*
|
|
$ quad(nocc+1:nocc+nvirt,iquad)*fbij
|
|
enddo
|
|
enddo
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
do j=1,nocc
|
|
fj=quad(j,iquad)
|
|
if (iquad.gt.1) fj=fj/quad(j,iquad-1)
|
|
do i=1,nocc
|
|
fij=quad(i,iquad)*fj
|
|
if (iquad.gt.1) fij=fij/quad(i,iquad-1)
|
|
do b=1,nvirt
|
|
do a=1,nvirt
|
|
fbij=fij*quad(nocc+b,iquad)*quad(nocc+a,iquad)
|
|
if (iquad.gt.1)
|
|
$ fbij=fbij/quad(nocc+a,iquad-1)/quad(nocc+b,iquad-1)
|
|
tt(a,b,i,j)= tt(a,b,i,j)*fbij ! ss is in tt
|
|
enddo
|
|
enddo
|
|
enddo
|
|
enddo
|
|
if(ccsdalg .eq. 'disk ') then
|
|
c call abciread(1,nvirt,abci,bsymmv,symmocc,dgroup,co,
|
|
c $first,last,v,
|
|
c $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
call abciread(1,nocc,abc_k,bsymmv,symmocc,dgroup,co,
|
|
$ first,last,va,irecln,ibufln,nbasis,nocc,nvirt,
|
|
$ nirmax,multpg)
|
|
else
|
|
! (ac|bi) = <ab|ci>
|
|
call dgemm('t', 't', nvirt**2, nvirt*nocc, dfnb,
|
|
$ 1.d0, jab, dfnb, jai, nvirt*nocc,
|
|
$ 0.d0, abc_k, nvirt**2)
|
|
! acbi -> abci
|
|
do i = 1, nocc
|
|
do a = 1, nvirt
|
|
do b = 1, nvirt
|
|
scr1v(b+1:nvirt) = abc_k(a, b+1:nvirt, b, i)
|
|
abc_k(a,b+1:nvirt,b,i) = abc_k(a,b,b+1:nvirt,i)
|
|
abc_k(a, b, b+1:nvirt, i) = scr1v(b+1:nvirt)
|
|
end do
|
|
end do
|
|
end do
|
|
end if
|
|
do c=1,nvirt
|
|
fc=quad(nocc+c,iquad)
|
|
do b=1,nvirt
|
|
fj=quad(nocc+b,iquad)*fc
|
|
do a=1,nvirt
|
|
fij=quad(nocc+a,iquad)*fj
|
|
do i=1,nocc
|
|
abc_k(a,b,c,i)=abc_k(a,b,c,i)*quad(i,iquad)*fij
|
|
enddo
|
|
enddo
|
|
enddo
|
|
enddo
|
|
do j=1,nocc
|
|
fj=quad(j,iquad)
|
|
do i=1,nocc
|
|
fij=quad(i,iquad)*fj
|
|
do k=1,nocc
|
|
do a=1,nvirt
|
|
aijk(a,k,i,j)=aijk(a,k,i,j)*fij*
|
|
$ quad(nocc+a,iquad)/quad(k,iquad)
|
|
enddo
|
|
enddo
|
|
enddo
|
|
enddo
|
|
c }}}
|
|
|
|
else ! if localcc=on
|
|
c multiply with Laplace factors and trf to the Mayer basis: (parts of) T2, abci, abij and aijk
|
|
call trf2laplbasis(tt, ttl, jij, jab0, jai0, nocc, nvirt,
|
|
$ dfnb, abij, abc_ofk, aijk, quad, iquad, nquad, cmo,
|
|
$ aijk, jai, jab, rr, rr, wa, wa, aijk, va, t, t, t0, talg,
|
|
$ fdo, va, wa, abci_inmem, ttl2, wa, wa, IabIJa)
|
|
|
|
! Jpai -> Jaip
|
|
call gentrans(jai, 1, dfnb, nvirt*nocc, nvirt*nocc*dfnb,
|
|
$ tmp, dcore(imem), (dfnb+nvirt*nocc)/2, iok)
|
|
if(iok .ne. 0) then
|
|
write(*,*) 'Error while reordering Jai in btcorr'
|
|
call mrccend(1)
|
|
end if
|
|
end if
|
|
endif ! lapl
|
|
c
|
|
c transe tt(a,b,i,k) to rr(b,a,i,k) for all k
|
|
TIME0(times)
|
|
if (rrd.eq.nocc) then
|
|
do k=1,nocc
|
|
do i=1,nocc
|
|
call tr(nvirt,tt(1,1,i,k),rr(1,1,i,k,1))
|
|
enddo
|
|
enddo
|
|
endif
|
|
TIMEADD(times(1,25),times)
|
|
|
|
if(talg .eq. 'occ ') then
|
|
C get <ab|ij> and <ai|jk> integrals !{{{
|
|
if(ccsdalg .eq. 'disk ') then
|
|
if(abijinmem)
|
|
$ call abijread(abij,bsymmv,symmocc,dgroup,first,last,co,
|
|
$ va,irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
|
|
call aijkread(aijk,bsymmv,symmocc,dgroup,first,last,co,
|
|
$ va,irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
|
|
else if(ccsdalg .eq. 'dfdirect') then
|
|
! (ai|bj) = <ab|ij>
|
|
if (abijinmem) then
|
|
call dgemm('n', 't', nvirt*nocc, nvirt*nocc, dfnb,
|
|
$ 1.d0, jai, nvirt*nocc, jai, nvirt*nocc,
|
|
$ 0.d0, abij, nvirt*nocc)
|
|
! aibj -> abij
|
|
do j = 1, nocc
|
|
do a = 1, nvirt
|
|
call dcopy(nocc*nvirt,abij(a,1,1,j),nvirt,scr,1)
|
|
|
|
do b = 1, nvirt
|
|
abij(a, b, 1:nocc, j) = scr(1:nocc, b)
|
|
end do
|
|
end do
|
|
end do
|
|
endif ! abijinmem
|
|
|
|
! (aj|ik) = <ai|jk>
|
|
call dgemm('n', 'n', nvirt*nocc, nocc**2, dfnb,
|
|
$ 1.d0, jai, nvirt*nocc, jij, dfnb,
|
|
$ 0.d0, aijk, nvirt*nocc)
|
|
! ajik -> aijk
|
|
do k = 1, nocc
|
|
do a = 1, nvirt
|
|
do j = 1, nocc
|
|
scr1o(j+1:nocc) = aijk(a, j+1:nocc, j, k)
|
|
aijk(a,j+1:nocc,j,k) = aijk(a,j,j+1:nocc,k)
|
|
aijk(a, j, j+1:nocc, k) = scr1o(j+1:nocc)
|
|
end do
|
|
end do
|
|
end do
|
|
end if !}}}
|
|
end if
|
|
|
|
TIME0(times)
|
|
if (.not.(lapl .and. localcc.eq.'off ') .and. n.ge.1) then
|
|
c get <ab|ci> integrals !{{{
|
|
if(ccsdalg .eq. 'disk ') then
|
|
call abciread(nocc-n+1, n, abc_k, bsymmv, symmocc, dgroup,
|
|
$ co, first, last, va, irecln, ibufln, nbasis, nocc,
|
|
$ nvirt, nirmax, multpg)
|
|
|
|
else if(ccsdalg .eq. 'dfdirect') then
|
|
! (ac|bk) = <ab|ck>
|
|
call dgemm('t', 't', nvirt**2, nvirt*n, dfnb,
|
|
$ 1.d0, jab, dfnb, jai(1, nocc-n+1, 1), nvirt*nocc,
|
|
$ 0.d0, abc_k, nvirt**2)
|
|
|
|
! acbk -> abck
|
|
do k = 1, n
|
|
do a = 1, nvirt
|
|
do b = 1, nvirt
|
|
scr1v(b+1:nvirt) = abc_k(a, b+1:nvirt, b, k)
|
|
abc_k(a, b+1:nvirt, b, k) =
|
|
$ abc_k(a, b, b+1:nvirt, k)
|
|
abc_k(a, b, b+1:nvirt, k) = scr1v(b+1:nvirt)
|
|
end do
|
|
end do
|
|
end do
|
|
end if
|
|
end if !}}}
|
|
TIMEADD(times(1,12),times)
|
|
|
|
kk = 0 ! kk=k for canonical, kk=j for local
|
|
do ! loop over index k for canonical or index j for local calculation
|
|
TIME0(times(1,99))
|
|
|
|
#if defined(MPI)
|
|
if(mpi_size .gt. 1) then
|
|
call get_and_increment_counter(counter_own, 1,
|
|
$ count_k)
|
|
kk = counter_own
|
|
if(k .gt. nocc+mpi_size .or. k .le. 0) then
|
|
write(*, '("WARNING: Counter in rank ", i4,
|
|
$ " out of range in (T): ", i25)') mpi_rank, kk
|
|
c call free_manager(ccsd_communicator, counter_k)
|
|
call deallocate_counter(ccsd_communicator, count_k)
|
|
call free_manager(ccsd_communicator)
|
|
call mrccend(1)
|
|
end if
|
|
else
|
|
#endif
|
|
kk = kk + 1
|
|
#if defined(MPI)
|
|
end if
|
|
#endif
|
|
if(kk .gt. nocc) exit
|
|
|
|
if(localcc .eq. 'off ') then
|
|
k = kk
|
|
else
|
|
k = 1
|
|
end if
|
|
|
|
if(ptrest) then
|
|
kdone = .true.
|
|
do j = k, nocc
|
|
if(j .eq. k) then
|
|
istart = j + 1
|
|
else
|
|
istart = j
|
|
endif
|
|
kjdone = .true.
|
|
do i = istart, nocc
|
|
call read_rst_pt(k, j, i, nocc, kjidone, esum)
|
|
if(kjidone) then
|
|
et = et + 2.d0 * esum
|
|
if(lno.or.lnaf.or.lf12.and.trim(localcc).eq.'off')
|
|
$ ets=ets+2*esum*(qfact(i)+qfact(j)+qfact(k))/3
|
|
prk = prk + 1
|
|
else
|
|
kdone = .false.
|
|
kjdone = .false.
|
|
end if
|
|
end do
|
|
jdone(j) = kjdone
|
|
end do
|
|
if(kdone) goto 1235
|
|
end if
|
|
|
|
ind = nocc - n + 1
|
|
|
|
if(new_k) then
|
|
c get <ab|ck> integrals for one k !{{{
|
|
TIME0(times)
|
|
if(k .ge. ind) then
|
|
lk = k - ind + 1
|
|
call rpoint3d(abc_k(1:nvirt, 1:nvirt, 1:nvirt, k - ind + 1),
|
|
$ abc_ofk, nvirt, nvirt, nvirt)
|
|
else !if(abckinmem) then
|
|
if(lapl) then ! .and. localcc.ne.'off ') then
|
|
call rpoint3d(abc1, abc_ofk, nvirt, nvirt, nvirt)
|
|
else
|
|
call rpoint3d(dcore(iabck), abc_ofk, nvirt, nvirt, nvirt)
|
|
end if
|
|
|
|
c if(ccsdalg .eq. 'disk ') then
|
|
c call abciread(k, 1, abc_ofk, bsymmv, symmocc, dgroup,
|
|
c $ co, first, last, va, irecln, ibufln, nbasis,
|
|
c $ nocc, nvirt, nirmax, multpg)
|
|
c
|
|
c else if(ccsdalg .eq. 'dfdirect') then
|
|
c ! (ac|bk) = <ab|ck>
|
|
c call dgemm('t', 't', nvirt**2, nvirt, dfnb,
|
|
c $ 1.d0, jab, dfnb, jai(1, k, 1), nvirt*nocc,
|
|
c $ 0.d0, abc_ofk, nvirt**2)
|
|
c
|
|
c ! acbk -> abck
|
|
c do a = 1, nvirt
|
|
c do b = 1, nvirt
|
|
c scr1v(b+1:nvirt) = abc_ofk(a, b+1:nvirt, b)
|
|
c abc_ofk(a, b+1:nvirt, b) =
|
|
c $ abc_ofk(a, b, b+1:nvirt)
|
|
c abc_ofk(a, b, b+1:nvirt) = scr1v(b+1:nvirt)
|
|
c end do
|
|
c end do
|
|
c
|
|
c end if
|
|
c write(*,*) "assemble abck",k
|
|
call get_abci(k, abc_ofk, bsymmv, symmocc, dgroup,
|
|
$ co, first, last, va, irecln, ibufln, nbasis, nocc,
|
|
$ nvirt, nirmax, multpg, ccsdalg, dfnb, jab, jai, dcore,
|
|
$ imem, n, iabck, ccsdmkl) !iabck not correct
|
|
end if
|
|
TIMEADD(times(1,12),times)!}}}
|
|
c
|
|
c transpose tt(a,b,i,k) to rr(b,a,i) for k
|
|
if(ccsdmkl .eq. 'thr') then
|
|
if (rrd.eq.1) then
|
|
do i=1,nocc
|
|
call tr(nvirt,tt(1,1,i,k),rr(1,1,i,1,1))
|
|
enddo
|
|
kinrr = 1
|
|
else
|
|
kinrr = k
|
|
endif
|
|
end if
|
|
c
|
|
if (.not.abijinmem .and. localcc.eq.'off ') then
|
|
c assembly <ab|jk> for k in abj order, full <ab|jk> did not fit in memory
|
|
do j=k,nocc
|
|
call dgemm('n','t',nvirt,nvirt,dfnb,
|
|
$ 1.d0,jai(1,j,1), nvirt*nocc,jai(1,k,1),nvirt*nocc,
|
|
$ 0.d0,abij(1,1,j,1),nvirt)
|
|
enddo
|
|
endif
|
|
|
|
c {{{ localcc.eq.'off ' case: FOR TEST PURPOSES, not efficient
|
|
if(localcc.eq.'off ' .and. lapl) then
|
|
if (talg.eq.'lapl'.or.talg.eq.'lato') then
|
|
c make ttl(-,~,i,k) & ttl2(-,~,k,i) from ss(-,-,-.-)
|
|
do i=1,nocc
|
|
do b=1,nvirt
|
|
fb=quad(nocc+b,iquad)**2
|
|
ttl (1:nvirt,b,i,1)=tt(1:nvirt,b,i,k)/fb
|
|
ttl2(1:nvirt,b,1,i)=tt(1:nvirt,b,k,i)/fb
|
|
enddo
|
|
enddo
|
|
endif
|
|
end if
|
|
c }}}
|
|
|
|
if(localcc .ne. 'off ') new_k = .false.
|
|
end if
|
|
|
|
if(localcc.ne.'off ') then
|
|
jmax = kk
|
|
else
|
|
jmax = nocc
|
|
end if
|
|
c
|
|
#if defined(OMP)
|
|
if(ptthreads .gt. 1) call omp_set_max_active_levels(2)
|
|
#endif
|
|
c$OMP PARALLEL DEFAULT(NONE) REDUCTION(+:et,et2,ets)
|
|
c$OMP& SHARED(dkji,prold,iout,ind,abc_k,ccsdrest,nReqs0,lf12,
|
|
c$OMP& requestsAll,omp_max_threads,ptthreads,ccsdmkl,rrd,nvirt,nocc,
|
|
c$OMP& IabIJa,lapl,ttl3a,tt,k,jmax,mpi_rank,mpi_size,pr,prk,iquad,quad,
|
|
c$OMP& iabci,ptrest,fdo,abc_ofk,n,imem,jai,ccsdalg,dfnb,jab,iabcj,
|
|
c$OMP& bsymmv,symmocc,dgroup,co,first,last,t,irecln,ibufln,nbasis,
|
|
c$OMP& nirmax,multpg,kinrr,abij,aijk,times,talg,ttl,ttl2,abijinmem,
|
|
c$OMP& localcc,ccsd_communicator,MPI_STATUS_IGNORE,ttd,kk,fdv,stat,
|
|
c$OMP& abci_inmem,tmpvsize,Apbc,jij,prk_sent,nummes,epair,request,flag,
|
|
c$OMP& mkl_max_threads,jdone,master_thread,master_rank,requests,
|
|
c$OMP& ijk_indices,energies,toSend,nReqsAll,
|
|
c$OMP& nReqs,lno,lnaf,qfact,ei) !,mpi_blocklen,mpi_displ,mpi_types,
|
|
cc$OMP& mpi_type_e)
|
|
c$OMP& PRIVATE(rrp,j,abc_ofj,scr1v,jinrr,esum,c,b,delta1,delta2,fkj,
|
|
c$OMP& istart,i,li,abc_ofi,delta3,fkji,fcb,astart,delta,fcba,y,z,w,v,
|
|
c$OMP& times0,kjiind,kjidone,k2,j2,i2,ijk,reqptr,deltaij,
|
|
c$OMP& mpi_err,omp_num_threads,omp_thread_num,IabIJ,
|
|
c$OMP& ttl3,ttl4,fb,istat,findex,lindex,denom,iinbl,esum2)
|
|
c$OMP& FIRSTPRIVATE(nthreade,mkl_threads,comm_scr)
|
|
c$OMP& NUM_THREADS(ptthreads)
|
|
|
|
c initialization of thread private variables!{{{
|
|
#ifdef OMP
|
|
omp_thread_num = omp_get_thread_num()
|
|
omp_num_threads = omp_get_num_threads()
|
|
#else
|
|
omp_thread_num = 0
|
|
omp_num_threads = 1
|
|
#endif
|
|
#ifdef MPI
|
|
c initialize arrays for MPI restart!{{{
|
|
c if(ccsdrest .ne. 'off ') then
|
|
cc if(omp_thread_num .eq. 0) then
|
|
cc findex = 1
|
|
cc lindex = nReqs0/omp_num_threads
|
|
cc
|
|
cc nReqsAll = nReqs0
|
|
cc nReqs = nReqs0/omp_num_threads
|
|
cc
|
|
cc call ipoint1d(requestsAll, requests, nReqsAll)
|
|
cc
|
|
cc else
|
|
c findex = omp_thread_num*nReqs0/omp_num_threads+1
|
|
c lindex = min((omp_thread_num+1)*nReqs0/omp_num_threads,
|
|
c $ nReqs0)
|
|
cc lindex = nReqs0
|
|
c
|
|
cc nReqs = nReqs0*(omp_num_threads-1)/
|
|
cc $ omp_num_threads
|
|
cc nReqs = min(nReqs0/omp_num_threads
|
|
cc $ nReqs0-omp_thread_num*nReqs0/omp_num_threads)
|
|
c nReqs = lindex - findex +1
|
|
c nReqsAll = nReqs
|
|
cc write(*,*) 'majom ', findex, lindex, nReqs
|
|
c
|
|
c call ipoint1d(requestsAll(findex:lindex), requests,
|
|
c $ nReqs)
|
|
c call ipoint1d(ijk_indices(findex:lindex), ijk_indices,
|
|
c $ nReqs)
|
|
c call rpoint1d(energies(findex:lindex), energies,
|
|
c $ nReqs)
|
|
cc call ipoint1d(toSend(findex:lindex), toSend, nReqs)
|
|
cc end if
|
|
c end if
|
|
|
|
cc$OMP SINGLE
|
|
c call MPI_Get_address(energies, addr_e, mpi_err)
|
|
c mpi_blocklen(1) = 1
|
|
c mpi_displ(1) = 0
|
|
c mpi_types(1) = MPI_DOUBLE_PRECISION
|
|
c call MPI_Get_address(ijk_indices, mpi_displ(2), mpi_err)
|
|
c mpi_blocklen(2) = 1
|
|
c mpi_displ(2) = mpi_displ(2) - addr_e
|
|
c mpi_types(2) = MPI_INTEGER
|
|
c call MPI_Type_create_struct(2, mpi_blocklen,
|
|
c $ mpi_displ, mpi_types, mpi_type_e, mpi_err)
|
|
c call MPI_Type_commit(mpi_type_e, mpi_err)
|
|
cc$OMP END SINGLE
|
|
#else
|
|
nReqsAll = 1
|
|
nReqs = 1 !}}}
|
|
#endif
|
|
|
|
if(omp_thread_num .lt.
|
|
$ mod(omp_max_threads, ptthreads)) then
|
|
if(ccsdmkl .eq. 'thr') mkl_threads = mkl_threads + 1
|
|
if(ptthreads .ne. 1) nthreade = nthreade + 1
|
|
end if
|
|
#ifdef MKL
|
|
istat = mkl_set_num_threads_local(int(mkl_threads,4))
|
|
#endif
|
|
|
|
if(rrd.eq.1 .and. ccsdmkl.eq.'seq' .and. localcc.eq.'off ')then
|
|
call rpoint4d(rr(1, 1, 1, 1, omp_thread_num+1),
|
|
$ rrp, nvirt, nvirt, nocc, rrd)
|
|
else
|
|
call rpoint4d(rr, rrp, nvirt, nvirt, nocc, rrd)
|
|
end if
|
|
|
|
call rpoint3d(wa(1, 1, 1, omp_thread_num+1),
|
|
$ w, nvirt, nvirt, nvirt)
|
|
call rpoint3d(va(1, 1, 1, omp_thread_num+1),
|
|
$ v, nvirt, nvirt, nvirt)
|
|
|
|
call rpoint2d(IabIJa(1, 1, omp_thread_num+1), IabIJ,
|
|
$ nvirt, nvirt)
|
|
|
|
if(lapl) then
|
|
call rpoint2d(ttl3a(1, 1, omp_thread_num+1), ttl3,
|
|
$ nvirt, nvirt)
|
|
call rpoint2d(IabIJ, ttl4, nvirt, nvirt)
|
|
c call rpoint2d(IabIJa(1, 1, omp_thread_num+1), ttl4,
|
|
c $ nvirt, nvirt)
|
|
|
|
else
|
|
call rpoint2d(tt(1:nvirt,1:nvirt,1,1), ttl3, nvirt, nvirt)
|
|
call rpoint2d(tt(1:nvirt,1:nvirt,1,1), ttl4, nvirt, nvirt)
|
|
|
|
end if
|
|
!}}}
|
|
|
|
do j = kk, jmax
|
|
|
|
if(omp_thread_num .eq. 0) then
|
|
if(master_thread) then !mpi_rank .eq. 0) then
|
|
pr = dble(prk) / dkji
|
|
else
|
|
pr = dble(prk) / dkji * mpi_size
|
|
end if
|
|
if(pr .ge. prold+0.1d0 .and. pr.lt.1.d0) then
|
|
prold = pr
|
|
c write(iout, '(i4, "% done.")') nint(pr * 100)
|
|
endif
|
|
|
|
call progress_bar(iout, pr, .false.)
|
|
end if
|
|
|
|
if(ptrest .and. jdone(j)) goto 1234
|
|
|
|
c assemble <ab|cj> {{{
|
|
if(j .ge. ind) then
|
|
call rpoint3d(abc_k(1:nvirt, 1:nvirt, 1:nvirt, j-ind+1),
|
|
$ abc_ofj, nvirt, nvirt, nvirt)
|
|
|
|
else if(j .eq. k) then
|
|
call rpoint3d(abc_ofk, abc_ofj, nvirt, nvirt, nvirt)
|
|
|
|
else if(j .ne. k) then ! .and. abcjinmem) then
|
|
TIME0(times0)
|
|
if(localcc .eq. 'off ') then
|
|
call rpoint3d(dcore(iabcj + omp_thread_num*nvirt**3),
|
|
$ abc_ofj, nvirt, nvirt, nvirt)
|
|
c if(ccsdalg .eq. 'disk ') then
|
|
c call abciread(j, 1, abc_ofj, bsymmv, symmocc, dgroup,
|
|
c $ co, first, last, v, irecln, ibufln, nbasis,
|
|
c $ nocc, nvirt, nirmax, multpg)
|
|
c
|
|
c else
|
|
c ! (ac|bj) = <ab|cj>
|
|
c call dgemm_wabc('t', 't', nvirt**2, nvirt, dfnb,
|
|
c $ 1.d0, jab, dfnb, jai(1, j, 1), nvirt*nocc,
|
|
c $ 0.d0, abc_ofj, nvirt**2, ccsdmkl, 1, nvirt)
|
|
c
|
|
c ! acbj -> abcj
|
|
c do a = 1, nvirt
|
|
c do b = 1, nvirt
|
|
c scr1v(b+1:nvirt) = abc_ofj(a, b+1:nvirt, b)
|
|
c abc_ofj(a, b+1:nvirt, b) =
|
|
c $ abc_ofj(a, b, b+1:nvirt)
|
|
c abc_ofj(a, b, b+1:nvirt) = scr1v(b+1:nvirt)
|
|
c end do
|
|
c end do
|
|
c end if
|
|
c write(*,*) "assemble abcj",j
|
|
call get_abci(j, abc_ofj, bsymmv, symmocc, dgroup,
|
|
$ co, first, last, v, irecln, ibufln, nbasis,
|
|
$ nocc, nvirt, nirmax, multpg, ccsdalg, dfnb,
|
|
$ jab, jai, dcore, imem, n, iabcj +
|
|
$ omp_thread_num * nvirt**3,
|
|
$ ccsdmkl)
|
|
else
|
|
call rpoint3d(abci, abc_ofj, nvirt, nvirt, nvirt)
|
|
c$OMP SINGLE
|
|
#if defined(OMP)
|
|
if(ptthreads .gt. 1) call omp_set_max_active_levels(3)
|
|
#endif
|
|
#ifdef MKL
|
|
istat = mkl_set_num_threads_local(int(omp_max_threads,4))
|
|
#endif
|
|
call get_abci(j, abc_ofj, bsymmv, symmocc, dgroup,
|
|
$ co, first, last, va, irecln, ibufln, nbasis,
|
|
$ nocc, nvirt, nirmax, multpg, ccsdalg, dfnb,
|
|
$ jab, jai, dcore, imem, n, iabcj,
|
|
$ ccsdmkl)
|
|
#ifdef MKL
|
|
istat = mkl_set_num_threads_local(int(mkl_threads,4))
|
|
#endif
|
|
#if defined(OMP)
|
|
if(ptthreads .gt. 1) call omp_set_max_active_levels(2)
|
|
#endif
|
|
c$OMP END SINGLE
|
|
end if
|
|
TIMEADD(times(1,12),times0)
|
|
end if ! }}}
|
|
|
|
! Apbc for localcc small mem!{{{
|
|
if(.not.abci_inmem .and. lapl) then !localcc.ne.'off ') then
|
|
c$OMP SINGLE
|
|
#if defined(OMP)
|
|
if(ptthreads .gt. 1) call omp_set_max_active_levels(3)
|
|
#endif
|
|
#ifdef MKL
|
|
istat = mkl_set_num_threads_local(int(omp_max_threads,4))
|
|
#endif
|
|
call dgemm('n', 't', dfnb*nvirt, nvirt, nvirt,
|
|
$ 1.d0, jab, dfnb*nvirt, ttl(1,1,j,k), nvirt,
|
|
$ 0.d0, Apbc, dfnb*nvirt)
|
|
if (j .ne. k) then ! j=k is taken care of inside wabc in the j=k case
|
|
c Ajk(P, b, c)=Ajk(P, c, b)
|
|
do b = 1, nvirt
|
|
do c = b+1, nvirt
|
|
tmpvsize(1:dfnb) = Apbc(1:dfnb, c, b)
|
|
Apbc(1:dfnb, c, b) = Apbc(1:dfnb, b, c)
|
|
Apbc(1:dfnb, b, c) = tmpvsize(1:dfnb)
|
|
end do
|
|
end do
|
|
c Ajk(P, b, c)=Ajk(P, b, c)+lljab(P, \bar b, \bar d)*ttl(\bar c, \tilde d, \bar k, \bar j)^T
|
|
call dgemm('n', 't', dfnb*nvirt, nvirt, nvirt,
|
|
$ 1.d0, jab, dfnb*nvirt, ttl2(1, 1, k, j), nvirt,
|
|
$ 1.d0, Apbc, dfnb*nvirt)
|
|
end if
|
|
#ifdef MKL
|
|
istat = mkl_set_num_threads_local(int(mkl_threads,4))
|
|
#endif
|
|
#if defined(OMP)
|
|
if(ptthreads .gt. 1) call omp_set_max_active_levels(2)
|
|
#endif
|
|
c$OMP END SINGLE
|
|
end if!}}}
|
|
|
|
if(ccsdmkl.eq.'seq' .and. rrd.eq.1) then
|
|
if(localcc .eq. 'off ') then
|
|
do i=1,nocc
|
|
call tr(nvirt, tt(1,1,i,j), rrp(:,:,i,1))
|
|
enddo
|
|
else
|
|
c$OMP DO SCHEDULE(DYNAMIC)
|
|
do i=1,nocc
|
|
call tr(nvirt, tt(1,1,i,j), rrp(:,:,i,1))
|
|
enddo
|
|
c$OMP END DO
|
|
end if
|
|
jinrr = 1
|
|
else
|
|
jinrr = j
|
|
end if
|
|
|
|
delta2 = 1.d0
|
|
fkj = fdo(k) + fdo(j)
|
|
if(j .eq. k) then !i=j=k => no contribution!
|
|
istart = j + 1
|
|
delta2 = delta2 + 1.d0
|
|
|
|
else
|
|
istart = j
|
|
endif
|
|
c$OMP DO
|
|
c$OMP& SCHEDULE(DYNAMIC)
|
|
cc$OMP& REDUCTION(+:et,ets)
|
|
do i=istart,nocc
|
|
if(ptrest) then
|
|
call read_rst_pt(k, j, i, nocc, kjidone, esum)
|
|
if(kjidone) then
|
|
c et = et + 2.d0 * esum
|
|
goto 123
|
|
end if
|
|
end if
|
|
c assemble <ab|ci> {{{
|
|
iinbl = .true.
|
|
if(i .ge. ind) then
|
|
call rpoint3d(abc_k(1:nvirt,1:nvirt,1:nvirt,i-ind+1),
|
|
$ abc_ofi, nvirt, nvirt, nvirt)
|
|
else if(i .eq. j) then ! .and. abciinmem)then
|
|
call rpoint3d(abc_ofj, abc_ofi, nvirt, nvirt, nvirt)
|
|
else if(localcc .eq. 'off ') then ! canonical small mem
|
|
TIME0(times0)
|
|
|
|
call rpoint3d(dcore(iabci +
|
|
$ omp_thread_num*nvirt**3), abc_ofi,
|
|
$ nvirt, nvirt, nvirt)
|
|
c if(ccsdalg .eq. 'disk ') then
|
|
c call abciread(i, 1, abc_ofi, bsymmv, symmocc,
|
|
c $ dgroup, co, first, last, v, irecln, ibufln,
|
|
c $ nbasis, nocc, nvirt, nirmax, multpg)
|
|
c
|
|
c else
|
|
c ! (ac|bi) = <ab|ci>
|
|
c call dgemm_wabc('t', 't', nvirt**2, nvirt, dfnb,
|
|
c $ 1.d0, jab, dfnb, jai(1, i, 1), nvirt*nocc,
|
|
c $ 0.d0, abc_ofi, nvirt**2, ccsdmkl, 1, nvirt)
|
|
c
|
|
c ! acbi -> abci
|
|
c do a = 1, nvirt
|
|
c do b = 1, nvirt
|
|
c scr1v(b+1:nvirt) = abc_ofi(a, b+1:nvirt, b)
|
|
c abc_ofi(a, b+1:nvirt, b) =
|
|
c $ abc_ofi(a, b, b+1:nvirt)
|
|
c abc_ofi(a, b, b+1:nvirt) = scr1v(b+1:nvirt)
|
|
c end do
|
|
c end do
|
|
c end if
|
|
c write(*,*) "assemble abci",i
|
|
call get_abci(i, abc_ofi, bsymmv, symmocc, dgroup,
|
|
$ co, first, last, v, irecln, ibufln, nbasis,
|
|
$ nocc, nvirt, nirmax, multpg, ccsdalg, dfnb,
|
|
$ jab, jai, dcore, imem, n, iabci +
|
|
$ omp_thread_num*nvirt**3,
|
|
$ ccsdmkl)
|
|
TIMEADD(times(1,12),times0)
|
|
else if(localcc .ne. 'off ') then
|
|
iinbl = .false.
|
|
call rpoint3d(abc_k(1:nvirt,1:nvirt,1:nvirt,1),
|
|
$ abc_ofi, nvirt, nvirt, nvirt)
|
|
end if ! }}}
|
|
|
|
delta3 = delta2
|
|
if(i .eq. j) delta3 = delta3 + 1.d0
|
|
deltaij = 1.d0
|
|
if(i.eq.j) deltaij = deltaij + 1
|
|
fkji = fkj + fdo(i)
|
|
|
|
c {{{ localcc.eq.'off ' case: FOR TEST PURPOSES, not efficient
|
|
if (talg.eq.'lapl'.or.talg.eq.'lato') then
|
|
c make ttl3(-,~,i,j) & ttl4(-,~,j,i) from ss(-,-,-,-)
|
|
do b=1,nvirt
|
|
fb=quad(nocc+b,iquad)**2
|
|
ttl3(1:nvirt,b)=tt(1:nvirt,b,i,j)/fb
|
|
IabIJ(1:nvirt,b)=tt(1:nvirt,b,j,i)/fb ! ttl4
|
|
enddo
|
|
else if(lapl) then
|
|
call dcopy(nvirt**2,tt(1,1,i,j),1,ttl3,1)
|
|
call dcopy(nvirt**2,tt(1,1,j,i),1,IabIJ,1) ! ttl4
|
|
end if!}}}
|
|
|
|
c compute W and V for all abc indices
|
|
TIME0(times0)
|
|
|
|
call wabc(nocc, nvirt, i, j, k, imem, dcore, t, ttl, rrp,
|
|
$ rrd, abc_ofi, abc_ofj, abc_ofk, kinrr, jinrr, w, v,
|
|
$ abij, aijk, times, talg, tt, iinbl, localcc,
|
|
$ IabIJ, jai, dfnb, ttl2, ttl3, ttl4, ttd, Apbc,
|
|
$ abijinmem, jai, ccsdmkl, ptthreads, omp_max_threads) !,
|
|
c $ abckinmem, abcjinmem, abciinmem)
|
|
TIMEADD(times(1,27),times0)
|
|
|
|
C calculating energy
|
|
TIME0(times0)
|
|
if(localcc .eq. 'off ') then !{{{
|
|
IF(i.eq.j.or.j.eq.k)THEN
|
|
esum=0.d0
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(NONE)
|
|
C$OMP& SHARED(nvirt,fdv,fkji,w,v,talg)
|
|
C$OMP& PRIVATE(a,b,c,delta1,fcb,astart,delta,fcba,denom)
|
|
C$OMP& REDUCTION(+:esum) NUM_THREADS(nthreade)
|
|
do c=1,nvirt
|
|
do b=c,nvirt
|
|
delta1=1.d0
|
|
if(b.eq.c)delta1=delta1+1.d0
|
|
fcb=fdv(c)+fdv(b)
|
|
if(b.eq.c)then
|
|
astart=b+1
|
|
else
|
|
astart=b
|
|
endif
|
|
do a=astart,nvirt
|
|
delta=delta1
|
|
if(a.eq.b)delta=delta+1.d0
|
|
fcba=fcb+fdv(a)
|
|
if(talg.eq.'occ ') then
|
|
denom = (fkji-fcba)*delta
|
|
else
|
|
denom = -delta
|
|
end if
|
|
esum=esum+(
|
|
$-(v(a,b,c)+v(b,c,a)+v(c,a,b))*(w(a,b,c)+w(b,c,a)+w(c,a,b))
|
|
|
|
$+(w(a,b,c)*v(a,b,c)
|
|
$+w(b,c,a)*v(b,c,a)
|
|
$+w(c,a,b)*v(c,a,b))*3.d0
|
|
|
|
$)/denom
|
|
enddo
|
|
enddo
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
fb = 4.d0*esum/delta3
|
|
ei(j) = ei(j) + fb
|
|
et=et+fb !4.d0*esum/delta3
|
|
if(lno.or.lnaf.or.lf12.and.trim(localcc).eq.'off')
|
|
$ ets = ets + fb*(qfact(i)+qfact(j)+qfact(k))/3
|
|
if(ccsdrest .ne. 'off') call saveToRst(nReqs, requests,
|
|
$ ijk_indices, energies, toSend, 2.d0*esum/delta3,
|
|
$ k, j, i, nocc, ccsd_communicator, nReqsAll,
|
|
$ ccsd_communicator, master_rank) !, mpi_type_e)
|
|
ELSE
|
|
c ! w1, w2, x, y1, z1 elore {{{
|
|
c call rpoint1d(dcore(imem+(n+2)*nvirt**3), w1,
|
|
c $ nvirt*(nvirt+1)/2-1)
|
|
c call rpoint1d(dcore(imem+(n+2)*nvirt**3+nvirt*(nvirt+1)/2-1),
|
|
c $ w2, nvirt*(nvirt+1)/2-1)
|
|
c call rpoint1d(dcore(imem+(n+2)*nvirt**3+
|
|
c $ 2*(nvirt*(nvirt+1)/2-1)), x, nvirt*(nvirt+1)/2-1)
|
|
c call rpoint1d(dcore(imem+(n+2)*nvirt**3+
|
|
c $ 3*(nvirt*(nvirt+1)/2-1)), y1, nvirt*(nvirt+1)/2-1)
|
|
c call rpoint1d(dcore(imem+(n+2)*nvirt**3+
|
|
c $ 4*(nvirt*(nvirt+1)/2-1)), z1, nvirt*(nvirt+1)/2-1)
|
|
c
|
|
c esum = 0.d0
|
|
c do c = 1, nvirt-1 ! a=b=c => no contribution
|
|
c TIME0(times(1,94))
|
|
ccc$OMP PARALLEL DO DEFAULT(PRIVATE)
|
|
ccc$OMP& SHARED(w1,w2,x,y1,z1,w,v,fdv,nvirt,c)
|
|
cc$OMP PARALLEL DO DEFAULT(SHARED)
|
|
cc$OMP& PRIVATE(b,fcb,astart,delta1,ind1,ind2,delta,fcba,abc)
|
|
cc$OMP& SCHEDULE(DYNAMIC)
|
|
c do b = c, nvirt
|
|
c fcb = fdv(c) + fdv(b)
|
|
c if(b .eq. c) then
|
|
c astart = b + 1
|
|
c delta1 = 2.d0
|
|
c else
|
|
c astart = b
|
|
c delta1 = 1.d0
|
|
c endif
|
|
c
|
|
c ind1 = (nvirt - c + 1) * (nvirt - c + 2) / 2 -
|
|
c $ (nvirt - b + 1) * (nvirt - b + 2) / 2
|
|
c if(b .eq. c) ind1 = ind1 + 1
|
|
c ind2 = ind1 + nvirt - astart
|
|
c
|
|
c w1(ind1:ind2) = w(astart:nvirt,b,c)
|
|
c $ - 2.0 * w(b,astart:nvirt,c)
|
|
c w2(ind1:ind2) = w(b,astart:nvirt,c)
|
|
c $ - 2.0 * w(astart:nvirt,b,c)
|
|
c x(ind1:ind2) = w(astart:nvirt,b,c) * v(astart:nvirt,b,c)
|
|
c $ + w(b,astart:nvirt,c) * v(b,astart:nvirt,c)
|
|
c y1(ind1:ind2) = v(astart:nvirt,b,c)
|
|
c z1(ind1:ind2) = v(b,astart:nvirt,c)
|
|
c
|
|
c w1(ind1:ind2) = w1(ind1:ind2) + w(c,astart:nvirt,b)
|
|
c $ - 2.0 * w(astart:nvirt,c,b)
|
|
c w2(ind1:ind2) = w2(ind1:ind2) + w(astart:nvirt,c,b)
|
|
c $ - 2.0 * w(c,astart:nvirt,b)
|
|
c x(ind1:ind2) = x(ind1:ind2) +
|
|
c $ w(c,astart:nvirt,b) * v(c,astart:nvirt,b)
|
|
c $ + w(astart:nvirt,c,b) * v(astart:nvirt,c,b)
|
|
c y1(ind1:ind2) = y1(ind1:ind2) + v(c,astart:nvirt,b)
|
|
c z1(ind1:ind2) = z1(ind1:ind2) + v(astart:nvirt,c,b)
|
|
c
|
|
c w1(ind1:ind2) = w1(ind1:ind2) + w(b,c,astart:nvirt)
|
|
c $ - 2.0 * w(c,b,astart:nvirt)
|
|
c w2(ind1:ind2) = w2(ind1:ind2) + w(c,b,astart:nvirt)
|
|
c $ - 2.0 * w(b,c,astart:nvirt)
|
|
c x(ind1:ind2) = x(ind1:ind2) +
|
|
c $ w(b,c,astart:nvirt) * v(b,c,astart:nvirt)
|
|
c $ + w(c,b,astart:nvirt) * v(c,b,astart:nvirt)
|
|
c y1(ind1:ind2) = y1(ind1:ind2) + v(b,c,astart:nvirt)
|
|
c z1(ind1:ind2) = z1(ind1:ind2) + v(c,b,astart:nvirt)
|
|
c
|
|
cc w1(ind1:ind2) = w(astart:nvirt,b,c)
|
|
cc w2(ind1:ind2) = - 2.0 * w(astart:nvirt,b,c)
|
|
cc y1(ind1:ind2) = v(astart:nvirt,b,c)
|
|
cc x(ind1:ind2) = w(astart:nvirt,b,c) * v(astart:nvirt,b,c)
|
|
cc
|
|
cc w1(ind1:ind2) = w1(ind1:ind2) - 2.0 * w(b,astart:nvirt,c)
|
|
cc w2(ind1:ind2) = w2(ind1:ind2) + w(b,astart:nvirt,c)
|
|
cc z1(ind1:ind2) = v(b,astart:nvirt,c)
|
|
cc x(ind1:ind2) = x(ind1:ind2) +
|
|
cc $ w(b,astart:nvirt,c) * v(b,astart:nvirt,c)
|
|
cc
|
|
cc
|
|
cc w1(ind1:ind2) = w1(ind1:ind2) + w(c,astart:nvirt,b)
|
|
cc w2(ind1:ind2) = w2(ind1:ind2) - 2.0 * w(c,astart:nvirt,b)
|
|
cc y1(ind1:ind2) = y1(ind1:ind2) + v(c,astart:nvirt,b)
|
|
cc x(ind1:ind2) = x(ind1:ind2) +
|
|
cc $ w(c,astart:nvirt,b) * v(c,astart:nvirt,b)
|
|
cc
|
|
cc w1(ind1:ind2) = w1(ind1:ind2) - 2.0 * w(astart:nvirt,c,b)
|
|
cc w2(ind1:ind2) = w2(ind1:ind2) + w(astart:nvirt,c,b)
|
|
cc z1(ind1:ind2) = z1(ind1:ind2) + v(astart:nvirt,c,b)
|
|
cc x(ind1:ind2) = x(ind1:ind2) +
|
|
cc $ w(astart:nvirt,c,b) * v(astart:nvirt,c,b)
|
|
cc
|
|
cc
|
|
cc w1(ind1:ind2) = w1(ind1:ind2) + w(b,c,astart:nvirt)
|
|
cc w2(ind1:ind2) = w2(ind1:ind2) - 2.0 * w(b,c,astart:nvirt)
|
|
cc y1(ind1:ind2) = y1(ind1:ind2) + v(b,c,astart:nvirt)
|
|
cc x(ind1:ind2) = x(ind1:ind2) +
|
|
cc $ w(b,c,astart:nvirt) * v(b,c,astart:nvirt)
|
|
cc
|
|
cc w1(ind1:ind2) = w1(ind1:ind2) - 2.0 * w(c,b,astart:nvirt)
|
|
cc w2(ind1:ind2) = w2(ind1:ind2) + w(c,b,astart:nvirt)
|
|
cc z1(ind1:ind2) = z1(ind1:ind2) + v(c,b,astart:nvirt)
|
|
cc x(ind1:ind2) = x(ind1:ind2) +
|
|
cc $ w(c,b,astart:nvirt) * v(c,b,astart:nvirt)
|
|
c
|
|
c
|
|
c ! abc = sum_k=v-b^v-c+1 (k) - 1
|
|
c abc = ind1 - 1
|
|
cc write(*,'(7i3)') c, b, ind1, ind2, astart, nvirt, abc
|
|
cc abc = (nvirt - c + 1) * (nvirt - c + 2) / 2 -
|
|
cc $ (nvirt - b) * (nvirt - b + 1) / 2 - 1
|
|
c do a = astart, nvirt
|
|
c if(a .eq. b) then
|
|
c delta = delta1 + 1.d0
|
|
c else
|
|
c delta = delta1
|
|
c end if
|
|
c fcba = fcb + fdv(a)
|
|
c abc = abc + 1
|
|
c x(abc) = x(abc) / (fkji - fcba) / delta
|
|
c y1(abc) = y1(abc) / (fkji - fcba) / delta
|
|
c z1(abc) = z1(abc) / (fkji - fcba) / delta
|
|
c enddo
|
|
c enddo
|
|
cc$OMP END PARALLEL DO
|
|
c TIMEADD(times(1,65),times(1,94))
|
|
cc write(*,*) 'c:',c
|
|
cc write(*,*) 'x', x
|
|
cc write(*,*) 'y', y1
|
|
cc write(*,*) 'z', z1
|
|
cc write(*,*) 'm:',m, abc
|
|
c
|
|
c m = (nvirt - c + 1) * (nvirt - c + 2) / 2 - 1
|
|
cc write(*,*) "m=",m
|
|
c
|
|
c db = 1024
|
|
c TIME0(times(1,94))
|
|
cc$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(l) REDUCTION(+:esum)
|
|
cc$OMP& SCHEDULE(DYNAMIC)
|
|
c do l = 1, ceiling(m/dble(db))
|
|
c db1 = min(m-(l-1)*db, db)
|
|
c esum=esum+ddot(db1,y1((l-1)*db+1),1,w1((l-1)*db+1),1)+
|
|
c $ ddot(db1,z1((l-1)*db+1),1,w2((l-1)*db+1),1)+
|
|
c $ 3.d0 * sum(x((l-1)*db+1:min(l*db,m)))
|
|
c end do
|
|
cc$OMP END PARALLEL DO
|
|
c TIMEADD(times(1,66),times(1,94))
|
|
c enddo !}}}
|
|
|
|
esum=0.d0
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(NONE)
|
|
C$OMP& SHARED(nvirt,fdv,fkji,w,v,talg)
|
|
C$OMP& PRIVATE(a,b,c,astart,y,delta1,delta,fcb,fcba,z,denom)
|
|
C$OMP& REDUCTION(+:esum) NUM_THREADS(nthreade)
|
|
do c=1,nvirt
|
|
do b=c,nvirt
|
|
delta1=1.d0
|
|
if(b.eq.c)delta1=delta1+1.d0
|
|
fcb=fdv(c)+fdv(b)
|
|
if(b.eq.c)then
|
|
astart=b+1
|
|
else
|
|
astart=b
|
|
endif
|
|
do a=astart,nvirt
|
|
delta=delta1
|
|
if(a.eq.b)delta=delta+1.d0
|
|
fcba=fcb+fdv(a)
|
|
y=v(a,b,c)+v(b,c,a)+v(c,a,b)
|
|
z=v(a,c,b)+v(b,a,c)+v(c,b,a)
|
|
if(talg .eq. 'occ ') then
|
|
denom = (fkji-fcba)*delta
|
|
else
|
|
denom = -delta
|
|
end if
|
|
esum=esum+(
|
|
$+(w(a,b,c)+w(b,c,a)+w(c,a,b))*(y-2.d0*z)
|
|
$+(w(a,c,b)+w(b,a,c)+w(c,b,a))*(z-2.d0*y)
|
|
|
|
$+(w(a,b,c)*v(a,b,c)
|
|
$+w(b,c,a)*v(b,c,a)
|
|
$+w(c,a,b)*v(c,a,b)
|
|
$+w(a,c,b)*v(a,c,b)
|
|
$+w(b,a,c)*v(b,a,c)
|
|
$+w(c,b,a)*v(c,b,a))*3.d0
|
|
|
|
$)/denom
|
|
enddo
|
|
enddo
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
fb = 2.d0*esum/delta3
|
|
ei(i) = ei(i) + fb
|
|
et=et+fb !2.d0*esum/delta3
|
|
if(lno.or.lnaf.or.lf12.and.trim(localcc).eq.'off')
|
|
$ ets = ets + fb*(qfact(i)+qfact(j)+qfact(k))/3
|
|
if(ccsdrest .ne. 'off') call saveToRst(nReqs, requests,
|
|
$ ijk_indices, energies, toSend, esum/delta3,
|
|
$ k, j, i, nocc, ccsd_communicator, nReqsAll,
|
|
$ ccsd_communicator, master_rank) !, mpi_type_e)
|
|
ENDIF ! }}}
|
|
else ! talg=occ local E expression {{{
|
|
if (localccXp(localcc,2022)) then
|
|
c if (.true.) then
|
|
esum=0.d0
|
|
esum2=0.d0
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(a,b,c,astart,y,delta1,delta,fcb,fcba,fact,w1,w2,z)
|
|
C$OMP& REDUCTION(+:esum,esum2) NUM_THREADS(nthreade)
|
|
do c=1,nvirt
|
|
do b=c,nvirt
|
|
delta1=1.d0
|
|
if(b.eq.c)delta1=delta1+1.d0
|
|
if(b.eq.c)then
|
|
astart=b+1
|
|
else
|
|
astart=b
|
|
endif
|
|
fcb=fdv(c)+fdv(b)
|
|
do a=astart,nvirt
|
|
delta=delta1
|
|
if(a.eq.b)delta=delta+1.d0
|
|
if (talg.eq.'topr'.or.talg.eq.'to ') then
|
|
fcba=fcb+fdv(a)
|
|
fact=fcba-fkji ! - due to other '- sign' in et=et-sum
|
|
else
|
|
fact = 1.d0
|
|
endif
|
|
w1=w(a,b,c)+w(b,c,a)+w(c,a,b)
|
|
w2=w(a,c,b)+w(c,b,a)+w(b,a,c)
|
|
y=v(a,b,c)+v(b,c,a)+v(c,a,b)
|
|
z=v(a,c,b)+v(b,a,c)+v(c,b,a)
|
|
esum=esum+(3.d0-delta)*(3.d0-deltaij)*(
|
|
$ w1*(y-2.d0*z)+w2*(z-2.d0*y)+3.d0*(
|
|
$ +w(a,b,c)*v(a,b,c)+w(b,c,a)*v(b,c,a)
|
|
$ +w(c,a,b)*v(c,a,b)+w(a,c,b)*v(a,c,b)
|
|
$ +w(b,a,c)*v(b,a,c)+w(c,b,a)*v(c,b,a))
|
|
$ )/(fact*6.d0)
|
|
c old expression
|
|
if (i.eq.j) then
|
|
y=v(a,b,c)+v(a,c,b)+v(b,c,a)
|
|
esum2=esum2+(
|
|
$ w(a,b,c)*(3.d0*v(a,b,c)-y)+
|
|
$ w(a,c,b)*(3.d0*v(a,c,b)-y)+
|
|
$ w(b,c,a)*(3.d0*v(b,c,a)-y)
|
|
$ )/(delta*fact)
|
|
else ! i>j
|
|
esum2=esum2+(
|
|
$ 2.d0/3.d0*
|
|
$ (w(a,b,c)+w(b,c,a)+w(c,a,b)-w(a,c,b)-w(b,a,c)-w(c,b,a))*
|
|
$ (v(a,b,c)+v(b,c,a)+v(c,a,b)-v(a,c,b)-v(b,a,c)-v(c,b,a))
|
|
c
|
|
$ + w(a,b,c)*(2.d0*v(a,b,c)-v(a,c,b)-v(c,b,a))
|
|
$ + w(b,c,a)*(2.d0*v(b,c,a)-v(a,c,b)-v(b,a,c))
|
|
$ + w(c,a,b)*(2.d0*v(c,a,b)-v(b,a,c)-v(c,b,a))
|
|
$ + w(a,c,b)*(2.d0*v(a,c,b)-v(b,c,a)-v(a,b,c))
|
|
$ + w(b,a,c)*(2.d0*v(b,a,c)-v(c,a,b)-v(b,c,a))
|
|
$ + w(c,b,a)*(2.d0*v(c,b,a)-v(a,b,c)-v(c,a,b))
|
|
c
|
|
$ )/(delta*fact)
|
|
endif ! i.eq.j or i>j
|
|
enddo
|
|
enddo
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
et2=et2-esum2
|
|
else
|
|
c {{{ laplace lno-(t) energy expression < 2022
|
|
esum=0.d0
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(a,b,c,astart,y,delta1,delta,fcb,fcba,fact)
|
|
C$OMP& REDUCTION(+:esum) NUM_THREADS(nthreade)
|
|
do c=1,nvirt
|
|
do b=c,nvirt
|
|
delta1=1.d0
|
|
if(b.eq.c)delta1=delta1+1.d0
|
|
if(b.eq.c)then
|
|
astart=b+1
|
|
else
|
|
astart=b
|
|
endif
|
|
fcb=fdv(c)+fdv(b)
|
|
do a=astart,nvirt
|
|
delta=delta1
|
|
if(a.eq.b)delta=delta+1.d0
|
|
if (talg.eq.'topr'.or.talg.eq.'to ') then
|
|
fcba=fcb+fdv(a)
|
|
fact=fcba-fkji ! - due to other - in et=et-sum
|
|
else
|
|
fact = 1.d0
|
|
endif
|
|
c old expression
|
|
if (i.eq.j) then
|
|
y=v(a,b,c)+v(a,c,b)+v(b,c,a)
|
|
esum=esum+(
|
|
$ w(a,b,c)*(3.d0*v(a,b,c)-y)+
|
|
$ w(a,c,b)*(3.d0*v(a,c,b)-y)+
|
|
$ w(b,c,a)*(3.d0*v(b,c,a)-y)
|
|
$ )/(delta*fact)
|
|
else ! i>j
|
|
esum=esum+(
|
|
$ 2.d0/3.d0*
|
|
$ (w(a,b,c)+w(b,c,a)+w(c,a,b)-w(a,c,b)-w(b,a,c)-w(c,b,a))*
|
|
$ (v(a,b,c)+v(b,c,a)+v(c,a,b)-v(a,c,b)-v(b,a,c)-v(c,b,a))
|
|
c
|
|
$ + w(a,b,c)*(2.d0*v(a,b,c)-v(a,c,b)-v(c,b,a))
|
|
$ + w(b,c,a)*(2.d0*v(b,c,a)-v(a,c,b)-v(b,a,c))
|
|
$ + w(c,a,b)*(2.d0*v(c,a,b)-v(b,a,c)-v(c,b,a))
|
|
$ + w(a,c,b)*(2.d0*v(a,c,b)-v(b,c,a)-v(a,b,c))
|
|
$ + w(b,a,c)*(2.d0*v(b,a,c)-v(c,a,b)-v(b,c,a))
|
|
$ + w(c,b,a)*(2.d0*v(c,b,a)-v(a,b,c)-v(c,a,b))
|
|
c
|
|
$ )/(delta*fact)
|
|
endif ! i.eq.j or i>j
|
|
enddo
|
|
enddo
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
endif
|
|
et=et-esum
|
|
epair(j,i)=epair(j,i)-esum
|
|
c }}}
|
|
end if ! }}}
|
|
123 continue
|
|
#ifdef MPI
|
|
if(mpi_size .gt. 1 .and. master_thread) then !mpi_rank .eq. 0) then
|
|
#ifdef OMP
|
|
if(omp_thread_num .eq. 0) then
|
|
#endif
|
|
do
|
|
call MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
|
|
$ ccsd_communicator, flag, stat, mpi_err)
|
|
if(.not.flag) exit
|
|
|
|
if(stat(MPI_TAG) .eq. 0) then
|
|
call MPI_Recv(comm_scr, 1, MPI_INTEGER_MRCC,
|
|
$ stat(MPI_SOURCE), stat(MPI_TAG),
|
|
$ ccsd_communicator, MPI_STATUS_IGNORE,
|
|
$ mpi_err)
|
|
prk = prk + comm_scr
|
|
|
|
else if(ccsdrest .ne. 'off ' .and.
|
|
$ stat(MPI_TAG) .eq. 1) then
|
|
c call MPI_Recv(esum, 1, MPI_DOUBLE_PRECISION,
|
|
c $ stat(MPI_SOURCE), stat(MPI_TAG),
|
|
c $ ccsd_communicator, MPI_STATUS_IGNORE,
|
|
c $ mpi_err)
|
|
c call MPI_Recv(energies, 1, mpi_type_e,
|
|
c $ stat(MPI_SOURCE), 1,
|
|
c $ ccsd_communicator, MPI_STATUS_IGNORE,
|
|
c $ mpi_err)
|
|
c write(*,*) 'call write_rst_pt1 comm ',ccsd_communicator
|
|
c call MPI_Recv(ijk_indices, 1, MPI_INTEGER_MRCC,
|
|
call MPI_Recv(ijk, 1, MPI_INTEGER_MRCC,
|
|
$ stat(MPI_SOURCE), 1,
|
|
$ ccsd_communicator, MPI_STATUS_IGNORE,
|
|
$ mpi_err)
|
|
c call MPI_Recv(energies, 1, MPI_DOUBLE_PRECISION,
|
|
call MPI_Recv(esum, 1, MPI_DOUBLE_PRECISION,
|
|
$ stat(MPI_SOURCE), 1,
|
|
$ ccsd_communicator, MPI_STATUS_IGNORE,
|
|
$ mpi_err)
|
|
c ijk = ijk_indices(1)
|
|
c esum = energies(1)
|
|
c write(*,*) 'call write_rst_pt1 with ',ijk, esum
|
|
c if(ijk.le.0) write(*,*) 'error, ', ijk, ' from ', stat(MPI_SOURCE)
|
|
call write_rst_pt(ijk, esum)
|
|
end if
|
|
end do
|
|
#ifdef OMP
|
|
end if
|
|
#endif
|
|
end if
|
|
#endif
|
|
TIMEADD(times(1,20),times0)
|
|
enddo ! i
|
|
c$OMP END DO NOWAIT
|
|
c$OMP SINGLE
|
|
prk = prk + nocc-istart+1
|
|
c$OMP END SINGLE NOWAIT
|
|
1234 continue
|
|
TIME0(times0)
|
|
c communicate progress {{{
|
|
#if defined(MPI)
|
|
if(.not.master_thread) then
|
|
if(omp_thread_num .eq. 0) then
|
|
flag = .true.
|
|
if(nummes .ne. 0) then
|
|
call MPI_Test(request, flag, stat, mpi_err)
|
|
end if
|
|
|
|
if(flag) then
|
|
comm_scr = 0
|
|
end if
|
|
|
|
comm_scr = comm_scr + prk - prk_sent
|
|
prk_sent = prk
|
|
|
|
if(flag) then
|
|
call MPI_Isend(comm_scr, 1, MPI_INTEGER_MRCC,
|
|
$ master_rank,0, ccsd_communicator, request, mpi_err)
|
|
nummes = nummes + 1
|
|
end if
|
|
end if
|
|
|
|
c else if(mpi_size .gt. 1) then ! first rank receives if ccsd is running MPI parallel
|
|
c flag = .true.
|
|
c do while(flag)
|
|
c call MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
|
|
c $ ccsd_communicator, flag, stat, mpi_err)
|
|
c
|
|
c if(flag) then
|
|
c call MPI_Recv(comm_scr, 1, MPI_INTEGER_MRCC,
|
|
c $ stat(MPI_SOURCE), stat(MPI_TAG),
|
|
c $ ccsd_communicator, stat, mpi_err)
|
|
c prk = prk + comm_scr
|
|
c end if
|
|
c end do
|
|
end if
|
|
#endif
|
|
c }}}
|
|
TIMEADD(times(1,28),times0)
|
|
|
|
end do ! j
|
|
#ifdef MKL
|
|
istat = mkl_set_num_threads_local(int(mkl_max_threads,4))
|
|
#endif
|
|
c$OMP END PARALLEL
|
|
1235 continue
|
|
c communicate progress {{{
|
|
#if defined(MPI)
|
|
c if(mpi_rank .ne. 0) then
|
|
if(.not.master_thread) then
|
|
if(omp_thread_num .eq. 0) then
|
|
flag = .true.
|
|
if(nummes .ne. 0) then
|
|
call MPI_Test(request, flag, stat, mpi_err)
|
|
end if
|
|
|
|
if(flag) then
|
|
comm_scr = 0
|
|
end if
|
|
|
|
comm_scr = comm_scr + prk - prk_sent
|
|
prk_sent = prk
|
|
|
|
if(flag) then
|
|
call MPI_Isend(comm_scr, 1, MPI_INTEGER_MRCC,
|
|
$ master_rank,0, ccsd_communicator, request, mpi_err)
|
|
nummes = nummes + 1
|
|
end if
|
|
end if
|
|
|
|
c else if(mpi_size .gt. 1) then ! first rank receives if ccsd is running MPI parallel
|
|
c flag = .true.
|
|
c do while(flag)
|
|
c call MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
|
|
c $ ccsd_communicator, flag, stat, mpi_err)
|
|
c
|
|
c if(flag) then
|
|
c call MPI_Recv(comm_scr, 1, MPI_INTEGER_MRCC,
|
|
c $ stat(MPI_SOURCE), stat(MPI_TAG),
|
|
c $ ccsd_communicator, stat, mpi_err)
|
|
c prk = prk + comm_scr
|
|
c end if
|
|
c end do
|
|
end if
|
|
#endif
|
|
c }}}
|
|
#ifdef MKL
|
|
call mkl_set_num_threads(mkl_max_threads)
|
|
!$OMP PARALLEL NUM_THREADS(ptthreads)
|
|
istat = mkl_set_num_threads_local(int(mkl_max_threads,4))
|
|
!$OMP END PARALLEL
|
|
#endif
|
|
#if defined(OMP)
|
|
call omp_set_max_active_levels(1)
|
|
#endif
|
|
TIMEADD(times(1,98),times(1,99))
|
|
if(WRITE_TIMES)
|
|
$ write(*, "('Rank ',i3,' done with k ',i3,
|
|
$ ' CPU [min]:',f10.5,2x,'Wall [min]:',7f10.5)")
|
|
$ mpi_rank,kk,times(1,98)/60.d0,times(2,98)/60.d0
|
|
end do ! k, kk is incremented
|
|
|
|
#ifdef MPI
|
|
if(nquad .ne. 1 .and. mpi_size .gt. 1)
|
|
$ call reset_counter(ccsd_communicator, count_k,
|
|
$ ccsd_communicator)
|
|
#endif
|
|
|
|
end do ! for q
|
|
|
|
TIME0(times)
|
|
TIME0(times(1,93))
|
|
#if defined(MPI)
|
|
if(ccsdrest .ne. "off ") nReqs = nReqs0
|
|
if(mpi_rank .ne. 0 .and. ccsdrest .ne. 'off ') then
|
|
call MPI_Waitall(nReqs0,requestsAll,MPI_STATUSES_IGNORE,mpi_err)
|
|
end if
|
|
if(mpi_size .gt. 1) then
|
|
call MPI_Ibarrier(ccsd_communicator, request, mpi_err)
|
|
|
|
if(mpi_size.gt.1 .and. WRITE_TIMES) then
|
|
write(*, "('Rank ',i3,' done with (T) ',
|
|
$ 'CPU [min]:',f10.3,2x,'Wall [min]:',7f10.3)")
|
|
$ mpi_rank,times(1,95)/60.d0,times(2,95)/60.d0
|
|
end if
|
|
|
|
c if(mpi_rank .eq. 0 .and. mpi_size .gt. 1) then
|
|
if(master_thread) then
|
|
call MPI_Ibarrier(ccsd_communicator, request, mpi_err)
|
|
ptdone = .false.
|
|
do while(.not.ptdone)
|
|
do !nm = 1, 10
|
|
call MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
|
|
$ ccsd_communicator, flag, stat, mpi_err)
|
|
if(.not.flag) exit
|
|
|
|
if(stat(MPI_TAG) .eq. 0) then
|
|
! receive ijk's done and print progress counter
|
|
call MPI_Recv(comm_scr, 1, MPI_INTEGER_MRCC,
|
|
$ stat(MPI_SOURCE), stat(MPI_TAG),
|
|
$ ccsd_communicator, MPI_STATUS_IGNORE,
|
|
$ mpi_err)
|
|
prk = prk + comm_scr
|
|
|
|
pr = dble(prk) / dkji
|
|
if(pr .ge. prold+0.1d0 .and. pr.lt.1.d0) then
|
|
prold = pr
|
|
c write(iout, '(i4, "% done.")') nint(pr * 100)
|
|
endif
|
|
|
|
call progress_bar(iout, pr, .false.)
|
|
|
|
else if(stat(MPI_TAG) .eq. 1) then !ccsdrest .ne. 'off ') then
|
|
! receive and save energy contribution from tasks
|
|
c call MPI_Recv(esum, 1, MPI_DOUBLE_PRECISION,
|
|
c $ stat(MPI_SOURCE), stat(MPI_TAG),
|
|
c $ ccsd_communicator, MPI_STATUS_IGNORE,
|
|
c $ mpi_err)
|
|
c call MPI_Recv(energies, 1, mpi_type_e,
|
|
c $ stat(MPI_SOURCE), 1,
|
|
c $ ccsd_communicator, MPI_STATUS_IGNORE,
|
|
c $ mpi_err)
|
|
c write(*,*) 'call write_rst_pt2 comm ',ccsd_communicator
|
|
call MPI_Recv(ijk, 1, MPI_INTEGER_MRCC,
|
|
$ stat(MPI_SOURCE), 1,
|
|
$ ccsd_communicator, MPI_STATUS_IGNORE,
|
|
$ mpi_err)
|
|
call MPI_Recv(esum, 1, MPI_DOUBLE_PRECISION,
|
|
$ stat(MPI_SOURCE), 1,
|
|
$ ccsd_communicator, MPI_STATUS_IGNORE,
|
|
$ mpi_err)
|
|
c ijk = ijk_indices(1)
|
|
c esum = energies(1)
|
|
c call write_rst_pt(stat(MPI_TAG), esum)
|
|
c write(*,*) 'call write_rst_pt2 with ',ijk, esum
|
|
call write_rst_pt(ijk, esum)
|
|
else
|
|
write(iout, '(a,i6)')
|
|
$ ' ERROR: Master recevived tag ',
|
|
$ stat(MPI_TAG)
|
|
call mrccend(1)
|
|
end if
|
|
|
|
end do
|
|
|
|
! other tasks done with (T)?
|
|
call MPI_Test(request, ptdone, MPI_STATUS_IGNORE,
|
|
$ mpi_err)
|
|
end do
|
|
else
|
|
if(.not.master_thread .and. ccsdrest .ne. 'off ') then
|
|
c call MPI_Waitall(nReqs0,requestsAll,MPI_STATUSES_IGNORE,
|
|
call MPI_Waitall(nReqs,requests,MPI_STATUSES_IGNORE,
|
|
$ mpi_err)
|
|
end if
|
|
call MPI_Ibarrier(ccsd_communicator, request, mpi_err)
|
|
call MPI_Wait(request, MPI_STATUS_IGNORE, mpi_err)
|
|
c call MPI_Barrier(ccsd_communicator, mpi_err)
|
|
end if
|
|
|
|
c call free_manager(ccsd_communicator, counter_k)
|
|
call deallocate_counter(ccsd_communicator, count_k)
|
|
call free_manager(ccsd_communicator)
|
|
|
|
call MPI_Allreduce(MPI_IN_PLACE, et, 1, MPI_DOUBLE_PRECISION,
|
|
$ MPI_SUM, ccsd_communicator, mpi_err)
|
|
call MPI_Allreduce(MPI_IN_PLACE, ets, 1, MPI_DOUBLE_PRECISION,
|
|
$ MPI_SUM, ccsd_communicator, mpi_err)
|
|
end if
|
|
#endif
|
|
TIMEADD(times(1,30),times)
|
|
TIMEADD(times(1,28),times)
|
|
|
|
c write(iout,'(" 100% done.")')
|
|
call progress_bar(iout, 1.d0, .false.)
|
|
if (localccXp(localcc,2022).and.master_thread)
|
|
$write(iout,'(a12,2es17.8)') 'old/new (T)',et2,et
|
|
|
|
if(ccsdrest .ne. 'off') close(725)
|
|
|
|
c write(iout,*)
|
|
c write(iout,'(" CCSD(T) correlation energy [au]:",f22.12)')et
|
|
c write(iout,'(" CCSD(T) total energy [au]: ",f22.12)')et+ecc
|
|
c write(iout,*)
|
|
c call prtenergc('CCSD(T) ',et+ecc,eref,locnoc)
|
|
|
|
return
|
|
end subroutine!}}}
|
|
c
|
|
C {{{ trf2laplbasis
|
|
************************************************************************
|
|
subroutine trf2laplbasis(tt,ttl,jij,jab,jia,nocc,nvirt,dfnb,
|
|
$abij,abci,aijk,quad,iquad,nquad,cmo,ljapi,lljpai,lljab,
|
|
$lajik,laibj,labc,lljij,ljij,lcmo,ilcmo,t,t1,talg,fdo,ofock,ofock1,
|
|
$abci_inmem,ttl2,ttijb,ttbij,qm1q)
|
|
************************************************************************
|
|
c multiply with Laplace factors and trf to the Mayer basis: T2 ,abci, abij and aijk
|
|
************************************************************************
|
|
implicit none
|
|
integer i,j,a,b,nocc,nvirt,dfnb,iquad,c,k,nquad,imax,jlen
|
|
real*8 jij(dfnb,nocc,nocc)
|
|
real*8 jia(nocc,nvirt,dfnb),cmo(nocc,nocc)
|
|
real*8 lcmo(nocc,nocc),ilcmo(nocc,nocc)
|
|
real*8 jab(dfnb,nvirt,nvirt),quad(nvirt+nocc,nquad)
|
|
real*8 ttl(nvirt,nvirt,nocc,1)
|
|
real*8 ttl2(nvirt,nvirt,1,nocc)
|
|
real*8 tt(nvirt,nvirt,nocc,nocc),abci(nvirt,nvirt,nvirt,*)
|
|
real*8 abij(nvirt,nvirt,nocc,*),aijk(nvirt,nocc,nocc,nocc)
|
|
real*8 fa,fb,fb2,t1(nvirt,nocc),t(nvirt,nocc),s
|
|
real*8 ljapi(nvirt,dfnb,nocc),qm1q(nocc,nocc)
|
|
real*8 lljab(dfnb,nvirt,nvirt),lajik(nvirt,nocc,nocc)
|
|
real*8 ttijb(nocc,nocc,nvirt),ttbij(nvirt,nocc,nocc)
|
|
real*8 laibj(nvirt,nocc,nvirt,*),labc(nvirt,nvirt,nvirt)
|
|
real*8 lljij(nocc,dfnb,nocc),ljij(nocc,dfnb,nocc)
|
|
real*8 lljpai(dfnb,nvirt,nocc),fdo(nocc),ofock(nocc,nocc)
|
|
real*8 ofock1(nocc,nocc)
|
|
character*4 talg
|
|
logical lapl,abci_inmem
|
|
c
|
|
lapl=talg.eq.'lapl'.or.talg.eq.'lato'
|
|
c 0) Laplace the canonical index of the occ mo trf mx:
|
|
if (lapl) then
|
|
do i=1,nocc
|
|
lcmo(i,1:nocc)=cmo(i,1:nocc)*quad(i,iquad) ! for barred, in v
|
|
enddo
|
|
do i=1,nocc
|
|
ilcmo(i,1:nocc)=cmo(i,1:nocc)/quad(i,iquad) ! for tilded, in t
|
|
enddo
|
|
else ! sc or T0
|
|
lcmo=cmo
|
|
ilcmo=cmo
|
|
endif
|
|
c 1) Laplace and trf Jij: pij->\tilde j p i -> \tilde j p \bar i
|
|
call dgemm('t','t',nocc,dfnb*nocc,nocc,1.d0,ilcmo,nocc,jij,
|
|
$dfnb*nocc,0.d0,ljij,nocc) ! in aijk
|
|
call dgemm('n','n',nocc*dfnb,nocc,nocc,1.d0,ljij,dfnb*nocc,
|
|
$lcmo,nocc,0.d0,lljij,nocc*dfnb) ! in w
|
|
c 2) Laplace and trf Jai: iap -> ap\bar i -> p\bar a \bar i
|
|
call dgemm('t','n',nvirt*dfnb,nocc,nocc,1.d0,jia,nocc,lcmo,nocc,
|
|
$0.d0,ljapi,nvirt*dfnb) ! in aijk
|
|
if (lapl) then
|
|
do i=1,nocc
|
|
do a=1,nvirt
|
|
lljpai(1:dfnb,a,i)=ljapi(a,1:dfnb,i)*quad(nocc+a,iquad)
|
|
enddo
|
|
enddo
|
|
else ! sc or T0
|
|
do i=1,nocc
|
|
call gtrans(ljapi(1,1,i),lljpai(1,1,i),nvirt,dfnb)
|
|
enddo
|
|
endif
|
|
c 3) assembly (aj|ik) and store <ai|jk>
|
|
do k=1,nocc
|
|
call dgemm('t','t',nvirt*nocc,nocc,dfnb,1.d0,lljpai,
|
|
$dfnb,lljij(1,1,k),nocc,0.d0,lajik,nvirt*nocc) ! in rr
|
|
do j=1,nocc
|
|
do i=1,nocc
|
|
aijk(1:nvirt,j,i,k)=lajik(1:nvirt,i,j)
|
|
enddo
|
|
enddo
|
|
enddo
|
|
c 4) assembly (ai|bj) and store <ab|ij> for j=1
|
|
jlen=1 ! localcc=off case is handled outside
|
|
call dgemm('t','n',nvirt*nocc,nvirt*jlen,dfnb,1.d0,lljpai,dfnb,
|
|
$lljpai,dfnb,0.d0,laibj,nvirt*nocc) ! in rr
|
|
do j=1,jlen
|
|
do b=1,nvirt
|
|
do i=1,nocc
|
|
abij(1:nvirt,b,i,j)=laibj(1:nvirt,i,b,j)
|
|
enddo
|
|
enddo
|
|
enddo
|
|
c 5) Laplace Jab -> p \bar a \bar b
|
|
if (lapl) then
|
|
do a=1,nvirt
|
|
fa=quad(nocc+a,iquad)
|
|
if (iquad.gt.1) fa=fa/quad(nocc+a,iquad-1) ! jab is owerwritten with lljab of the previous Laplace quadrature point
|
|
do b=1,nvirt
|
|
fb=quad(nocc+b,iquad)
|
|
if (iquad.gt.1) fb=fb/quad(nocc+b,iquad-1)! jab is owerwritten with lljab of the previous Laplace quadrature point
|
|
lljab(1:dfnb,b,a)=jab(1:dfnb,b,a)*fa*fb
|
|
enddo
|
|
enddo
|
|
else
|
|
lljab=jab
|
|
endif
|
|
c 6) assembly (ac|bi) for a single i and store <ab|ci> only IF entire <ab|ci> fits into memory
|
|
c ELSE save <ab|c1>
|
|
imax=nocc
|
|
if (.not.abci_inmem) imax=1
|
|
do i=1,imax
|
|
call dgemm('t','n',nvirt**2,nvirt,dfnb,1.d0,lljab,dfnb,
|
|
$lljpai(1,1,i),dfnb,0.d0,labc,nvirt**2) ! labc in w
|
|
do b=1,nvirt
|
|
do c=1,nvirt
|
|
abci(1:nvirt,b,c,i)=labc(1:nvirt,c,b)
|
|
enddo
|
|
enddo
|
|
enddo
|
|
c for 7) qm1q = cmo^-1 / quad(iquad-1) * lcmo: iquad-1 GSL basis to iquad GSL basis trf mx
|
|
if (iquad.gt.1) then
|
|
do i=1,nocc
|
|
do j=1,nocc
|
|
s=0.d0
|
|
do k=1,nocc
|
|
s=s+cmo(k,i)*lcmo(k,j)/quad(k,iquad-1)
|
|
enddo
|
|
qm1q(i,j)=s ! in IabIJ
|
|
enddo
|
|
enddo
|
|
elseif (iquad.eq.1) then
|
|
qm1q=lcmo ! for both lapl and sc/T0
|
|
endif
|
|
c 7) Laplace and trf T2
|
|
do a=1,nvirt
|
|
if (lapl) then
|
|
fa=quad(nocc+a,iquad)
|
|
if (iquad.gt.1) fa=fa/quad(nocc+a,iquad-1) ! tt is owerwritten with the previos Laplace quadrature point
|
|
else
|
|
fa=1.d0
|
|
endif
|
|
c a) tt(a,b,i,j)->tt_ijb(i,j,b) of \bar a
|
|
do j=1,nocc
|
|
do i=1,nocc
|
|
do b=1,nvirt
|
|
ttijb(i,j,b)=tt(a,b,i,j)
|
|
enddo
|
|
enddo
|
|
enddo
|
|
c b) tt_ijb(i,j,b) -> fa*tt_jbi(j,b,\bar i)
|
|
call dgemm('t','n',nvirt*nocc,nocc,nocc,fa,ttijb,nocc,
|
|
$qm1q,nocc,0.d0,lajik,nvirt*nocc) ! in rr
|
|
c c) tt_jbi(j,b,\bar i) -> tt_bij(b,\bar i,\bar j)
|
|
call dgemm('t','n',nvirt*nocc,nocc,nocc,1.d0,lajik,nocc,
|
|
$qm1q,nocc,0.d0,ttbij,nocc*nvirt) ! in w
|
|
c d) tt_bij(b,\bar i,\bar j) -> ss(\bar a,\bar b,\bar i,\bar j) & ttl(\bar a,\tilde b,\bar i,1) & ttl2(\bar a,\tilde b,1,\bar i)
|
|
do b=1,nvirt
|
|
if (lapl) then
|
|
fb=quad(nocc+b,iquad)
|
|
if (iquad.gt.1) fb=fb/quad(nocc+b,iquad-1) ! tt is owerwritten with the previous Laplace quadrature point
|
|
fb2=quad(nocc+b,iquad)**2
|
|
else
|
|
fb=1.d0
|
|
fb2=1.d0
|
|
endif
|
|
tt(a,b,1:nocc,1:nocc)=ttbij(b,1:nocc,1:nocc)*fb ! this will be ss, same mem space
|
|
ttl(a,b,1:nocc,1)=tt(a,b,1:nocc,1)/fb2
|
|
ttl2(a,b,1,1:nocc)=tt(a,b,1,1:nocc)/fb2
|
|
enddo !b
|
|
enddo !a
|
|
c 8) Laplace and trf T1
|
|
call dgemm('n','n',nvirt,nocc,nocc,1.d0,t1,nvirt,lcmo,nocc,
|
|
$0.d0,t,nvirt)
|
|
if (lapl) then
|
|
do a=1,nvirt
|
|
t(a,1:nocc)=t(a,1:nocc)*quad(nocc+a,iquad)
|
|
enddo
|
|
endif
|
|
c 9) compute semi-canonical orbital energies
|
|
c write(*,*) 'warning mk Fock in Mayer basis in lapl too'
|
|
if (talg.eq.'topr'.or.talg.eq.'to ') then
|
|
call dcopy(nocc**2,cmo,1,ofock1,1)
|
|
do i=1,nocc
|
|
c call dscal(nocc,fdo(i),ofock1(1,i),1)
|
|
call dscal(nocc,fdo(i),ofock1(i,1),nocc) ! in w
|
|
enddo
|
|
call dgemm('t','n',nocc,nocc,nocc,1.d0,cmo,nocc,ofock1,nocc,
|
|
$0.d0,ofock,nocc)
|
|
do i=1,nocc
|
|
fdo(i)=ofock(i,i) ! in v
|
|
enddo
|
|
endif
|
|
c
|
|
return
|
|
end subroutine trf2laplbasis
|
|
c }}}
|
|
|
|
C {{{ wabc
|
|
************************************************************************
|
|
subroutine wabc(nocc,nvirt,i,j,k,imem,dcore,t,tt,rr,rrd,
|
|
$abc_ofi, abc_ofj, abc_ofk, kinrr,
|
|
$jinrr,w,v,abij,aijk,times,talg,ss,iinbl,localcc,IabIJ,lljpai,dfnb,
|
|
$tt2,tt3,tt4,ttd,Apbc,abijinmem,jai,ccsdmkl,ptthreads,
|
|
$omp_max_threads) !,
|
|
c $abckinmem, abcjinmem, abciinmem,n,bcymv,symocc,dgroup,co,
|
|
c $first,last,va,irecln,ibufln,nbasis,nirmax,multpg,ccsdalg,jab,jai)
|
|
************************************************************************
|
|
c called if talg=occ or talg=lapl
|
|
c build W and V matrices of the (T) for all a,b,c and a given i,j(,k) index set
|
|
c if talg=occ ss=tt=tt2=tt3; ttd=nocc
|
|
c if talg=lapl ss.ne.tt due to different Laplace factors & ttd=1
|
|
c tt(-,~,-,k) in tt, tt(-,~,k,-) in tt2, tt(-,~,i,j) in tt3, tt(-,~,j,i) in tt4
|
|
************************************************************************
|
|
implicit none
|
|
character*8 ccsdalg
|
|
integer a,b,c,i,j,k,rrd,imem,kinrr,nocc,nvirt,l,dfnb
|
|
integer ttd,ti,tj,tk,intk, jinrr
|
|
real*8 abc_ofi(nvirt, nvirt, nvirt)
|
|
real*8 abc_ofj(nvirt, nvirt, nvirt)
|
|
real*8 abc_ofk(nvirt, nvirt, nvirt)
|
|
real*8 t(nvirt,nocc),tt(nvirt,nvirt,nocc,ttd)
|
|
real*8 rr(nvirt,nvirt,nocc,rrd),ss(nvirt,nvirt,nocc,nocc)
|
|
real*8 tt2(nvirt,nvirt,ttd,nocc)
|
|
real*8 tt4(nvirt,nvirt,ttd,ttd),tt3(nvirt,nvirt,ttd,ttd)
|
|
real*8 w(nvirt,nvirt,nvirt),v(nvirt,nvirt,nvirt)
|
|
real*8 abij(nvirt,nvirt,nocc,*) !<12|12>, only for j=1 if laplace + localcc
|
|
real*8 aijk(nvirt,nocc,nocc,nocc) !<12|12>
|
|
real*8 IabIJ(nvirt,nvirt),lljpai(dfnb,nvirt,nocc)
|
|
real*8 times(6,0:100),dcore(*),jai(nvirt,nocc,dfnb)
|
|
real*8 Apbc(dfnb, nvirt, nvirt)
|
|
character*4 talg,localcc
|
|
logical lapl,iinbl,abijinmem
|
|
|
|
real*8 timep(6, 3)
|
|
real*8 times0(6, 2)
|
|
character*3 ccsdmkl
|
|
integer ptthreads, omp_max_threads
|
|
integer nthread
|
|
integer omp_get_thread_num
|
|
c logical abckinmem, abcjinmem, abciinmem
|
|
real*8, pointer :: rr_i(:,:,:)
|
|
|
|
interface
|
|
c {{{ interfaces for pointers
|
|
subroutine rpoint3d(egydim,haromdim,dim1,dim2,dim3)
|
|
implicit none
|
|
integer dim1,dim2,dim3
|
|
real*8,target :: egydim(dim1,dim2,dim3)
|
|
real*8, pointer :: haromdim(:,:,:)
|
|
end subroutine
|
|
c}}}
|
|
subroutine get_abci(i, abc_ofi, bsymmv, symmocc, dgroup,
|
|
$ co, first, last, va, irecln, ibufln, nbasis, nocc, nvirt,
|
|
$ nirmax, multpg, ccsdalg, dfnb, jab, jai, dcore, imem, n,
|
|
$ addr, ccsdmkl)
|
|
implicit none
|
|
integer :: i, nvirt, nocc, nbasis, dfnb, imem, n, addr
|
|
integer :: irecln, ibufln, nirmax, dgroup
|
|
integer :: bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
|
|
real*8 :: va(nvirt,nvirt,nvirt,*)
|
|
real*8 :: dcore(*)
|
|
integer :: co(nbasis)
|
|
integer :: first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
|
|
real*8, pointer :: abc_ofi(:, :, :)
|
|
real*8 :: jai(nvirt, nocc, dfnb), jab(dfnb, nvirt, nvirt)
|
|
character*8 :: ccsdalg
|
|
character*3 :: ccsdmkl
|
|
end subroutine
|
|
end interface
|
|
#if defined(OMP)
|
|
|
|
c if(ccsdmkl .eq. 'thr') then
|
|
c nthread = 1
|
|
c else
|
|
nthread = omp_max_threads / ptthreads
|
|
if(omp_get_thread_num()+1 .le. mod(omp_max_threads, ptthreads))
|
|
$ nthread = nthread + 1
|
|
c end if
|
|
#endif
|
|
|
|
lapl=.false.
|
|
if (talg.eq.'lapl'.or.talg.eq.'lato') lapl=.true.
|
|
if (lapl) then
|
|
ti=1 ! position in tt3/tt4
|
|
tj=1 ! position in tt3/tt4
|
|
tk=1 ! position in tt/tt2
|
|
else
|
|
ti=i
|
|
tj=j
|
|
tk=k
|
|
endif
|
|
intk=k ! localcc=off.and.(talg=occ/lapl)
|
|
if (.not.abijinmem) intk=1 ! position of k index in abij if abij is not stored in memory
|
|
C III a)
|
|
IF(i.eq.j)THEN
|
|
c {{{ (i.eq.j)
|
|
TIME0(times0(1,2))
|
|
c collect to v(b,a,c), then w(a,b,c)=v(b,a,c)+v(a,b,c)
|
|
c f) and b) (b here) I_ba,di*T_cd,kj^T
|
|
TIME0(times0)
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nvirt,
|
|
$ 1.d0, abc_ofj, nvirt**2, tt2(1, 1, tk, j), nvirt,
|
|
$ 0.d0, v, nvirt**2, ccsdmkl, 1, nvirt)
|
|
TIMEADD(times(1,37),times0)
|
|
|
|
c c) and e) (c here ! I symmetry) T_bd,jk*I_da,ci
|
|
TIME0(times0)
|
|
call dgemm_wabc('n', 'n', nvirt, nvirt**2, nvirt,
|
|
$ 1.d0, tt(1, 1, j, tk), nvirt, abc_ofj, nvirt,
|
|
$ 1.d0, v, nvirt, ccsdmkl, 3, nvirt)
|
|
TIMEADD(times(1,38),times0)
|
|
|
|
cc a) and d) (a here) T_bd,ji*I_ac,dk
|
|
TIME0(times0)
|
|
call dgemm_wabc('n', 't', nvirt, nvirt**2, nvirt,
|
|
$ 1.d0, tt4(1, 1, tj, ti), nvirt, abc_ofk, nvirt**2,
|
|
$ 1.d0, v, nvirt, ccsdmkl, 3, nvirt)
|
|
TIMEADD(times(1,39),times0)
|
|
TIMEADD(times(1,13),times0(1,2))
|
|
|
|
C 2. tag
|
|
TIME0(times0(1,2))
|
|
c b) and d) (b here) I_bl,jk*R_ac,li
|
|
if (rrd.eq.1) then
|
|
TIME0(times0)
|
|
do l=1,nocc
|
|
call tr(nvirt,ss(1,1,l,i),w(1,1,l)) ! transpose a and b of tt(a,b,l)_i to w
|
|
enddo
|
|
call dgemm_wabc('n', 't', nvirt, nvirt**2, nocc,
|
|
$ -1.d0, aijk(1, 1, j, k), nvirt, w, nvirt**2,
|
|
$ 1.d0, v, nvirt, ccsdmkl, 3, nvirt)
|
|
TIMEADD(times(1,57),times0)
|
|
|
|
elseif (rrd.eq.nocc) then
|
|
TIME0(times0)
|
|
call dgemm_wabc('n', 't', nvirt, nvirt**2, nocc,
|
|
$ -1.d0, aijk(1, 1, j, k), nvirt, rr(1, 1, 1, i), nvirt**2,
|
|
$ 1.d0, v, nvirt, ccsdmkl, 3, nvirt)
|
|
TIMEADD(times(1,57),times0)
|
|
endif
|
|
|
|
c a) and f) (a here) T_ba,li*I_cl,kj
|
|
TIME0(times0)
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nocc,
|
|
$ -1.d0, ss(1, 1, 1, i), nvirt**2, aijk(1, 1, k, j), nvirt,
|
|
$ 1.d0, v, nvirt**2, ccsdmkl, 1, nvirt)
|
|
TIMEADD(times(1,59),times0)
|
|
|
|
c c) and e) (c here) I_bl,jk*T_ac,lk
|
|
if(ccsdmkl .eq. 'thr') then
|
|
TIME0(times0)
|
|
call dgemm('n', 't', nvirt, nvirt**2, nocc,
|
|
$ -1.d0, aijk(1, 1, j, i), nvirt, ss(1, 1, 1, k), nvirt**2,
|
|
$ 1.d0, v, nvirt)
|
|
TIMEADD(times(1,58),times0)
|
|
|
|
TIME0(times0)
|
|
call dcopy(nvirt**3,v,1,w,1)
|
|
do c=1,nvirt
|
|
call tradd(nvirt,1.d0,v(1,1,c),w(1,1,c))
|
|
enddo
|
|
TIMEADD(times(1,63),times0)
|
|
|
|
else
|
|
TIME0(times0)
|
|
c$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(b, c, timep) SCHEDULE(DYNAMIC)
|
|
c$OMP& NUM_THREADS(nthread)
|
|
do c = 1, nvirt
|
|
c TIME0(timep(1,3))
|
|
call dgemm('n', 't', nvirt, nvirt, nocc,
|
|
$ -1.d0, aijk(1,1,j,i), nvirt, ss(1,c,1,k), nvirt**2,
|
|
$ 1.d0, v(1, 1, c), nvirt)
|
|
|
|
! v(:,:,c) should be in cache
|
|
do b = 1, nvirt
|
|
w(1:nvirt, b, c) = v(1:nvirt, b, c) + v(b, 1:nvirt, c)
|
|
end do
|
|
end do
|
|
c$OMP END PARALLEL DO
|
|
TIMEADD(times(1,58),times0)
|
|
end if
|
|
TIMEADD(times(1,14),times0(1,2))
|
|
c }}}
|
|
ELSE IF(j.eq.k)THEN !i.ne.j!!!
|
|
c {{{ (j.eq.k)
|
|
TIME0(times0(1,2))
|
|
c collect to v(a,c,b), then w(a,b,c)=v(a,c,b)+v(a,b,c)
|
|
c b) and c) (c here, I symmetry) I_da,ci^T * T_bd,jk^T
|
|
TIME0(times0)
|
|
if (iinbl .or. localcc.eq.'off ') then
|
|
call dgemm_wabc('t', 't', nvirt**2, nvirt, nvirt,
|
|
$ 1.d0, abc_ofi, nvirt, tt(1, 1, j, tk), nvirt,
|
|
$ 0.d0, v, nvirt**2, ccsdmkl, 1, nvirt)
|
|
|
|
else
|
|
call dgemm_wabc('n', 'n', nvirt, nvirt**2, dfnb,
|
|
$ 1.d0, jai(1, i, 1), nvirt*nocc, Apbc, dfnb,
|
|
$ 0.d0, v, nvirt, ccsdmkl, 3, nvirt) ! v(a, c, b)= lljpai(P, \bar a, \bar I) Ajk(P, c, b)
|
|
|
|
end if
|
|
TIMEADD(times(1,40),times0)
|
|
|
|
c a) and f) (a here) I_ac,dk * T_bd,ji^T
|
|
TIME0(times0)
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nvirt,
|
|
$ 1.d0, abc_ofj, nvirt**2, tt4(1, 1, tj, ti), nvirt,
|
|
$ 1.d0, v, nvirt**2, ccsdmkl, 1, nvirt)
|
|
TIMEADD(times(1,41),times0)
|
|
|
|
c e) and d) (e here) T_ad,ik * I_cb,dj^T
|
|
TIME0(times0)
|
|
call dgemm_wabc('n', 't', nvirt, nvirt**2, nvirt,
|
|
$ 1.d0, tt(1, 1, i, tk), nvirt, abc_ofj, nvirt**2,
|
|
$ 1.d0, v, nvirt, ccsdmkl, 3, nvirt)
|
|
TIMEADD(times(1,42),times0)
|
|
TIMEADD(times(1,15),times0(1,2))
|
|
|
|
C 2. tag
|
|
TIME0(times0(1,2))
|
|
c a) and b), b: rr(a,c,l,i)*<bl|jk>^T -> v(a,c,b) !NP
|
|
if (rrd.eq.1) then
|
|
TIME0(times0)
|
|
do l=1,nocc
|
|
call tr(nvirt,ss(1,1,l,i),w(1,1,l)) ! transpose a and b of tt(a,b,l)_i to w
|
|
enddo
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nocc,
|
|
$ -1.d0, w, nvirt**2, aijk(1, 1, j, k), nvirt,
|
|
$ 1.d0, v, nvirt**2, ccsdmkl, 1, nvirt)
|
|
TIMEADD(times(1,60),times0)
|
|
|
|
elseif (rrd.eq.nocc) then
|
|
TIME0(times0)
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nocc,
|
|
$ -1.d0, rr(1, 1, 1, i), nvirt**2, aijk(1, 1, j, k), nvirt,
|
|
$ 1.d0, v, nvirt**2, ccsdmkl, 1, nvirt)
|
|
TIMEADD(times(1,60),times0)
|
|
endif
|
|
|
|
c c) and f) (c here) T_ac,lk * I_bl,ji^T
|
|
TIME0(times0)
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nocc,
|
|
$ -1.d0, ss(1, 1, 1, k), nvirt**2, aijk(1, 1, j, i), nvirt,
|
|
$ 1.d0, v, nvirt**2, ccsdmkl, 1, nvirt)
|
|
TIMEADD(times(1,61),times0)
|
|
|
|
c d) and e) (d here) I_al,ik * T_cb,lj^T
|
|
if(ccsdmkl .eq. 'thr') then
|
|
TIME0(times0)
|
|
call dgemm('n', 't', nvirt, nvirt**2, nocc,
|
|
$ -1.d0, aijk(1,1,i,k), nvirt, ss(1,1,1,j), nvirt**2,
|
|
$ 1.d0, v, nvirt)
|
|
TIMEADD(times(1,62),times0)
|
|
c
|
|
TIME0(times0)
|
|
call dcopy(nvirt**3,v,1,w,1)
|
|
do c=1,nvirt
|
|
do b=1,nvirt
|
|
call daxpy(nvirt,1.d0,v(b,c,1),nvirt**2,w(b,1,c),nvirt)
|
|
enddo
|
|
enddo
|
|
TIMEADD(times(1,64),times0)
|
|
|
|
else
|
|
TIME0(times0)
|
|
call dfillzero(w, nvirt**3)
|
|
c$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(a, b, c, timep)
|
|
c$OMP& SCHEDULE(DYNAMIC)
|
|
c$OMP& NUM_THREADS(nthread)
|
|
do b = 1, nvirt
|
|
call dgemm('n', 't', nvirt, nvirt, nocc,
|
|
$ -1.d0, aijk(1,1,i,k), nvirt, ss(1,b,1,j), nvirt**2,
|
|
$ 1.d0, v(1, 1, b), nvirt)
|
|
|
|
c$OMP CRITICAL
|
|
call daxpy(nvirt**2, 1.d0, v(1, 1, b), 1, w(1, 1, b), 1)
|
|
do c = 1, nvirt
|
|
call daxpy(nvirt, 1.d0, v(1, c, b), 1, w(1, b, c), 1)
|
|
end do
|
|
c$OMP END CRITICAL
|
|
end do
|
|
c$OMP END PARALLEL DO
|
|
TIMEADD(times(1,62),times0)
|
|
end if
|
|
TIMEADD(times(1,16),times0(1,2))
|
|
c }}}
|
|
ELSE
|
|
c {{{ 6 <ab|ci>*T2 terms
|
|
TIME0(times0)
|
|
c e)
|
|
TIME0(times0(1,2))
|
|
call dgemm_wabc('n', 'n', nvirt, nvirt**2, nvirt,
|
|
$ 1.d0, tt(1, 1, i, tk), nvirt, abc_ofj, nvirt,
|
|
$ 0.d0, w, nvirt, ccsdmkl, 3, nvirt)
|
|
TIMEADD(times(1,47),times0(1,2))
|
|
|
|
c f)
|
|
TIME0(times0(1,2))
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nvirt,
|
|
$ 1.d0, abc_ofj, nvirt**2, tt2(1, 1, tk, i), nvirt,
|
|
$ 1.d0, w, nvirt**2, ccsdmkl, 1, nvirt)
|
|
TIMEADD(times(1,48),times0(1,2))
|
|
|
|
c d)
|
|
TIME0(times0(1,2))
|
|
call dgemm_wabc('n', 't', nvirt, nvirt**2, nvirt,
|
|
$ 1.d0, tt3(1, 1, ti, tj), nvirt, abc_ofk, nvirt**2,
|
|
$ 1.d0, w, nvirt, ccsdmkl, 3, nvirt)
|
|
TIMEADD(times(1,46),times0(1,2))
|
|
c a)
|
|
TIME0(times0(1,2))
|
|
if(ccsdmkl .eq. 'thr') then
|
|
call dgemm('t', 't', nvirt**2, nvirt, nvirt,
|
|
$ 1.d0, abc_ofk, nvirt, tt4(1, 1, tj, ti), nvirt,
|
|
$ 0.d0, v, nvirt**2) ! start v(c, a, b)
|
|
else
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nvirt,
|
|
$ 1.d0, abc_ofk, nvirt**2, tt4(1,1,tj,ti), nvirt,
|
|
$ 1.d0, w, nvirt, ccsdmkl, 2, nvirt)
|
|
end if
|
|
TIMEADD(times(1,43),times0(1,2))
|
|
c b)
|
|
if (iinbl .or. localcc.eq.'off ') then
|
|
TIME0(times0(1,2))
|
|
if(ccsdmkl .eq. 'thr') then
|
|
call dgemm('n', 'n', nvirt, nvirt**2, nvirt,
|
|
$ 1.d0, tt2(1, 1, tk, j), nvirt, abc_ofi, nvirt,
|
|
$ 1.d0, v, nvirt)
|
|
else
|
|
call dgemm_wabc('t', 't', nvirt**2, nvirt, nvirt,
|
|
$ 1.d0, abc_ofi, nvirt, tt2(1, 1, tk, j), nvirt,
|
|
$ 1.d0, w, nvirt**2, ccsdmkl, 1, nvirt)
|
|
end if
|
|
TIMEADD(times(1,44),times0(1,2))
|
|
c c)
|
|
TIME0(times0(1,2))
|
|
if(ccsdmkl .eq. 'thr') then
|
|
call dgemm('n', 't', nvirt**2, nvirt, nvirt,
|
|
$ 1.d0, abc_ofi, nvirt**2, tt(1, 1, j, tk), nvirt,
|
|
$ 1.d0, v, nvirt**2)
|
|
else
|
|
call dgemm_wabc('t', 't', nvirt**2, nvirt, nvirt,
|
|
$ 1.d0, abc_ofi, nvirt, tt(1, 1, j, tk), nvirt,
|
|
$ 1.d0, w, nvirt, ccsdmkl, 2, nvirt)
|
|
end if
|
|
TIMEADD(times(1,45),times0(1,2))
|
|
else
|
|
call dgemm_wabc('n', 'n', nvirt, nvirt**2, dfnb,
|
|
$ 1.d0, jai(1, i, 1), nvirt*nocc, Apbc, dfnb,
|
|
$ 1.d0, w, nvirt, ccsdmkl, 3, nvirt)
|
|
endif ! iinbl
|
|
TIMEADD(times(1,17),times0)
|
|
c }}}
|
|
c
|
|
c {{{ 6 <ai|jk>*T2 terms
|
|
TIME0(times0)
|
|
if(ccsdmkl .eq. 'thr') then
|
|
c b)
|
|
TIME0(times0(1,2))
|
|
call dgemm('n', 't', nvirt**2, nvirt, nocc,
|
|
$ -1.d0, ss(1, 1, 1, i), nvirt**2, aijk(1, 1, j, k), nvirt,
|
|
$ 1.d0, v, nvirt**2)
|
|
TIMEADD(times(1,51),times0(1,2))
|
|
|
|
c c)
|
|
TIME0(times0(1,2))
|
|
call dgemm('n', 't', nvirt**2, nvirt, nocc,
|
|
$ -1.d0, rr(1,1,1,kinrr), nvirt**2, aijk(1,1,j,i), nvirt,
|
|
$ 1.d0, v, nvirt**2)
|
|
TIMEADD(times(1,52),times0(1,2))
|
|
|
|
c v(c,a,b) +> w(a,b,c)
|
|
TIME0(times0(1,2))
|
|
do c=1,nvirt
|
|
call daxpy(nvirt**2,1.d0,v(c,1,1),nvirt,w(1,1,c),1)
|
|
enddo
|
|
TIMEADD(times(1,49),times0(1,2))
|
|
|
|
else
|
|
c c)
|
|
TIME0(times0(1,2))
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nocc,
|
|
$ -1.d0, ss(1,1,1,k), nvirt**2, aijk(1,1,j,i), nvirt,
|
|
$ 1.d0, w, nvirt, ccsdmkl, 2, nvirt)
|
|
TIMEADD(times(1,52),times0(1,2))
|
|
|
|
end if
|
|
|
|
if (rrd.eq.1 .and. ccsdmkl.eq.'thr') then
|
|
c a)
|
|
TIME0(times0(1,2))
|
|
call dgemm('n', 't', nvirt, nvirt**2, nocc,
|
|
$ -1.d0, aijk(1, 1, k, j), nvirt, ss(1, 1, 1, i), nvirt**2,
|
|
$ 0.d0, v, nvirt) ! start v(c, b, a)
|
|
TIMEADD(times(1,50),times0(1,2))
|
|
c d)
|
|
TIME0(times0(1,2))
|
|
call dgemm('n', 't', nvirt**2, nvirt, nocc,
|
|
$ -1.d0, ss(1, 1, 1, j), nvirt**2, aijk(1, 1, i, k), nvirt,
|
|
$ 1.d0, v, nvirt**2)
|
|
TIMEADD(times(1,53),times0(1,2))
|
|
|
|
c v(c,b,a) +> w(a,b,c)
|
|
TIME0(times0(1,2))
|
|
do c=1,nvirt
|
|
do b=1,nvirt
|
|
call daxpy(nvirt,1.d0,v(c,b,1),nvirt**2,w(1,b,c),1)
|
|
enddo
|
|
enddo
|
|
TIMEADD(times(1,47),times0(1,2))
|
|
|
|
else
|
|
if(rrd .eq. 1) then
|
|
do l=1,nocc
|
|
call tr(nvirt,ss(1,1,l,i),v(1,1,l)) ! transpose a and b of tt(a,b,l)_i to v
|
|
enddo
|
|
|
|
call rpoint3d(v, rr_i, nvirt, nvirt, nocc)
|
|
|
|
else
|
|
call rpoint3d(rr(1, 1, 1, i), rr_i, nvirt, nvirt, nocc)
|
|
end if
|
|
|
|
c a)
|
|
TIME0(times0(1,2))
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nocc,
|
|
$ -1.d0, rr_i, nvirt**2, aijk(1, 1, k, j), nvirt,
|
|
$ 1.d0, w, nvirt**2, ccsdmkl, 1, nvirt)
|
|
TIMEADD(times(1,50),times0(1,2))
|
|
|
|
c d)
|
|
TIME0(times0(1,2))
|
|
call dgemm_wabc('n', 't', nvirt, nvirt**2, nocc,
|
|
$ -1.d0, aijk(1, 1, i, k), nvirt,
|
|
$ rr(1, 1, 1, jinrr), nvirt**2,
|
|
$ 1.d0, w, nvirt, ccsdmkl, 3, nvirt)
|
|
TIMEADD(times(1,53),times0(1,2))
|
|
|
|
c b)
|
|
TIME0(times0(1,2))
|
|
if(ccsdmkl.eq.'seq') then
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nocc,
|
|
$ -1.d0, rr_i, nvirt**2, aijk(1, 1, j, k), nvirt,
|
|
$ 1.d0, w, nvirt, ccsdmkl, 2, nvirt)
|
|
end if
|
|
TIMEADD(times(1,51),times0(1,2))
|
|
endif
|
|
|
|
c f)
|
|
TIME0(times0(1,2))
|
|
call dgemm_wabc('n', 't', nvirt**2, nvirt, nocc,
|
|
$ -1.d0, ss(1,1,1,j), nvirt**2, aijk(1,1,k,i), nvirt,
|
|
$ 1.d0, w, nvirt**2, ccsdmkl, 1, nvirt)
|
|
TIMEADD(times(1,55),times0(1,2))
|
|
|
|
c e)
|
|
TIME0(times0(1,2))
|
|
call dgemm_wabc('n', 't', nvirt, nvirt**2, nocc,
|
|
$ -1.d0, aijk(1,1,i,j), nvirt, ss(1,1,1,k), nvirt**2,
|
|
$ 1.d0, w, nvirt, ccsdmkl, 3, nvirt)
|
|
TIMEADD(times(1,54),times0(1,2))
|
|
TIMEADD(times(1,18),times0)
|
|
c }}}
|
|
ENDIF
|
|
C calculating V {{{
|
|
TIME0(times0(1,2))
|
|
call dcopy(nvirt**3,w,1,v,1)
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(c)
|
|
c$OMP& NUM_THREADS(nthread)
|
|
do c = 1, nvirt
|
|
call dger(nvirt, nvirt,
|
|
$ 1.d0, t(1,i), 1, abij(1,c,j,intk), 1, v(1,1,c), nvirt)
|
|
end do
|
|
C$OMP END PARALLEL DO
|
|
c
|
|
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(c)
|
|
c$OMP& NUM_THREADS(nthread)
|
|
do c = 1, nvirt
|
|
call dger(nvirt, nvirt,
|
|
$ 1.d0, abij(1,c,i,intk), 1, t(1,j), 1, v(1,1,c), nvirt)
|
|
end do
|
|
C$OMP END PARALLEL DO
|
|
|
|
if (.not.abijinmem.and.j.ne.1) then
|
|
call dgemm('n', 't', nvirt, nvirt, dfnb,
|
|
$ 1.d0, jai(1,i,1), nvirt*nocc, jai(1,j,1), nvirt*nocc,
|
|
$ 0.d0, IabIJ, nvirt) ! assembly the missing abij(.,.,i,j)
|
|
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(b)
|
|
c$OMP& NUM_THREADS(nthread)
|
|
do b = 1, nvirt
|
|
call dger(nvirt, nvirt,
|
|
$ 1.d0, Iabij(1,b), 1, t(1,k), 1, v(1,b,1), nvirt**2)
|
|
end do
|
|
C$OMP END PARALLEL DO
|
|
|
|
else ! abijinmem.or.j.eq.1
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(b)
|
|
c$OMP& NUM_THREADS(nthread)
|
|
do b = 1, nvirt
|
|
call dger(nvirt, nvirt,
|
|
$ 1.d0, abij(1,b,i,j), 1, t(1,k), 1, v(1,b,1), nvirt**2)
|
|
end do
|
|
C$OMP END PARALLEL DO
|
|
endif
|
|
TIMEADD(times(1,19),times0(1,2)) ! }}}
|
|
return
|
|
end subroutine wabc
|
|
c }}}
|
|
c
|
|
************************************************************************
|
|
subroutine ptindices(icount, nocc, mpi_rank, mpi_size,
|
|
$ master_thread, find, lind,
|
|
$ notend, max_threads, nind, inds) ! {{{
|
|
************************************************************************
|
|
implicit none
|
|
c include "mpif.h"
|
|
integer, intent(in) :: mpi_rank, mpi_size, nocc
|
|
integer :: find, lind, max_threads
|
|
logical :: notend, master_thread
|
|
integer, dimension(0:nocc) :: icount
|
|
|
|
integer :: k
|
|
|
|
type indices
|
|
integer :: num = 0
|
|
integer, allocatable, dimension(:) :: val
|
|
end type
|
|
type(indices), dimension(mpi_size) :: ind
|
|
integer :: isum, i
|
|
integer, dimension(mpi_size) :: nums
|
|
|
|
integer, dimension(nocc) :: inds
|
|
integer :: nind
|
|
|
|
isum = icount(nocc)
|
|
do k = nocc, 1, -1
|
|
icount(k) = icount(k) - icount(k-1)
|
|
end do
|
|
|
|
nums = 0
|
|
do i = 1, mpi_size
|
|
allocate(ind(i)%val(nocc))
|
|
ind(i)%val = 0
|
|
end do
|
|
|
|
do i = 1, nocc
|
|
k = minloc(nums, 1)
|
|
nums(k) = nums(k) + icount(i)
|
|
ind(k)%num = ind(k)%num + 1
|
|
ind(k)%val(ind(k)%num) = i
|
|
end do
|
|
|
|
c if(mpi_rank .eq. 0 .and. mpi_size .gt. 1) then
|
|
if(master_thread .and. mpi_size .gt. 1) then
|
|
write(*, '("Allocation:")')
|
|
do i = 1, mpi_size
|
|
write(*, '(i4, " -- ")', advance = 'no') i
|
|
write(*, '(100(10i4, :, /, 8x))', advance = 'no')
|
|
$ (ind(i)%val(k), k = 1, ind(i)%num)
|
|
write(*, '(f12.2, "%")') dble(nums(i)) / isum * 100
|
|
end do
|
|
end if
|
|
|
|
nind = ind(mpi_rank+1)%num
|
|
inds(1:nocc) = ind(mpi_rank+1)%val(1:nocc)
|
|
|
|
do i = 1, mpi_size
|
|
deallocate(ind(i)%val)
|
|
end do
|
|
|
|
end subroutine ! }}}
|
|
|
|
|
|
************************************************************************
|
|
subroutine ptindices_old(icount, nocc, mpi_rank, mpi_size, find,
|
|
$ lind, notend, max_threads)
|
|
************************************************************************
|
|
implicit none
|
|
integer, intent(in) :: mpi_rank, mpi_size, nocc
|
|
integer :: find, lind, max_threads
|
|
logical :: notend
|
|
|
|
integer, dimension(0:nocc) :: icount
|
|
|
|
integer :: nrank, k
|
|
real*8 :: avgcount
|
|
|
|
nrank = 0
|
|
k = 1
|
|
avgcount = dble(icount(nocc)) / mpi_size ! average number of k's to process
|
|
do while(k .le. nocc)
|
|
if(icount(k)-icount(find-1) .ge. avgcount) then
|
|
! hit next rank's final index, check if previous k is more favourable
|
|
if(abs(icount(k-1) - icount(find-1) - avgcount) .lt.
|
|
$ abs(icount(k) - icount(find-1) - avgcount)) then
|
|
if(k .ne. find) then
|
|
k = k - 1
|
|
end if
|
|
end if
|
|
if(nrank .eq. mpi_rank) then
|
|
! found last index
|
|
if(nrank .eq. mpi_size-1) then
|
|
! last thread calculates all remaining indices
|
|
lind = nocc
|
|
max_threads = mpi_size
|
|
else
|
|
lind = k
|
|
end if
|
|
exit
|
|
else
|
|
! next rank's first index
|
|
nrank = nrank + 1
|
|
find = k + 1
|
|
end if
|
|
end if
|
|
if(k .eq. nocc) then
|
|
max_threads = nrank + 1
|
|
if(nrank .eq. mpi_rank) then
|
|
lind = nocc
|
|
else
|
|
! too many processes
|
|
notend = .false.
|
|
end if
|
|
end if
|
|
|
|
k = k + 1
|
|
end do
|
|
|
|
end subroutine
|
|
|
|
C {{{ wabc
|
|
************************************************************************
|
|
subroutine wabc_old(nocc,nvirt,i,j,k,imem,dcore,t,tt,rr,rrd,li,lj,
|
|
$ind,kinrr,w,v,abij,aijk,times,talg,ss,iinbl,localcc,IabIJ,lljpai,
|
|
$dfnb,tt2,tt3,tt4,ttd,abijinmem,jai)
|
|
************************************************************************
|
|
c called if talg=occ of talg=lapl
|
|
c build W and V matrices of the (T) for all a,b,c and a given i,j(,k) index set
|
|
c if talg=occ ss=tt=tt2=tt3; ttd=nocc
|
|
c if talg=lapl ss.ne.tt due to different Laplace factors & ttd=1
|
|
c tt(-,~,-,k) in tt, tt(-,~,k,-) in tt2, tt(-,~,i,j) in tt3, tt(-,~,j,i) in tt4
|
|
************************************************************************
|
|
implicit none
|
|
integer a,b,c,i,j,k,rrd,ind,li,lj,imem,kinrr,nocc,nvirt,l,dfnb
|
|
integer ttd,ti,tj,tk,intk
|
|
real*8 t(nvirt,nocc),tt(nvirt,nvirt,nocc,ttd)
|
|
real*8 rr(nvirt,nvirt,nocc,rrd),ss(nvirt,nvirt,nocc,nocc)
|
|
real*8 tt2(nvirt,nvirt,ttd,nocc)
|
|
real*8 tt4(nvirt,nvirt,ttd,ttd),tt3(nvirt,nvirt,ttd,ttd)
|
|
real*8 w(nvirt,nvirt,nvirt),v(nvirt,nvirt,nvirt)
|
|
real*8 abij(nvirt,nvirt,nocc,*) !<12|12>, only for j=1 if laplace + localcc
|
|
real*8 aijk(nvirt,nocc,nocc,nocc) !<12|12>
|
|
real*8 IabIJ(nvirt,nvirt),lljpai(dfnb,nvirt,nocc)
|
|
real*8 times(6,0:100),dcore(*),fact,jai(nvirt,nocc,dfnb)
|
|
character*4 talg,localcc
|
|
logical lapl,iinbl,abijinmem
|
|
lapl=.false.
|
|
if (talg.eq.'lapl'.or.talg.eq.'lato') lapl=.true.
|
|
fact=0.d0
|
|
if (.not.iinbl) fact=1.d0
|
|
if (lapl) then
|
|
ti=1 ! position in tt3/tt4
|
|
tj=1 ! position in tt3/tt4
|
|
tk=1 ! position in tt/tt2
|
|
else
|
|
ti=i
|
|
tj=j
|
|
tk=k
|
|
endif
|
|
intk=k ! localcc=off.and.(talg=occ/lapl)
|
|
if (.not.abijinmem) intk=1 ! position of k index in abij if abij is not stored in memory
|
|
C III a)
|
|
IF(i.eq.j)THEN
|
|
c {{{ (i.eq.j)
|
|
c TIME0(times)
|
|
c collect to v(b,a,c), then w(a,b,c)=v(b,a,c)+v(a,b,c)
|
|
c f) and b) (b here) I_ba,di*T_cd,kj^T
|
|
call dgemm('n','t',nvirt**2,nvirt,nvirt,1.d0,
|
|
$dcore(imem+(lj-1)*nvirt**3),nvirt**2,tt2(1,1,tk,j),
|
|
$nvirt,0.d0,v,nvirt**2)
|
|
c c) and e) (c here ! I symmetry) T_bd,jk*I_da,ci
|
|
call dgemm('n','n',nvirt,nvirt**2,nvirt,1.d0,
|
|
$tt(1,1,j,tk),nvirt,dcore(imem+(lj-1)*nvirt**3),
|
|
$nvirt,1.d0,v,nvirt)
|
|
c a) and d) (a here) T_bd,ji*I_ac,dk
|
|
call dgemm('n','t',nvirt,nvirt**2,nvirt,1.d0,
|
|
$tt4(1,1,tj,ti),nvirt,dcore(imem+(k-ind)*nvirt**3),
|
|
$nvirt**2,1.d0,v,nvirt)
|
|
c TIMEADD(times(1,13),times)
|
|
C 2. tag
|
|
c TIME0(times)
|
|
c b) and d) (b here) I_bl,jk*R_ac,li
|
|
if (rrd.eq.1) then
|
|
do l=1,nocc
|
|
call tr(nvirt,ss(1,1,l,i),w(1,1,l)) ! transpose a and b of tt(a,b,l)_i to w
|
|
enddo
|
|
call dgemm('n','t',nvirt,nvirt**2,nocc,-1.d0,aijk(1,1,j,k),nvirt,
|
|
$w,nvirt**2,1.d0,v,nvirt)
|
|
elseif (rrd.eq.nocc) then
|
|
call dgemm('n','t',nvirt,nvirt**2,nocc,-1.d0,aijk(1,1,j,k),nvirt,
|
|
$rr(1,1,1,i),nvirt**2,1.d0,v,nvirt)
|
|
endif
|
|
c c) and e) (c here) I_bl,jk*T_ac,lk
|
|
call dgemm('n','t',nvirt,nvirt**2,nocc,-1.d0,aijk(1,1,j,i),nvirt,
|
|
$ss(1,1,1,k),nvirt**2,1.d0,v,nvirt)
|
|
c a) and f) (a here) T_ba,li*I_cl,kj
|
|
call dgemm('n','t',nvirt**2,nvirt,nocc,-1.d0,ss(1,1,1,i),nvirt**2,
|
|
$aijk(1,1,k,j),nvirt,1.d0,v,nvirt**2)
|
|
c
|
|
call dcopy(nvirt**3,v,1,w,1)
|
|
do c=1,nvirt
|
|
call tradd(nvirt,1.d0,v(1,1,c),w(1,1,c))
|
|
enddo
|
|
c TIMEADD(times(1,14),times)
|
|
c }}}
|
|
ELSE IF(j.eq.k)THEN !i.ne.j!!!
|
|
c {{{ (j.eq.k)
|
|
c TIME0(times)
|
|
c collect to v(a,c,b), then w(a,b,c)=v(a,c,b)+v(a,b,c)
|
|
c b) and c) (c here, I symmetry) I_da,ci^T * T_bd,jk^T
|
|
if (iinbl) call dgemm('t','t',nvirt**2,nvirt,nvirt,1.d0,
|
|
$dcore(imem+(li-1)*nvirt**3),nvirt,tt(1,1,j,tk),
|
|
$nvirt,0.d0,v,nvirt**2)
|
|
c a) and f) (a here) I_ac,dk * T_bd,ji^T
|
|
call dgemm('n','t',nvirt**2,nvirt,nvirt,1.d0,
|
|
$dcore(imem+(lj-1)*nvirt**3),nvirt**2,tt4(1,1,tj,ti),
|
|
$nvirt,1.d0,v,nvirt**2)
|
|
c e) and d) (e here) T_ad,ik * I_cb,dj^T
|
|
call dgemm('n','t',nvirt,nvirt**2,nvirt,1.d0,
|
|
$tt(1,1,i,tk),nvirt,dcore(imem+(lj-1)*nvirt**3),
|
|
$nvirt**2,1.d0,v,nvirt)
|
|
c TIMEADD(times(1,15),times)
|
|
C 2. tag
|
|
c TIME0(times)
|
|
c a) and b), b: rr(a,c,l,i)*<bl|jk>^T -> v(a,c,b) !NP
|
|
if (rrd.eq.1) then
|
|
do l=1,nocc
|
|
call tr(nvirt,ss(1,1,l,i),w(1,1,l)) ! transpose a and b of tt(a,b,l)_i to w
|
|
enddo
|
|
call dgemm('n','t',nvirt**2,nvirt,nocc,-1.d0,w,nvirt**2,
|
|
$aijk(1,1,j,k),nvirt,1.d0,v,nvirt**2)
|
|
elseif (rrd.eq.nocc) then
|
|
call dgemm('n','t',nvirt**2,nvirt,nocc,-1.d0,rr(1,1,1,i),nvirt**2,
|
|
$aijk(1,1,j,k),nvirt,1.d0,v,nvirt**2)
|
|
endif
|
|
c c) and f) (c here) T_ac,lk * I_bl,ji^T
|
|
call dgemm('n','t',nvirt**2,nvirt,nocc,-1.d0,ss(1,1,1,k),nvirt**2,
|
|
$aijk(1,1,j,i),nvirt,1.d0,v,nvirt**2)
|
|
c d) and e) (d here) I_al,ik * T_cb,lj^T
|
|
call dgemm('n','t',nvirt,nvirt**2,nocc,-1.d0,aijk(1,1,i,k),nvirt,
|
|
$ss(1,1,1,j),nvirt**2,1.d0,v,nvirt)
|
|
c
|
|
call dcopy(nvirt**3,v,1,w,1)
|
|
do c=1,nvirt
|
|
do b=1,nvirt
|
|
call daxpy(nvirt,1.d0,v(b,c,1),nvirt**2,w(b,1,c),nvirt)
|
|
enddo
|
|
enddo
|
|
c TIMEADD(times(1,16),times)
|
|
c }}}
|
|
ELSE
|
|
c {{{ 6 <ab|ci>*T2 terms
|
|
c TIME0(times(1,99))
|
|
c e)
|
|
c TIME0(times)
|
|
call dgemm('n','n',nvirt,nvirt**2,nvirt,1.d0,
|
|
$tt(1,1,i,tk),nvirt,dcore(imem+(lj-1)*nvirt**3),nvirt,fact,w,nvirt)
|
|
c TIMEADD(times(1,45),times)
|
|
c f)
|
|
c TIME0(times)
|
|
if (lapl) then
|
|
call dgemm('n','t',nvirt**2,nvirt,nvirt,1.d0,
|
|
$dcore(imem+(lj-1)*nvirt**3),nvirt**2,
|
|
$tt2(1,1,tk,i),nvirt,1.d0,w,nvirt**2)
|
|
else
|
|
call dgemm('n','n',nvirt**2,nvirt,nvirt,1.d0,
|
|
$dcore(imem+(lj-1)*nvirt**3),nvirt**2,
|
|
$tt(1,1,i,tk),nvirt,1.d0,w,nvirt**2)
|
|
endif
|
|
c TIMEADD(times(1,46),times)
|
|
c d)
|
|
c TIME0(times)
|
|
call dgemm('n','t',nvirt,nvirt**2,nvirt,1.d0,
|
|
$tt3(1,1,ti,tj),nvirt,dcore(imem+(k-ind)*nvirt**3),
|
|
$nvirt**2,1.d0,w,nvirt)
|
|
c TIMEADD(times(1,44),times)
|
|
c a)
|
|
c TIME0(times)
|
|
if (lapl) then
|
|
call dgemm('t','t',nvirt**2,nvirt,nvirt,1.d0,
|
|
$dcore(imem+(k-ind)*nvirt**3),nvirt,tt4(1,1,tj,ti),
|
|
$nvirt,0.d0,v,nvirt**2) ! start v(c,a,b)
|
|
else
|
|
call dgemm('t','n',nvirt**2,nvirt,nvirt,1.d0,
|
|
$dcore(imem+(k-ind)*nvirt**3),nvirt,tt3(1,1,ti,tj),
|
|
$nvirt,0.d0,v,nvirt**2) ! start v(c,a,b)
|
|
endif
|
|
c TIMEADD(times(1,40),times)
|
|
c b)
|
|
c TIME0(times)
|
|
if (iinbl) then
|
|
call dgemm('n','n',nvirt,nvirt**2,nvirt,1.d0,tt2(1,1,tk,j),
|
|
$nvirt,dcore(imem+(li-1)*nvirt**3),nvirt,1.d0,v,nvirt)
|
|
c TIMEADD(times(1,42),times)
|
|
c c)
|
|
c TIME0(times)
|
|
if (lapl) then
|
|
call dgemm('n','t',nvirt**2,nvirt,nvirt,1.d0,
|
|
$dcore(imem+(li-1)*nvirt**3),nvirt**2,
|
|
$tt(1,1,j,tk),nvirt,1.d0,v,nvirt**2)
|
|
else
|
|
call dgemm('n','n',nvirt**2,nvirt,nvirt,1.d0,
|
|
$dcore(imem+(li-1)*nvirt**3),nvirt**2,
|
|
$tt2(1,1,tk,j),nvirt,1.d0,v,nvirt**2)
|
|
endif
|
|
endif ! iinbl
|
|
c TIMEADD(times(1,43),times)
|
|
c TIMEADD(times(1,17),times(1,99))
|
|
c }}}
|
|
c
|
|
c {{{ 6 <ai|jk>*T2 terms
|
|
c TIME0(times(1,99))
|
|
c b)
|
|
c TIME0(times)
|
|
call dgemm('n','t',nvirt**2,nvirt,nocc,-1.d0,ss(1,1,1,i),nvirt**2,
|
|
$aijk(1,1,j,k),nvirt,1.d0,v,nvirt**2)
|
|
c TIMEADD(times(1,51),times)
|
|
c c)
|
|
c TIME0(times)
|
|
call dgemm('n','t',nvirt**2,nvirt,nocc,-1.d0,rr(1,1,1,kinrr),
|
|
$nvirt**2,aijk(1,1,j,i),nvirt,1.d0,v,nvirt**2)
|
|
c TIMEADD(times(1,52),times)
|
|
c v(c,a,b) +> w(a,b,c)
|
|
c TIME0(times)
|
|
do c=1,nvirt
|
|
call daxpy(nvirt**2,1.d0,v(c,1,1),nvirt,w(1,1,c),1)
|
|
enddo
|
|
c TIMEADD(times(1,41),times)
|
|
if (rrd.eq.1) then
|
|
c a)
|
|
c TIME0(times)
|
|
call dgemm('n','t',nvirt,nvirt**2,nocc,-1.d0,aijk(1,1,k,j),nvirt,
|
|
$ ss(1,1,1,i),nvirt**2,0.d0,v,nvirt) ! start v(c,b,a)
|
|
c TIMEADD(times(1,50),times)
|
|
c d)
|
|
c TIME0(times)
|
|
call dgemm('n','t',nvirt**2,nvirt,nocc,-1.d0,ss(1,1,1,j),nvirt**2,
|
|
$aijk(1,1,i,k),nvirt,1.d0,v,nvirt**2)
|
|
c TIMEADD(times(1,53),times)
|
|
c v(c,b,a) +> w(a,b,c)
|
|
c TIME0(times)
|
|
do c=1,nvirt
|
|
do b=1,nvirt
|
|
call daxpy(nvirt,1.d0,v(c,b,1),nvirt**2,w(1,b,c),1)
|
|
enddo
|
|
enddo
|
|
c TIMEADD(times(1,47),times)
|
|
elseif(rrd.eq.nocc) then
|
|
c a)
|
|
c TIME0(times)
|
|
call dgemm('n','t',nvirt**2,nvirt,nocc,-1.d0,rr(1,1,1,i),nvirt**2,
|
|
$aijk(1,1,k,j),nvirt,1.d0,w,nvirt**2)
|
|
c TIMEADD(times(1,50),times)
|
|
c d)
|
|
c TIME0(times)
|
|
call dgemm('n','t',nvirt,nvirt**2,nocc,-1.d0,aijk(1,1,i,k),nvirt,
|
|
$ rr(1,1,1,j),nvirt**2,1.d0,w,nvirt)
|
|
c TIMEADD(times(1,53),times)
|
|
endif
|
|
c f)
|
|
c TIME0(times)
|
|
call dgemm('n','t',nvirt**2,nvirt,nocc,-1.d0,ss(1,1,1,j),nvirt**2,
|
|
$aijk(1,1,k,i),nvirt,1.d0,w,nvirt**2)
|
|
c TIMEADD(times(1,55),times)
|
|
c e)
|
|
c TIME0(times)
|
|
call dgemm('n','t',nvirt,nvirt**2,nocc,-1.d0,aijk(1,1,i,j),nvirt,
|
|
$ss(1,1,1,k),nvirt**2,1.d0,w,nvirt)
|
|
c TIMEADD(times(1,54),times)
|
|
c TIMEADD(times(1,18),times(1,99))
|
|
c }}}
|
|
ENDIF
|
|
C calculating V
|
|
c TIME0(times)
|
|
c call dcopy(nvirt**3,w,1,v,1)
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(a)
|
|
do a=1,nvirt
|
|
v(a,1:nvirt,1:nvirt)=w(a,1:nvirt,1:nvirt)
|
|
$ +t(a,i)*abij(1:nvirt,1:nvirt,j,intk)
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
c
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(b)
|
|
do b=1,nvirt
|
|
v(1:nvirt,b,1:nvirt)=v(1:nvirt,b,1:nvirt)
|
|
$ +t(b,j)*abij(1:nvirt,1:nvirt,i,intk)
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
c
|
|
if (.not.abijinmem.and.j.ne.1) then
|
|
if (lapl.and.localcc.ne.'off ') then ! abij(.,.,i,j) is not available
|
|
call dgemm('t','n',nvirt,nvirt,dfnb,1.d0,lljpai(1,1,i),dfnb,
|
|
$ lljpai(1,1,j),dfnb,0.d0,IabIJ,nvirt) ! assembly the missing abij(.,.,i,j)
|
|
else ! talg=occ , jlen=1
|
|
call dgemm('n','t',nvirt,nvirt,dfnb,1.d0,jai(1,i,1),nvirt*nocc,
|
|
$ jai(1,j,1),nvirt*nocc,0.d0,IabIJ,nvirt) ! assembly the missing abij(.,.,i,j)
|
|
endif
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(c)
|
|
do c=1,nvirt
|
|
v(1:nvirt,1:nvirt,c)=v(1:nvirt,1:nvirt,c)
|
|
$ +t(c,k)*Iabij(1:nvirt,1:nvirt)
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
else ! abijinmem.or.j.eq.1
|
|
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
|
|
C$OMP& PRIVATE(c)
|
|
do c=1,nvirt
|
|
v(1:nvirt,1:nvirt,c)=v(1:nvirt,1:nvirt,c)
|
|
$ +t(c,k)*abij(1:nvirt,1:nvirt,i,j)
|
|
enddo
|
|
C$OMP END PARALLEL DO
|
|
endif
|
|
c do a=1,nvirt
|
|
c do b=1,nvirt
|
|
c do c=1,nvirt
|
|
c write(*,'(6i4,2es14.6)') k,j,i,a,b,c,w(a,b,c),v(a,b,c)
|
|
c enddo
|
|
c enddo
|
|
c enddo
|
|
return
|
|
end subroutine
|
|
c }}}
|
|
|
|
************************************************************************
|
|
subroutine dgemm_wabc(tra, trb, ndim1, ndim2, ndim3,
|
|
$ f1, a, lda, b, ldb, f2, c, ldc, ccsdmkl, v, d)
|
|
************************************************************************* ! {{{
|
|
c v = 0 : c(i, j, k) = a(i, j, l) * b(l, k) or a(i, l) * b(l, j, k)
|
|
c v = 1 : for j: c(i, j, k) = a(i, j, l) * b(l, k)
|
|
c v = 2 : for k: c(i, j, k) = a(i, k, l) * b(l, j)
|
|
c v = 3 : for k: c(i, j, k) = a(i, l) * b(l, j, k)
|
|
implicit none
|
|
|
|
integer ::v, d, ndim1, ndim2, ndim3, lda, ldb, ldc
|
|
real*8 :: a(lda, *), b(ldb, *), c(ldc, *), f1, f2
|
|
character :: tra, trb
|
|
character(len = 3) :: ccsdmkl
|
|
|
|
integer :: l
|
|
|
|
integer :: nthread, ptthreads
|
|
integer :: omp_max_threads, ccsdthreads, mkl_max_threads
|
|
common /threads/ ccsdthreads, ptthreads, omp_max_threads,
|
|
$ mkl_max_threads
|
|
|
|
integer omp_get_thread_num, nthreads
|
|
|
|
nthreads = omp_max_threads / ptthreads
|
|
#ifdef OMP
|
|
if(omp_get_thread_num()+1 .le. mod(omp_max_threads, ptthreads))
|
|
$ nthreads = nthreads + 1
|
|
#endif
|
|
|
|
c#ifdef MKL
|
|
c if(ccsdmkl .eq. 'seq') call mkl_set_num_threads(1)
|
|
c#endif
|
|
|
|
if(ccsdmkl .eq. 'thr' .or. v .eq. 0) then
|
|
call dgemm(tra, trb, ndim1, ndim2, ndim3,
|
|
$ f1, a, lda, b, ldb,
|
|
$ f2, c, ldc)
|
|
|
|
else if(v .eq. 1) then
|
|
if(tra .eq. 'n' .or. tra .eq. 'N') then
|
|
c$OMP PARALLEL DO SCHEDULE(DYNAMIC) DEFAULT(SHARED) PRIVATE(l)
|
|
c$OMP& NUM_THREADS(nthreads)
|
|
do l = 1, d
|
|
call dgemm(tra, trb, ndim1/d, ndim2, ndim3,
|
|
$ f1, a(ndim1/d*(l-1)+1, 1), lda, b, ldb,
|
|
$ f2, c(ndim1/d*(l-1)+1, 1), ldc)
|
|
end do
|
|
c$OMP END PARALLEL DO
|
|
|
|
else
|
|
c$OMP PARALLEL DO SCHEDULE(DYNAMIC) DEFAULT(SHARED) PRIVATE(l)
|
|
c$OMP& NUM_THREADS(nthreads)
|
|
do l = 1, d
|
|
call dgemm(tra, trb, ndim1/d, ndim2, ndim3,
|
|
$ f1, a(1, ndim1/d*(l-1)+1), lda, b, ldb,
|
|
$ f2, c(ndim1/d*(l-1)+1, 1), ldc)
|
|
end do
|
|
c$OMP END PARALLEL DO
|
|
end if
|
|
|
|
else if(v .eq. 2) then
|
|
if(tra .eq. 'n' .or. tra .eq. 'N') then
|
|
c$OMP PARALLEL DO SCHEDULE(DYNAMIC) DEFAULT(SHARED) PRIVATE(l)
|
|
c$OMP& NUM_THREADS(nthreads)
|
|
do l = 1, d
|
|
call dgemm(tra, trb, ndim1/d, ndim2, ndim3,
|
|
$ f1, a(ndim1/d*(l-1)+1, 1), lda, b, ldb,
|
|
$ f2, c(1, ndim2*(l-1)+1), ldc)
|
|
end do
|
|
c$OMP END PARALLEL DO
|
|
|
|
else
|
|
c$OMP PARALLEL DO SCHEDULE(DYNAMIC) DEFAULT(SHARED) PRIVATE(l)
|
|
c$OMP& NUM_THREADS(nthreads)
|
|
do l = 1, d
|
|
call dgemm(tra, trb, ndim1/d, ndim2, ndim3,
|
|
$ f1, a(1, ndim1/d*(l-1)+1), lda, b, ldb,
|
|
$ f2, c(1, ndim2*(l-1)+1), ldc)
|
|
end do
|
|
c$OMP END PARALLEL DO
|
|
end if
|
|
|
|
else if(v .eq. 3) then
|
|
if(trb .eq. 'n' .or. trb .eq. 'N') then
|
|
c$OMP PARALLEL DO SCHEDULE(DYNAMIC) DEFAULT(SHARED) PRIVATE(l)
|
|
c$OMP& NUM_THREADS(nthreads)
|
|
do l = 1, d
|
|
call dgemm(tra, trb, ndim1, ndim2/d, ndim3,
|
|
$ f1, a, lda, b(1, ndim2/d*(l-1)+1), ldb,
|
|
$ f2, c(1, ndim2/d*(l-1)+1), ldc)
|
|
end do
|
|
c$OMP END PARALLEL DO
|
|
|
|
else
|
|
c$OMP PARALLEL DO SCHEDULE(DYNAMIC) DEFAULT(SHARED) PRIVATE(l)
|
|
c$OMP& NUM_THREADS(nthreads)
|
|
do l = 1, d
|
|
call dgemm(tra, trb, ndim1, ndim2/d, ndim3,
|
|
$ f1, a, lda, b(ndim2/d*(l-1)+1, 1), ldb,
|
|
$ f2, c(1, ndim2/d*(l-1)+1), ldc)
|
|
end do
|
|
c$OMP END PARALLEL DO
|
|
end if
|
|
|
|
end if
|
|
|
|
c#ifdef MKL
|
|
c if(ccsdmkl .eq. 'seq') call mkl_set_num_threads(nthread)
|
|
c#endif
|
|
|
|
end subroutine ! }}}
|
|
|
|
************************************************************************
|
|
subroutine get_abci(i, abc_ofi, bsymmv, symmocc, dgroup,
|
|
$ co, first, last, va, irecln, ibufln, nbasis, nocc, nvirt,
|
|
$ nirmax, multpg, ccsdalg, dfnb, jab, jai, dcore, imem, n,
|
|
$ addr, ccsdmkl)
|
|
************************************************************************
|
|
implicit none
|
|
integer :: i, nvirt, nocc, nbasis, dfnb, imem, n, addr
|
|
integer :: irecln, ibufln, nirmax, dgroup
|
|
integer :: bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
|
|
real*8 :: va(nvirt,nvirt,nvirt,*)
|
|
real*8 :: dcore(*)
|
|
integer :: co(nbasis)
|
|
integer :: first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
|
|
real*8, pointer :: abc_ofi(:, :, :)
|
|
real*8 :: jai(nvirt, nocc, dfnb), jab(dfnb, nvirt, nvirt)
|
|
character*8 :: ccsdalg
|
|
character*3 :: ccsdmkl
|
|
|
|
integer :: a, b
|
|
real*8 :: scr1v(nvirt)
|
|
|
|
c call rpoint3d(dcore(addr), abc_ofi, nvirt, nvirt, nvirt)
|
|
c write(*,*) "after pointer"
|
|
|
|
if(ccsdalg .eq. 'disk ') then
|
|
call abciread(i, 1, abc_ofi, bsymmv, symmocc, dgroup,
|
|
$ co, first, last, va, irecln, ibufln, nbasis,
|
|
$ nocc, nvirt, nirmax, multpg)
|
|
|
|
else if(ccsdalg .eq. 'dfdirect') then
|
|
! (ac|bi) = <ab|ci>
|
|
call dgemm_wabc('t', 't', nvirt**2, nvirt, dfnb,
|
|
$ 1.d0, jab, dfnb, jai(1, i, 1), nvirt*nocc,
|
|
$ 0.d0, abc_ofi, nvirt**2, ccsdmkl, 1, nvirt)
|
|
|
|
! acbi -> abci
|
|
do a = 1, nvirt
|
|
do b = 1, nvirt
|
|
scr1v(b+1:nvirt) = abc_ofi(a, b+1:nvirt, b)
|
|
abc_ofi(a, b+1:nvirt, b) =
|
|
$ abc_ofi(a, b, b+1:nvirt)
|
|
abc_ofi(a, b, b+1:nvirt) = scr1v(b+1:nvirt)
|
|
end do
|
|
end do
|
|
|
|
end if
|
|
|
|
end subroutine
|
|
C
|
|
************************************************************************
|
|
subroutine ttsort(tt,ttscr,nocc,nvirt)
|
|
************************************************************************
|
|
implicit none
|
|
integer nocc, nvirt
|
|
real*8 tt(nocc,nocc,nvirt,nvirt)
|
|
real*8 ttscr(nvirt,nvirt,nocc,nocc)
|
|
c real*8,allocatable :: ttscr(:,:,:,:)
|
|
integer i,j
|
|
c allocate(ttscr(nvirt,nvirt,nocc,nocc))
|
|
call dcopy(nvirt**2*nocc**2,tt,1,ttscr,1)
|
|
do j=1,nocc
|
|
do i=1,nocc
|
|
call dcopy(nvirt**2,ttscr(1,1,i,j),1,tt(i,j,1,1),nocc**2)
|
|
enddo
|
|
enddo
|
|
return
|
|
end subroutine
|
|
C
|
|
************************************************************************
|
|
subroutine sortabci(bsymmv,symmocc,dgroup,co,
|
|
$first,last,w,
|
|
$maxcor,imem,imem1,dcore,nocc,nvirt,nbasis,iout,nirmax,
|
|
$irecln,ibufln,multpg)
|
|
************************************************************************
|
|
implicit none
|
|
integer maxcor, imem, imem1
|
|
real*8 dcore(*)
|
|
integer nocc, nvirt, nbasis
|
|
integer iout
|
|
integer nirmax, dgroup
|
|
integer bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
|
|
integer irecln, ibufln
|
|
|
|
integer co(nbasis)
|
|
integer first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
|
|
real*8 w(*)
|
|
integer imem2
|
|
integer n,nw,ind,ind2,avmem
|
|
logical notend,notend2
|
|
ind=1
|
|
notend=.true.
|
|
do while(notend)
|
|
C calculate free memory for old & new list
|
|
if(maxcor-(imem-imem1).ge.nocc*nvirt**2+nvirt**3)then
|
|
c avmem=maxcor-(imem-imem1)+max(nocc**3,ibufln)+nocc**3-nvirt**3 !SzL bug if ibufln>V**3-O**3
|
|
c imem2=imem-max(nocc**3,ibufln)-nocc**3+nvirt**3 !SzL
|
|
avmem=maxcor-(imem-imem1) !NP
|
|
imem2=imem
|
|
n=(avmem-nocc*nvirt**2-modulo(avmem-nocc*nvirt**2,nvirt**3))/
|
|
$nvirt**3
|
|
nw=(avmem-n*nvirt**3-modulo(avmem-n*nvirt**3,nocc*nvirt**2))/
|
|
$(nocc*nvirt**2)
|
|
else
|
|
write(iout,*)'Insufficient memory!'
|
|
call mrccend(1)
|
|
endif
|
|
|
|
if(ind+n-1.ge.nocc)then
|
|
notend=.false.
|
|
n=nocc-ind+1
|
|
nw=(avmem-n*nvirt**3-modulo(avmem-n*nvirt**3,nocc*nvirt**2))/
|
|
$(nocc*nvirt**2)
|
|
endif
|
|
C read old list
|
|
call abciread(ind,n,dcore(imem2),bsymmv,symmocc,dgroup,co,
|
|
$first,last,w,
|
|
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
|
|
C read&write new list
|
|
ind2=1
|
|
notend2=.true.
|
|
do while(notend2)
|
|
if(ind2+nw-1.ge.nvirt)then
|
|
notend2=.false.
|
|
nw=nvirt-ind2+1
|
|
endif
|
|
if(ind.ne.1)then
|
|
call iabcread(nw,ind2,dcore(imem2+n*nvirt**3),w,
|
|
$ nocc,nvirt,irecln,ibufln)
|
|
endif
|
|
call atpakol(n,nw,ind,ind2,dcore(imem2),
|
|
$dcore(imem2+n*nvirt**3),nvirt,nocc)
|
|
call iabcwrite(nw,ind2,dcore(imem2+n*nvirt**3),w,
|
|
$ nocc,nvirt,irecln,ibufln)
|
|
|
|
ind2=ind2+nw
|
|
enddo
|
|
|
|
ind=ind+n
|
|
enddo
|
|
|
|
return
|
|
end subroutine
|
|
c
|
|
C {{{ rralloc
|
|
************************************************************************
|
|
subroutine rralloc(avmem,nocc,nvirt,itrr,rrd,restofarrays)
|
|
************************************************************************
|
|
c determine and allocate the size of the rr array storing the transpose of T2
|
|
************************************************************************
|
|
implicit none
|
|
integer avmem,nocc,nvirt,restofarrays,dblalloc,itrr,rrd
|
|
integer ccsdthreads, ptthreads, omp_max_threads, mkl_max_threads
|
|
common /threads/ ccsdthreads, ptthreads, omp_max_threads,
|
|
$ mkl_max_threads
|
|
c
|
|
if (avmem-restofarrays.ge.nvirt**2*nocc**2) then
|
|
itrr=dblalloc(nvirt**2*nocc**2) ! store also rr=tt^traspose for the full tt
|
|
rrd=nocc
|
|
else
|
|
itrr=dblalloc(nvirt*nocc*max(nvirt,nocc)*ptthreads) ! v*o**2 for lajik
|
|
rrd=1
|
|
endif
|
|
c
|
|
return
|
|
end subroutine
|
|
c }}}
|
|
|
|
************************************************************************
|
|
subroutine progress_bar(iout, pr, first_call)
|
|
************************************************************************
|
|
c Prints progress bar based on pr containing the value of the progress normalized to unity.
|
|
implicit none
|
|
real*8, intent(in) :: pr
|
|
integer, intent(in) :: iout
|
|
logical, intent(in) :: first_call
|
|
|
|
integer, save :: pr_prev
|
|
integer :: pr_current, i
|
|
logical :: last_call
|
|
|
|
integer, parameter :: length = 42
|
|
character(len = length) ::
|
|
$ bar = ' |---------|---------|---------|---------|'
|
|
|
|
if(first_call) then
|
|
write(iout, '(a44)')
|
|
$ ' 0% 25% 50% 75% 100%'
|
|
pr_prev = 2
|
|
write(iout, '(a2)', advance = 'no') bar(1:pr_prev)
|
|
flush(iout)
|
|
end if
|
|
|
|
pr_current = min(nint(pr / 0.025d0), 40) + 2
|
|
|
|
last_call = pr_current.eq.length .and. pr_prev.ne.length
|
|
|
|
do i = pr_prev+1, pr_current
|
|
write(iout, '(a1)', advance = 'no') bar(i:i)
|
|
flush(iout)
|
|
end do
|
|
|
|
if(last_call) then
|
|
write(iout, *)
|
|
flush(iout)
|
|
end if
|
|
|
|
pr_prev = max(pr_current, pr_prev)
|
|
|
|
end subroutine
|