2024-07-25 10:27:17 +02:00

5517 lines
178 KiB
Fortran

#ifdef TIMING
#define TIME0(a) call time0(a)
#define TIMEADD(a, b) call timeadd(a, b)
#define WRITE_TIMES .true.
#else
#define TIME0(a) !call time0(a)
#define TIMEADD(a, b) !call timeadd(a, b)
#define WRITE_TIMES .false.
#endif
************************************************************************
subroutine ccsdpt(ijij,ijia,ijab,iabj,igsl,ccmem,
$imem,iimem,maxcor,dcore,icore,iout,
$irecln,ibufln,nirmax,multpg,diisfile,errfile,tfile,
$tinfo,ifcfile,gbasfile,ifltln,mpi_ccsd,master_rank,bcast_comm,
$inbcast,master_thread,ccsd_communicator,c,cc,is,iss,leom)
************************************************************************
use omp_lib
implicit none
#if defined(MPI)
include "mpif.h"
#endif
integer maxcor,imem,iimem,wsize,locnoc
integer diisfile,errfile,tfile,tinfo
integer ifcfile,gbasfile,ifltln
integer icore(*),iout,ccsd_communicator
real*8 dcore(*),eref,emp2,ecc,tol
integer irecln,ibufln,nbasis,nocc,nvirt,dgroup,nirmax,naobasis
integer bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
integer i,dblalloc,intalloc,it,itt,iv,ivv,iscrs
integer ibmat,ibvec,iinvbmat,icvec
integer ibeta,ix,irr,ivvend
integer ifai,ifab,ifij,is,iss
integer ih,iui,iint1
integer iico,ico,ifirst,ilast
integer ibuffer, iend
integer imem1,ccmem(8)
integer dfnb,ccmaxit,mxmem,ittend,nbset
integer, allocatable :: nbfread(:)
integer talgoccmem,talgvirtmem,talgmem
common/memcom/imem1
integer ccsdthreads, ptthreads
character*3 ccsdmkl
character*4 ccsdth, ptth, ccsdrest, ovirt
character*4 cctol,localcc,talg,dfbasis_cor
character*8 dfintran,ccmxit,ccsdalg,lccoporder, csrc
character*16 calctype,tprint,rank_ch
character*16 naf, met, eps
integer deglev,nc, ios, is1, is2, is3
real*8 esdi,tmp, emp2full, emp2corr
real*8 et,nenergy,ibuftmp(ibufln)
logical localcc15p, master_thread, lno, lnaf, leom, lpert
integer ijij, ijai, ijia, ijab, ifia, iabj , igsl
integer ifij0, ifai0, ifia0, ifab0, ifock
integer ihij, ihai, ihia, ihab
integer memreq,memreq1, memreq2, memreq3
integer mra2, mrbc2, mrd2, mreg2, mrtrf, mren, mrri
integer inorm, jlen
logical ptrest, ccsd_converged
integer to_abij
integer tmemmin,tlape,talglaplmem, laplminmem, abcistoremin, max
integer dhyb
real*8 cmp2os, cmp2ss, cmp3
c real*8 c(nvirt, nocc), cc(nvirt, nvirt, nocc*(nocc+1)/2)
c real*8 c(:, :), cc(:, :, :)
real*8 c, cc, ecc_ppl_scaled,ecc_ppl, eppl_correction
integer ifij_c1, ifab_c1, ifia_c1, ifai_c1
integer ijai_c1, ijab_c1, ijij_c1
integer master_rank, bcast_comm
logical inbcast, ppl_calc, lmp3
c integer OMP_GET_NUM_THREADS, OMP_GET_MAX_THREADS
integer MKL_GET_MAX_THREADS
c INTEGER(KIND=OMP_PROC_BIND_KIND) omp_get_proc_bind
integer omp_max_threads, mkl_max_threads
common /threads/ ccsdthreads, ptthreads, omp_max_threads,
$ mkl_max_threads
save it, itt, iv, ivv, ivvend
save iui, iico, ico, ifirst, ilast
save iscrs, ifij, ifab, ittend, inorm, ibmat, ibvec, ifock
save ih, iinvbmat, icvec, ifai, iint1, ibeta, ix, irr, ibuffer
save iend
character*32 outfilename, output_folder
integer iout2
common /outunit/ iout2, outfilename, output_folder
! MPI
character*128 ifacename
logical mpi_ccsd, flag,lf12
integer mpi_rank, mpi_size, mpi_err
#if defined (MPI)
integer name_len
character*(MPI_MAX_PROCESSOR_NAME) cpu_name
c character*8 rank_ch
if(mpi_ccsd) then
if(ccsd_communicator .eq. MPI_COMM_NULL) return
call MPI_Initialized(flag, mpi_err)
if(.not. flag) call MPI_Init(mpi_err)
call MPI_Comm_rank(ccsd_communicator, mpi_rank, mpi_err)
call MPI_Comm_size(ccsd_communicator, mpi_size, mpi_err)
c if(mpi_size .eq. 1) call MPI_Finalize(mpi_err)
! broadcast input files
call bcast_file('KEYWD', bcast_comm)
call bcast_file('VARS', bcast_comm)
call bcast_file('55', bcast_comm)
call MPI_Barrier(ccsd_communicator, mpi_err)
if(!mpi_size .ne. 1 .or.
$ ccsd_communicator .ne. MPI_COMM_WORLD) then
call MPI_Get_processor_name(cpu_name, name_len, mpi_err)
write(rank_ch, '(i8)') mpi_rank
if(.not.leom .and. .not.master_thread) then
write(*,"(' ccsd rank ', a4, ' running on ', a20)")
$ trim(adjustl(rank_ch)), cpu_name
else
write(*,"(' ccsd rank ', a4, ' running on ', a20,
$ ', master')")
$ trim(adjustl(rank_ch)), cpu_name
end if
call MPI_Barrier(ccsd_communicator, mpi_err)
call sleep_f(1)
end if
if(.not.leom .and. master_thread) write(iout, *)
else
#endif
mpi_rank = 0
mpi_size = 1
master_rank = 0
bcast_comm = 0 !MPI_COMM_NULL (other MPI implementations?)
inbcast = .true.
master_thread = .true.
#if defined(MPI)
end if
#endif
#if defined(OMP)
omp_max_threads = OMP_GET_MAX_THREADS()
#ifdef Intel
# if __INTEL_COMPILER < 1910 || defined(ONEDPL_VERSION_MAJOR)
call omp_set_nested(.true.)
# endif
#else
call omp_set_nested(.true.)
#endif
call omp_set_max_active_levels(1)
#ifdef MKL
mkl_max_threads = MKL_GET_MAX_THREADS()
call mkl_set_dynamic(0)
#else
mkl_max_threads = omp_max_threads
#endif
#else
omp_max_threads = 1
mkl_max_threads = 1
#endif
c ibind = omp_get_proc_bind()
C Allocate memory (if not allocated already)
call memalloc
C Read keywords
call getkey('calc',4,calctype,16)
call getkey('ccsdalg',7,ccsdalg,8)
call getkey('lccoporder',10,lccoporder,8)
if (lccoporder.eq.'lccfirst') call mrccini_from_subroutine ! initialize when mrccini is not executed in main
call getkey('dfintran',8,dfintran,8)
call getkey('talg',4,talg,4)
call getkey('dfbasis_cor',11,dfbasis_cor,4)
lpert = trim(calctype).eq.'mp2' .or. trim(calctype).eq.'mp3'
if(trim(calctype).eq.'mp3') then
lmp3 = .true.
else
lmp3 = .false.
end if
if(leom .or. trim(calctype).eq.'mp3') then
ccmaxit = 1
else
call getkey('ccmaxit',7,ccmxit,8)
read(ccmxit,*) ccmaxit
end if
call uppercase(calctype,calctype,16)
if(.not.leom) then
write(iout, '(/" ")', advance = 'no')
if (ccsdalg .eq. 'dfdirect')
$ write(iout, '("Direct ")', advance = 'no')
if (dfbasis_cor .ne. 'none')
$ write(iout, '("DF-")', advance = 'no')
write(iout, '(2a)') trim(adjustl(calctype)) // ' calculation'
write(iout,*)
end if
lf12=.false.
if(trim(calctype).eq.'CCSD(T)-F12') then
calctype='CCSD(T) '
lf12=.true.
else if(trim(calctype).eq.'CCSD-F12') then
calctype='CCSD '
lf12=.true.
endif
#if defined (MPI)
if(lf12) then
call bcast_file('F12INTE', bcast_comm)
call bcast_file('F12INT1', bcast_comm)
end if
#endif
call getkey('cctol',5,cctol,4)
read(cctol,*) i
tol=10.d0**(-i)
if(.not.leom .and. trim(calctype).ne.'mp3') write(iout,2015) tol
2015 format(' Convergence criterion: ',1pe8.1)
call getkey('localcc',7,localcc,4)
if(localcc.ne.'off '.and.
$ master_thread) then !no ifcfile write for (group) slave threads if localcc & 2 layer MPI
locnoc=3
else
locnoc=0
endif
call getkey('naf_cor',7,naf,16)
c call getkey('eps',3,eps,16)
call getkey('ovirt',5,ovirt,4) !RZ
call getkey('tprint',6,tprint,16)
call getkey('ccsdmkl',7,ccsdmkl,3)
call getkey('ccsdrest',8,ccsdrest,4)
if(talg.eq.'lapl' .and. ccsdrest.ne.'off ') then
write(iout,*) 'ERROR: Laplace (T) correction can only be calcul
$ated with ccsdrest=off!'
call mrccend(1)
end if
emp2full = 0.d0
lno = ovirt.eq.'ppl ' .or. ovirt.eq.'mp2 ' .or. ovirt.eq.'osv'
ppl_calc = ovirt .eq. 'ppl '
lnaf = naf .ne. 'off '
if((lno .or. lnaf) .and. dfintran.ne.'ovirt ' .and.
$ localcc.eq.'off ') then
if(master_thread) then
open(ifcfile, file = "iface", status = "old")
read(ifcfile, *)
met = " "
do while(met .ne. "MP2 ")
read(ifcfile, 7596, iostat = ios)
$ csrc, met, is1, is2, is3, emp2full
if(ios .ne. 0) then
write(iout, *)
$ "Error: MP2 energy not found in file iface!"
call mrccend(1)
end if
end do
close(ifcfile)
c if(eps .ne. " ") lno = .true.
c write(*,*) "MP2_0: ",emp2full
end if
end if
7596 format(a8,1x,a15,1x,3(i2,1x),1pe23.15)
#if defined(OMP)
call getkey('ccsdthreads',11,ccsdth,4)
read(ccsdth,*) ccsdthreads
if(ccsdthreads .gt. omp_max_threads) then
write(iout,'(/"WARNING: ccsdthreads greater than the maximal nu
$mber of OpenMP threads")')
call mrccend(1)
end if
if(ccsdthreads .gt. mkl_max_threads) then
write(iout, '(/"WARNING: ccsdthreads greater than the maximal n
$umber of MKL threads")')
write(iout, '(" using number of OpenMP threads instead")')
mkl_max_threads = omp_max_threads
end if
call getkey('ptthreads',9,ptth,4)
read(ptth,*) ptthreads
c if(mpi_rank .eq. 0) ptthreads = 1
if(calctype.eq.'CCSD(T) ' .and.
$ ptthreads .gt. omp_max_threads) then
write(iout,'(/"WARNING: ptthreads greater than the maximal numb
$er of OpenMP threads")')
call mrccend(1)
end if
if(ptthreads .gt. mkl_max_threads .and.
$ ccsdthreads .le. mkl_max_threads) then
write(iout, '(/"WARNING: ptthreads greater than the maximal num
$ber of MKL threads")')
write(iout, '(" using number of OpenMP threads instead")')
mkl_max_threads = omp_max_threads
end if
#else
ccsdthreads = 1
ptthreads = 1
#endif
call getvar('dhyb ',dhyb)
if(dhyb .eq. 3) then
call getvar('cmp2s ',cmp2os)
call getvar('cmp2t ',cmp2ss)
call getvar('cmp3 ',cmp3)
end if
call getvar('ncore ',nc)
if (localcc.ne.'off ') nc=0 ! ncore is meaningless for domains in local CC
if(.not.leom) write(iout,*)
c open(minpfile,file='MINP')
ifacename='iface'
c#if defined (MPI)
c if (localcc.eq.'2022') then ! not needed if all LISs have dedicated folders
c write(rank_ch, '(i8)') mpi_rank ! ??? mpi_rank not ok, not the same in ldrpa mpi and ccsd mpi
c ifacename='iface.' // trim(adjustl(rank_ch))
c endif
c#endif
call getvar('nbset ',nbset)
allocate(nbfread(nbset))
if(master_thread)
$ open(ifcfile,status='unknown',file=ifacename,position='append')
c open(555,file='fort.555')
open(555,file='55') !RZ.
rewind 555
if(dfbasis_cor .eq. 'none') then
dfnb = 1
read(555, *)
else if (localcc15p(localcc)) then
read(555,*) (dfnb,i=1,4), dfnb
else
call getvar('nbf ',nbfread)
naobasis=nbfread(1)
if(naf.eq.' ' .or. dfintran.eq.'ovirt ' .or.
$ localcc .eq. '2013') then
dfnb=nbfread(3)
read(555,*) !!!!!
else
read(555,*) (dfnb,i=1,4), dfnb
endif
endif
c pontcsop dim
read(555,*) dgroup
c a palyak szimmetria szerint rendezve; a szimmetriak hatarait mutatja meg
read(555,*) bsymmv(1:dgroup+1)
c adott szimmhez tartozo betoltott palyak szama
read(555,*) symmocc(1:dgroup)
nocc=0
do i=1,dgroup
nocc=nocc+symmocc(i)
enddo
nbasis=bsymmv(dgroup+1)-1
nvirt=nbasis-nocc
if (localcc15p(localcc)) naobasis=nbasis ! warning: dummy initialization!!!
wsize=max(nocc,nvirt)**3+ibufln
if(nocc.eq.0 .or. nvirt.eq.0) goto 123
close(555,status='keep')
if(.not.leom) then
if (dfbasis_cor.eq.'none') then
write(iout,"(' Number of occupied/virtual orbitals:',2i6)")
$ nocc,nvirt
c if (dfnb.eq.0) dfnb=1
else
write(iout,
$ "(' Number of occupied/virtual/auxiliary orbitals:', 3i6)")
$ nocc,nvirt,dfnb
endif
endif
!NP
c construct input file 56 and 2e integrals <pq|rs> from J_pq density fitted integrals
if (localcc15p(localcc).and..not.(ccsdalg .eq. 'dfdirect')) then ! TODO: localcc=2022
#ifdef MPI
if(mpi_size .gt. 1) then
call bcast_file('DFINT_IJ', bcast_comm)
call bcast_file('DFINT_AI', bcast_comm)
call bcast_file('DFINT_AB', bcast_comm)
if(lnaf) then
call bcast_file('DFINT_AI_NONAF', bcast_comm)
call bcast_file('iface', bcast_comm)
end if
end if
#endif
mxmem=maxcor-(imem-imem1)
tmp=0.d0
write(iout,*) 'Assembly of four-center integrals'
write(iout,*)
call jpq2pqrs(nocc,nvirt,dfnb,dcore,irecln,ibufln,ibuftmp,mxmem
$ ,imem,tmp,'ccsd')
endif
#ifdef MPI
if ((localcc15p(localcc) .and. naf.ne.'off ') ! TODO: localcc=2022
$ .and. (lccoporder.eq.'trffirst' .or. ccsdalg.eq.'disk ')
$ .and. mpi_size.gt.1) call bcast_file('ajb', bcast_comm)
#endif
!NP
eppl_correction = 0.d0
C Partitioning memory
allocation: if(.not.leom) then
iui=dblalloc(nocc)
iico=intalloc(nbasis)
ico=intalloc(nbasis)
ifirst=intalloc(6*8*8*nbasis)
ilast=intalloc(6*8*8*nbasis)
if(ccsdalg .eq. 'dfdirect'.and.lccoporder.eq.'trffirst') then
ijij = dblalloc(dfnb*nocc**2)
ijia = dblalloc(dfnb*nocc*nvirt)
ijab = dblalloc(dfnb*nvirt**2)
endif
c
c minimum memory requirement for canconical talg=occ/virt route
talgmem=nocc+nvirt+nvirt*nocc+nvirt**2*nocc**2+nvirt*nocc**3 ! common part: fdo+fdv+itnew+ittnew+iaijk
if(ccsdalg .eq. 'disk') then
to_abij = nvirt**2*nocc**2
else
to_abij = nvirt**2*nocc
end if
talgoccmem=talgmem + to_abij + ! abij
$ ptthreads*nvirt*nocc*max(nvirt,nocc) + ! trr
$ nvirt**3 + ptthreads*(nvirt**2+3*nvirt**3+wsize) ! IabIJ,abck,w,abci,abcj,v
c $ ptthreads*(nvirt**2+nvirt**3+wsize) ! IabIJ,abck,w,abci,abcj,v
talgvirtmem=talgmem+2*nvirt**2*nocc**2+6*nocc**2+2*nocc**3+ ! abij,trr,iv1-iw3,scro,w
$ wsize+3*nvirt**2*nocc ! v+abci block
if(ccsdalg .eq. 'disk') then
tlape = nocc**2 + dfnb*(nvirt**2+ nvirt*nocc+nocc**2)
else if(lccoporder .eq. 'trffirst') then
tlape = nocc**2
else
tlape = 0
end if
laplminmem=(2+ptthreads)*nvirt**3+wsize*ptthreads+
$ nvirt**2*(dfnb+nocc*ptthreads)
abcistoremin=nvirt**3*(nocc+ptthreads)+wsize*ptthreads+
$ nvirt**2*nocc*ptthreads ! store full abci & rr(a,b,i,1) & w and v
talgmem = talgmem - nvirt*nocc**3 +
$ max(nvirt*nocc**3, dfnb*nocc*max(nvirt, nocc)) ! aijk
if(localcc .eq. 'off ') then
laplminmem=abcistoremin
else
laplminmem=min(laplminmem,abcistoremin)
end if
if(localcc .eq. 'off ') then
jlen = nocc
else
jlen = 1
end if
talglaplmem=talgmem + tlape + laplminmem +
c $ max(nvirt*nocc**3, dfnb*nocc*max(nocc, nvirt)) + ! aijk
$ dfnb*nvirt*nocc + max(nocc, nvirt)**2*ptthreads + ! lljpai, IabIJ
$ nvirt*nocc*max(nvirt, nocc) + ! ttl
$ nvirt**2*(nocc*(1+jlen)+ptthreads) + max(nocc, nvirt)*nocc ! ttl2, abij, ttl3, t1
if(talg .eq. 'occ ') then
tmemmin = talgoccmem
else if(talg .eq. 'virt') then
tmemmin = talgvirtmem
else if(talg .eq. 'lapl') then
tmemmin = talglaplmem
end if
if(.not.leom .and. calctype.eq.'CCSD(T) ') then
c if (localcc.eq.'off ') then
write(iout,'(f10.2,
$ " MB is the minimal memory requirement for (T)")')
$ (tmemmin+(imem-imem1))*8.d0/1024**2
if(maxcor-(imem-imem1).lt.tmemmin) then
write(iout,*)"Insufficient memory for (T) correction"
c call mrccend(1)
c else
c if(talg.eq.'occ '.and.maxcor-(imem-imem1).lt.talgoccmem) then
c talg='virt'
c write(iout,*)"Warning: Insufficient memory for talg=occ,
c $ switching to talg=virt"
c write(iout,'(f10.2,
c $" MB is the minimal memory requirement for the faster talg=occ")')
c $ (talgoccmem+(imem-imem1))*8.d0/1024**2
c call flush(iout)
c endif
c endif
endif
endif
c
iscrs=dblalloc(max(nvirt*(nvirt+1),nocc**2,
$ nocc*nvirt*ccsdthreads))
c warning: do not change the position of ifij, ifab, it, itt to stay consistent with the "Preparing memory" part of (T)
ifij=dblalloc(nocc**2)
ifab=dblalloc(nvirt**2)
it=dblalloc(nocc*nvirt)
c itt=dblalloc(nvirt**2*(nocc+1)*nocc/2) ! has to be here so that it is not overwritten by ittnew in (T)
! for tt or (+)tt and (-)tt
itt=dblalloc(nvirt*(nvirt+1)*(nocc+1)*nocc/2+1) ! has to be here so that it is not overwritten by ittnew in (T)
ittend=itt+nvirt**2*(nocc+1)*nocc/2 ! stores tt or (+)tt and (-)tt; +1 for MPI communication
inorm = itt+nvirt**2*(nocc+1)*nocc/2
ibmat=dblalloc(ccmaxit**2)
ibvec=dblalloc(ccmaxit**2)
iinvbmat=dblalloc((ccmaxit+1)**2)
icvec=dblalloc(ccmaxit)
ifock = dblalloc(nbasis**2)
C
ih=dblalloc(nbasis**2)
ifai=dblalloc(nvirt*nocc)
if(localcc.ne.'off ')then
iint1=dblalloc(nocc**2)
else
iint1=1
endif
ibeta=dblalloc(nocc**2)
ix=dblalloc(nvirt**2)
irr=dblalloc(nvirt*nocc)
ibuffer=dblalloc(ibufln)
iv=dblalloc(nvirt*nocc)
ivv=dblalloc(nvirt**2*(nocc+1)*nocc/2)
ivvend=ivv+nvirt**2*(nocc+1)*nocc/2
iend = iv
else allocation
imem = iend
iv = is
ivv = iss
end if allocation
C
call readinfo(nbasis,nocc,dgroup,bsymmv,symmocc,
$icore(iico),icore(ico),dcore(ih),nenergy,icore(ifirst),
$icore(ilast),dcore(iui),deglev,ccsdalg,nirmax)
C
123 continue
if(ccsdalg .eq. 'disk ') then
ifij0 = imem
ifia0 = imem
ifai0 = imem
ifab0 = imem
ijij = imem
ijia = imem
ijai = imem
ijab = imem
ifia = imem
! 15a
memreq1 = (imem-imem1)+nvirt**2*nocc**2+nvirt**3
! 2g
memreq2 = (imem-imem1)+nvirt**2*nocc**2+nocc**4+nocc**3*(nocc+1)/2
! 4
memreq3 = (imem-imem1)+nocc**3*nvirt+nvirt**3+nvirt**2*(nvirt+1)/2
$ +nocc*(nocc+1)/2*(nocc*nvirt+nvirt*(nvirt+1))
memreq = max(memreq1, memreq2, memreq3)
if(.not.leom) write(iout,'(f10.2,
$ " MB is the minimal memory requirement for CCSD")')
$ memreq*8.d0/1024**2
call ccsdcore(dcore(it),dcore(itt),dcore(iv),dcore(ivv),
$ dcore(iscrs),dcore(imem),dcore(ih),
$ dcore(ibmat),dcore(ibvec),dcore(iinvbmat),dcore(icvec),
$ dcore(ibeta),dcore(ix),dcore(irr),
$ dcore(ifij),dcore(ifab),dcore(ifai),
$ icore(ico),icore(ifirst),icore(ilast),
$ dgroup,bsymmv,symmocc,dcore(ibuffer),
$ calctype,localcc,dcore(iui),dcore(iint1),esdi,
$ dfintran,ccmaxit,tprint,nc,ccsdalg,nenergy,
$ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,
$ maxcor,imem,dcore,ifltln,iout,diisfile,errfile,
$ tfile,tinfo,eref,ecc,emp2,tol,locnoc,mpi_rank,mpi_size,
$ mpi_ccsd,ccsdmkl,ccsdthreads,omp_max_threads,
$ ccsdrest,ptrest, ccsd_converged, ccsd_communicator,
$ emp2full,lno,lnaf,emp2corr,master_thread, lpert)
else if(ccsdalg .eq. 'dfdirect') then
ihij = dblalloc(nocc**2)
ihia = dblalloc(nocc*nvirt)
ihai = dblalloc(nocc*nvirt)
ihab = dblalloc(nvirt**2)
ijai = dblalloc(dfnb*nocc*nvirt)
ifai0 = dblalloc(nocc*nvirt)
ifij0 = dblalloc(nocc**2)
ifab0 = dblalloc(nvirt**2)
ifia = dblalloc(nvirt*nocc)
if(leom) then
ifij_c1 = dblalloc(nocc**2)
ifab_c1 = dblalloc(nvirt**2)
ifia_c1 = dblalloc(nvirt*nocc)
ifai_c1 = dblalloc(nvirt*nocc)
ijai_c1 = dblalloc(dfnb*nocc*nvirt)
ijab_c1 = dblalloc(dfnb*nvirt**2)
ijij_c1 = dblalloc(dfnb*nocc**2)
else
ifij_c1 = imem
ifab_c1 = imem
ifia_c1 = imem
ifai_c1 = imem
ijai_c1 = imem
ijab_c1 = imem
ijij_c1 = imem
end if
mra2 = (imem-imem1) + 2*nvirt*(nvirt+1) + nocc*(nocc+1)/2
#if defined(OMP)
$ + omp_get_max_threads() * nvirt**2
#endif
mrbc2 = (imem-imem1) + nocc**3*(nocc+1)/2 +
$ (3 + 1) * ccsdthreads * nvirt**2*nocc +
$ nvirt*nocc*dfnb + nvirt**2
mrd2 = (imem-imem1) + nvirt**2 +
$ (3 +1) * ccsdthreads * nvirt**2*nocc
mreg2 = (imem-imem1) + 2 * nvirt*nocc*dfnb
mrtrf = (imem-imem1) + dfnb*max(nocc**2, nocc*nvirt)
mren = (imem-imem1) + 2 * nocc**2*nvirt
mrri = (imem-imem1) + dfnb*nocc*(nocc+1)/2 ! to read integrals
memreq = max(mra2, mrbc2, mrd2, mreg2, mrtrf, mren, mrri)
if(.not.leom .and. .not.lmp3) write(iout,'(f10.2,
$ " MB is the minimal memory requirement for CCSD")')
$ memreq*8.d0/1024**2
ccmem(5)=memreq
call dfccsdcore(dcore(it),dcore(itt),dcore(iv),dcore(ivv),
$ dcore(iscrs),iscrs,
$ dcore(ijij),dcore(ijai),dcore(ijia),dcore(ijab),
$ dcore(ih),
$ dcore(ifij0),dcore(ifai0),dcore(ifab0),dcore(ifock),
$ dcore(ibmat),dcore(iinvbmat),
$ dcore(ibeta),dcore(ix),
$ dcore(ifij),dcore(ifab),dcore(ifai),dcore(ifia),
$ localcc,dcore(iui),
$ dcore(iint1),esdi,dfintran,ccmaxit,tprint,nc,dfnb,
$ ccsdalg,lccoporder,dcore(iabj),ccmem,
$ nbasis,nocc,nvirt,iout,ifltln,naobasis,imem,
$ maxcor,dcore,icore,eref,emp2,ecc,diisfile,errfile,tol,
$ locnoc,
$ mpi_rank,mpi_size,dcore(inorm),master_rank, master_thread,
$ mpi_ccsd,
$ ccsdmkl, ccsdthreads, omp_max_threads, mkl_max_threads,
$ ccsdrest, ptrest, ccsd_converged, ccsd_communicator,
$ emp2full,lno,lnaf,emp2corr, c, cc, leom,
$ dcore(ifia_c1), dcore(ifij_c1), dcore(ifab_c1),
$ dcore(ifai_c1),
$ dcore(ijij_c1), dcore(ijab_c1), dcore(ijai_c1),
$ eppl_correction, ppl_calc, lmp3, calctype,lf12,
$ cmp2os, cmp2ss, cmp3, dhyb, lpert, nbfread)
else
call unknown('ccsdalg', 7)
end if
C
if(calctype.eq.'CCSD(T) ') then ! .and. ccsd_converged) then
if(ccsdalg .eq. 'dfdirect') then
call dcopy(nocc**2, dcore(ifij0), 1, dcore(ifij), 1)
call dcopy(nvirt**2, dcore(ifab0), 1, dcore(ifab), 1)
end if
call ptcore(dcore(it),dcore(itt),dcore(ifij),dcore(ifab),
$ icore(ico),icore(ifirst),icore(ilast),dgroup,bsymmv,symmocc,
$ iscrs,localcc,dcore(iui),esdi,et,iui,dfnb,ittend,ijij,ijia,
$ ijab,ccsdalg,igsl,lccoporder,ccmem,talg,
$ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,
$ iout,maxcor,imem,dcore,iimem,icore,wsize,gbasfile,ecc,eref,
$ locnoc,mpi_rank,mpi_size,imem,mpi_ccsd,
$ ccsdmkl, ptthreads, omp_max_threads, mkl_max_threads,
$ ccsdrest, ptrest, bcast_comm, ccsd_communicator,emp2corr,
$ lno,lnaf,eppl_correction,emp2,emp2full,master_rank,
$ master_thread,lf12,ppl_calc)
end if
c if(localcc.ne.'off ') then
call dbldealloc(iui)
c else
c call dbldealloc(ivvend)
c end if
if(master_thread) close(ifcfile,status='keep')
#if defined(MPI)
if(mpi_ccsd .and. mpi_size .ne. 1.and.localcc.eq.'off ') then ! TODO: localcc
if(master_thread) then
call gather_file('ccsd', ccsd_communicator)
else
call gather_file(outfilename, ccsd_communicator)
call delete_file(outfilename, .true.)
call delete_file('KEYWD', inbcast)
call delete_file('VARS', inbcast)
call delete_file('55', inbcast)
if(lf12) then
call delete_file('F12INTE', inbcast)
call delete_file('F12INT1', inbcast)
end if
end if
end if
#endif
if(ccsd_converged .and. inbcast) then !master_thread) then
c call delete_file('ccsd.rst', .true.)
c call delete_file('pt.rst', .true.)
call delete_file('fort.18', .true.)
call delete_file('fort.19', .true.)
end if
end subroutine
************************************************************************
subroutine ccsdcore(t,tt,v,vv,scrs,vscr,h,bmat,bvec,invbmat,cvec,
$beta,x,rr,fij,fab,fai,co,first,last,dgroup,bsymmv,symmocc,buffer,
$calctype,localcc,ui,int1,esdi,dfintran,ccmaxit,tprint,nc,ccsdalg,
$nenergy,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,maxcor,imem,dcore,
$ifltln,iout,diisfile,errfile,tfile,tinfo,eref,ecc,emp2,tol,locnoc,
$mpi_rank,mpi_size, mpi_ccsd,ccsdmkl,ccsdthreads,omp_max_threads,
$ccsdrest,ptrest, ccsd_converged, ccsd_communicator, emp2full,
$lno,lnaf,emp2corr,master_thread,lpert)
************************************************************************
implicit none
integer irecln,ibufln,nbasis,nocc,nvirt,dgroup,nirmax
integer bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
integer maxcor, imem, ifltln,iout,diisfile,errfile,tfile,tinfo
real*8 dcore(*)
real*8 eref,ecc,emp2,tol
integer a,b,c,i,j,k,l,n
integer ttind,ttind2,ind,id,nc
integer ccmaxit,locnoc, nit0, ccsd_communicator
real*8 norm,nenergy
real*8 bmat(ccmaxit,ccmaxit),invbmat(ccmaxit+1,ccmaxit+1)
real*8 cvec(ccmaxit),bvec(ccmaxit**2)
real*8 tt(nvirt,nvirt,(nocc+1)*nocc/2)
real*8 vv(nvirt,nvirt,(nocc+1)*nocc/2)
real*8 scrs(nvirt,nvirt),vscr(*)
real*8 beta(nocc,nocc),x(nvirt,nvirt)
real*8 t(nvirt,nocc),rr(nvirt,nocc),v(nvirt,nocc)
real*8 fab(nvirt,nvirt),fai(nvirt,nocc),fij(nocc,nocc)
real*8 h(nbasis,nbasis),ui(nocc)
real*8 eccold,esdi, emp2full, emp2corr
integer mpi_rank, mpi_size
integer ccsdthreads, omp_max_threads
character*3 ccsdmkl
character*4 ccsdrest
integer co(nbasis)
integer first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
integer recbounds(nvirt,2)
real*8 buffer(ibufln)
integer imem1,avmem,dblalloc
common/memcom/ imem1
logical notend,lene, master_thread, lpert
c character*1 ison
character*4 localcc,dfbasis_cor,ovirt
character*8 dfintran
character*16 calctype,tprint
integer itmp
real*8 int1(nocc,nocc),tprtol
integer isrpa
logical ltpr,localcc15p
c times
real*8 ttmp(6,10),times(6,0:100)
character*80 txt(0:100)
real*8, pointer :: alpha_ijkl(:,:,:,:),cc_abIJ(:,:,:),ck_IJKL(:,:)
real*8, pointer :: ks_aklj(:,:,:,:), tk_iklj(:,:,:,:)
real*8, pointer :: ks_ak_ofIJ(:,:)
real*8, pointer :: alpha_klIJ(:,:,:), cc_abkl(:,:,:,:)
real*8, pointer :: kdo_akij(:,:,:,:), kk_ablk(:,:,:,:)
real*8, pointer :: abcj_abcj(:,:,:,:), kk_abik(:,:,:)
real*8, pointer :: ccx(:,:)
real*8, pointer :: jj_abkj(:,:,:), ccp(:,:), ccm(:,:)
real*8, pointer :: abcix(:,:,:), nmmat(:,:,:)
real*8, pointer :: nmmat2(:,:)
real*8, pointer :: ll_abkl(:,:,:,:)
real*8, pointer :: ccpr_bcl_ofj(:,:,:), ll_ablk(:,:,:,:)
real*8, pointer :: tt_abij(:,:,:,:), ll_klab(:,:,:,:)
real*8, pointer :: cc_labi(:,:,:,:), cc_abk_ofj(:,:,:)
real*8, pointer :: betac_abj_ofi(:,:,:)
real*8, pointer :: z_bakj(:,:,:,:)
real*8, pointer :: y_bakj(:,:,:,:), tztr(:,:)
real*8, pointer :: ks_laK_J(:,:,:), tt_ack_ofi(:,:,:)
real*8, pointer :: ccbar_bcl_ofj(:,:,:)
real*8, pointer :: tz(:,:)
real*8, pointer :: ls_lakj(:,:,:,:), ttpr(:,:,:), ccpr(:,:)
real*8, pointer :: abic_baic(:,:,:,:), tmp(:), aibc_baic(:,:,:,:)
real*8, pointer :: aibc_abic(:,:,:,:), tks_baK_J(:,:,:)
real*8, pointer :: ls_alki(:,:,:,:), ccij(:,:)
real*8, pointer :: tj_iakj(:,:,:)
real*8 fac,eccss,eccos,eccs,eccscs,eccoss,eccsss,et1
integer ialpha, iccabij, ickijkl, iksaklj, itkijkl, iksakofij
integer ialphaklij, iccabkl, ikdoakij, illabkl
integer ittabij, iccabkofj, izbakj, itz, itztr
integer iccbarbclofj, iybakj, ittackofi
integer ittpr, iccprbclofj, iccpr, iccx, inmmat, iccp, iccm
integer ijjabkj
integer indmin, indmax
integer ccx_size, nmmat_size, kssize, ccpm_size
real*8 calcnorm
integer blocknumber, bsize, maxmemalloc
character*8 ccsdalg
real*8 rdummy(1) ! for energycalc instead of Jai and Jia
integer idummy, iscrs2
logical mpi_ccsd, rstex, ptrest, ccsd_converged, lno, lnaf
integer nit
logical lfock
interface
c {{{ interfaces for pointers
subroutine rpoint1d(egydim1,egydim2,dim1)
implicit none
integer dim1
real*8,target :: egydim1(dim1)
real*8, pointer :: egydim2(:)
end subroutine
subroutine rpoint2d(egydim,haromdim,dim1,dim2)
implicit none
integer dim1,dim2
real*8,target :: egydim(dim1,dim2)
real*8, pointer :: haromdim(:,:)
end subroutine
subroutine rpoint3d(egydim,haromdim,dim1,dim2,dim3)
implicit none
integer dim1,dim2,dim3
real*8,target :: egydim(dim1,dim2,dim3)
real*8, pointer :: haromdim(:,:,:)
end subroutine
subroutine rpoint4d(egydim,haromdim,dim1,dim2,dim3,dim4)
implicit none
integer dim1,dim2,dim3,dim4
real*8,target :: egydim(dim1,dim2,dim3,dim4)
real*8, pointer :: haromdim(:,:,:,:)
end subroutine
c}}}
end interface
emp2corr = 0.d0
call getkey('ovirt',5,ovirt,4) !RZ
lfock=ovirt.eq.'off '.and.localcc.ne.'2013' !RZ
times(1:6,0:100)=0.d0
c
if(tprint.ne.'off ') then
ltpr=.true.
read(tprint,*) tprtol
else
ltpr=.false.
endif
iscrs2 = dblalloc(nocc*nvirt)
c
c call getarg(1,ison)
c ison='y'
call rbwrt(recbounds,nvirt,ibufln)
c {{{ ccsd
nit0 = 0
nit=0
id=0
c if (ison.eq.'y') write(*,*) 'start pre iter'
c call stopper(id,ison)
c call timer
if(maxcor-(imem-imem1) .ge. nvirt**2*nocc**2) then
call dfillzero(dcore(imem),nvirt**2*nocc**2)
call llextract(dcore(imem),bsymmv,symmocc,dgroup,first,last,
$ co,buffer,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,ifltln)
else
call memerr(nvirt**2*nocc**2, maxcor-(imem-imem1),
$ 'Insufficient memory to construct L! ')
endif
C Construct l, Eq. 19
if(maxcor-(imem-imem1).ge.nvirt*nocc**3)then
call dfillzero(dcore(imem),nvirt*nocc**3)
call lsextract(dcore(imem),bsymmv,symmocc,dgroup,co,
$ first,last,buffer,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,ifltln)
else
call memerr(nvirt*nocc**3, maxcor-(imem-imem1),
$ 'Insufficient memory to construct l! ')
endif
c write(*,*) 'Construct l, Eq. 19'
c call timer
c decompress K
if(maxcor-(imem-imem1) .ge. nvirt*2*nocc**2) then
call rpoint4d(dcore(imem), kk_ablk, nvirt, nvirt, nocc, nocc)
call kklkread(kk_ablk, bsymmv, symmocc, dgroup,
$ first, last, co, buffer, 'n',
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
call kklkwrite(kk_ablk, nocc, nvirt, ifltln)
else
call memerr(nvirt**2*nocc**2, maxcor-(imem-imem1),
$ 'Insufficient memory for K! ')
end if
C B3)
if(maxcor-(imem-imem1).ge.nocc**4)then
call fockextract(h,fij,fai,fab,bsymmv,symmocc,co,first,
$ last,dgroup,dcore(imem),buffer,localcc,dfintran,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
else
call memerr(nocc**4, maxcor-(imem-imem1),
$ 'Insufficient memory for Fock matrix! ')
endif
c if (ison.eq.'y') write(*,*) 'fock B3'
c call stopper(id,ison)
c call timer
C Calculate the Hartree-Fock energy {{{
call getkey('dfbasis_cor',11,dfbasis_cor,4)
if(localcc.eq.'off '.and.dfbasis_cor.ne.'none') then
call getvar('eref ',eref)
elseif(localcc15p(localcc)) then
eref=0.d0
else
if(lfock)then !RZ
call getvar('eref ',eref)
else !RZ
eref=nenergy
do i=1,nocc
eref=eref+h(i,i)+fij(i,i)
enddo
endif !RZ
endif
if(.not.localcc15p(localcc)) then
write(iout,*)
write(iout,"(' Reference energy [au]: ',f25.12)")
$ eref
endif ! }}}
C Initialize t's {{{
call vvinit(vv,bsymmv,symmocc,dgroup,first,last,co,buffer,
$irecln,ibufln,nocc,nvirt,nbasis,nirmax,multpg)
do j = 1, nocc
do i = 1, j
ttind = (j-1) * j / 2 + i
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(a,b)
do b = 1, nvirt
do a = 1, nvirt
tt(a, b, ttind) = -vv(a, b, ttind) /
$ (fab(a, a) + fab(b, b) - fij(i, i) - fij(j, j))
end do
end do
C$OMP END PARALLEL DO
end do
do a = 1, nvirt
t(a, j) = -fai(a, j) / (fab(a, a) - fij(j, j))
end do
end do ! }}}
nit=0
call energycalc(t,tt,fai,nit,eref,0.d0,
$ scrs,localcc,ccsdalg,scrs, !NP
$maxcor,imem,dcore,ifltln,iout,nvirt,nocc,idummy,ecc,scrs,
$dcore(iscrs2), mpi_rank, mpi_size,mpi_ccsd, times,
$ccsd_communicator,emp2full,lno,lnaf,emp2corr,eccs,eccss,eccos,
$eccscs,eccoss,eccsss,.true.,master_thread,.false., vv, vv, et1)
if(localcc.ne.'off ') then
call localenergycalc(t,tt,fai,nit,scrs,int1,esdi,ui,
$ ccsdalg,idummy,scrs,scrs,
$nocc, nvirt, dcore, maxcor, imem, iout,ifltln,scrs,dcore(iscrs2))
if(nocc.eq.0 .or. nvirt.eq.0) goto 234
c write(iout,*)
c write(iout,'(" CCSD correlation energy [au]: ",f22.12)')esdi
c write(iout,'(" CCSD total energy [au]: ",f22.12)')
c $esdi+eref
c write(iout,*)
call prtenergc('MP2 ',esdi,eref,locnoc)
else
call prtenergc('MP2 ',ecc,eref,locnoc)
endif
if(trim(calctype).eq.'MP2') Return
c if(.false.) then
c if(.true.) then
c calctype='SOSEX '
c localcc='sose'
C testing rpa calculation
if(localcc.eq.'drpa'.or.calctype.eq.'DRPA '.or.
$ localcc.eq.'rpa '.or.calctype.eq.'RPA '.or.
$ localcc.eq.'sose'.or.calctype.eq.'SOSEX ')then
c IF(.true.)then
C Save MP2 amplitudes for initial CCSD amplitudes
call twrite(t, tfile, irecln, tinfo, ibufln, nvirt, nocc)
call system('mv tfile tfile.mp2')
call system('mv tinfofile tinfofile.mp2')
C Calculate dRPA energy
call rpa_twrite(tt,tfile,tinfo,irecln,ibufln,nocc,nvirt)
C kell valami keyword az RPA-hoz!!
isrpa=0
if(localcc.eq.'rpa '.or.calctype.eq.'RPA ') isrpa=1
call rpacorecc(nvirt,nocc,tt,dcore(imem),vv,fab,fij,eref,tol,
$ccmaxit,vscr,bmat,bvec,invbmat,cvec,iout,ecc,scrs,localcc,int1,
$ui,esdi,isrpa,bsymmv,symmocc,dgroup,first,last,co,buffer,
$ibufln,dcore,imem,diisfile,errfile,tfile,tinfo,ifltln,ibufln,
$irecln,nbasis,nirmax,multpg,locnoc)
if(localcc.eq.'sose'.or.calctype.eq.'SOSEX ')then
if(localcc.ne.'off ')then
call localenergycalc(t,tt,fai,nit,scrs,int1,esdi,ui,
$ ccsdalg,idummy,scrs,scrs, nocc, nvirt, dcore,
$ maxcor, imem, iout, ifltln, scrs, dcore(iscrs2))
esdi=0.5d0*esdi
write(iout,'(" SOSEX correlation energy [au]: ",f22.12)')
$esdi
write(iout,*)
call prtenergc('SOSEX ',esdi,eref,locnoc)
else
norm = calcnorm(nvirt,nocc,fai,vv)
call energycalc(t,tt,fai,nit,eref,
$ norm,scrs,localcc,ccsdalg,scrs, !NP
$maxcor,imem,dcore,ifltln,iout,nvirt,nocc,idummy,ecc,scrs,
$dcore(iscrs2), mpi_rank, mpi_size,mpi_ccsd, times,
$ccsd_communicator,emp2full,lno,lnaf,emp2corr,eccs,eccss,eccos,
$eccscs,eccoss,eccsss,.false.,master_thread,.false., vv, vv, et1)
ecc=0.5d0*(ecc-eref)+eref
write(iout,'(" SOSEX correlation energy [au]: ",f22.12)')
$ecc-eref
write(iout,'(" Total SOSEX energy [au]: ",f22.12)')
$ecc
write(iout,*)
call prtenergc('SOSEX ',ecc,eref,locnoc)
endif
endif
write(iout,*)
C Restore MP2 amplitudes for initial CCSD amplitudes
call system('mv tfile.mp2 tfile')
call system('mv tinfofile.mp2 tinfofile')
call ttread(t,'n', ibufln, irecln, tfile, tinfo)
endif
c call twrite(t, tfile, irecln, tinfo, ibufln, nvirt, nocc)
c call stopper(id,ison)
emp2=ecc
if(nocc.eq.0 .or. nvirt.eq.0) goto 234
C Starting iterations
write(iout,*) 'Starting CCSD iterations...'
write(iout,*)
cc call timer
nit=0
if (.not.localcc15p(localcc)) then
write(iout,'(21x,"Residual norm",10x,"Energy")')
else
write(iout,'(21x,"Residual norm",7x,"Correlation Energy")')
endif
if(ccsdrest .eq. 'ccsd') then
c if(mpi_rank .eq. master_rank) then !0) then
if(master_thread) then
inquire(file = 'ccsd.rst', exist = rstex)
if(rstex) call read_rst_ccsd(nit0, ecc, eccold, norm, lene,
$ t, tt, nocc, nvirt)
nit0=0
write(iout, "(' Iteration', i3, 2f22.12)") nit, norm, ecc
call flush(iout)
end if
else
norm=1000.d0*tol
eccold=emp2+10.d0*tol
lene=.true.
end if
c TIME0(times(1,99))
do while(
$ (norm / 10.d0 .gt. tol .or.
$ abs(eccold - ecc) .gt. tol .or.
$ lene) .and.
$ (nit .lt. ccmaxit))
lene=abs(eccold-ecc).gt.tol
eccold=ecc
nit=nit+1
c TIME0(ttmp)
id=0
c write(6,*)
c call stopper(id,ison)
C 1) a), b) c)
c TIME0(times)
call dcopy(nvirt*nocc, fai, 1, v, 1)
call dcopy(nvirt*nocc, fai, 1, rr, 1)
call dcopy(nocc**2, fij, 1, beta, 1)
call dgemm('t', 'n', nocc, nocc, nvirt,
$ 1.d0, v, nvirt, t, nvirt,
$ 1.d0, beta, nocc)
c TIMEADD(times(1,1),times)
c if (ison.eq.'y') write(*,*) '1) a), b) c)'
c call stopper(id,ison)
c call timer
C 2) a), b)
c TIME0(times)
if(maxcor-(imem-imem1).ge.nocc**4) then
ialpha = dblalloc(nocc**4)
call rpoint4d(dcore(ialpha), alpha_ijkl,
$ nocc, nocc, nocc, nocc)
! read <ij|kl> into alpha and K into V
call list2abread(alpha_ijkl, vv, bsymmv, symmocc, dgroup,
$ first, last, co, buffer,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
else
call memerr(nocc**4, maxcor - (imem-imem1),
$ 'Insufficient memory for alpha_ijkl! ')
endif
c if (ison.eq.'y') write(*,*) '2) a), b) '
c call stopper(id,ison)
c call timer
C 2) c)
c TIME0(times)
if(maxcor-(imem-imem1).ge.nvirt**2*nocc*(nocc+1)/2) then
iccabij = dblalloc(nvirt**2*nocc*(nocc+1)/2)
call rpoint3d(dcore(iccabij), cc_abIJ,
$ nvirt, nvirt, nocc*(nocc+1)/2)
else
call memerr(nvirt**2*nocc*(nocc+1)/2,maxcor - (imem-imem1),
$ 'Insufficient memory for cc_abIJ! ')
end if
if(maxcor-(imem-imem1).ge.nocc**2*(nocc+1)**2/4) then
ickijkl = dblalloc(nocc**2*(nocc+1)**2/4)
call rpoint2d(dcore(ickijkl), ck_IJKL,
$ nocc*(nocc+1)/2, nocc*(nocc+1)/2)
else
call memerr(nocc**2*(nocc+1)**2/4, maxcor - (imem-imem1),
$ 'Insufficient memory for ck_IJKL! ')
end if
! C^{ji}_{ab} = C^{ij}_{ba} for i <= j
do j = 1, nocc
do i = 1, j
ttind = j * (j - 1) / 2 + i
call ccijtrcalc(cc_abIJ(:, :, ttind), nvirt, t(1,i), t(1,j),
$ tt(1, 1, ttind))
enddo
enddo
! C^{ji}_{ab} * K^{kl}_{ab} -> alpha_ijlk, alpha_jikl
call dgemm('t', 'n', nocc*(nocc+1)/2, nocc*(nocc+1)/2, nvirt**2,
$ 1.d0, cc_abIJ, nvirt**2, vv, nvirt**2,
$ 0.d0, ck_IJKL, nocc*(nocc+1)/2)
do l = 1, nocc
do k = 1, l - 1
ttind2 = (l - 1) * l / 2 + k
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(i,j,ttind)
do j = 1, nocc
do i = 1, j - 1
ttind = (j - 1) * j / 2 + i
alpha_ijkl(i,j,l,k) = alpha_ijkl(i,j,l,k) +
$ ck_IJKL(ttind, ttind2)
alpha_ijkl(j,i,k,l) = alpha_ijkl(j,i,k,l) +
$ ck_IJKL(ttind, ttind2)
end do
end do
C$OMP END PARALLEL DO
end do
end do
! C^{ij}_{ab}
do j = 1, nocc
do i = 1, j
ttind = j * (j - 1) / 2 + i
call ccijcalc(cc_abIJ(:, :, ttind), nvirt, t(1, i), t(1, j),
$ tt(1, 1, ttind))
enddo
enddo
! C^{ij}_{ab} * K^{lk}_{ab} -> alpha_ijkl, alpha_jilk
call dgemm('t', 'n', nocc*(nocc+1)/2, nocc*(nocc+1)/2, nvirt**2,
$ 1.d0, cc_abIJ, nvirt**2, vv, nvirt**2,
$ 0.d0, ck_IJKL, nocc*(nocc+1)/2)
! divide C^{ii}*K^{kk} by 2, because these elements will be added twice to alpha
do i = 1, nocc
ttind = (i + 1) * i / 2
do k = 1, nocc
ttind2 = (k + 1) * k / 2
ck_IJKL(ttind, ttind2) = ck_IJKL(ttind, ttind2) / 2.d0
end do
end do
do l = 1, nocc
do k = 1, l
ttind2 = (l - 1) * l / 2 + k
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(i,j,ttind)
do j = 1, nocc
do i = 1, j
ttind = (j - 1) * j / 2 + i
alpha_ijkl(i,j,k,l) = alpha_ijkl(i,j,k,l) +
$ ck_IJKL(ttind, ttind2)
alpha_ijkl(j,i,l,k) = alpha_ijkl(j,i,l,k) +
$ ck_IJKL(ttind, ttind2)
end do
end do
C$OMP END PARALLEL DO
end do
end do
maxmemalloc = imem - imem1
call dbldealloc(iccabij)
c TIMEADD(times(1,2),times)
c if (ison.eq.'y') write(*,*) '2) c) '
c call stopper(id,ison)
c call timer
C 2) d), e)
c TIME0(times)
if(maxcor-(imem-imem1) .ge. nvirt*nocc**3)then
iksaklj = dblalloc(nvirt*nocc**3)
call rpoint4d(dcore(iksaklj), ks_aklj, nvirt, nocc, nocc, nocc)
! read k^{klj}
call ksread(ks_aklj, bsymmv, symmocc, dgroup, first, last,
$ co, buffer,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
else
call memerr(nvirt*nocc**3, maxcor - (imem-imem1),
$ 'Insufficient memory for ks_aklj! ')
endif
if(maxcor-(imem-imem1).ge.nocc**4)then
itkijkl = dblalloc(nocc**4)
call rpoint4d(dcore(itkijkl), tk_iklj,
$ nocc, nocc, nocc, nocc)
else
call memerr(nocc**4, maxcor - (imem-imem1),
$ 'Insufficient memory for tk_iklj! ')
end if
! t^i*k^{klj} -> alpha_{ijkl}, alpha_{jilk}
call dgemm('t', 'n', nocc, nocc**3, nvirt,
$ 1.d0, t, nvirt, ks_aklj, nvirt,
$ 0.d0, tk_iklj, nocc)
do l = 1, nocc
do k = 1, nocc
do j = 1, nocc
alpha_ijkl(1:nocc,j,k,l) = alpha_ijkl(1:nocc,j,k,l) +
$ tk_iklj(1:nocc,k,l,j)
alpha_ijkl(j,1:nocc,l,k) = alpha_ijkl(j,1:nocc,l,k) +
$ tk_iklj(1:nocc,k,l,j)
end do
end do
end do
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(itkijkl)
c TIMEADD(times(1,3),times)
c if (ison.eq.'y') write(*,*) '2) d), e)'
c call stopper(id,ison)
c call timer
C 2) f)
c TIME0(times)
if(maxcor-(imem-imem1).ge.nvirt*nocc)then
iksakofij = dblalloc(nvirt*nocc)
call rpoint2d(dcore(iksakofij), ks_ak_ofIJ, nvirt, nocc)
else
call memerr(nocc*nvirt, maxcor - (imem-imem1),
$ 'Insufficient memory for ks_ak_ofIJ! ')
endif
do j = 1, nocc
do i = 1, j
ttind = (j-1) * j / 2 + i
! k^{ijk}*t^k -> V^{ij}
do k = 1, nocc
ks_ak_ofIJ(1:nvirt, k) = ks_aklj(1:nvirt, i, j, k)
end do
call dgemm('n', 't', nvirt, nvirt, nocc,
$ -1.d0, ks_ak_ofIJ, nvirt, t, nvirt,
$ 1.d0, vv(1, 1, ttind), nvirt)
! t^k*k^{jik} -> V^{ij}
do k = 1, nocc
ks_ak_ofIJ(1:nvirt, k) = ks_aklj(1:nvirt, j, i, k)
end do
call dgemm('n', 't', nvirt, nvirt, nocc,
$ -1.d0, t, nvirt, ks_ak_ofIJ, nvirt,
$ 1.d0, vv(1, 1, ttind), nvirt)
enddo
enddo
c TIMEADD(times(1,4),times)
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(iksaklj)
c if (ison.eq.'y') write(*,*) '2) f)'
c call stopper(id,ison)
C 2) g)
c TIME0(times)
if(maxcor-(imem-imem1) .ge. nocc**3*(nocc+1)/2) then
ialphaklij = dblalloc(nocc**3*(nocc+1)/2)
call rpoint3d(dcore(ialphaklij), alpha_klIJ,
$ nocc, nocc, nocc*(nocc+1)/2)
else
call memerr(nocc**3*(nocc+1)/2, maxcor - (imem-imem1),
$ 'Insufficient memory for alpha_klIJ! ')
endif
! sort alpha: ijkl -> klIJ
do j = 1, nocc
do i = 1, j
ttind = j * (j - 1) / 2 + i
do l = 1, nocc
alpha_klIJ(1:nocc, l, ttind) =
$ alpha_ijkl(i, j, 1:nocc, l)
end do
end do
end do
if(maxcor-(imem-imem1) .ge. nvirt**2*nocc**2) then
iccabkl = dblalloc(nvirt**2*nocc**2)
call rpoint4d(dcore(iccabkl), cc_abkl,
$ nvirt, nvirt, nocc, nocc)
else
call memerr(nvirt**2*nocc**2, maxcor - (imem-imem1),
$ 'Insufficient memory for cc_abkl! ')
endif
! generate C^{ij} for all i,j
do j = 1, nocc
call calcccfori(t, tt, nocc, nvirt, cc_abkl(:,:,:,j), j, 'n')
enddo
! sum_{kl} {C^{kl} * alpha_{klIJ}} -> V^{IJ}
call dgemm('n', 'n', nvirt**2, nocc*(nocc+1)/2, nocc**2,
$ 1.d0, cc_abkl, nvirt**2, alpha_klIJ, nocc**2,
$ 1.d0, vv, nvirt**2)
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(ialpha)
c TIMEADD(times(1,5),times)
c if (ison.eq.'y') write(*,*) '2) g)'
c call stopper(id,ison)
c call timer
C 3) a), b
c TIME0(times)
ccpm_size = (nvirt+1)*nvirt*(nocc+1)*nocc/4
nmmat_size = (nvirt+1)*nvirt*(nocc+1)*nocc/4
if(maxcor-(imem-imem1) .ge. 2 * ccpm_size) then ! TODO: ccpm+nmmat???
iccx = dblalloc(ccpm_size)
call rpoint2d(dcore(iccx), ccx,
$ (nvirt+1)*nvirt/2, (nocc+1)*nocc/2)
else
call memerr(2*ccpm_size, maxcor-(imem-imem1),
$ 'Insufficient memory for ccx! ')
end if
call rpoint2d(dcore(imem1+maxcor - nmmat_size),
$ nmmat2, (nvirt+1)*nvirt/2, (nocc+1)*nocc/2)
call crccx('a', t, tt, ccx, nvirt, nocc)
call xpplcalc('a', nmmat2, dcore(imem), ccx, recbounds, times,
$ nocc, nvirt, maxcor, imem, iout, ibufln, irecln)
c if (ison.eq.'y') write(*,*) 'ppl a',nit,times(3,3),times(4,3)
call xmatextr('a', nmmat2, vv, nvirt, nocc)
call crccx('s', t, tt, ccx, nvirt, nocc)
call xpplcalc('s', nmmat2, dcore(imem), ccx, recbounds, times,
$ nocc, nvirt, maxcor, imem, iout, ibufln, irecln)
c if (ison.eq.'y') write(*,*) 'ppl s',nit,times(3,3),times(4,3)
call xmatextr('s', nmmat2, vv, nvirt, nocc)
maxmemalloc = max(maxmemalloc, imem - imem1 + nmmat_size)
call dbldealloc(iccx)
c TIMEADD(times(1,6),times)
c if (ison.eq.'y') write(*,*) '3) a), b) ppl'
c call timer
c if(ison.eq.'y')write(6,*)'ppl'
c call stopper(id,ison)
C 4) a)
if(maxcor - (imem-imem1) .ge. nvirt*nocc**3) then
ikdoakij = dblalloc(nvirt*nocc**3)
call rpoint4d(dcore(ikdoakij), kdo_akij, nvirt, nocc, nocc,
$ nocc)
call dfillzero(kdo_akij, nvirt*nocc**3)
else
call memerr(nvirt*nocc**3,
$ maxcor - (imem-imem1),
$ 'Insufficient memory for kdo_akij! ')
endif
ccx_size = nocc*(nocc+1)/2 * nvirt*(nvirt+1)/2
nmmat_size = nocc*(nocc+1)/2 * nvirt * nocc
if(maxcor - (imem-imem1) .ge. 2 * ccx_size + nmmat_size) then ! C(+), C(-) and N or M
c C(+) and C(-) to the beginning of the free memory
iccp = dblalloc(ccx_size)
call rpoint2d(dcore(iccp),
$ ccp, nvirt*(nvirt+1)/2, nocc*(nocc+1)/2)
call crccx('s', t, tt, ccp, nvirt, nocc)
iccm = dblalloc(ccx_size)
call rpoint2d(dcore(iccm),
$ ccm, nvirt*(nvirt+1)/2, nocc*(nocc+1)/2)
call crccx('a', t, tt, ccm, nvirt, nocc)
c N and M after C(-)
inmmat = dblalloc(nmmat_size)
call rpoint3d(dcore(inmmat),
$ nmmat, nocc*(nocc+1)/2, nvirt, nocc)
else
call memerr(ccx_size + nmmat_size, maxcor - (imem-imem1),
$ 'Insufficient memory for C(+-) and N/M! ')
endif
c if (ison.eq.'y') write(*,*) 'C for 4/a/III'
c call timer
C 4) a)
c call stopper(id,ison)
c if(ison.eq.'y')write(6,*)'abci'
cc TIME0(times)
ind=1
notend=.true.
n = blocknumber(nvirt**3+nvirt**2*(nvirt+1)/2, 1, nocc, 'y', ind,
$ bsize, 'Insufficient memory for one <ab|ci> blk!',
$ maxcor-(imem-imem1))
do while(notend) ! for abci
c TIME0(times)
if(ind + n - 1 .ge. nocc)then
notend = .false.
n = nocc - ind + 1
endif
call rpoint4d(dcore(imem), abcj_abcj, nvirt, nvirt, nvirt, n)
cc TIME0(times(1,99))
! read n blocks of abci
call abciread(ind, n, abcj_abcj, bsymmv, symmocc,
$ dgroup, co, first, last, buffer,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
cc TIMEADD(times(1,11),times(1,99))
c TIMEADD(times(1,7),times)
c if (ison.eq.'y') write(*,*) '4) b) abci I/O'
c call stopper(id,ison)
c 4 b)
C II term (2.62)
c TIME0(times)
! <ab|cj> * t_c^i -> KDV^{ij}
do j = ind, ind+n-1
ttind = (j - 1) * j / 2 + 1
call dgemm('n', 'n', nvirt**2, j, nvirt,
$ 1.d0, abcj_abcj(1, 1, 1, j-ind+1), nvirt**2, t, nvirt,
$ 1.d0, vv(1, 1, ttind), nvirt**2)
enddo
c TIMEADD(times(1,8),times)
c if (ison.eq.'y') write(*,*) '4) b) '
c call stopper(id,ison)
C 5) a) III of 2.33
c TIME0(times)
! <bc|ak> * C_{bc}^{ij} -> kd^{kij}, analogous to 3) (<ab|cd> * C_{cd}^{ij})
call rpoint3d(dcore(imem+nvirt**3*n), ! store <ab||ck> and <ab|||ck> after the integrals
$ abcix, nvirt*(nvirt+1)/2, nvirt, n)
call abckccalc('a', nocc, nvirt, n, ind, abcj_abcj,
$ ccm, abcix, kdo_akij, nmmat)
call abckccalc('s', nocc, nvirt, n, ind, abcj_abcj,
$ ccp, abcix, kdo_akij, nmmat)
c TIMEADD(times(1,10),times)
ind = ind + n
enddo ! while for abci
maxmemalloc = max(maxmemalloc,
$ imem-imem1+nvirt**2*(nvirt+(nvirt+1)/2)*n)
call dbldealloc(iccp)
c if (ison.eq.'y') write(*,*) '5) a) III'
c if(ison.eq.'y')write(6,*)'abciend'
c call stopper(id,ison)
cc TIMEADD(times(1,8),times)
C 5) b) I
c TIME0(times)
call rpoint3d(dcore(imem), kk_abik, nvirt, nvirt,
$ nocc*(nocc+1)/2)
! read n blocks of K (i>=k)
call kklkread(kk_abik, bsymmv, symmocc, dgroup,
$ first, last, co, buffer, 'y',
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
! <ab|ik> * t_b^j -> kd^{kij}; <a,b|ik>^T * t_b^j -> kd^{ikj}
do k = 1, nocc
do i = k, nocc
! sum_{j=nocc-(k-1)+1)^{nocc} {j} + (i-k+1)
ttind = (2*nocc-k+2) * (k-1) / 2 + i-k+1
call dgemm('n', 'n', nvirt, nocc, nvirt,
$ 1.d0, kk_abik(1, 1, ttind), nvirt, t, nvirt,
$ 1.d0, kdo_akij(1, k, i, 1), nvirt*nocc**2)
if(i.ne.k) then
call dgemm('t', 'n', nvirt, nocc, nvirt,
$ 1.d0, kk_abik(1, 1, ttind), nvirt, t, nvirt,
$ 1.d0, kdo_akij(1, i, k, 1), nvirt*nocc**2)
endif
enddo
enddo
c TIMEADD(times(1,11),times)
C 5) c) II
c TIME0(times)
ind = 1
notend = .true.
do while(notend) ! for J
n = blocknumber(nvirt**2 + nocc*nvirt, nocc, nocc, 'n', ind,
$ bsize, 'Insufficient memory for one block of J! ',
$ maxcor-(imem-imem1))
if(n+ind-1 .ge. nocc) notend = .false.
ijjabkj = dblalloc(nvirt**2 * (2*nocc-2*ind-n+3)*n/2)
call rpoint3d(dcore(ijjabkj), jj_abkj, nvirt, nvirt,
$ (2*nocc-2*ind-n+3)*n/2)
! read n blocks of J
call jj1read(ind, n, jj_abkj, bsymmv, symmocc, dgroup,
$ co, first, last, buffer,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
call rpoint3d(dcore(imem), tj_iakj, nocc, nvirt,
$ (2*nocc-2*ind-n+3)*n/2)
call dgemm('t', 'n', nocc, nvirt*(2*nocc-2*ind-n+3)*n/2, nvirt,
$ 1.d0, t, nvirt, jj_abkj, nvirt,
$ 0.d0, tj_iakj, nocc)
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(ijjabkj)
! (a,b|kj)^T * t_b^i -> kd^{kij}; (ab|kj) * t_b^j -> kd^{jik}
do j = ind, ind+n-1
do k = j, nocc
! sum_{l=nocc-(j-1)+1)^{nocc-ind+1} {l} + (k-j+1)
ttind = ((j-ind)*(2*nocc-ind-j+3)/2+k-j+1)
do i = 1, nocc
kdo_akij(1:nvirt, k, i, j) =
$ kdo_akij(1:nvirt, k, i, j) +
$ tj_iakj(i, 1:nvirt, ttind)
if(j .ne. k)
$ kdo_akij(1:nvirt, j, i, k) =
$ kdo_akij(1:nvirt, j, i, k) +
$ tj_iakj(i, 1:nvirt, ttind)
end do
enddo
enddo
ind = ind + n
end do ! while for J
c TIMEADD(times(1,12),times)
c if (ison.eq.'y') write(*,*) '5) b)'
c call stopper(id,ison)
c call timer
C 5) d)
c TIME0(times)
! 2 kd^{kik} - kd^{kki} -> v^i
do j=1,nocc
do k=1,nocc
call daxpy(nvirt, 2.d0, kdo_akij(1, k, j, k), 1, v(1, j), 1)
call daxpy(nvirt,-1.d0, kdo_akij(1, k, k, j), 1, v(1, j), 1)
enddo
end do
c TIMEADD(times(1,13),times)
c 5) e)
c TIME0(times)
! - kd^{kij} * t^k -> V^{ij}; - t^k * kd^{kji} -> V^{ij}
do j = 1, nocc
do i = 1, j
ttind = j * (j-1) / 2 + i
call dgemm('n', 't', nvirt, nvirt, nocc,
$ -1.d0, kdo_akij(1, 1, i, j), nvirt, t, nvirt,
$ 1.d0, vv(1, 1, ttind), nvirt)
call dgemm('n', 't', nvirt, nvirt, nocc,
$ -1.d0, t, nvirt, kdo_akij(1, 1, j, i), nvirt,
$ 1.d0, vv(1, 1, ttind), nvirt)
end do
enddo
c TIMEADD(times(1,14),times)
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(ikdoakij)
c if (ison.eq.'y') write(*,*) '5) d) e)'
c call timer
c call stopper(id,ison)
C 6) a)
c TIME0(times)
! sort tt: abIJ -> abji
if(maxcor - (imem-imem1) .ge. nvirt**2*nocc**2) then
ittabij = dblalloc(nvirt**2*nocc**2)
call rpoint4d(dcore(ittabij), tt_abij, nvirt, nvirt, nocc,
$ nocc)
do j = 1, nocc
do i = 1, nocc
indmin = min(i,j)
indmax = max(i,j)
ttind = indmax * (indmax-1) / 2 + indmin
do b = 1, nvirt
if(i .le . j) then
tt_abij(1:nvirt, b, j, i) = tt(1:nvirt, b, ttind)
else
tt_abij(1:nvirt, b, j, i) = tt(b, 1:nvirt, ttind)
end if
end do
end do
end do
else
call memerr(nvirt**2*nocc**2, maxcor - (imem-imem1),
$ 'Insufficient memory for T! ')
end if
ind = 1
notend = .true.
n = blocknumber(nvirt*nocc, nocc, nocc, 'y', ind, bsize,
$ 'Insufficient memory for one block of ls!',
$ maxcor-(imem-imem1))
do while(notend) ! for l
if(ind+n-1 .ge. nocc)then
notend = .false.
n = nocc-ind+1
endif
call rpoint4d(dcore(imem), ls_alki, nvirt, nocc, nocc, n)
! read n blocks of l
call lsread(ls_alki, ind, n, nocc, nvirt, ifltln)
c 6) b)
! T^{lk} * l^{lki} -> s^i
call dgemm('n', 'n', nvirt, n, nvirt*nocc**2,
$ -1.d0, tt_abij, nvirt, ls_alki, nvirt*nocc**2,
$ 1.d0, v(1, ind), nvirt)
c 6) c)
! t^l * l^{lki} -> beta_{ki}
call dgemv('t', nocc*nvirt, nocc*n,
$ 1.d0, ls_alki, nocc*nvirt, t, 1,
$ 1.d0, beta(1, ind), 1)
ind = ind + n
end do ! while for l
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(ittabij)
c TIMEADD(times(1,15),times)
c if (ison.eq.'y') write(*,*) '6) a-c'
c call timer
c call stopper(id,ison)
C 7) a)
c call dfillzero(x,nvirt**2)
itmp = dblalloc(nvirt**2)
call rpoint2d(dcore(itmp), ccij, nvirt, nvirt)
ind=1
notend=.true.
n = blocknumber(2 * nvirt**2*nocc, 1, nocc, 'y', ind, bsize,
$ 'Insufficient memory for one block of ll!', ! halve the block size to leave space for tt_abij and cc_labi
$ maxcor-(imem-imem1))
illabkl = dblalloc(n * nvirt**2*nocc)
call rpoint4d(dcore(illabkl), ll_abkl, nvirt, nvirt, nocc, n)
ittabij = dblalloc(n * nvirt**2*nocc)
call rpoint4d(dcore(ittabij), tt_abij, nvirt, nvirt, nocc, n)
do while(notend) ! for L
c TIME0(times)
if(ind+n-1 .ge. nocc)then
notend = .false.
n = nocc-ind+1
endif
C 7) b)
! read n blocks of L
call ll2read(ind, n, ll_abkl, 'abkl',nocc,nvirt,ifltln)
C 7) c)
! L^{kl} * t^l -> r^k
do l = ind, ind+n-1
do k = 1, nocc
call dgemv('n', nvirt, nvirt,
$ 1.d0, ll_abkl(1, 1, k, l-ind+1), nvirt, t(1, l), 1,
$ 1.d0, rr(1, k), 1)
enddo
enddo
c TIMEADD(times(1,17),times)
C 7) e)
c calculate -A
c TIME0(times)
! T_{cb}^{lk} -> T_{bc}^{kl} for l's read
do l = ind, ind+n-1
do k = 1, nocc
indmin = min(l, k)
indmax = max(l, k)
ttind = (indmax-1) * indmax / 2 + indmin
if(l .le. k) then
call tr(nvirt, tt(1,1,ttind), tt_abij(:,:,k,l-ind+1))
else
call dcopy(nvirt**2, tt(1, 1, ttind), 1,
$ tt_abij(1, 1, k, l-ind+1), 1)
end if
end do
end do
! - L^{kl} * T^{kl} -> X
if(ind .eq. 1) then
fac = 0.d0
else
fac = 1.d0
end if
call dgemm('n', 't', nvirt, nvirt, nvirt*nocc*n,
$ -1.d0, ll_abkl, nvirt, tt_abij, nvirt,
$ fac, x, nvirt)
c TIMEADD(times(1,18),times)
c 7) d)
c TIME0(times)
call rpoint4d(tt_abij, ll_klab, nocc, n, nvirt, nvirt)
! sort L: abkl -> klab for l read
do b = 1, nvirt
do a = 1, nvirt
do l = 1, n
ll_klab(1:nocc, l, a, b) =
$ ll_abkl(a, b, 1:nocc, l)
end do
end do
end do
call rpoint4d(ll_abkl, cc_labi, n, nvirt, nvirt, nocc)
! sort C: abli -> labi for all i and l read
do i = 1, nocc
do l = ind, ind+n-1
indmax = max(i, l)
indmin = min(i, l)
ttind = indmax * (indmax-1) / 2 + indmin
if(i .le. l) then
call ccijcalc(ccij, nvirt, t(1, i), t(1, l),
$ tt(1, 1, ttind))
do b = 1, nvirt
cc_labi(l-ind+1, 1:nvirt, b, i) = ccij(1:nvirt, b)
end do
else
call ccijtrcalc(ccij, nvirt, t(1, l), t(1, i),
$ tt(1, 1, ttind))
do b = 1, nvirt
cc_labi(l-ind+1, 1:nvirt, b, i) = ccij(1:nvirt, b)
end do
end if
end do
end do
! tr(L^{kl}_{ab} * C^l_{ab}^i -> beta_{ki}
call dgemm('n', 'n', nocc, nocc, n*nvirt**2,
$ 1.d0, ll_klab, nocc, cc_labi, n*nvirt**2,
$ 1.d0, beta, nocc)
c TIMEADD(times(1,19),times)
ind = ind + n
end do ! while for L
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(itmp)
c if (ison.eq.'y') write(*,*) '7) a-e'
c call stopper(id,ison)
c call timer
C check
C 8) a)
c TIME0(times)
! h - A^T -> scrs
call tr(nvirt, x, scrs)
do a=1,nvirt
do b=1,nvirt
scrs(a,b)=scrs(a,b)+h(a+nocc,b+nocc)
enddo
enddo
c TIMEADD(times(1,20),times)
C 8) b)
c TIME0(times)
! (h - A^T) * t^i -> v^i
call dgemm('n', 'n', nvirt, nocc, nvirt,
$ 1.d0, scrs, nvirt, t, nvirt,
$ 1.d0, v, nvirt)
c TIMEADD(times(1,21),times)
C 8) c)
c TIME0(times)
! s^i * t^j^T -> V^{ij}; t^i * s^j^T -> V^{ij}
do j = 1, nocc
do i = 1, j
ttind = (j-1) * j / 2 + i
call dger(nvirt, nvirt,
$ 1.d0, v(1, i), 1, t(1, j), 1,
$ vv(1, 1, ttind), nvirt)
call dger(nvirt, nvirt,
$ 1.d0, t(1, i), 1, v(1, j), 1,
$ vv(1, 1, ttind), nvirt)
end do
end do
c TIMEADD(times(1,22),times)
c if (ison.eq.'y') write(*,*) '8) a-c'
c call timer
c call stopper(id,ison)
C 9) b)
c TIME0(times)
! t^k * beta_{ki} -> v^i
call dgemm('n', 'n', nvirt, nocc, nocc,
$ -1.d0, t, nvirt, beta, nocc,
$ 1.d0, v, nvirt)
c TIMEADD(times(1,24),times)
c if (ison.eq.'y') write(*,*) '9) a-b'
c call timer
c call stopper(id,ison)
C 10)
c TIME0(times)
if(maxcor - (imem-imem1) .ge. nvirt**2*nocc) then
iccabkofj = dblalloc(nvirt**2*nocc)
call rpoint3d(dcore(iccabkofj), cc_abk_ofj, nvirt, nvirt, nocc)
else
call memerr(nvirt**2*nocc, maxcor - (imem-imem1),
$ 'Insufficient memory for cc_abk_ofj! ')
end if
! C_{ab}^{kj} * beta_{ki} -> V^{ij}
do j = 1, nocc
call calcccfori(t, tt, nocc, nvirt, cc_abk_ofj, j, 'n')
call dgemm('n', 'n', nvirt**2, j, nocc,
$ -1.d0, cc_abk_ofj, nvirt**2, beta, nocc,
$ 1.d0, vv(1, 1, j*(j-1)/2+1), nvirt**2)
end do
call rpoint3d(dcore(imem), betac_abj_ofi, nvirt, nvirt, nocc)
! C_{ba}^{ki} * beta_{kj} -> V^{ij}
do i = 1, nocc
call calcccfori(t, tt, nocc, nvirt, cc_abk_ofj, i, 't')
call dgemm('n', 'n', nvirt**2, nocc-i+1, nocc,
$ -1.d0, cc_abk_ofj, nvirt**2, beta(1, i), nocc,
$ 0.d0, betac_abj_ofi, nvirt**2)
do j = i, nocc
ttind = j * (j-1) / 2 + i
do b = 1, nvirt
vv(1:nvirt, b, ttind) = vv(1:nvirt, b, ttind) +
$ betac_abj_ofi(1:nvirt, b, j-i+1)
end do
end do
end do
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(iccabkofj)
c TIMEADD(times(1,25),times)
c if (ison.eq.'y') write(*,*) '10)'
c call stopper(id,ison)
c call timer
C 11)
c TIME0(times)
call daxpy(nvirt**2, 1.d0, fab, 1, x, 1)
c TIMEADD(times(1,26),times)
C 12)
c TIME0(times)
! - r^k * t^k^T -> X
call dgemm('n', 't', nvirt, nvirt, nocc,
$ -1.d0, rr, nvirt, t, nvirt,
$ 1.d0, x, nvirt)
c TIMEADD(times(1,27),times)
c if (ison.eq.'y') write(*,*) '11-12)'
c call timer
c call stopper(id,ison)
c if(ison.eq.'y')write(6,*)'abci'
cc TIME0(times)
C 13) a)
c calculate the transpose of Z^{kj} (see 14))
c TIME0(times)
if(maxcor - (imem-imem1) .ge. nocc**2*nvirt**2) then
izbakj = dblalloc(nocc**2*nvirt**2)
call rpoint4d(dcore(izbakj), z_bakj, nvirt, nvirt, nocc, nocc)
else
call memerr(nocc**2*nvirt**2, maxcor - (imem-imem1),
$ 'Insufficient memory for z_bakj! ')
end if
c TIME0(times)
ind = 1
notend = .true.
n = blocknumber(nvirt**2, nvirt, nocc, 'y', ind, bsize,
$ 'Insufficient mem for one <ab|ci> block! ',
$ maxcor-(imem-imem1))
do while(notend)
if(ind+n-1 .ge. nocc) then
notend = .false.
n = nocc-ind+1
end if
call rpoint4d(dcore(imem), aibc_abic, nvirt, nvirt, n, nvirt)
cc TIME0(times(1,99))
call abci2read(ind, n, aibc_abic, bsymmv, symmocc, dgroup,
$ co, first, last, buffer,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
cc TIMEADD(times(1,11),times(1,99))
c calculate the symmetric JE^{kj} matrices
call dgemm('n', 'n', nvirt**2*n, nocc, nvirt,
$ 1.d0, aibc_abic, nvirt**2*n, t, nvirt,
$ 0.d0, z_bakj(1, 1, ind, 1), nvirt**2*nocc)
ind=ind+n
end do ! while for abci
c TIMEADD(times(1,30),times)
c if (ison.eq.'y') write(*,*) '13 a)'
c call stopper(id,ison)
C 13) c)
! 2 * JE^{kk} -> X
do k=1,nocc
call daxpy(nvirt**2, 2.d0, z_bakj(1, 1, k, k), 1, x, 1)
enddo
C 13) d)
c read J and add it to Z
c J^{kj} is symmetric to the permutation of the virtual indices
c TIME0(times)
call jjtoz(z_bakj,bsymmv,symmocc,dgroup,first,last,co,buffer,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
c TIMEADD(times(1,29),times)
c if (ison.eq.'y') write(*,*) '13 b-d)'
c call stopper(id,ison)
C 13) e)
c K
avmem=maxcor-(imem-imem1)
if(avmem .ge. nvirt**2*nocc) then
iccbarbclofj = dblalloc(nvirt**2*nocc)
call rpoint3d(dcore(iccbarbclofj), ccbar_bcl_ofj, nvirt, nvirt,
$ nocc)
else
call memerr(nvirt**2*nocc, avmem,
$ 'Insufficient memory for ccbar_cbl_ofj! ')
end if
ind = 1
notend = .true.
n = blocknumber(nvirt**2, nocc, nocc, 'y', ind, bsize,
$ 'Insufficient memory for one block of K! ',
$ maxcor-(imem-imem1))
do while(notend)
if(ind+n-1 .ge. nocc) then
notend = .false.
n = nocc-ind+1
end if
call rpoint4d(dcore(imem), kk_ablk,
$ nvirt, nvirt, nocc, n)
! read n blocks of K
call kklk2read(ind, n, kk_ablk, nocc, nvirt, ifltln)
do j = 1, nocc
! generate Cbar^{lj}^T's for j
do l = 1, j
ttind = j * (j-1) / 2 + l
call coijcalc(ccbar_bcl_ofj(:, :, l), nvirt,
$ t(1, l), t(1, j), tt(1, 1, ttind))
end do
do l = j+1, nocc
ttind = l * (l-1) / 2 + j
call coijtrcalc(ccbar_bcl_ofj(:, :, l), nvirt,
$ t(1, j), t(1, l), tt(1, 1, ttind))
end do
! Cbar^{lj}^T * K^{lk} -> Z_{ba}^{kj}
do k = ind, ind+n-1
call dgemm('n', 't', nvirt, nvirt, nvirt*nocc,
$ -1.d0, ccbar_bcl_ofj, nvirt,
$ kk_ablk(1, 1, 1, k-ind+1), nvirt,
$ 1.d0, z_bakj(1, 1, k, j), nvirt)
end do
end do
ind = ind + n
end do ! while for K
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(iccbarbclofj)
c TIMEADD(times(1,28),times)
c if (ison.eq.'y') write(*,*) '13 e) III'
c call timer
c call stopper(id,ison)
c ks
c TIME0(times)
ind=1
notend=.true.
do while(notend)
n = blocknumber(nvirt*nocc + nvirt**2, nocc, nocc, 'n', ind,
$ kssize, 'Insufficient memory for one block of ks!',
$ maxcor-(imem-imem1))
kssize = kssize / (nvirt*nocc+nvirt**2)
if(n+ind-1 .ge. nocc) then
notend = .false.
c n = nocc - ind + 1
end if
call rpoint3d(dcore(imem), ks_laK_J, nocc, nvirt, kssize)
call rpoint3d(dcore(imem+nocc*nvirt*kssize), tks_baK_J,
$ nvirt, nvirt, kssize)
! read n blocks of k as k^l_a^{KJ}
call ks2read(ind,n,kssize,ks_laK_J,
$ bsymmv,symmocc,dgroup,first,last,co,buffer,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
! t_b^l * k^l_a^{KJ}
call dgemm('n', 'n', nvirt, nvirt*kssize, nocc,
$ -1.d0, t, nvirt, ks_laK_J, nocc,
$ 0.d0, tks_baK_J, nvirt)
c $ 1.d0, z_bakj(1, 1, 1, ind), nvirt)
! tks_baK_J -> Z^{k,j}, Z^{j,k}
ttind = 0
do j = ind, ind+n-1
ttind = ttind + 1
call daxpy(nvirt**2*(nocc-j+1),
$ 1.d0, tks_baK_J(1, 1, ttind), 1,
$ z_bakj(1, 1, j, j), 1)
do k = j + 1, nocc
ttind = ttind + 1
call daxpy(nvirt**2, 1.d0, tks_baK_J(1, 1, ttind), 1,
$ z_bakj(1, 1, j, k), 1)
end do
end do
ind = ind + n
end do ! while for k
c TIMEADD(times(1,31),times)
c if (ison.eq.'y') write(*,*) '13 e) IV'
c call stopper(id,ison)
C 14)
c TIME0(times)
if(maxcor - (imem-imem1) .ge. nvirt**2*nocc + 2 * nvirt**2) then
ittackofi = dblalloc(nvirt**2*nocc)
call rpoint3d(dcore(ittackofi), tt_ack_ofi, nvirt, nvirt, nocc)
itz = dblalloc(nvirt**2)
call rpoint2d(dcore(itz), tz, nvirt, nvirt)
itztr = dblalloc(nvirt**2)
call rpoint2d(dcore(itztr), tztr, nvirt, nvirt)
else
call memerr(nvirt**2*nocc + 2*nvirt**2, maxcor - (imem-imem1),
$ 'Insufficient memory for tztr and tz! ')
end if
do i = 1, nocc
! T^{k,i} k<=i
call dcopy(nvirt**2*i, tt(1, 1, i*(i-1)/2+1), 1, tt_ack_ofi, 1)
! T^{k,i} k>i
do k = i+1, nocc
ttind = k * (k-1) / 2 + i
call tr(nvirt, tt(1, 1, ttind), tt_ack_ofi(:, :, k))
end do
do j = 1, nocc
! T{ac}_^{ki} * Z_{bc}^{kj}
call dgemm('n', 't', nvirt, nvirt, nvirt*nocc,
$ 1.d0, tt_ack_ofi, nvirt, z_bakj(1, 1, 1, j), nvirt,
$ 0.d0, tz, nvirt)
! -1/2 T^{ki} * Z^{kj} - (T^{ki} * Z^{kj})^T -> V^{ij}
if(i .lt. j) then
! G^{ij}
do b = 1, nvirt
tztr(1:nvirt,b) = -0.5d0*tz(1:nvirt,b) - tz(b,1:nvirt)
end do
ttind = j * (j-1) / 2 + i
call daxpy(nvirt**2, 1.d0, tztr, 1, vv(1, 1, ttind), 1)
else if(j .lt. i) then
! G^{ji}^T
do b = 1, nvirt
tztr(1:nvirt,b) = -tz(1:nvirt,b) - 0.5d0*tz(b,1:nvirt)
end do
ttind = i * (i-1) / 2 + j
call daxpy(nvirt**2, 1.d0, tztr, 1, vv(1, 1, ttind), 1)
else if(i .eq. j) then
! G^{ij} + G^{ji}^T
do b = 1, nvirt
tztr(1:nvirt, b) = -1.5d0 * tz(1:nvirt, b) -
$ 1.5d0 * tz(b, 1:nvirt)
end do
ttind = i * (i+1) / 2
call daxpy(nvirt**2, 1.d0, tztr, 1, vv(1, 1, ttind), 1)
end if
end do
end do
c TIMEADD(times(1,32),times)
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(izbakj)
c if (ison.eq.'y') write(*,*) '14)'
c call timer
c call stopper(id,ison)
c if(ison.eq.'y')write(6,*)'abci'
cc TIME0(times)
C 15) a)
c calculate the transpose of Y^{kj} (see 16))
if(maxcor - (imem+nvirt-imem1) .ge. nocc**2*nvirt**2) then
iybakj = dblalloc(nocc**2*nvirt**2)
call rpoint4d(dcore(iybakj), y_bakj, nvirt, nvirt, nocc, nocc)
itmp = dblalloc(nvirt)
call rpoint1d(dcore(itmp), tmp, nvirt)
else
call memerr(nocc**2*nvirt**2+nvirt, maxcor - (imem-imem1),
$ 'Insufficient memory for y_bakj! ')
end if
ind=1
notend=.true.
n = blocknumber(nvirt**2, nvirt, nocc, 'y', ind, bsize,
$ 'Insufficient memory for one abci block! ',
$ maxcor-(imem-imem1))
do while(notend)
c TIME0(times)
if(ind+n-1.ge.nocc)then
notend=.false.
n=nocc-ind+1
endif
call rpoint4d(dcore(imem), abic_baic, nvirt, nvirt, n, nvirt)
call rpoint4d(dcore(imem), aibc_baic, nvirt, nvirt, n, nvirt)
cc TIME0(times(1,99))
call abci3read(ind,n,abic_baic,
$ bsymmv,symmocc,dgroup,co,first,last,buffer,
$irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
cc TIMEADD(times(1,11),times(1,99))
! KE
call dgemm('n', 'n', nvirt**2*n, nocc, nvirt,
$ 1.d0, abic_baic, nvirt**2*n, t, nvirt,
$ 0.d0, y_bakj(1, 1, ind, 1), nvirt**2*nocc)
c TIMEADD(times(1,33),times)
C 15) b)
c TIME0(times)
! KE^{kk} -> X
do k = ind, ind + n - 1
call tradd(nvirt, -1.d0, y_bakj(:, :, k, k), x)
enddo
c TIMEADD(times(1,34),times)
c 4) b) I
c TIME0(times)
! <ab|cj> * t_c^i from KDV
do j = 1, nocc
do i = ind, min(j, ind+n-1)
ttind = j * (j-1) / 2 + i
do a = 1, nvirt
vv(a, 1:nvirt, ttind) = vv(a, 1:nvirt, ttind) +
$ y_bakj(1:nvirt, a, i, j)
end do
end do
end do
c TIMEADD(times(1,9),times)
c 15) c) JE
c TIME0(times)
! sort abci for JE
do i = 1, n
do b = 1, nvirt
do c = 1, nvirt-1
tmp(c+1:nvirt) = abic_baic(b, c+1:nvirt, i, c)
aibc_baic(b, c+1:nvirt, i, c) =
$ abic_baic(b, c, i, c+1:nvirt)
aibc_baic(b, c, i, c+1:nvirt) = tmp(c+1:nvirt)
end do
end do
end do
! JE
call dgemm('n', 'n', nvirt**2*n, nocc, nvirt,
$ -0.5d0, aibc_baic, nvirt**2*n, t, nvirt,
$ 1.d0, y_bakj(1, 1, ind, 1), nvirt**2*nocc)
c TIMEADD(times(1,35),times)
ind=ind+n
end do ! while for abci
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(itmp)
c if (ison.eq.'y') write(*,*) '15) a'
cc TIMEADD(times(1,10),times)
c call stopper(id,ison)
C Construct JE, Eq. 22
C 15) c)
c TIME0(times)
call jjtoy(y_bakj,bsymmv,symmocc,dgroup,first,last,co,
$ buffer,irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
call kktoy(y_bakj,bsymmv,symmocc,dgroup,first,last,co, buffer,
$ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
c TIMEADD(times(1,35),times)
C 15) d)
c f^k * adj(t^j)
c TIME0(times)
do j=1,nocc
do k=1,nocc
call dger(nvirt, nvirt, 1.d0, t(1, j), 1, fai(1, k), 1,
$ y_bakj(1, 1, k, j), nvirt)
enddo
enddo
c TIMEADD(times(1,36),times)
c 1/2 L^{kl} (2 Cbar^{lj - Cbar^{jl}})
c TIME0(times)
ind=1
notend=.true.
if(maxcor - (imem-imem1) .ge. nvirt**2*nocc + nvirt**2) then
iccprbclofj = dblalloc(nvirt**2*nocc)
call rpoint3d(dcore(iccprbclofj), ccpr_bcl_ofj, nvirt, nvirt,
$ nocc)
iccpr = dblalloc(nvirt**2)
call rpoint2d(dcore(iccpr), ccpr, nvirt, nvirt)
else
call memerr(nvirt**2*nocc + nvirt**2, maxcor-(imem-imem1),
$ 'Insufficient memory for ccpr_bcl_ofj! ')
endif
n = blocknumber(nvirt**2, nocc, nocc, 'y', ind, bsize,
$ 'Insufficient memory for one block of ll! ',
$ maxcor-(imem-imem1))
do while(notend)
call rpoint4d(dcore(imem), ll_ablk, nvirt, nvirt, nocc, n)
if(ind+n-1.ge.nocc)then
notend=.false.
n=nocc-ind+1
endif
call ll2read(ind, n, ll_ablk, 'ablk',nocc,nvirt,ifltln)
do j = 1, nocc
do l = 1, j-1
ttind = j * (j-1) / 2 + l
call coijtrcalc(ccpr,nvirt,t(1,l),t(1,j),tt(1,1,ttind))
call dcopy(nvirt**2, ccpr, 1, ccpr_bcl_ofj(1, 1, l), 1)
call tradd(nvirt, -0.5d0, ccpr, ccpr_bcl_ofj(:, :, l))
end do
ttind = j * (j+1) / 2
call coijcalc(ccpr_bcl_ofj(:, :, j), nvirt,
$ t(1, j), t(1, j), tt(1, 1, ttind))
call dscal(nvirt**2, 0.5d0, ccpr_bcl_ofj(1, 1, j), 1)
do l = j+1, nocc
ttind = l * (l-1) / 2 + j
call coijtrcalc(ccpr,nvirt,t(1,j),t(1,l),tt(1,1,ttind))
call dcopy(nvirt**2, ccpr, 1, ccpr_bcl_ofj(1, 1, l), 1)
call dscal(nvirt**2, -0.5d0, ccpr_bcl_ofj(1, 1, l), 1)
call tradd(nvirt, 1.d0, ccpr, ccpr_bcl_ofj(:, :, l))
end do
do k = ind, ind+n-1
call dgemm('n', 't', nvirt, nvirt, nvirt*nocc,
$ 1.d0, ccpr_bcl_ofj, nvirt,
$ ll_ablk(1, 1, 1, k-ind+1), nvirt,
$ 1.d0, y_bakj(1, 1, k, j), nvirt)
end do
end do
ind=ind+n
end do ! while for L
c TIMEADD(times(1,37),times)
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(iccprbclofj)
c -1/2 sum_l{l^{lkj} adj(t^l)}
c TIME0(times)
ind=1
notend=.true.
n = blocknumber(nvirt**2, nocc, nocc, 'y', ind, bsize,
$ 'Insufficient memory for one block of ls!',
$ maxcor-(imem-imem1))
do while(notend)
call rpoint4d(dcore(imem), ls_lakj, nocc, nvirt, nocc, n)
if(ind+n-1.ge.nocc)then
notend=.false.
n=nocc-ind+1
endif
call ls2read(ind, n, ls_lakj, nocc, nvirt, ifltln)
call dgemm('n', 'n', nvirt, nvirt*nocc*n, nocc,
$ -0.5d0, t, nvirt, ls_lakj, nocc,
$ 1.d0, y_bakj(1, 1, 1, ind), nvirt)
ind=ind+n
end do ! while for l
c TIMEADD(times(1,38),times)
c if (ison.eq.'y') write(*,*) '15) b-d'
c call stopper(id,ison)
c if(ison.eq.'y')write(6,*)'16), 17)'
C 16)
if(maxcor-(imem-imem1) .ge. nvirt**2*nocc)then
ittpr = dblalloc(nvirt**2*nocc)
call rpoint3d(dcore(ittpr), ttpr, nvirt, nvirt, nocc)
else
call memerr(nvirt**2*nocc, maxcor-(imem-imem1),
$ 'Insufficient memory for ttpr! ')
endif
do i = 1, nocc
c TIME0(times)
! 2 * T^{ik} - T^{ki}
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(k,l,indmin,indmax,ttind)
do k=1,nocc
indmin = min(i,k)
indmax = max(i,k)
ttind = indmax * (indmax-1) / 2 + indmin
if(i. le. k) then
do l = 1, nvirt
ttpr(1:nvirt, l, k) =
$ 2.d0 * tt(1:nvirt,l,ttind) - tt(l,1:nvirt,ttind)
end do
else
do l = 1, nvirt
ttpr(1:nvirt, l, k) =
$ 2.d0 * tt(l,1:nvirt,ttind) - tt(1:nvirt,l,ttind)
end do
end if
enddo
C$OMP END PARALLEL DO
c G^{i,j}
do j = i, nocc
ttind = j * (j-1) / 2 + i
call dgemm('n', 't', nvirt, nvirt, nvirt*nocc,
$ 1.d0, ttpr, nvirt, y_bakj(1, 1, 1, j), nvirt,
$ 1.d0, vv(1, 1, ttind), nvirt)
end do
c G^{j,i}^T
do j = 1, i
ttind = i * (i-1) / 2 + j
call dgemm('n', 't', nvirt, nvirt, nvirt*nocc,
$ 1.d0, y_bakj(1, 1, 1, j), nvirt, ttpr, nvirt,
$ 1.d0, vv(1, 1, ttind), nvirt)
end do
c TIMEADD(times(1,39),times)
cC 9) a)
! (2 * T^{ik} - T^{ki}) *r^k -> v^i
c TIME0(times)
call dgemv('n', nvirt, nvirt*nocc,
$ 1.d0, ttpr, nvirt, rr, 1, 1.d0, v(1, i), 1)
c TIMEADD(times(1,23),times)
end do
maxmemalloc = max(maxmemalloc, imem - imem1)
call dbldealloc(iybakj)
c 17)
c TIME0(times)
! T^{ij} * X -> V^{ij}
do j = 1, nocc
do i = 1, j
ttind2 = (j-1) * j / 2 + i
call dgemm('n', 'n', nvirt, nvirt, nvirt,
$ 1.d0, tt(1, 1, ttind2), nvirt, x, nvirt,
$ 1.d0, vv(1, 1, ttind2), nvirt)
enddo
enddo
c adj(G^{j,i})
! X * T^{ji} -> V^{ij}
call dgemm('t', 'n', nvirt, nvirt*nocc*(nocc+1)/2, nvirt,
$ 1.d0, x, nvirt, tt, nvirt,
$ 1.d0, vv, nvirt)
c TIMEADD(times(1,40),times)
c if (ison.eq.'y') write(*,*) '16-17)'
c call timer
c call stopper(id,ison)
C Construct D, Eq. 27, including E, Eq. 21
C Construct C overbar, Eq. 37
C Construct JE, Eq. 22
C Construct KE, Eq. 23
C Construct Z, Eq. 36
C Construct Y, Eq. 35
C Construct A Eq. 33
C Construct r Eq. 31
C Construct X Eq. 34
C Construct KD Eq. 20
C Construct s Eq. 32
C Construct beta Eq. 30
C Construct alpha Eq. 29
C Construct G, Eq. 28
C Construct v Eq. 24
C Construct V, Eq. 25
! calculate norm before updating amplitudes
norm = calcnorm(nvirt, nocc, v, vv)
C Update cluster amplitudes
do j = 1, nocc
do i = 1, j
ttind = (j-1) * j / 2 + i
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(a,b)
do b = 1, nvirt
do a = 1, nvirt
vv(a, b, ttind) = vv(a, b, ttind) /
$ (fab(a, a) + fab(b, b) - fij(i, i) - fij(j, j))
tt(a, b, ttind) = tt(a, b, ttind) - vv(a, b, ttind)
end do
end do
C$OMP END PARALLEL DO
end do
do a = 1, nvirt
v(a, j) = v(a, j) / (fab(a, a) - fij(j, j))
t(a, j) = t(a, j) - v(a, j)
end do
end do
c if (ison.eq.'y') write(*,*) 'update)'
c call timer
c call stopper(id,ison)
c call getarg(2,dextr)
c if(dextr.eq.'y') then
C DIIS extrapolation
call diis(nit+nit0,nocc*nvirt+nvirt**2*nocc*(nocc+1)/2,t,v,ccmaxit
$ ,diisfile,errfile,ifltln,bmat,invbmat)
c if (ison.eq.'y') write(*,*) 'diis)'
c call timer
c call stopper(id,ison)
c call timer
c endif
C Construct C, Eq. 26
C Calculate CCSD Energy
c call largest(nocc,nvirt,vv,v,tt,t,dcore(imem+nvirt**2*(nocc+1)*
c $nocc/2),dcore(imem+nvirt**2*(nocc+1)*nocc/2),icore(iimem),iout)
call energycalc(t,tt,fai,nit,eref,norm,
$ scrs,localcc,ccsdalg,scrs,
$ maxcor,imem,dcore,ifltln,iout,nvirt,nocc,idummy,ecc,scrs,
$ dcore(iscrs2), mpi_rank, mpi_size,mpi_ccsd, times,
$ ccsd_communicator,emp2full,lno,lnaf,emp2corr,eccs,eccss,eccos,
$eccscs,eccoss,eccsss,.false.,master_thread,.false., vv, vv, et1)
cc TIMEADD(times(1,6),ttmp)
c if (ison.eq.'y') write(*,*) 'energy)'
c call stopper(id,ison)
c call timer
if(ccsdrest .ne. 'off ') then
call write_rst_ccsd(nit, ecc, eccold, norm, lene, t, tt,
$ nocc, nvirt)
end if
enddo ! iteration }}}
c TIMEADD(times(1,50),times(1, 99))
if(localcc.ne.'off ') then
call localenergycalc(t,tt,fai,nit,scrs,int1,esdi,ui,
$ ccsdalg,idummy,rdummy,rdummy,nocc, nvirt, dcore, maxcor,
$ imem, iout, ifltln,scrs,dcore(iscrs2))
endif
C
c write(6,*)norm,dabs(eccold-ecc),tol
write(iout,*)
if(norm/10.gt.tol.or.dabs(eccold-ecc).gt.tol) then
ccsd_converged = .false.
write(iout,
$"(' Convergence not achieved in',i3,' iterations!')") nit
if(localcc.ne.'off ')call mrccend(1)
else
ccsd_converged = .true.
write(iout,"(' CCSD iteration has converged in',i3,' steps.')")
$nit
endif
234 continue
inquire(file = 'pt.rst', exist = rstex)
ptrest = (ccsdrest .eq. 'ccsd') .and. (nit .eq. 0) .and. rstex
write(iout,'(f12.2," MB memory was utilized for CCSD")')
$ maxmemalloc*8.d0/1024**2
c analyse T1 & T2 amplitudes
if(nocc.ne.0 .and. nvirt.ne.0)
$ call tprt(ltpr,nc,nocc,nvirt,t,tt,tprtol,iout,scrs)
if(localcc.ne.'off ')then
write(iout,*)
write(iout,'(" T1 contribution [au]: ",f22.12)')et1
write(iout,'(" CCSD correlation energy [au]: ",f22.12)')esdi
write(iout,*)
c write(iout,'(" CCSD total energy [au]: ",f22.12)')
c $esdi+eref
call prtenergc('CCSD ',esdi,eref,locnoc)
else
write(iout,*)
write(iout,
$ '(" T1 contribution [au]: ",f22.12)')et1
write(iout,
$ '(" CCSD correlation energy [au]: ",f22.12)')
$ ecc-eref
write(iout,
$ '(" Total CCSD energy [au]: ",f22.12)')
$ ecc
if((lno .or. lnaf) .and. dfintran.ne.'ovirt ')
$ write(iout,
$ '(" Total CCSD energy with correction [au]: ",f22.12)')
$ ecc + emp2corr
write(iout,*)
c call prtenergc('CCSD ',ecc,eref,locnoc)
call prtenergc('CCSD ',ecc+emp2corr,eref,locnoc)
endif
call flush(iout)
C }}} end of (local)CCSD calculation
C
c timing data {{{
if (.false.) then
write(iout,*)
write(iout,"(1x,70('*'))")
write(iout,'(a94)') ' Time of manipulations:' //
$ '1 it. CPU/Wall [min], tot CPU/Wall [min], CPU/Wall ratio, tot %'
write(iout,*)
txt(1)= '" 1 "'
txt(2)= '" 2 abc "'
txt(3)= '" 2 de "'
txt(4)= '" 2 f "'
txt(5)= '" 2 g "'
txt(6)= '" 3 "'
txt(7)= '" 4 a "'
txt(8)= '" 4 b II "'
txt(9)= '" 4 b I "'
txt(10)='" 5 a "'
txt(11)='" 5 b "'
txt(12)='" 5 c "'
txt(13)='" 5 d "'
txt(14)='" 5 e "'
txt(15)='" 6 abc "'
txt(16)='" "'
txt(17)='" 7 bc "'
txt(18)='" 7 e "'
txt(19)='" 7 d "'
txt(20)='" 8 a "'
txt(21)='" 8 b "'
txt(22)='" 8 c "'
txt(23)='" 9 a "'
txt(24)='" 9 b "'
txt(25)='" 10 "'
txt(26)='" 11 "'
txt(27)='" 12 "'
txt(28)='" 13 e K "'
txt(29)='" 13 d "'
txt(30)='" 13 ac "'
txt(31)='" 13 e k "'
txt(32)='" 14 "'
txt(33)='" 15 a "'
txt(34)='" 15 b "'
txt(35)='" 15 c "'
txt(36)='" 15 d f "'
txt(37)='" 15 d L "'
txt(38)='" 15 d l "'
txt(39)='" 16 "'
txt(40)='" 17 "'
txt(41)='" 5 b teljes K "'
do i=1,40
if(i.eq.16) cycle
ttmp(1,1)=times(4,50)
write(txt(0),*) trim(txt(i))
write(iout,"(" // txt(0) // ",1x,7f10.3)")
$ times(3,i)/nit/60.d0,times(4,i)/nit/60.d0,
c $ times(5,i),times(6,i),
$ times(3,i)/60.d0,times(4,i)/60.d0,times(3,i)/times(4,i),
$ times(4,i)/ttmp(1,1)*100.d0
enddo
endif ! timing data }}}
C delete tfile,llfile,lsfile,diisfile,errfile {{{
open(tfile,file='tfile')
close(tfile,status="delete")
open(703,file='lsfile')
close(703,status="delete")
open(704,file='llfile')
close(704,status="delete")
open(710,file='kkfile')
close(710,status="delete")
open(diisfile,file='fort.18')
close(diisfile,status="delete")
open(errfile,file='fort.19')
close(errfile,status="delete")
if(localcc.ne.'off ')then
open(722,file='iabc')
close(722,status="delete")
endif
call system('rm -f *infofile')
C }}}
return
end subroutine
************************************************************************
subroutine atpakol(n,nw,ind,ind2,abci,iabc,nvirt,nocc)
************************************************************************
c rearrange: <a_all b_all | c_block i_block> to < i_block a_all | b_all c_block >
implicit none
integer nvirt, nocc
integer n,nw,ind,ind2
integer i,c
real*8 abci(nvirt,nvirt,nvirt,n)
real*8 iabc(nocc,nvirt,nvirt,nw)
do i=1,n
do c=ind2,nw+ind2-1
call dcopy(nvirt**2,abci(1,1,c,i),1,
$iabc(i+ind-1,1,1,c-ind2+1),nocc)
enddo
enddo
return
end subroutine
C
************************************************************************
subroutine rpa_diis
$(nact,nmax,pvec,evec,ndmax,bmat,bvec,invbmat,cvec,norm,
$diisfile,errfile,tfile,tinfo,ifltln,ibufln,irecln,nvirt,nocc)
************************************************************************
* DIIS extrapolation for the dRPA iteration *
************************************************************************
implicit none
integer diisfile, errfile, tfile, tinfo
integer ifltln, ibufln, irecln
integer nvirt, nocc
integer nact,iact,i,j,nmax,ndmax
integer ipiv(ndmax)
real*8 bmat(ndmax,ndmax),bvec(ndmax**2),invbmat(ndmax,ndmax),ddot
real*8 cvec(ndmax),sum,sum1,evec(nmax),pvec(nmax),norm,dnrm2
C
open(diisfile,file='diisfile',status='unknown',access='direct',
$recl=nmax*ifltln)
open(errfile,file='errfile',status='unknown',access='direct',
$recl=nmax*ifltln)
C
c rewind(tfile)
c if(nact.eq.1) then
c call dfillzero(evec,nmax)
c else
c read(tfile) evec
call rpa_ttread(evec,'d',nvirt,nocc,ibufln,irecln,tfile,tinfo)
c endif
C Save amplitudes and calculate new error vector
write(diisfile,rec=nact) pvec
call daxpy(nmax,-1.d0,evec,1,pvec,1)
norm=dnrm2(nmax,pvec,1)
write(errfile,rec=nact) pvec
C Update the scalar product matrix (B matrix)
do iact=1,nact
read(errfile,rec=iact) evec
sum=ddot(nmax,pvec,1,evec,1)
bmat(nact,iact)=sum
bmat(iact,nact)=sum
enddo
C Invert B matrix
do j=1,nact
do i=1,j
bvec(i+(j-1)*j/2)=bmat(i,j)
enddo
enddo
call dsptrf('U',nact,bvec,ipiv,invbmat,i)
call dsptri('U',nact,bvec,ipiv,invbmat,i)
do j=1,nact
do i=1,j
invbmat(i,j)=bvec(i+(j-1)*j/2)
invbmat(j,i)=invbmat(i,j)
enddo
enddo
C Denominator
sum=0.d0
do i=1,nact
do j=1,nact
sum=sum+invbmat(i,j)
enddo
enddo
sum=1.d0/sum
C Coefficients
do i=1,nact
sum1=0.d0
do j=1,nact
sum1=sum1+invbmat(i,j)
enddo
cvec(i)=sum*sum1
enddo
C New amplitudes
call dfillzero(pvec,nmax)
do iact=1,nact
sum=cvec(iact)
read(diisfile,rec=iact) evec
call daxpy(nmax,sum,evec,1,pvec,1)
enddo
C Save new amplitudes
c rewind(tfile)
c write(tfile) pvec
call rpa_twrite(pvec,tfile,tinfo,irecln,ibufln,nocc,nvirt)
C
close(diisfile)
close(errfile)
C
return
end subroutine
C
************************************************************************
subroutine tradd(n,alpha,mat,newmat)
************************************************************************
* transpose and add constant times a square matrix to another matrix *
************************************************************************
integer n,i
real*8 mat(n,n),newmat(n,n),alpha
do i=1,n
call daxpy(n,alpha,mat(i,1),n,newmat(1,i),1)
enddo
return
end subroutine
************************************************************************
subroutine tr(n,mat,newmat)
************************************************************************
* transpose a square matrix to a scratch matrix *
************************************************************************
integer n,i
real*8 mat(n,n),newmat(n,n)
do i=1,n
call dcopy(n,mat(i,1),n,newmat(1,i),1)
enddo
return
end subroutine
************************************************************************
subroutine ccijtrcalc(scrs,nvirt,ti,tj,ttij)
************************************************************************
* calculates the cc(:,:,i,j)' matrix to a scratch matrix *
************************************************************************
implicit none
integer nvirt
real*8 scrs(nvirt,nvirt)
real*8 ttij(nvirt,nvirt)
real*8 ti(nvirt)
real*8 tj(nvirt)
call tr(nvirt,ttij,scrs)
call dger(nvirt,nvirt,1.d0,tj,1,ti,1,scrs,nvirt)
return
end subroutine
************************************************************************
subroutine ccijcalc(scrs,nvirt,ti,tj,ttij)
************************************************************************
* calculates the cc(:,:,i,j) matrix to a scratch matrix *
************************************************************************
implicit none
integer nvirt
real*8 scrs(nvirt,nvirt)
real*8 ttij(nvirt,nvirt)
real*8 ti(nvirt)
real*8 tj(nvirt)
call dcopy(nvirt**2,ttij,1,scrs,1)
call dger(nvirt,nvirt,1.d0,ti,1,tj,1,scrs,nvirt)
return
end subroutine
************************************************************************
subroutine coijcalc(scrs,nvirt,ti,tj,ttij)
************************************************************************
* calculates the co(:,:,i,j) matrix to a scratch matrix *
************************************************************************
implicit none
integer nvirt
real*8 scrs(nvirt,nvirt)
real*8 ttij(nvirt,nvirt)
real*8 ti(nvirt)
real*8 tj(nvirt)
call dcopy(nvirt**2,ttij,1,scrs,1)
call dscal(nvirt**2,0.5d0,scrs,1)
call dger(nvirt,nvirt,1.d0,ti,1,tj,1,scrs,nvirt)
return
end subroutine
************************************************************************
subroutine coijtrcalc(scrs,nvirt,ti,tj,ttij)
************************************************************************
* calculates the co(:,:,i,j) matrix to a scratch matrix *
************************************************************************
implicit none
integer nvirt
real*8 scrs(nvirt,nvirt)
real*8 ttij(nvirt,nvirt)
real*8 ti(nvirt)
real*8 tj(nvirt)
call tr(nvirt,ttij,scrs)
call dscal(nvirt**2,0.5d0,scrs,1)
call dger(nvirt,nvirt,1.d0,tj,1,ti,1,scrs,nvirt)
return
end subroutine
***********************************************************************
subroutine xpplcalc(x,xmat,tmp,ccx,recbounds,times,nocc,nvirt,
$ maxcor,imem,iout,ibufln,irecln)
***********************************************************************
implicit none
integer ibufln, irecln
integer maxcor, imem, iout
integer nocc, nvirt
integer ind,n
integer a,b,c,nn,nm
character*1 x
logical notend,notfull
integer avmem,imem1,recbounds(nvirt,2)
common/memcom/ imem1
real*8 tmp(*),dbeta
real*8 ccx((nvirt+1)*nvirt/2,(nocc+1)*nocc/2)
real*8 xmat((nvirt+1)*nvirt/2,(nocc+1)*nocc/2)
real*8 times(6,0:100)
ind=1
notend=.true.
c if(nit.eq.1)then
c write(6,'(" a b c nn nm nn-a+1 nn-b+1 n ind")')
c endif
do while(notend)
notfull=.true.
avmem=maxcor-(imem-imem1)-(nvirt+1)*nvirt*(nocc+1)*nocc/4
c write(6,'("avmem: ",i10)')avmem
if(avmem.ge.(nvirt-ind+1)**2*(nvirt-ind+2)/2)then
n=1
a=(nvirt-ind+1)*(nvirt-ind+2)/2
do while(notfull.and.notend)
if(n+ind-1.eq.nvirt)then
notend=.false.
else
b=(nvirt-(ind+n)+1)*(nvirt-(ind+n)+2)/2
c=a-b
if(a*c.le.avmem)then
n=n+1
c write(6,'(3i5)')a,c,n
endif
b=(nvirt-(ind+n)+1)*(nvirt-(ind+n)+2)/2
c=a-b
if(a*c.gt.avmem)then
n=n-1
notfull=.false.
endif
endif
enddo
else
write(iout,*)'Insufficient memory!'
call mrccend(1)
endif
if(ind+n-1.ge.nvirt)then
notend=.false.
n=nvirt-ind+1
else
notend=.true.
endif
c write(6,'("in ppl: beg,end,num,tot:",9i5)')ind,ind+n-1,n,nvirt
a=(nvirt-ind+1)*(nvirt-ind+2)/2
b=(nvirt-(ind+n)+1)*(nvirt-(ind+n)+2)/2
c=a-b
nn=nvirt*(nvirt+1)/2
nm=nocc*(nocc+1)/2
cc TIME0(times)
call readxlist(x,ind,n,tmp,a,c,recbounds,nocc,nvirt,
$ ibufln,irecln)
cc TIMEADD(times(1,4),times)
c if(nit.eq.1)then
c write(6,'(9i5)')a,b,c,nn,nm,nn-a+1,nn-b+1,n,ind
c endif
cc TIME0(times)
if(ind.eq.1)then
dbeta=0.d0
else
dbeta=1.d0
endif
if(ind+n-1.eq.nvirt)then
call dsymm('l', 'l', a, nm,
$ 1.d0, tmp, a, ccx(nn-a+1,1), nn,
$ dbeta, xmat(nn-a+1,1), nn)
else
call dsymm('l', 'l', a-b, nm,
$ 1.d0, tmp, a, ccx(nn-a+1,1), nn,
$ dbeta, xmat(nn-a+1,1), nn)
call dgemm('n', 'n', b, nm, c,
$ 1.d0, tmp(c+1), a, ccx(nn-a+1,1), nn,
$ dbeta, xmat(nn-b+1,1),nn)
call dgemm('t', 'n', c, nm, b,
$ 1.d0, tmp(c+1), a, ccx(nn-b+1,1), nn,
$ 1.d0, xmat(nn-a+1,1), nn)
endif
cc TIMEADD(times(1,5),times)
ind=ind+n
enddo
return
end subroutine
*********************************************************************
subroutine twrite(t, tfile, irecln, tinfo, ibufln, nvirt, nocc)
*********************************************************************
implicit none
integer tfile, irecln, tinfo, ibufln, nvirt, nocc
real*8 t(nvirt*nocc+nvirt**2*(nocc+1)*nocc/2),buffer(ibufln)
integer i,lastrec,lastrecsize
call dfillzero(buffer,ibufln)
open(tfile,file='tfile',access='direct',recl=irecln,
$status='unknown')
lastrecsize=modulo(nvirt*nocc+nvirt**2*(nocc+1)*nocc/2,
$ibufln)
if(lastrecsize.eq.0)lastrecsize=ibufln
lastrec=(nvirt*nocc+nvirt**2*(nocc+1)*nocc/2-lastrecsize)/ibufln+1
do i=1,lastrec-1
write(tfile,rec=i)t((i-1)*ibufln+1:i*ibufln)
enddo
call dcopy(lastrecsize,t((lastrec-1)*ibufln+1),1,buffer,1)
write(tfile,rec=lastrec)buffer(1:ibufln)
open(tinfo,file='tinfofile')
rewind(tinfo)
write(tinfo,*)lastrec,lastrecsize
c close(vfile)
close(tfile)
close(tinfo)
return
end subroutine
*********************************************************************
subroutine rpa_twrite(t,tfile,tinfo,irecln,ibufln,nocc,nvirt)
*********************************************************************
implicit none
integer tfile, tinfo, irecln, ibufln
integer nocc, nvirt
real*8 t(nvirt**2*(nocc+1)*nocc/2),buffer(ibufln)
integer i,lastrec,lastrecsize
call dfillzero(buffer,ibufln)
open(tfile,file='tfile',access='direct',recl=irecln,
$status='unknown')
lastrecsize=modulo(nvirt**2*(nocc+1)*nocc/2,
$ibufln)
if(lastrecsize.eq.0)lastrecsize=ibufln
lastrec=(nvirt**2*(nocc+1)*nocc/2-lastrecsize)/ibufln+1
do i=1,lastrec-1
write(tfile,rec=i)t((i-1)*ibufln+1:i*ibufln)
enddo
call dcopy(lastrecsize,t((lastrec-1)*ibufln+1),1,buffer,1)
write(tfile,rec=lastrec)buffer(1:ibufln)
open(tinfo,file='tinfofile')
rewind(tinfo)
write(tinfo,*)lastrec,lastrecsize
c close(vfile)
close(tfile)
close(tinfo)
return
end subroutine
*********************************************************************
subroutine rpa_ttread(t,d,nvirt,nocc,ibufln,irecln,tfile,tinfo)
*********************************************************************
implicit none
integer ibufln, irecln, tfile, tinfo
integer nvirt, nocc
real*8 t(nvirt**2*(nocc+1)*nocc/2),buffer(ibufln)
integer i,lastrec,lastrecsize
character*1 d
open(tinfo,file='tinfofile')
rewind(tinfo)
read(tinfo,*)lastrec,lastrecsize
open(tfile,file='tfile',access='direct',recl=irecln,
$status='old')
do i=1,lastrec-1
read(tfile,rec=i)t((i-1)*ibufln+1:i*ibufln)
enddo
read(tfile,rec=lastrec)buffer(1:ibufln)
call dcopy(lastrecsize,buffer,1,t((lastrec-1)*ibufln+1),1)
if(d.eq.'d')close(tfile,status="delete")
if(d.eq.'n')close(tfile)
close(tinfo)
return
end subroutine
*********************************************************************
subroutine ttread(t,d, ibufln, irecln, tfile, tinfo)
*********************************************************************
implicit none
integer ibufln, irecln, tfile, tinfo
real*8 t(*),buffer(ibufln)
integer i, lastrec, lastrecsize
character*1 d
open(tinfo,file='tinfofile')
rewind(tinfo)
read(tinfo,*)lastrec,lastrecsize
open(tfile,file='tfile',access='direct',recl=irecln,
$status='old')
do i=1,lastrec-1
read(tfile,rec=i)t((i-1)*ibufln+1:i*ibufln)
enddo
read(tfile,rec=lastrec)buffer(1:ibufln)
call dcopy(lastrecsize,buffer,1,t((lastrec-1)*ibufln+1),1)
if(d.eq.'d')close(tfile,status="delete")
if(d.eq.'n')close(tfile)
close(tinfo)
return
end subroutine
*********************************************************************
subroutine readinfo(nbasis,nocc,dgroup,bsymmv,symmocc,
$ico,co,h,NENERGY,first,last,ui,deglev,ccsdalg,nirmax)
*********************************************************************
implicit none
integer nirmax
integer bsymmv(nirmax+1),symmocc(nirmax)
integer nocc,nbasis,dgroup
integer co(nbasis),ico(nbasis)
real*8 h(nbasis,nbasis)
integer first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
integer i,j,k,ii,jj
integer bsymm,csymm,isymm,xsymm
real*8 NENERGY
integer deglev,actocc
real*8 ui(nocc)
character*8 ccsdalg
c
ii=1
do isymm=1,dgroup
do j=bsymmv(isymm),bsymmv(isymm)+symmocc(isymm)-1
co(j)=ii
ico(ii)=isymm
ii=ii+1
enddo
enddo
do isymm=1,dgroup
do j=bsymmv(isymm)+symmocc(isymm),bsymmv(isymm+1)-1
co(j)=ii
ico(ii)=isymm
ii=ii+1
enddo
enddo
c open(555,file='fort.555')
open(555,file='55') !RZ.
rewind 555
read(555,*)
read(555,*)
read(555,*)
read(555,*)actocc,deglev
read(555,*)ui(1:nocc)
C Reading the Fock-matrix!!
do xsymm=1,dgroup
do i=bsymmv(xsymm),bsymmv(xsymm+1)-1
do j=bsymmv(xsymm),bsymmv(xsymm+1)-1
read(555,*) h(i,j)
h(j,i)=h(i,j)
c write(6,'(2i3,f14.10)')i,j,h(i,j)
c write(917,'(F19.12,2I5)')h(i,j),i,j
enddo
enddo
enddo
read(555,*) NENERGY
close(555,status='keep')
c
! open(56,file='fort.56')
first=0
last=0
bsymm=1
if(ccsdalg .ne. 'dfdirect') then
open(56,file='56') !R.Z
rewind 56
do while(bsymm.ne.0)
read(56,*) jj,bsymm,csymm,i,j,k
c write(916,'(6I5)')jj,bsymm,csymm,i,j,k
c jj: milyen tipusu integral
c b/csymm: 2./3. indexre a szimm
c i: utolso index erteke
c j,k: mettol meddig kell olvasni (rekordot)
c if(bsymm.eq.0)exit
if(bsymm.ne.0)then
first(jj,bsymm,csymm,i)=j
last(jj,bsymm,csymm,i)=k
endif
enddo
close(56)
end if
return
end subroutine
************************************************************************
subroutine localenergycalc(t,tt,fai,nn,
$scrs,int1,esdi,ui,ccsdalg,dfnb,jia,abj,
$nocc, nvirt, dcore, maxcor, imem, iout,ifltln,kk_jk_ofab,t1t)
C Calculate i-th local energy part
************************************************************************
implicit none
integer nocc, nvirt
integer maxcor, imem, iout, ifltln
real*8 dcore(*)
real*8 t(nvirt,nocc),tt(nvirt,nvirt,(nocc+1)*nocc/2)
real*8 scrs(nvirt,nvirt),fai(nvirt,nocc)
real*8 int1(nocc,nocc),ui(nocc)
real*8 ddot,esdi,abj(nvirt,nvirt,nocc)
logical notend,localcc15p,diskints4cc
integer ind,n,j,k,l,ttind,nn
integer imem1,pos
character*4 localcc !NP
character*16 naf !NP
common/memcom/imem1
integer dblalloc
integer blocknumber, bsize
real*8, pointer :: ll_kaj_b(:,:,:,:), cc_laj_b(:,:,:,:),abofj(:)
real*8 kk_jk_ofab(nocc, nocc)
integer illkajb, icclajb
integer a, b
character*8 ccsdalg,lccoporder
integer dfnb
real*8 jia(nocc, nvirt, dfnb)
real*8 t1t(nocc, nvirt)
interface
c {{{ interfaces for pointers
subroutine rpoint1d(egydim1,egydim2,dim1)
implicit none
integer dim1
real*8,target :: egydim1(dim1)
real*8, pointer :: egydim2(:)
end subroutine
subroutine rpoint4d(egydim,haromdim,dim1,dim2,dim3,dim4)
implicit none
integer dim1,dim2,dim3,dim4
real*8,target :: egydim(dim1,dim2,dim3,dim4)
real*8, pointer :: haromdim(:,:,:,:)
end subroutine
c}}}
end interface
call getkey('localcc',7,localcc,4)
call getkey('naf_cor',7,naf,16)
call getkey('lccoporder',10,lccoporder,8)
diskints4cc=lccoporder.eq.'trffirst'.or.ccsdalg.eq.'disk '
call dgemm('t','n',nocc,nocc,nvirt,2.d0,
$fai,nvirt,t,nvirt,0.d0,int1,nocc)
if (localcc15p(localcc).and.naf.ne.'off ') then
c contribution of the f*t1 type term to dE^CCSD_i
call dgemv('n',nocc,nocc,1.d0,int1,nocc,ui,1,0.d0,dcore(imem),1)
esdi=ddot(nocc,ui,1,dcore(imem),1)
endif
if (.not.localcc15p(localcc).or.naf.eq.'off ') then !NP
c {{{ E^CCSD is computed with density fitted integrals and amplitudes
if(ccsdalg .ne. 'dfdirect') then
ind=1
notend=.true.
n = blocknumber(nvirt**2, nocc, nocc, 'y', ind, bsize,
$ 'Insufficient memory! ',
$ maxcor-(imem-imem1))
do while(notend)
c if(maxcor-(imem-imem1).ge.nvirt**2*nocc)then
c n=(maxcor-(imem-imem1)-modulo(maxcor-(imem-imem1),
c $nvirt**2*nocc))/(nvirt**2*nocc)
c else
c write(iout,*)'Insufficient memory!'
c call mrccend(1)
c endif
if(ind+n-1.ge.nocc)then
notend=.false.
n=nocc-ind+1
endif
call ll2read(ind,n,dcore(imem), 'abkl',nocc,nvirt,ifltln)
do j=ind,ind+n-1
do k=1,nocc
do l=1,nocc
if(l.le.j)then
ttind=(j-1)*j/2+l
call ccijcalc(scrs,nvirt,t(1,l),t(1,j),
$ tt(1,1,ttind))
int1(k,l)=int1(k,l)+ddot(nvirt**2,
$ dcore(imem+(k-1)*nvirt**2+(j-ind)*nvirt**2*nocc),1,scrs,1)
endif
if(l.gt.j)then
ttind=(l-1)*l/2+j
call ccijtrcalc(scrs,nvirt,t(1,j),t(1,l),
$ tt(1,1,ttind))
int1(k,l)=int1(k,l)+ddot(nvirt**2,
$ dcore(imem+(k-1)*nvirt**2+(j-ind)*nvirt**2*nocc),1,scrs,1)
endif
c do a=1,nvirt
c do b=1,nvirt
c write(*,'(4i3,f22.16)') j,l,a,b,
c $ scrs(a,b)
c end do
c end do
enddo
enddo
enddo
ind=ind+n
enddo
else ! ccsdalg .eq. 'dfdirect'
n = blocknumber(2*nvirt*nocc, nocc, nvirt, 'y', 1, bsize,
$ 'Insufficient memory in localenergycalc! ',
$ maxcor-(imem-imem1))
do a = 1, nvirt
call dcopy(nocc, t(a, 1), nvirt, t1t(1, a), 1)
end do
illkajb = dblalloc(nocc**2*nvirt*n)
call rpoint4d(dcore(illkajb), ll_kaj_b, nocc, nvirt, nocc, n)
icclajb = dblalloc(nocc**2*nvirt*n)
call rpoint4d(dcore(icclajb), cc_laj_b, nocc, nvirt, nocc, n)
ind = 1
notend = .true.
do while(notend)
if(ind+n-1 .ge. nvirt) then
notend = .false.
n = nvirt-ind+1
end if
! Labkj
call dgemm('n', 't', nocc*nvirt, nocc*n, dfnb,
$ 1.d0, jia, nocc*nvirt, jia(1, ind, 1), nocc*nvirt,
$ 0.d0, ll_kaj_b, nocc*nvirt)
do b = 1, n
do a = 1, nvirt
do k = 1, nocc
call dcopy(nocc, ll_kaj_b(k, a, 1, b), nocc*nvirt,
$ kk_jk_ofab(1, k), 1)
end do
do k = 1, nocc
ll_kaj_b(k, a, 1:nocc, b) =
$ 2.d0 * ll_kaj_b(k, a, 1:nocc, b)
$ - kk_jk_ofab(k, 1:nocc)
end do
end do
end do
! Cablj
do j = 1, nocc
do b = ind, ind+n-1
do l = 1, j
ttind = j * (j-1) / 2 + l
cc_laj_b(l, 1:nvirt, j, b-ind+1) =
$ tt(1:nvirt, b, ttind)
end do
do l = j+1, nocc
ttind = l * (l-1) / 2 + j
cc_laj_b(l, 1:nvirt, j, b-ind+1) =
$ tt(b, 1:nvirt, ttind)
end do
end do
end do
call dger(nocc*nvirt, nocc*n,
$ 1.d0, t1t, 1, t1t(1, ind), 1,
$ cc_laj_b, nocc*nvirt)
c do j=1,nocc
c do l=1,nocc
c do a=1,nvirt
c do b=ind,ind+n-1
c write(*,'(4i3,f22.16)') j,l,a,b,cc_laj_b(l,a,j,b)
c end do
c end do
c end do
c end do
call dgemm('n', 't', nocc, nocc, nvirt*nocc*n,
$ 1.d0, ll_kaj_b, nocc, cc_laj_b, nocc,
$ 1.d0, int1, nocc)
ind = ind + n
end do
call dbldealloc(illkajb)
end if
c esdi= !!??? Ui'*INT1*Ui
call dgemv('n',nocc,nocc,1.d0,int1,nocc,ui,1,0.d0,scrs,1)
esdi=ddot(nocc,ui,1,scrs,1)
c }}}
c
elseif(localcc15p(localcc).and.naf.ne.'off ') then
c E^CCSD is computed with amplitudes assembled in the NAF basis and integrals assembled in the total aux basis
if(maxcor-(imem-imem1).lt.2*nvirt**2) then ! for L_ab and transformed C_ab
write(iout,*)'Insufficient memory!'
call mrccend(1)
endif
if (diskints4cc) then
pos=imem+nvirt**2
open(700,file='ajb',form='unformatted')
rewind(700)
call rpoint1d(dcore(pos), abofj, nvirt**2)
endif
c db
c write(*,*) 'ajb in ccsd.f', imem, pos
c db
do j=1,nocc
call dfillzero(dcore(imem),nvirt**2)
if (diskints4cc) then
read(700) abofj(1:nvirt**2)
else
call rpoint1d(abj(1,1,j), abofj, nvirt**2)
endif
c db
c write(*,*) 'abofj for j',j
c write(*,'(1000f12.6)') abofj(1:nvirt**2)
c db
do l=1,nocc
if(l.le.j)then
ttind=(j-1)*j/2+l
call ccijcalc(scrs,nvirt,t(1,l),t(1,j),
$ tt(1,1,ttind)) ! get C^ab_lj for a given lj
elseif(l.gt.j)then
ttind=(l-1)*l/2+j
call ccijtrcalc(scrs,nvirt,t(1,j),t(1,l),
$ tt(1,1,ttind)) ! get C^ab_lj for a given lj
endif
call daxpy(nvirt**2,ui(l),scrs,1,dcore(imem),1) ! transform l canonical to I localized orb index
enddo !l
esdi=esdi+ddot(nvirt**2,abofj,1,dcore(imem),1) ! sum_ab L^ab_Ij * C^ab_Ij for all j
enddo !j
c db
c write(*,*) 'localE, t1 +t2', esdi , nocc, nvirt
c db
if (diskints4cc) close(700)
endif
if(nn.eq.0)then
write(iout,"(
$ ' LMP2 domain correlation energy contribution [au]:'
$ ,f22.12)")esdi
endif
return
end subroutine
************************************************************************
subroutine energycalc(t,tt,fai,nn,er,norm,scrs,localcc,ccsdalg,
$jia,maxcor,imem,dcore,ifltln,iout,nvirt,nocc,dfnb,ecc,kk_ij_ofab,
$t1t,mpi_rank,mpi_size,mpi_ccsd, times, ccsd_communicator,emp2full,
$lno,lnaf,emp2corr,eccs,eccss,eccos, eccscs, eccoss, eccsss, lpert,
$master_thread, lf12,f12int1, f12int2, et1)
************************************************************************
*
* nn: >0 iteration number, calc. CCSD energy
* 0 MP2 energy calculation
* -1 CCSD energy calculation for NAF correction
* -2 does not print energy (PPL contribution with t=0)
* -3 - -9: CCSD energy (contributions with printing)
*
implicit none
#if defined(MPI)
include "mpif.h"
#endif
integer maxcor, imem, ifltln, iout
integer mpi_rank, mpi_size, mpi_err
integer nvirt, nocc, dfnb, ccsd_communicator
real*8 dcore(*)
real*8 times(6,0:100), eccss, eccos, ecabs,emp2f12,ef12,tmp
real*8 t(nvirt,nocc),tt(nvirt,nvirt,(nocc+1)*nocc/2)
real*8 scrs(nvirt,nvirt)
real*8 fai(nvirt,nocc)
real*8 f12int1(nvirt, nocc)
real*8 f12int2(nvirt, nvirt, nocc*(nocc+1)/2)
real*8 ddot,norm,er
logical notend,localcc15p,lf12
real*8 emp2full, emp2corr
logical lno,lnaf,lpert,master_thread
integer ind,n,i,j,ttind,nn
integer imem1
character*4 localcc
character*8 dfintran
character*9 nonaf
common/memcom/imem1
real*8, pointer :: qf(:,:), qfact(:)
real*8 eccs, tfact
real*8 jia(nocc, nvirt, dfnb)
integer blocknumber, bsize
character*8 ccsdalg
integer dblalloc
integer a, b, find, lind, dhyb
logical mpi_ccsd, lscale
integer illiajb, ittiajb
real*8, pointer :: lliaj_b(:,:,:,:)
real*8, pointer :: ttiaj_b(:,:,:,:)
real*8 kk_ij_ofab(nocc, nocc), t1t(nocc, nvirt)
real*8 ecc, eccscs, eccoss, eccsss, et1
integer iqfact, iqf
real*8 ediag(nocc)
interface
c {{{ interfaces for pointers
subroutine rpoint1d(egydim,haromdim,dim1)
implicit none
integer dim1
real*8,target :: egydim(dim1)
real*8, pointer :: haromdim(:)
end subroutine
subroutine rpoint2d(egydim,haromdim,dim1,dim2)
implicit none
integer dim1,dim2
real*8,target :: egydim(dim1,dim2)
real*8, pointer :: haromdim(:,:)
end subroutine
subroutine rpoint4d(egydim,haromdim,dim1,dim2,dim3,dim4)
implicit none
integer dim1,dim2,dim3,dim4
real*8,target :: egydim(dim1,dim2,dim3,dim4)
real*8, pointer :: haromdim(:,:,:,:)
end subroutine
c}}}
end interface
ecc = 0.d0
eccss = 0.d0
eccos = 0.d0
eccs = 0.d0
eccscs = 0.d0
eccoss = 0.d0
eccsss = 0.d0
ediag = 0.d0
call getvar('dhyb ',dhyb)
C Calculate energy
if(ccsdalg .eq. 'disk ') then ! {{{
ecc=er+2.d0*ddot(nvirt*nocc,fai,1,t,1)
et1 = ecc - er
if(lpert .and. dhyb.ge.1) ecc = er
ind=1
notend=.true.
n = blocknumber(nvirt**2, nocc, nocc, 'y', ind, bsize,
$ 'Insufficient memory! ',
$ maxcor-(imem-imem1))
do while(notend)
if(ind+n-1 .ge. nocc) then
notend = .false.
n = nocc-ind+1
end if
call ll2read(ind, n, dcore(imem), 'abkl',nocc,nvirt,ifltln)
do j = ind, ind+n-1
do i = 1, nocc
if(i .le. j) then
ttind = (j-1) * j / 2 + i
if (lpert) then ! quadratic t1 term is not needed for MP2
call dcopy(nvirt**2,tt(1, 1, ttind),1,scrs,1)
else
call ccijcalc(scrs, nvirt, t(1, i), t(1, j),
$ tt(1, 1, ttind))
endif
ecc = ecc + ddot(nvirt**2,
$ dcore(imem+(i-1)*nvirt**2+
$ (j-ind)*nvirt**2*nocc), 1, scrs, 1)
endif
if(i .gt. j) then
ttind = (i-1) * i / 2 + j
if (lpert) then ! quadratic t1 term is not needed for MP2
call tr(nvirt,tt(1, 1, ttind),scrs)
else
call ccijtrcalc(scrs, nvirt, t(1, j), t(1, i),
$ tt(1, 1, ttind))
endif
ecc = ecc + ddot(nvirt**2,
$ dcore(imem+(i-1)*nvirt**2+
$ (j-ind)*nvirt**2*nocc), 1, scrs, 1)
end if
end do
end do
ind = ind + n
enddo ! }}}
else if(ccsdalg .eq. 'dfdirect') then ! {{{
lscale=(lno.or.lnaf).and.trim(localcc).eq.'off'.and..not.lf12
if(lscale) then
iqfact=dblalloc(nocc)
call rpoint1d(dcore(iqfact), qfact, nocc)
iqf=dblalloc(nvirt*nocc)
call rpoint2d(dcore(iqf), qf, nvirt, nocc)
open(800,file='F12INTE',form='unformatted')
read(800) ecabs,emp2f12
read(800) tfact,qfact
else
iqfact=imem
iqf=imem
call rpoint2d(fai, qf, nvirt, nocc)
endif
TIME0(times(1,95))
if(master_thread) then !mpi_rank .eq. 0) then
c ecc = er + 2.d0 * ddot(nvirt*nocc, fai, 1, t, 1)
if(lscale) then
do i = 1, nocc
qf(:, i) = qfact(i) * fai(:, i)
end do
eccs = 2.d0 * ddot(nvirt*nocc, qf, 1, t, 1)
end if
ecc = er + 2.d0 * ddot(nvirt*nocc, fai, 1, t, 1)
else
ecc = 0
end if
et1 = ecc - er
if(lpert .and. dhyb.ge.1) ecc = er
TIMEADD(times(1, 45), times(1, 95))
if(nocc.eq.0 .or. nvirt.eq.0) goto 123
n = blocknumber(2*nocc**2*nvirt, 1, nvirt, 'y', 1, bsize,
$ 'Insufficient memory for one block of K! ',
$ maxcor-(imem-imem1))
illiajb = dblalloc(nocc**2*nvirt*n)
call rpoint4d(dcore(illiajb), lliaj_b, nocc, nvirt, nocc, n)
ittiajb = dblalloc(nocc**2*nvirt*n)
call rpoint4d(dcore(ittiajb), ttiaj_b, nocc, nvirt, nocc, n)
TIME0(times(1,95))
do i = 1, nocc
call dcopy(nvirt, t(1, i), 1, t1t(i, 1), nocc)
end do
TIMEADD(times(1, 46), times(1, 95))
call blockindices(find, lind, nvirt, n, mpi_rank, mpi_size,
$ notend, 1)
ind = find
do while(notend)
if(ind+n-1 .ge. lind) then !nvirt) then
n = lind - ind + 1 !nvirt - ind + 1
notend = .false.
end if
TIME0(times(1,95))
! ttiaj_b
do j = 1, nocc
c$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,ttind,b) SCHEDULE(DYNAMIC)
do i = 1, j
ttind = j * (j-1) / 2 + i
do b = ind, ind+n-1
ttiaj_b(i, 1:nvirt, j, b-ind+1) =
$ tt(1:nvirt, b, ttind)
end do
end do
c$OMP END PARALLEL DO
c$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,ttind,b) SCHEDULE(DYNAMIC)
do i = j+1, nocc
ttind = i * (i-1) / 2 + j
do b = ind, ind+n-1
ttiaj_b(i, 1:nvirt, j, b-ind+1) =
$ tt(b, 1:nvirt, ttind)
end do
end do
c$OMP END PARALLEL DO
end do
TIMEADD(times(1, 49), times(1, 95))
if (.not.lpert) then ! quadratic t1 term is not needed for MP2
TIME0(times(1,95))
call dger(nocc*nvirt, nocc*n,
$ 1.d0, t1t, 1, t1t(1, ind), 1, ttiaj_b, nocc*nvirt)
TIMEADD(times(1, 50), times(1, 95))
endif
TIME0(times(1,95))
call dgemm('n', 't', nocc*nvirt, nocc*n, dfnb,
$ 1.d0, jia, nocc*nvirt, jia(1, ind, 1), nocc*nvirt,
$ 0.d0, lliaj_b, nocc*nvirt)
TIMEADD(times(1, 47), times(1, 95))
TIME0(times(1,95))
c ediag = 0.d0
c do i = 1, nocc
c do a = 1, nvirt
c do b = 1, n
c ediag(i) = ediag(i) +
c $ lliaj_b(i,a,i,b) * ttiaj_b(i,a,i,b)
c end do
c end do
c end do
c write(*,*) 'CCSD oppsite spin diagonal', sum(ediag)
c write(*,'(f20.12)') ediag
eccos = ddot(nvirt*nocc**2*n, lliaj_b, 1, ttiaj_b, 1)
eccss = -eccos
ecc = ecc + eccos
TIMEADD(times(1, 51), times(1, 95))
TIME0(times(1,95))
do b = ind, ind+n-1
do a = 1, nvirt
do j = 1, nocc
kk_ij_ofab(1:nocc, j) =
$ lliaj_b(1:nocc, a, j, b-ind+1)
end do
do j = 1, nocc
lliaj_b(1:nocc, a, j, b-ind+1) =
c $ lliaj_b(1:nocc, a, j, b-ind+1) -
$ 2.d0 * lliaj_b(1:nocc, a, j, b-ind+1) -
$ kk_ij_ofab(j, 1:nocc)
end do
end do
end do
TIMEADD(times(1, 48), times(1, 95))
TIME0(times(1,95))
eccss = eccss +
$ ddot(nvirt*nocc**2*n, lliaj_b, 1, ttiaj_b, 1)
ecc = ecc + eccss
do i = 1, nocc
do j = 1, nocc
do a = 1, nvirt
do b = 1, n
c$OMP ATOMIC
ediag(i) = ediag(i) +
$ lliaj_b(i,a,j,b) * ttiaj_b(i,a,j,b)
end do
end do
end do
end do
TIMEADD(times(1, 51), times(1, 95))
if(lscale) then
c q-scaled total energy
do b = 1, n
do j = 1, nocc
do a = 1, nvirt
do i = 1, nocc
lliaj_b(i, a, j, b) =
$ (qfact(i) + qfact(j)) / 2 *
$ lliaj_b(i, a, j, b)
end do
end do
end do
end do
eccs = eccs +
$ ddot(nvirt*nocc**2*n, lliaj_b, 1, ttiaj_b, 1)
! q-scs innen {{{
read(800) qfact
TIME0(times(1,95))
call dgemm('n', 't', nocc*nvirt, nocc*n, dfnb,
$ 1.d0, jia, nocc*nvirt, jia(1, ind, 1), nocc*nvirt,
$ 0.d0, lliaj_b, nocc*nvirt)
TIMEADD(times(1, 47), times(1, 95))
do b = 1, n
do j = 1, nocc
do a = 1, nvirt
do i = 1, nocc
lliaj_b(i, a, j, b) =
$ (qfact(i) + qfact(j)) / 2 *
$ lliaj_b(i, a, j, b)
end do
end do
end do
end do
TIME0(times(1,95))
eccoss = ddot(nvirt*nocc**2*n, lliaj_b, 1, ttiaj_b, 1)
c eccsss = -eccoss
eccscs = eccscs + eccoss
TIMEADD(times(1, 51), times(1, 95))
TIME0(times(1,95))
read(800) qfact
call dgemm('n', 't', nocc*nvirt, nocc*n, dfnb,
$ 1.d0, jia, nocc*nvirt, jia(1, ind, 1), nocc*nvirt,
$ 0.d0, lliaj_b, nocc*nvirt)
do b = ind, ind+n-1
do a = 1, nvirt
do j = 1, nocc
kk_ij_ofab(1:nocc, j) =
$ lliaj_b(1:nocc, a, j, b-ind+1)
end do
do j = 1, nocc
lliaj_b(1:nocc, a, j, b-ind+1) =
c $ lliaj_b(1:nocc, a, j, b-ind+1) -
$ lliaj_b(1:nocc, a, j, b-ind+1) -
$ kk_ij_ofab(j, 1:nocc)
end do
end do
end do
do b = 1, n
do j = 1, nocc
do a = 1, nvirt
do i = 1, nocc
lliaj_b(i, a, j, b) =
$ (qfact(i) + qfact(j)) / 2 *
$ lliaj_b(i, a, j, b)
end do
end do
end do
end do
TIMEADD(times(1, 48), times(1, 95))
TIME0(times(1,95))
eccsss = eccsss +
$ ddot(nvirt*nocc**2*n, lliaj_b, 1, ttiaj_b, 1)
eccscs = eccscs + eccsss
TIMEADD(times(1, 51), times(1, 95))
rewind(800)
! q-scs eddig }}}
rewind(800)
end if
ind = ind + n
end do
c if(nn.ne.-2) then
c open(799, file='ccsd_ei')
c write(799,'(f20.14)') ediag
cc write(*,*) 'ccsd energy ',sum(ediag)
c close(799)
c else
c open(799, file='ppl_ei')
c write(799,'(f20.14)') ediag
cc write(*,*) 'ccsd energy ',sum(ediag)
c close(799)
c end if
c
call dbldealloc(illiajb)
C F12 energy contribution
if(lf12 .and. (nn.gt.0 .or. nn.eq.-1) .and. mpi_rank.eq.0) then
ef12 = 0.d0
rewind(800)
read(800) ecabs, emp2f12
read(800)
read(800) f12int2
ef12 = ef12 + ddot(nvirt**2*(nocc+1)*nocc/2,f12int2,1,tt,1)
read(800) f12int2
tmp = 0.d0
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(a,b,i,j,ttind)
C$OMP& REDUCTION(+:tmp)
do j = 1, nocc
do i = 1, j
ttind = (j-1) * j / 2 + i
do b = 1, nvirt
do a = 1, nvirt
tmp = tmp + f12int2(a, b, ttind)*t(a,i)*t(b,j)
end do
end do
end do
end do
C$OMP END PARALLEL DO
ef12 = ef12 + tmp
read(800) f12int1
ef12 = ef12 + ddot(nvirt*nocc, f12int1, 1, t, 1)
ecc = ecc + emp2f12 + 2.d0 * ef12
endif
C F12 end
123 continue
TIME0(times(1,95))
#if defined(MPI)
if(mpi_ccsd .and. mpi_size .ne. 1) then
call MPI_Allreduce(MPI_IN_PLACE, eccss, 1,
$ MPI_DOUBLE_PRECISION, MPI_SUM, ccsd_communicator,
$ mpi_err)
call MPI_Allreduce(MPI_IN_PLACE, eccos, 1,
$ MPI_DOUBLE_PRECISION, MPI_SUM, ccsd_communicator,
$ mpi_err)
call MPI_Allreduce(MPI_IN_PLACE, ecc, 1,
$ MPI_DOUBLE_PRECISION, MPI_SUM, ccsd_communicator,
$ mpi_err)
end if
#endif
TIMEADD(times(1, 52), times(1, 95))
end if ! }}}
if(nn.eq.0)then
if (localcc15p(localcc)) then
write(iout,*)
write(iout,"(' MP2 correlation energy [au]: ',f21.12)")
$ ecc
else
write(iout,"(' T1 contribution [au]: ',f21.12)")
$ et1
if(ccsdalg .eq. 'dfdirect') then
write(iout,"(' Same spin MP2 energy [au]: ',f21.12)")
$ eccss
write(iout,"(' Opposite spin MP2 energy [au]:',f21.12)")
$ eccos
end if
write(iout,"(' MP2 correlation energy [au]: ',f21.12)")
$ ecc-er
write(iout,"(' Total MP2 energy [au]: ',f21.12)")
$ ecc
call getkey('dfintran', 8, dfintran, 8)
if(dfintran.ne.'ovirt ') then
if(lno .or. lnaf) then
emp2corr = emp2full - ecc
write(iout,
$ "(' MP2 correction [au]: ',f28.12)")
$ emp2corr
end if
c if(lno .and. lnaf) then
c write(iout,
c $ "(' Correction for NOs&NAFs [au]: ',f22.12)")
c $ emp2corr
c else if(lno) then
c write(iout,
c $ "(' Correction for NOs [au]: ',f22.12)")
c $ emp2corr
c else if(lnaf) then
c write(iout,
c $ "(' Correction for NAFs [au]: ',f22.12)")
c $ emp2corr
c end if
end if
endif
write(iout,*)
else if(nn .eq. -1) then
c write(iout,
c $ "(/' CCSD energy with MO integrals [au]: ',
c $ f22.12)")ecc
else if(nn .gt. 0) then
write(iout,"(' Iteration',i3,2f22.12)")nn,norm,ecc
call flush(iout)
else if(nn .eq. -3) then
write(iout,"(' rest_eg ',f22.12)") ecc
else if(nn .eq. -4) then
write(iout,"(' rest_s ',f22.12)") ecc
else if(nn .eq. -5) then
write(iout,"(' rest_mp3 ',f22.12)") ecc
else if(nn .eq. -6) then
write(iout,"(' rest_nl6 ',f22.12)") ecc
else if(nn .eq. -7) then
write(iout,"(' rest_v ',f22.12)") ecc
else if(nn .eq. -8) then
write(iout,"(' rest_a2 ',f22.12)") ecc
else if(nn .eq. -9) then
write(iout,"(' rest_egnl',f22.12)") ecc
endif
return
end subroutine
************************************************************************
function calcnorm(nvirt, nocc, v, vv)
************************************************************************
implicit none
real*8 calcnorm
integer nvirt, nocc
real*8 v(nvirt,nocc),vv(nvirt,nvirt,(nocc+1)*nocc/2)
real*8 dnrm2
calcnorm = dnrm2(nvirt**2*nocc*(nocc+1)/2,vv,1) +
$ dnrm2(nvirt*nocc,v,1)
end function
*********************************************************************
subroutine crccx(x,t,tt,ccx,nvirt,nocc)
*********************************************************************
implicit none
integer nvirt,nocc
integer i,j,c,d
integer ijind,cdind
character*1 x
real*8 tt(nvirt,nvirt,(nocc+1)*nocc/2)
real*8 t(nvirt,nocc)
real*8 ccx((nvirt+1)*nvirt/2,(nocc+1)*nocc/2)
integer dmax
real*8 fac
if(x .eq. 's') then
dmax = nvirt
fac = 1.d0
else ! if(x .eq. 'a')
dmax = nvirt - 1
fac = -1.d0
end if
do j = 1, nocc
do i = 1, j
ijind = j * (j-1) / 2 + i
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(c,d,cdind)
do d = 1, dmax
do c = d+1, nvirt
cdind = (d-1) * (2*nvirt-d+2) / 2 + c-d+1
ccx(cdind, ijind) =
$ 0.5d0 * (tt(c, d, ijind) + t(c, i) * t(d, j) +
$ fac * (tt(d, c, ijind) + t(d, i) * t(c, j)))
end do
end do
C$OMP END PARALLEL DO
end do
end do
! diagonal elements
if(x .eq. 's')then
do j = 1, nocc
do i = 1, j
ijind = j * (j-1) / 2 + i
do d = 1, nvirt
cdind = (d-1) * (2*nvirt-d+2) / 2 + 1
ccx(cdind, ijind) =
$ 0.5d0 * (tt(d, d, ijind) + t(d, i) * t(d, j))
end do
end do
end do
else ! if(x .eq. 'a')
do j = 1, nocc
do i = 1, j
ijind = j * (j-1) / 2 + i
do d = 1, nvirt
cdind = (d-1) * (2*nvirt-d+2) / 2 + 1
ccx(cdind, ijind) = 0.d0
end do
end do
end do
end if
return
end subroutine
*********************************************************************
subroutine readxlist(x,ind,n,tmp,na,nc,recbounds,nocc,nvirt,
$ ibufln,irecln)
*********************************************************************
implicit none
integer nocc, nvirt
integer ibufln, irecln
integer c,d,ii,jj,jj0,ll
integer na,nc,nd,nn,nm
integer abind,cdind
integer ind,n
real*8 tmp(na,nc),buffer(ibufln)
character*1 x
character*6 fname
integer recbounds(nvirt,2)
if(x.eq.'s')ll=0
if(x.eq.'a')ll=1
if(x.eq.'s')fname='abcd.s'
if(x.eq.'a')fname='abcd.a'
abind=1
cdind=1
if(ind.eq.1)then
jj0=1
else
jj0=recbounds(ind-1,1)+1
endif
jj=jj0
OPEN(599+ll,file=fname,access='direct',recl=irecln,STATUS='OLD')
c if(nit.eq.1)then
c if(x.eq.'s')fname='abcd.s.info'
c if(x.eq.'a')fname='abcd.a.info'
c open(410+ll,file=fname)
c rewind 410+ll
c endif
nn=nvirt*(nvirt+1)/2
nd=nn-na
nm=nocc*(nocc+1)/2
ii=0
cdind=nn-na
c if(nit.eq.1)write(6,'(6i5)')nn,na,nb,nc,ind,n
do d=ind,ind+n-1
c do jj=jj0,recbounds(d,1)
c read(599+ll,rec=jj)buffer(1:ibufln)
c enddo
c ii=0
c wind=1
c do c=d,nvirt
c cdind=cdind+1
c ii=ii+1
c call dcopy(nn-cdind+1,w(wind),1,
c $tmp(cdind-(nn-na),cdind-(nn-na)),1)
c wind=wind+nn-cdind+1
c enddo
do c=d,nvirt
cdind=cdind+1
do abind=cdind,nn
ii=ii+1
if(ii.eq.1)then
read(599+ll,rec=jj)buffer(1:ibufln)
endif
tmp(abind-(nn-na),cdind-(nn-na))=buffer(ii)
c if(nit.eq.1.and.abs(buffer(ii)).gt.10.d0)
c $write(410+ll,'(f20.3,3i6)')buffer(ii),abind,c,d
if(ii.eq.ibufln.or.(jj.eq.recbounds(d,1).and.ii.eq.
$recbounds(d,2)))then
jj=jj+1
ii=0
endif
enddo
enddo
enddo
c if ind+n-1.eq.nvirt the lower triangle is not needed!
CLOSE(599+ll)
c CLOSE(410+ll)
return
end subroutine
*********************************************************************
subroutine xmatextr(x,xmat,kdv,nvirt,nocc)
*********************************************************************
implicit none
integer nvirt,nocc
integer a,b
integer ijind,abind
real*8 xmat((nvirt+1)*nvirt/2,(nocc+1)*nocc/2)
real*8 kdv(nvirt,nvirt,(nocc+1)*nocc/2)
character*1 x
if(x.eq.'s')then
do ijind=1,(nocc+1)*nocc/2
kdv(1,1,ijind)=kdv(1,1,ijind)+xmat(1,ijind)
abind=1
do b=1,nvirt-1
abind=abind+1
do a=b+1,nvirt
kdv(a,b,ijind)=kdv(a,b,ijind)+xmat(abind,ijind)
kdv(b,a,ijind)=kdv(b,a,ijind)+xmat(abind,ijind)
abind=abind+1
enddo
kdv(b+1,b+1,ijind)=kdv(b+1,b+1,ijind)+xmat(abind,ijind)
enddo
enddo
endif
if(x.eq.'a')then
do ijind=1,(nocc+1)*nocc/2
abind=1
do b=1,nvirt-1
abind=abind+1
do a=b+1,nvirt
kdv(a,b,ijind)=kdv(a,b,ijind)+xmat(abind,ijind)
kdv(b,a,ijind)=kdv(b,a,ijind)-xmat(abind,ijind)
abind=abind+1
enddo
enddo
enddo
endif
return
end subroutine
*********************************************************************
* timer call *
*********************************************************************
subroutine stopper(id,ison)
*********************************************************************
implicit none
real*8 tim, timold, wct, wctold
integer id
character*1 ison
save timold, wctold
if(ison.eq.'y')then
timold=tim
wctold=wct
tim=1.d-15
wct=1.d-15
call mtime(tim,wct)
write(*,"(' CPU:',f9.5,
$' Wall:',f14.5,' xCPU:',f9.5,' xWall:',2f9.5)")
$tim/60.d0,wct/60.d0,tim-timold,wct-wctold,
$ (tim-timold)/(wct-wctold)
id=id+1
endif
return
end subroutine
*********************************************************************
subroutine rbwrt(recbounds,nvirt,ibufln)
*********************************************************************
implicit none
integer nvirt, ibufln
integer recbounds(nvirt,2)
integer d,n
n=nvirt
c rewind 906
do d=1,nvirt
recbounds(d,2)=modulo(((n-d+1)**2+1)*(n-d+1)/2,ibufln)
if(recbounds(d,2).eq.0)recbounds(d,2)=ibufln
enddo
recbounds(1,1)=((n**2+1)*n/2-recbounds(1,2))/ibufln+1
c write(906,*)recbounds(1,1:2)
do d=2,nvirt
recbounds(d,1)=recbounds(d-1,1)+(((n-d+1)**2+1)*(n-d+1)/2-
$recbounds(d,2))/ibufln+1
c write(906,*)recbounds(d,1:2)
enddo
return
end subroutine
************************************************************************
subroutine rpacorecc(nvirt,nocc,ttt,tp1,vvv,fab,fij,eref,tol,maxit
$,vscr,bmat,bvec,invbmat,cvec,iout,erpa,scrs,localcc,int1,ui,esdi,
$isrpa,bsymmv,symmocc,dgroup,first,last,co,w,wsize,dcore,imem,
$diisfile,errfile,tfile,tinfo,ifltln,ibufln,irecln,nbasis,nirmax,
$multpg,locnoc)
************************************************************************
implicit none
integer diisfile,errfile,tfile,tinfo,ifltln,ibufln,irecln
integer nvirt,nocc,a,b,i,j,k,l,lk,kj,il,ij,lj
integer nit,ndim,maxit,nn,iout, nbasis,nirmax
integer multpg(nirmax,nirmax),locnoc
real*8 ttt(nvirt,nvirt,nocc*(nocc+1)/2),fij(nocc,nocc),erpa,eref
real*8 tp1(nvirt,nvirt,nocc*(nocc+1)/2),scrs(nvirt,nvirt),alpha
real*8 vvv(nvirt,nvirt,nocc*(nocc+1)/2),fab(nvirt,nvirt),erpaold
real*8 bmat(maxit,maxit),invbmat(maxit,maxit),ddot,tol
real*8 cvec(maxit),bvec(maxit**2),vscr(*),esdi,norm
real*8 int1(nocc,nocc),ui(nocc),coeff
character*4 localcc
integer isrpa,wsize
integer first(0:5,8,8,nvirt+nocc),last(0:5,8,8,nvirt+nocc)
integer co(nvirt+nocc)
integer bsymmv(nocc+1),symmocc(nocc)
integer dgroup,imem
real*8 dcore(1)
real*8 w(wsize)
ndim=nocc*nvirt
nn=nvirt**2*nocc*(nocc+1)/2
IF(isrpa.eq.1)THEN
write(iout,'(" RPA calculation")')
write(iout,*)
C Construct B (=L with i.le.j)
call llread(dcore(imem), nocc, nvirt, ifltln)
call bbextract(dcore(imem),dcore(imem+nvirt**2*nocc**2)
$ ,nocc,nvirt,ibufln,irecln)
C Construct A
call aaextract(dcore(imem),dcore(imem+nvirt**2*nocc**2),
$ bsymmv,symmocc,dgroup,first,last,co,w,
$ irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,ifltln)
ELSE
write(iout,'(" dRPA calculation")')
write(iout,*)
ENDIF
C Starting iteration
erpa=0.d0
erpaold=1.d0
nit=0
norm=tol+1.d0
write(iout,'(21x,"Error vector norm",6x,"Energy")')
do while((dabs(erpa-erpaold).gt.tol.or.
$norm/10.gt.tol).and.nit.lt.maxit)
erpaold=erpa
nit=nit+1
C Calculate 1+T
call dcopy(nn,ttt,1,tp1,1)
IF(isrpa.eq.1)THEN
ELSE
do i=1,nocc
ij=i*(i+1)/2
do a=1,nvirt
tp1(a,a,ij)=1.d0+tp1(a,a,ij)
enddo
enddo
ENDIF
C Update T
IF(isrpa.eq.1)THEN
C Read B (L)
call bbread(vvv,nocc,nvirt,ibufln,irecln)
C Copy B to Tnew
call dcopy(nn,vvv,1,ttt,1)
coeff=1.d0
ELSE
call dfillzero(ttt,nn)
coeff=2.d0
ENDIF
do j=1,nocc
do l=1,nocc
C (1)
call dfillzero(scrs,nvirt**2)
do k=1,nocc
if(k.ge.l.and.k.le.j) then
kj=(j-1)*j/2+k
lk=(k-1)*k/2+l
call dgemm('n','n',nvirt,nvirt,nvirt,coeff,
$vvv(1,1,lk),nvirt,tp1(1,1,kj),nvirt,1.d0,scrs,nvirt)
endif
if(k.ge.l.and.k.gt.j) then
kj=(k-1)*k/2+j
lk=(k-1)*k/2+l
call dgemm('n','t',nvirt,nvirt,nvirt,coeff,
$vvv(1,1,lk),nvirt,tp1(1,1,kj),nvirt,1.d0,scrs,nvirt)
endif
if(k.lt.l.and.k.le.j) then
kj=(j-1)*j/2+k
lk=(l-1)*l/2+k
call dgemm('t','n',nvirt,nvirt,nvirt,coeff,
$vvv(1,1,lk),nvirt,tp1(1,1,kj),nvirt,1.d0,scrs,nvirt)
endif
if(k.lt.l.and.k.gt.j) then
kj=(k-1)*k/2+j
lk=(l-1)*l/2+k
call dgemm('t','t',nvirt,nvirt,nvirt,coeff,
$vvv(1,1,lk),nvirt,tp1(1,1,kj),nvirt,1.d0,scrs,nvirt)
endif
enddo
C (2)
do i=1,j
if(i.le.l) then
il=(l-1)*l/2+i
ij=(j-1)*j/2+i
call dgemm('n','n',nvirt,nvirt,nvirt,1.d0,
$tp1(1,1,il),nvirt,scrs,nvirt,1.d0,ttt(1,1,ij),nvirt)
endif
if(i.gt.l) then
il=(i-1)*i/2+l
ij=(j-1)*j/2+i
call dgemm('t','n',nvirt,nvirt,nvirt,1.d0,
$tp1(1,1,il),nvirt,scrs,nvirt,1.d0,ttt(1,1,ij),nvirt)
endif
enddo
enddo
enddo
c write(6,*)'B+TBT'
c do j=1,nocc
c do i=1,j
c ij=(j-1)*j/2+i
c do b=1,nvirt
c do a=1,nvirt
c write(6,'(4i3,e16.8)')a,b,i,j,ttt(a,b,ij)
c enddo
c enddo
c enddo
c enddo
IF(isrpa.eq.1)THEN
C Read A
call aaread(vvv,nocc,nvirt,ibufln,irecln)
c write(6,*)'A'
c do j=1,nocc
c do i=1,j
c ij=(j-1)*j/2+i
c do b=1,nvirt
c do a=1,nvirt
c write(6,'(4i3,e16.8)')a,b,i,j,vvv(a,b,ij)
c enddo
c enddo
c enddo
c enddo
C Calculate AT+TA
do j=1,nocc
do i=1,j
ij=(j-1)*j/2+i
do l=1,nocc
if(j.le.l)then
lj=(l-1)*l/2+j
if(i.le.l) then
il=(l-1)*l/2+i
call dgemm('n','t',nvirt,nvirt,nvirt,1.d0,
$vvv(1,1,il),nvirt,tp1(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
call dgemm('n','t',nvirt,nvirt,nvirt,1.d0,
$tp1(1,1,il),nvirt,vvv(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
endif
if(i.gt.l) then
il=(i-1)*i/2+l
call dgemm('t','t',nvirt,nvirt,nvirt,1.d0,
$vvv(1,1,il),nvirt,tp1(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
call dgemm('t','t',nvirt,nvirt,nvirt,1.d0,
$tp1(1,1,il),nvirt,vvv(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
endif
endif
if(j.gt.l)then
lj=(j-1)*j/2+l
if(i.le.l) then
il=(l-1)*l/2+i
call dgemm('n','n',nvirt,nvirt,nvirt,1.d0,
$vvv(1,1,il),nvirt,tp1(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
call dgemm('n','n',nvirt,nvirt,nvirt,1.d0,
$tp1(1,1,il),nvirt,vvv(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
endif
if(i.gt.l) then
il=(i-1)*i/2+l
call dgemm('t','n',nvirt,nvirt,nvirt,1.d0,
$vvv(1,1,il),nvirt,tp1(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
call dgemm('t','n',nvirt,nvirt,nvirt,1.d0,
$tp1(1,1,il),nvirt,vvv(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
endif
endif
enddo
enddo
enddo
C ITT
c write(6,*)'+AT+TA'
c do j=1,nocc
c do i=1,j
c ij=(j-1)*j/2+i
c do b=1,nvirt
c do a=1,nvirt
c write(6,'(4i3,e16.8)')a,b,i,j,ttt(a,b,ij)
c enddo
c enddo
c enddo
c enddo
ENDIF
do j=1,nocc
do i=1,j
ij=(j-1)*j/2+i
do b=1,nvirt
do a=1,nvirt
ttt(a,b,ij)=ttt(a,b,ij)/(fij(i,i)+fij(j,j)-fab(a,a)-fab(b,b))
enddo
enddo
enddo
enddo
IF(isrpa.eq.1)THEN
c call dscal(nn,0.5d0,ttt,1)
ENDIF
c write(6,*)'T new'
c do j=1,nocc
c do i=1,j
c ij=(j-1)*j/2+i
c do b=1,nvirt
c do a=1,nvirt
c write(6,'(4i3,e16.8)')a,b,i,j,ttt(a,b,ij)
c enddo
c enddo
c enddo
c enddo
C DIIS extrapolation
call rpa_diis(nit,nn,ttt,vscr,maxit,bmat,bvec,invbmat,cvec,
$ norm,diisfile,errfile,tfile,tinfo,ifltln,ibufln,irecln,
$ nvirt,nocc)
C Read K
call vvinit(vvv,bsymmv,symmocc,dgroup,first,last,co,w,
$irecln,ibufln,nocc,nvirt,nbasis,nirmax,multpg)
C Calculate energy
erpa=0.d0
do j=1,nocc
do i=1,j
ij=(j-1)*j/2+i
c IF(isrpa.eq.1)THEN
c if(i.eq.j)then
c alpha=0.5d0
c else
c alpha=1.d0
c endif
c ELSE
if(i.eq.j)then
alpha=1.d0
else
alpha=2.d0
endif
c ENDIF
erpa=erpa+alpha*ddot(nvirt**2,ttt(1,1,ij),1,vvv(1,1,ij),1)
enddo
enddo
c erpa=ddot(nn,vvv,1,ttt,1)
c write(6,*)
c write(6,"(1x,70('='))")
c write(6,"(' Iteration',i3,' Energy: ',2f20.12)")
write(iout,"(' Iteration',i3,2f22.12)") nit,norm,erpa+eref
c write(6,*)
enddo
C Calculate local energy contribution
if(localcc.eq.'drpa'.or.localcc.eq.'rpa ')then
call dfillzero(int1,nocc**2)
do j=1,nocc
do k=1,nocc
do l=1,nocc
if(l.le.j.and.k.le.j)then
lj=(j-1)*j/2+l
kj=(j-1)*j/2+k
int1(l,k)=int1(l,k)+ddot(nvirt**2,
$ttt(1,1,lj),1,vvv(1,1,kj),1)
endif
if(l.gt.j.and.k.gt.j)then
lj=(l-1)*l/2+j
kj=(k-1)*k/2+j
int1(l,k)=int1(l,k)+ddot(nvirt**2,
$ttt(1,1,lj),1,vvv(1,1,kj),1)
endif
if(l.gt.j.and.k.le.j)then
lj=(l-1)*l/2+j
kj=(j-1)*j/2+k
call tr(nvirt,ttt(1,1,lj),scrs)
int1(l,k)=int1(l,k)+ddot(nvirt**2,
$scrs,1,vvv(1,1,kj),1)
endif
if(l.le.j.and.k.gt.j)then
lj=(j-1)*j/2+l
kj=(k-1)*k/2+j
call tr(nvirt,vvv(1,1,kj),scrs)
int1(l,k)=int1(l,k)+ddot(nvirt**2,
$scrs,1,ttt(1,1,lj),1)
endif
enddo
enddo
enddo
c esdi= !!??? Ui'*INT1*Ui
call dgemv('n',nocc,nocc,1.d0,int1,nocc,ui,1,0.d0,scrs,1)
esdi=ddot(nocc,ui,1,scrs,1)
endif
write(iout,*)
if(dabs(erpa-erpaold).gt.tol.or.norm/10.gt.tol) then
write(iout,
$"(' Convergence not achieved in',i3,' iterations!')") nit
call mrccend(0)
else
IF(isrpa.eq.1)THEN
write(iout,"(' RPA iteration has converged in',i3,' steps.')")
$nit
ELSE
write(iout,"(' dRPA iteration has converged in',i3,' steps.')")
$nit
ENDIF
write(iout,*)
if(localcc.ne.'off ')then
IF(isrpa.eq.1)THEN
write(iout,'(" RPA correlation energy [au]: ",f22.12)')
$esdi
write(iout,*)
if(localcc.eq.'rpa ')
$ call prtenergc('RPA ',esdi,eref,locnoc)
else
write(iout,'(" dRPA correlation energy [au]: ",f22.12)')
$esdi
write(iout,*)
if(localcc.eq.'drpa')
$ call prtenergc('dRPA ',esdi,eref,locnoc)
endif
else
IF(isrpa.eq.1)THEN
write(iout,'(" RPA correlation energy [au]: ",f22.12)')
$erpa
write(iout,'(" Total RPA energy [au]: ",f22.12)')
$erpa+eref
ELSE
write(iout,'(" dRPA correlation energy [au]: ",f22.12)')
$erpa
write(iout,'(" Total dRPA energy [au]: ",f22.12)')
$erpa+eref
ENDIF
write(iout,*)
if(localcc.eq.'drpa')
$ call prtenergc('dRPA ',erpa,eref,locnoc)
endif
write(iout,*)
endif
C
IF(isrpa.eq.1)THEN
open(714,file='bbfile')
close(714,status='delete')
open(715,file='aafile')
close(715,status='delete')
ENDIF
return
end subroutine
C {{{ readdfints
************************************************************************
subroutine readdfints(jij,jai,jab,cmo,nocc,nvirt,dfnb,sjij,sjab,
$jai2,localcc,ccsdalg,lccoporder,move)
************************************************************************
c read and expand 3 center DF integrals in the can MO basis
************************************************************************
implicit none
integer i,j,a,b,nocc,nvirt,ij,ab,dfnb
real*8 jij(dfnb,nocc,nocc),sjij(dfnb,nocc*(nocc+1)/2)
real*8 jai(nocc,nvirt,dfnb),cmo(nocc,nocc),jai2(nvirt,nocc,dfnb)
real*8 jab(dfnb,nvirt,nvirt),sjab(dfnb,nvirt*(nvirt+1)/2)
real*8 move((dfnb+nvirt**2)/2)
character*4 localcc
character*8 ccsdalg,lccoporder
if(ccsdalg .eq. 'disk ') then
c
c read J_ij from DFINT_IJ in (P,ij) order, upper triangle format for ij indices
open(111,file='DFINT_IJ',form='unformatted')
read(111) sjij
close(111)
ij=0
do j=1,nocc
do i=1,j
ij=ij+1
jij(1:dfnb,i,j)=sjij(1:dfnb,ij)
jij(1:dfnb,j,i)=sjij(1:dfnb,ij)
enddo
enddo
c read J_ab from DFINT_AB in (P,ab) order, upper triangle format for ab indices
open(111,file='DFINT_AB',form='unformatted')
read(111) sjab
close(111)
ab=0
do b=1,nvirt
do a=1,b
ab=ab+1
jab(1:dfnb,a,b)=sjab(1:dfnb,ab)
jab(1:dfnb,b,a)=sjab(1:dfnb,ab)
enddo
enddo
if (localcc.ne.'off ') then
c read J_ai from DFINT_AI in J(i,a,P) order
open(111,file='DFINT_AI',form='unformatted')
read(111) jai
close(111)
else
c read J_ai from DFINT_AI in J(a,i,P) order and transpose
open(111,file='DFINT_AI',form='unformatted')
read(111) jai2
close(111)
do i=1,nocc
do a=1,nvirt
call dcopy(dfnb,jai2(a,i,1:dfnb),1,jai(i,a,1:dfnb),1)
enddo
enddo
endif
else if(ccsdalg .eq. 'dfdirect') then
! reorder indices of DF integrals
call reorderdfints(jij, jab, nocc, nvirt, dfnb, move)
end if
if (localcc.ne.'off ') then
c read occ MO basis of Laplace (T)
if (lccoporder.eq.'trffirst'.or.ccsdalg .eq. 'disk ') then
open(111,file='laplbas',form='unformatted')
read(111) cmo
close(111)
c else : already in memory @ igsl position
endif
else
cmo=0.d0
do i=1,nocc
cmo(i,i)=1.d0
enddo
end if
c
return
end subroutine readdfints
c }}}
c
C {{{ tprt
************************************************************************
subroutine tprt(ltpr,ncore,nocc,nvirt,t,tt,tprtol,iout,t1t1)
************************************************************************
c print CCSD amplitude diagnostics
************************************************************************
implicit none
integer ncore,nocc,nvirt,i,j,a,b,iout,ij,iisyev
real*8 tprtol,t(nvirt,nocc),ddot,t1max,t2max,t1t1(nocc,nocc)
real*8 tt(nvirt,nvirt,(nocc+1)*nocc/2),norm,mrtol
logical ltpr
character*40 str40
character*6 str6
integer*4 isyev
equivalence(isyev,iisyev) !For Intel
c in stack
real*8 eigval(nocc),scr(20*nocc)
mrtol=0.15d0
c
t1max=0.d0
t2max=0.d0
do i=1,nocc
do a=1,nvirt
if (dabs(t(a,i)).gt.dabs(t1max)) t1max=t(a,i)
do j=1,i
ij=j*(j-1)/2+i
do b=1,nvirt
if (dabs(tt(b,a,ij)).gt.dabs(t2max)) t2max=tt(b,a,ij)
enddo
enddo
enddo
enddo
c T1 diagnostic
norm=ddot(nocc*nvirt,t,1,t,1)
c D1 diagnostic
call dsyrk('u','t',nocc,nvirt,1.d0,t,nvirt,0.d0,t1t1,nocc)
call dsyev('N','U',nocc,t1t1,nocc,eigval,scr,20*nocc,isyev)
if(isyev.ne.0) then
write(iout,*) 'Fatal error at the D1 diagnostic evaluation'
eigval(nocc)=0.d0
c call mrccend(1)
endif
write(iout,*)
write(iout,"(' T1 diagnostic: ',es14.3,12x,
$'D1 diagnostic: ',es14.3)")
$dsqrt(norm/(nocc*2.d0)),dsqrt(eigval(nocc))
c
if (ltpr) then
write(iout,*) 'Dominant cluster amplitudes'
write(iout,"(' Printing threshold: ',1pe9.2)") tprtol
write(iout,*)
do i=1,nocc
do a=1,nvirt
if (dabs(t(a,i)).gt.tprtol) then
write(str6,'(i6)') i+ncore
str40= ' ' // trim(adjustl(str6)) // ' -> '
write(str6,'(i6)') a+nocc+ncore
str40=trim(str40) // ' ' // trim(adjustl(str6))
write(iout,"(1es10.3,a40)") t(a,i), str40
endif
enddo
enddo
do i=1,nocc
do j=1,i
ij=(i-1)*i/2+j
do a=1,nvirt
do b=1,nvirt
if (dabs(tt(b,a,ij)).gt.tprtol) then
write(str6,'(i6)') j+ncore
str40= ' ' // trim(adjustl(str6))
write(str6,'(i6)') i+ncore
str40=trim(str40) // ' ' // trim(adjustl(str6)) // ' -> '
write(str6,'(i6)') b+nocc+ncore
str40=trim(str40) // ' ' // trim(adjustl(str6))
write(str6,'(i6)') a+nocc+ncore
str40=trim(str40) // ' ' // trim(adjustl(str6))
write(iout,"(1es10.3,a40)") tt(b,a,ij), str40
endif
enddo
enddo
enddo
enddo
endif
c
if (dabs(t1max).gt.mrtol.or.dabs(t2max).gt.mrtol.or.ltpr) then
write(iout,*)
if (.not.ltpr)write(iout,*)'Warning: large CCSD amplitude found'
write(iout,"(' Largest T1 amplitude: ',1es9.2)") t1max
write(iout,"(' Largest T2 amplitude: ',1es9.2)") t2max
endif
c
return
end subroutine tprt
c }}}
c
C {{{ largest
************************************************************************
subroutine largest(nocc,nvirt,vv,v,tt,t,scr,scr2,map,iout)
************************************************************************
c print largest components of the input arrays
************************************************************************
implicit none
integer nocc,nvirt,i,j,a,b,map(nvirt**2*(nocc+1)*nocc/2),d,ij,k
integer iout
real*8 vv(nvirt,nvirt,(nocc+1)*nocc/2),v(nvirt,nocc)
real*8 tt(nvirt,nvirt,(nocc+1)*nocc/2),t(nvirt,nocc)
real*8 scr(nvirt,nvirt,(nocc+1)*nocc/2),scr2(nvirt,nocc)
c
write(iout,*)
write(iout,*) 'Largest residuals'
d=nvirt**2*(nocc+1)*nocc/2
scr(1:nvirt,1:nvirt,1:(nocc+1)*nocc/2)=
$ dabs(vv(1:nvirt,1:nvirt,1:(nocc+1)*nocc/2))
call qsortd(map,d,scr)
do k=1,min(10,d)
a=mod(map(d-k+1),nvirt)
if (a.eq.0) a=nvirt
b=mod(map(d-k+1),nvirt**2)
if (b.eq.0) then
if (a.ne.nvirt) then
write(iout,*) 'sg wrong'
call mrccend(1)
end if
if (a.eq.nvirt) b=nvirt
else
b=(b-a)/nvirt+1
endif
ij=(map(d-k+1)-a-(b-1)*nvirt)/nvirt**2+1
do i=1,nocc
if (i*(i-1)/2.ge.ij) exit
enddo
i=i-1
j=ij-i*(i-1)/2
write(iout,'(i10,5i6,2es16.8)')
$ map(d-k+1),a,b,i,j,ij,vv(a,b,ij),tt(a,b,ij)
enddo
c singles
d=nvirt*nocc
scr2(1:nvirt,1:nocc)=dabs(v(1:nvirt,1:nocc))
call qsortd(map,d,scr2)
do k=1,min(10,d)
a=mod(map(d-k+1),nvirt)
if (a.eq.0) a=nvirt
i=(map(d-k+1)-a)/nvirt+1
write(iout,'(3i9,2es16.8)') map(d-k+1),a,i,v(a,i),t(a,i)
enddo
write(iout,*)
write(iout,*) 'Largest amplitudes'
d=nvirt**2*(nocc+1)*nocc/2
scr(1:nvirt,1:nvirt,1:(nocc+1)*nocc/2)=
$ dabs(tt(1:nvirt,1:nvirt,1:(nocc+1)*nocc/2))
call qsortd(map,d,scr)
do k=1,min(10,d)
a=mod(map(d-k+1),nvirt)
if (a.eq.0) a=nvirt
b=mod(map(d-k+1),nvirt**2)
if (b.eq.0) then
if (a.ne.nvirt) then
write(iout,*) 'sg wrong'
call mrccend(1)
end if
if (a.eq.nvirt) b=nvirt
else
b=(b-a)/nvirt+1
endif
ij=(map(d-k+1)-a-(b-1)*nvirt)/nvirt**2+1
do i=1,nocc
if (i*(i-1)/2.ge.ij) exit
enddo
i=i-1
j=ij-i*(i-1)/2
write(iout,'(i10,5i6,2es16.8)')
$map(d-k+1),a,b,i,j,ij,vv(a,b,ij),tt(a,b,ij)
enddo
c singles
d=nvirt*nocc
scr2(1:nvirt,1:nocc)=dabs(t(1:nvirt,1:nocc))
call qsortd(map,d,scr2)
do k=1,min(10,d)
a=mod(map(d-k+1),nvirt)
if (a.eq.0) a=nvirt
i=(map(d-k+1)-a)/nvirt+1
write(iout,'(3i9,2es16.8)') map(d-k+1),a,i,v(a,i),t(a,i)
enddo
write(iout,*)
c
return
end subroutine
c }}}
c
************************************************************************
subroutine abcixcalc(x, abcix, abci_list, nvirt, n)
************************************************************************
c Generates <ab||ci> = <ab|ci> - <ba|ci> or <ab|||ci> = <ab|ci> + <ba|ci>
implicit none
character x
real*8 abcix(nvirt*(nvirt+1)/2, nvirt, n)
real*8 abci_list(nvirt, nvirt, nvirt, n)
integer nvirt, n
real*8 fac
integer ind, a, b, c, i
if(x .eq. 's') then
fac = 1.d0
else if(x .eq. 'a') then
fac = -1.d0
end if
do i = 1, n
do c = 1, nvirt
ind = 0
do b = 1, nvirt
do a = b, nvirt
ind = ind + 1
abcix(ind, c, i) = abci_list(a, b, c, i) +
$ fac * abci_list(b, a, c, i)
end do
end do
end do
end do
end subroutine
************************************************************************
subroutine abckccalc(x, nocc, nvirt, n, ind, abcj_abcj,
$ ccx, abcix, kdo_akij, nmmat)
************************************************************************
implicit none
character x
integer n, nvirt, nocc, ind
real*8 abcj_abcj(nvirt, nvirt, nvirt, n)
real*8 ccx(nvirt*(nvirt+1)/2, nocc*(nocc+1)/2)
real*8 nmmat(nocc*(nocc+1)/2, nvirt, nocc)
real*8 abcix(nvirt*(nvirt+1)/2, nvirt, n)
real*8 kdo_akij(nvirt, nocc, nocc, nocc)
integer ttind, i, j, k
real*8 fac
c call crccx(x, t, tt, ccx, nvirt, nocc)
! generate <ab||ci> or <ab|||ci> in abcix
call abcixcalc(x, abcix, abcj_abcj, nvirt, n)
! abcix * C(+/-) -> M/N
call dgemm('t', 'n', nocc*(nocc+1)/2, nvirt*n, nvirt*(nvirt+1)/2,
$ 1.d0, ccx, nvirt*(nvirt+1)/2, abcix, nvirt*(nvirt+1)/2,
$ 0.d0, nmmat(1, 1, ind), nocc*(nocc+1)/2)
! sort N/M into kdo
if(x .eq. 's') then
fac = 1.d0
else if(x .eq. 'a') then
fac = -1.d0
end if
do j = 1, nocc
do i = 1, j
! diagonal elements of N are zero
if(x.eq.'a' .and. i.eq.j) cycle
ttind = j * (j-1) / 2 + i
do k = ind, ind+n-1
if(x .eq. 'a') then
! first call
kdo_akij(1:nvirt,k,i,j) = nmmat(ttind, 1:nvirt, k)
else
! N already in kdo
kdo_akij(1:nvirt,k,i,j) = kdo_akij(1:nvirt,k,i,j) +
$ nmmat(ttind, 1:nvirt, k)
end if
if(i .ne. j) kdo_akij(1:nvirt, k, j, i) =
$ kdo_akij(1:nvirt, k, j, i) +
$ fac * nmmat(ttind, 1:nvirt, k)
end do
end do
end do
end subroutine
************************************************************************
subroutine calcccfori(t, tt, nocc, nvirt, cci, i, tr)
************************************************************************
implicit none
real*8 t(nvirt, nocc), tt(nvirt, nvirt, nocc*(nocc+1)/2)
integer nocc, nvirt, i
real*8 cci(nvirt, nvirt, nocc)
character tr
integer j, ttind
do j = 1, nocc
if(tr .eq. 'n') then
if(j .le. i) then
ttind = i * (i-1) / 2 + j
call ccijcalc(cci(1, 1, j), nvirt, t(1, j), t(1, i),
$ tt(1, 1, ttind))
else
ttind = j * (j-1) / 2 + i
call ccijtrcalc(cci(1, 1, j), nvirt, t(1, i), t(1, j),
$ tt(1, 1, ttind))
end if
else if(tr .eq. 't') then
if(i .le. j) then
ttind = j * (j-1) / 2 + i
call ccijcalc(cci(1, 1, j), nvirt, t(1, i), t(1, j),
$ tt(1, 1, ttind))
else
ttind = i * (i-1) / 2 + j
call ccijtrcalc(cci(1, 1, j), nvirt, t(1, j), t(1, i),
$ tt(1, 1, ttind))
end if
end if
end do
end subroutine
************************************************************************
function blocknumber(firstdim, seconddim, lastdim,
$ constant, ind, bsize, message, avmem)
************************************************************************
c firstdim: size of the first two dimensions (first two indices of the integrals)
c seconddim: second dimension, may vary with the value of the last index
c lastdim: size of the last (fourth) dimension
c constant: whether the second dimension is constant
c ind: first block to read
implicit none
integer blocknumber, bsize, avmem
integer firstdim, seconddim, lastdim, ind
character constant
character*40 message
integer n
integer blocksize
logical notfull
bsize = avmem
if(constant .eq. 'y') then
c block size is constant
blocksize = firstdim * seconddim
if(blocksize .ne. 0) then
n = (avmem - mod(avmem, blocksize)) / blocksize
else
n = 0
end if
bsize = n * blocksize
else
c block size not constant
notfull = .true.
n = 0
do while(notfull .and. (ind + n .le. lastdim))
blocksize = firstdim * (seconddim - ind - n + 1)
if(avmem .ge. blocksize) then
avmem = avmem - blocksize
n = n + 1
else
notfull = .false.
endif
enddo
bsize = bsize - avmem
end if
! insufficient memory for one block
if(n.le.0 .and. lastdim.ne.0)
$ call memerr(blocksize, avmem, message)
! all remaining blocks fit into the memory
if(ind + n .gt. lastdim) n = lastdim - ind + 1
blocknumber = n
end function
#if defined(MPI)
************************************************************************
subroutine sum_v(v2, v1, v_len, v_type) ! tt1 = tt1 + tt2 {{{
************************************************************************
c implicit none
c integer v_len, v_type
c type(v_type) :: v1(*), v2(*)
c
c integer i
c
c do i = 1, v_len
c v1(i) = v1(i) + v2(i)
c end do
use, intrinsic :: iso_c_binding, only : c_ptr, c_f_pointer
c use mpi_f08
c#include <mpif.h>
#if defined(MPI)
include "mpif.h"
#endif
type(c_ptr), value :: v1, v2
integer v_len
c type(MPI_Datatype) :: v_type
integer v_type
real*8, pointer :: v1_r(:), v2_r(:)
integer(kind = MPI_ADDRESS_KIND) lb, sizeof_double, sizeof_vtype
integer mpi_err
c integer num
c data num/0/
c save num
c num=num+1
call MPI_Type_get_extent(MPI_DOUBLE_PRECISION, lb,
$ sizeof_double, mpi_err)
call MPI_Type_get_extent(v_type, lb,
$ sizeof_vtype, mpi_err)
c write(*,*) "sizes",num,v_len,sizeof_vtype / sizeof_double,
c $ v_len * sizeof_vtype / sizeof_double
c sum real*8 arrays
call c_f_pointer(v1, v1_r,
$ (/ v_len * sizeof_vtype / sizeof_double /) )
call c_f_pointer(v2, v2_r,
$ (/ v_len * sizeof_vtype / sizeof_double /) )
v1_r = v1_r + v2_r
end subroutine ! }}}
subroutine prk_communicate(mpi_rank,mpi_size,nummes,comm_scr,
$prk,prk_sent,mpi_ccsd, ccsd_communicator) ! {{{
************************************************************************
implicit none
include "mpif.h"
integer, intent(in) :: mpi_rank, mpi_size, ccsd_communicator
integer :: request, mpi_err, comm_scr,nummes
integer :: stat(MPI_STATUS_SIZE), prk, prk_sent
logical :: flag, mpi_ccsd
if(mpi_ccsd) then
if(mpi_rank .ne. mpi_size-1) then ! TODO: use master_rank or master_thread
flag = .true.
if(nummes .ne. 0) then
call MPI_Test(request, flag, stat, mpi_err)
end if
if(.not.flag) then
call MPI_Cancel(request, mpi_err)
else
comm_scr = 0
end if
comm_scr = comm_scr + prk - prk_sent
prk_sent = prk
call MPI_Isend(comm_scr, 1, MPI_INTEGER_MRCC, mpi_size-1,
$ 0, ccsd_communicator, request, mpi_err)
nummes=nummes+1
else if(mpi_ccsd) then ! last rank receives if ccsd is running MPI parallel
flag = .true.
do while(flag)
call MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
$ ccsd_communicator, flag, stat, mpi_err)
if(flag) then
call MPI_Recv(comm_scr, 1, MPI_INTEGER_MRCC,
$ stat(MPI_SOURCE), stat(MPI_TAG),
$ ccsd_communicator, stat, mpi_err)
prk = prk + comm_scr
end if
end do
end if
endif ! mpi_ccsd
return
end subroutine ! }}}
************************************************************************
subroutine prk_communicate_fin(mpi_rank,mpi_size,
$prk,prold,dkji,iout,et,mpi_ccsd, ccsd_communicator) ! {{{
************************************************************************
implicit none
include "mpif.h"
integer, intent(in) :: mpi_rank, mpi_size, ccsd_communicator
integer :: request, mpi_err, comm_scr, iout
integer :: stat(MPI_STATUS_SIZE), prk,prold,nint
logical :: flag, mpi_ccsd, ptdone
real*8 :: dkji,et,pr
if(mpi_ccsd) then
call MPI_Ibarrier(ccsd_communicator, request, mpi_err)
if(mpi_rank .eq. mpi_size-1) then ! TODO: use master_rank or master_thread
ptdone = .false.
do while(.not.ptdone)
flag = .true.
do while(flag)
call MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
$ ccsd_communicator, flag, stat, mpi_err)
if(flag) then
call MPI_Recv(comm_scr, 1, MPI_INTEGER_MRCC,
$ stat(MPI_SOURCE), stat(MPI_TAG),
$ ccsd_communicator, stat, mpi_err)
prk = prk + comm_scr
end if
end do
pr=dble(prk)/dkji !progress counter
if(pr.ge.prold+0.1d0.and.pr.ne.1.d0) then
prold=pr
write(iout,'(i4,"% done.")')nint(pr*100)
endif
call MPI_Test(request, ptdone, MPI_STATUS_IGNORE,
$ mpi_err)
end do
end if
cc call MPI_Barrier(ccsd_communicator, mpi_err)
cc write(*,*) "rank:",mpi_rank,et
cc call MPI_Barrier(ccsd_communicator, mpi_err)
c if(mpi_rank .eq. 0) then
c call MPI_Reduce(MPI_IN_PLACE, et, 1, MPI_DOUBLE_PRECISION,
c $ MPI_SUM, 0, ccsd_communicator, request, mpi_err)
c else
c call MPI_Reduce(et, et, 1, MPI_DOUBLE_PRECISION,
c $ MPI_SUM, 0, ccsd_communicator, request, mpi_err)
c end if
call MPI_Allreduce(MPI_IN_PLACE, et, 1, MPI_DOUBLE_PRECISION,
$ MPI_SUM, ccsd_communicator, mpi_err)
endif ! mpi_ccsd
return
end subroutine ! }}}
#endif
c restart {{{
************************************************************************
subroutine write_rst_ccsd(nit, ecc, eccold, norm, lene, t, tt,
$ nocc, nvirt)
************************************************************************
implicit none
integer :: nit, nocc, nvirt
real*8 :: ecc, eccold, norm
real*8 :: t(nvirt, nocc), tt(nvirt, nvirt, nocc*(nocc+1)/2)
logical :: lene
integer*8, parameter :: version = 1
integer*8 :: noccw, nvirtw
! convert from (possibly) 32-bit to 64-bit
noccw = nocc
nvirtw = nvirt
open(unit = 725, file = 'ccsd.rst', access = 'stream')
write(725) version
write(725) noccw, nvirtw
write(725) nit
write(725) ecc, eccold, norm, lene
write(725) t, tt
close(725)
c write(*,*) ecc, eccold, norm, lene
end subroutine
************************************************************************
subroutine read_rst_ccsd(nit, ecc, eccold, norm, lene, t, tt,
$ nocc, nvirt)
************************************************************************
implicit none
integer :: nit, nocc, nvirt
real*8 :: ecc, eccold, norm
real*8 :: t(nvirt, nocc), tt(nvirt, nvirt, nocc*(nocc+1)/2)
logical :: lene
integer*8 :: version
integer*8 :: noccr, nvirtr
logical rstex
inquire(file = 'ccsd.rst', exist = rstex)
if(.not. rstex) return
open(unit = 725, file = 'ccsd.rst', access = 'stream')
read(725) version
if(version .ne. 1) then
write(*, *) 'ERROR: Unknown version of restart file.'
call mrccend(1)
end if
read(725) noccr, nvirtr
if(noccr .ne. nocc .or. nvirtr .ne. nvirt) then
write(*, *) 'ERROR: Wrong restart file.'
call mrccend(1)
end if
read(725) nit
read(725) ecc, eccold, norm, lene
read(725) t, tt
close(725)
end subroutine
************************************************************************
subroutine zero_rst_pt(nocc)
************************************************************************
implicit none
integer :: nocc
integer :: k, j, i, kji
logical*1 :: done
open(unit = 725, file = 'pt.rst', access = 'stream')
kji = 0
done = .false.
do k = 1, nocc
do j = k, nocc
do i = j, nocc
if(k .eq. j .and. j .eq. i) cycle
write(725) done, 0.d0
kji = kji + 1
end do
end do
end do
close(725)
end subroutine
************************************************************************
subroutine read_rst_pt(k, j, i, nocc, done, et)
************************************************************************
implicit none
integer :: nocc
integer :: k, j, i
logical*1 :: done
real*8 :: et
integer :: kji
integer :: ijk_index
kji = ijk_index(k, j, i, nocc)
c$OMP CRITICAL
read(725, pos = 9 * (kji-1) + 1) done
if(done) then
read(725, pos = 9 * (kji-1) + 2) et
else
et = 0.d0
end if
c$OMP END CRITICAL
end subroutine
************************************************************************
subroutine write_rst_pt(kji, et)
************************************************************************
implicit none
integer :: kji
real*8 :: et
logical*1 :: done
done = .true.
c write(*,*) kji, 9 * (kji-1) + 1
c$OMP CRITICAL
write(725, pos = 9 * (kji-1) + 1) done, et
c$OMP END CRITICAL
end subroutine
************************************************************************
function ijk_index(k, j, i, nocc)
************************************************************************
implicit none
integer :: nocc
integer :: k, j, i
integer :: ijk_index
integer :: kji, k2, j2, i2
kji = 0
do k2 = 1, k-1
do j2 = k2, nocc
do i2 = j2, nocc
if(k2 .eq. j2 .and. j2 .eq. i2) cycle
kji = kji + 1
end do
end do
end do
do j2 = k, j-1
do i2 = j2, nocc
if(k .eq. j2 .and. j2 .eq. i2) cycle
kji = kji + 1
end do
end do
do i2 = j, i
if(k .eq. j .and. j .eq. i2) cycle
kji = kji + 1
end do
c write(*,*) k, j, i, kji
ijk_index = kji
end function
************************************************************************
subroutine saveToRst(nReqs, requests, ijk_indices, energies,
$ toSend, en, k, j, i, nocc, world_small, nReqsAll,
$ ccsd_communicator, master_rank) !, mpi_type)
************************************************************************
implicit none
integer nReqs, k, j, i, nocc, world_small, nReqsAll !, mpi_type
real*8 en
integer ijk_index, reqptr, ii, ijk, ccsd_communicator
integer mpi_rank, mpi_err, omp_get_thread_num, master_rank
logical flag
integer tag
save tag
#ifdef MPI
#include "mpif.h"
integer requests(nReqsAll),ijk_indices(nReqsAll),toSend(nReqsAll)
real*8 energies(nReqsAll)
#else
integer requests,ijk_indices,toSend
real*8 energies
#endif
#ifdef MPI
call MPI_Comm_rank(ccsd_communicator, mpi_rank, mpi_err)
#else
mpi_rank = 0
#endif
c if(mpi_rank .eq. 0) then
if(mpi_rank .eq. master_rank) then
ijk = ijk_index(k, j, i, nocc)
c write(*,*) 'writing ', ijk, en
call write_rst_pt(ijk, en)
#ifdef MPI
else
#ifdef OMP
if(omp_get_thread_num() .eq. 0) then
#endif
! send everything
do ii = 1, nReqsAll
if(toSend(ii) .eq. 1) then
call MPI_Isend(ijk_indices(ii), 1, MPI_INTEGER_MRCC,
$ master_rank, 1, world_small,
$ requests(ii), mpi_err)
call MPI_Request_free(requests(ii), mpi_err)
call MPI_Isend(energies(ii), 1, MPI_DOUBLE_PRECISION,
$ master_rank, 1, world_small,
$ requests(ii), mpi_err)
toSend(ii) = 0
end if
end do
! check all pending communication
do ii = 1, nReqsAll
call MPI_Test(requests(ii), flag,
$ MPI_STATUS_IGNORE, mpi_err)
end do
! find a free slot
reqptr = ijk_index(k, j, i, nocc)
! send energy
energies(reqptr) = en
ijk_indices(reqptr) = ijk_index(k, j, i, nocc)
call MPI_Isend(ijk_indices(reqptr), 1, MPI_INTEGER_MRCC,
$ master_rank,
$ 1, world_small, requests(reqptr),
$ mpi_err)
call MPI_Request_free(requests(reqptr), mpi_err)
call MPI_Isend(energies(reqptr), 1, MPI_DOUBLE_PRECISION,
$ master_rank,
$ 1, world_small, requests(reqptr),
$ mpi_err)
#ifdef OMP
else
c$OMP CRITICAL
reqptr = ijk_index(k, j, i, nocc)
toSend(reqptr) = 2
energies(reqptr) = en
ijk_indices(reqptr) = ijk_index(k, j, i, nocc)
toSend(reqptr) = 1
c$OMP END CRITICAL
end if
#endif
#endif
end if
end subroutine
c }}}
#include "ccsd_io.f"
#include "ccsd_dfdirect.f"
#include "ccsd_pt.f"