easyconfigs-it4i/m/MRCC/mrcc_files/ccsd.f

#ifdef TIMING
#define TIME0(a) call time0(a)
#define TIMEADD(a, b) call timeadd(a, b)
#define WRITE_TIMES .true.
#else
#define TIME0(a) !call time0(a)
#define TIMEADD(a, b) !call timeadd(a, b)
#define WRITE_TIMES .false.
#endif

************************************************************************
      subroutine ccsdpt(ijij,ijia,ijab,iabj,igsl,ccmem,
     $imem,iimem,maxcor,dcore,icore,iout,
     $irecln,ibufln,nirmax,multpg,diisfile,errfile,tfile,
     $tinfo,ifcfile,gbasfile,ifltln,mpi_ccsd,master_rank,bcast_comm,
     $inbcast,master_thread,ccsd_communicator,c,cc,is,iss,leom)
************************************************************************
      use omp_lib
      implicit none
#if defined(MPI)
      include "mpif.h"
#endif
      integer maxcor,imem,iimem,wsize,locnoc
      integer diisfile,errfile,tfile,tinfo
      integer ifcfile,gbasfile,ifltln
      integer icore(*),iout,ccsd_communicator
      real*8 dcore(*),eref,emp2,ecc,tol
      integer irecln,ibufln,nbasis,nocc,nvirt,dgroup,nirmax,naobasis
      integer bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
      integer i,dblalloc,intalloc,it,itt,iv,ivv,iscrs
      integer ibmat,ibvec,iinvbmat,icvec
      integer ibeta,ix,irr,ivvend
      integer ifai,ifab,ifij,is,iss
      integer ih,iui,iint1
      integer iico,ico,ifirst,ilast
      integer ibuffer, iend
      integer imem1,ccmem(8)
      integer dfnb,ccmaxit,mxmem,ittend,nbset
      integer, allocatable :: nbfread(:)
      integer talgoccmem,talgvirtmem,talgmem
      common/memcom/imem1
      integer ccsdthreads, ptthreads
      character*3 ccsdmkl
      character*4 ccsdth, ptth, ccsdrest, ovirt
      character*4 cctol,localcc,talg,dfbasis_cor
      character*8 dfintran,ccmxit,ccsdalg,lccoporder, csrc
      character*16 calctype,tprint,rank_ch
      character*16 naf, met, eps
      integer deglev,nc, ios, is1, is2, is3
      real*8  esdi,tmp, emp2full, emp2corr
      real*8  et,nenergy,ibuftmp(ibufln)
      logical localcc15p, master_thread, lno, lnaf, leom, lpert

      integer ijij, ijai, ijia, ijab, ifia, iabj , igsl
      integer ifij0, ifai0, ifia0, ifab0, ifock
      integer ihij, ihai, ihia, ihab

      integer memreq,memreq1, memreq2, memreq3
      integer mra2, mrbc2, mrd2, mreg2, mrtrf, mren, mrri
      integer inorm, jlen
      logical ptrest, ccsd_converged
      integer to_abij
      integer tmemmin,tlape,talglaplmem, laplminmem, abcistoremin, max

      integer dhyb
      real*8 cmp2os, cmp2ss, cmp3

c      real*8 c(nvirt, nocc), cc(nvirt, nvirt, nocc*(nocc+1)/2)
c      real*8 c(:, :), cc(:, :, :)
      real*8 c, cc, ecc_ppl_scaled,ecc_ppl, eppl_correction

       integer ifij_c1, ifab_c1, ifia_c1, ifai_c1
       integer ijai_c1, ijab_c1, ijij_c1

      integer master_rank, bcast_comm
      logical inbcast, ppl_calc, lmp3
c      integer OMP_GET_NUM_THREADS, OMP_GET_MAX_THREADS
      integer MKL_GET_MAX_THREADS
c      INTEGER(KIND=OMP_PROC_BIND_KIND) omp_get_proc_bind

      integer omp_max_threads, mkl_max_threads
      common /threads/ ccsdthreads, ptthreads, omp_max_threads,
     $      mkl_max_threads
      save it, itt, iv, ivv, ivvend
      save iui, iico, ico, ifirst, ilast
      save iscrs, ifij, ifab, ittend, inorm, ibmat, ibvec, ifock
      save ih, iinvbmat, icvec, ifai, iint1, ibeta, ix, irr, ibuffer
      save iend

      character*32 outfilename, output_folder
      integer iout2
      common /outunit/ iout2, outfilename, output_folder

! MPI
      character*128 ifacename
      logical mpi_ccsd, flag,lf12
      integer mpi_rank, mpi_size, mpi_err


#if defined (MPI)
      integer name_len
      character*(MPI_MAX_PROCESSOR_NAME) cpu_name
c      character*8 rank_ch

      if(mpi_ccsd) then
         if(ccsd_communicator .eq. MPI_COMM_NULL) return

         call MPI_Initialized(flag, mpi_err)
         if(.not. flag) call MPI_Init(mpi_err)

         call MPI_Comm_rank(ccsd_communicator, mpi_rank, mpi_err)
         call MPI_Comm_size(ccsd_communicator, mpi_size, mpi_err)

c         if(mpi_size .eq. 1) call MPI_Finalize(mpi_err)

         ! broadcast input files
         call bcast_file('KEYWD', bcast_comm)
         call bcast_file('VARS', bcast_comm)
         call bcast_file('55', bcast_comm)
         call MPI_Barrier(ccsd_communicator, mpi_err)

         if(!mpi_size .ne. 1 .or.
     $         ccsd_communicator .ne. MPI_COMM_WORLD) then
               call MPI_Get_processor_name(cpu_name, name_len, mpi_err)

            write(rank_ch, '(i8)') mpi_rank
            if(.not.leom .and. .not.master_thread) then
               write(*,"(' ccsd rank ', a4, ' running on ', a20)")
     $               trim(adjustl(rank_ch)), cpu_name
            else
               write(*,"(' ccsd rank ', a4, ' running on ', a20,
     $               ', master')")
     $               trim(adjustl(rank_ch)), cpu_name
            end if
            call MPI_Barrier(ccsd_communicator, mpi_err)
            call sleep_f(1)
         end if

         if(.not.leom .and. master_thread) write(iout, *)
      else
#endif
         mpi_rank = 0
         mpi_size = 1
         master_rank = 0
         bcast_comm = 0 !MPI_COMM_NULL (other MPI implementations?)
         inbcast = .true.
         master_thread = .true.
#if defined(MPI)
      end if
#endif

#if defined(OMP)
      omp_max_threads = OMP_GET_MAX_THREADS()
#ifdef Intel
#  if __INTEL_COMPILER < 1910 || defined(ONEDPL_VERSION_MAJOR)
      call omp_set_nested(.true.)
#  endif
#else
      call omp_set_nested(.true.)
#endif
      call omp_set_max_active_levels(1)
#ifdef MKL
      mkl_max_threads = MKL_GET_MAX_THREADS()
      call mkl_set_dynamic(0)
#else
      mkl_max_threads = omp_max_threads
#endif
#else
      omp_max_threads = 1
      mkl_max_threads = 1
#endif
c      ibind = omp_get_proc_bind()

C Allocate memory (if not allocated already)
      call memalloc

C Read keywords
      call getkey('calc',4,calctype,16)
      call getkey('ccsdalg',7,ccsdalg,8)
      call getkey('lccoporder',10,lccoporder,8)
      if (lccoporder.eq.'lccfirst') call mrccini_from_subroutine ! initialize when mrccini is not executed in main
      call getkey('dfintran',8,dfintran,8)
      call getkey('talg',4,talg,4)
      call getkey('dfbasis_cor',11,dfbasis_cor,4)
      lpert = trim(calctype).eq.'mp2' .or. trim(calctype).eq.'mp3'
      if(trim(calctype).eq.'mp3') then
         lmp3 = .true.
      else
         lmp3 = .false.
      end if
      if(leom .or. trim(calctype).eq.'mp3') then
         ccmaxit = 1
      else
         call getkey('ccmaxit',7,ccmxit,8)
         read(ccmxit,*) ccmaxit
      end if
      call uppercase(calctype,calctype,16)
      if(.not.leom) then
         write(iout, '(/" ")', advance = 'no')
         if (ccsdalg .eq. 'dfdirect')
     $         write(iout, '("Direct ")', advance = 'no')
         if (dfbasis_cor .ne. 'none')
     $         write(iout, '("DF-")', advance = 'no')
         write(iout, '(2a)') trim(adjustl(calctype)) // ' calculation'
         write(iout,*)
      end if
      lf12=.false.
      if(trim(calctype).eq.'CCSD(T)-F12') then
         calctype='CCSD(T)         '
         lf12=.true.
      else if(trim(calctype).eq.'CCSD-F12') then
         calctype='CCSD            '
         lf12=.true.
      endif
#if defined (MPI)
      if(lf12) then
         call bcast_file('F12INTE', bcast_comm)
         call bcast_file('F12INT1', bcast_comm)
      end if
#endif
      call getkey('cctol',5,cctol,4)
      read(cctol,*) i
      tol=10.d0**(-i)
      if(.not.leom .and. trim(calctype).ne.'mp3') write(iout,2015) tol
2015  format(' Convergence criterion: ',1pe8.1)
      call getkey('localcc',7,localcc,4)
      if(localcc.ne.'off '.and.
     $   master_thread) then !no ifcfile write for (group) slave threads if localcc & 2 layer MPI
         locnoc=3
      else
         locnoc=0
      endif
      call getkey('naf_cor',7,naf,16)
c      call getkey('eps',3,eps,16)
      call getkey('ovirt',5,ovirt,4)  !RZ
      call getkey('tprint',6,tprint,16)
      call getkey('ccsdmkl',7,ccsdmkl,3)
      call getkey('ccsdrest',8,ccsdrest,4)
      if(talg.eq.'lapl' .and. ccsdrest.ne.'off ') then
         write(iout,*) 'ERROR: Laplace (T) correction can only be calcul
     $ated with ccsdrest=off!'
         call mrccend(1)
      end if

      emp2full = 0.d0
      lno = ovirt.eq.'ppl ' .or. ovirt.eq.'mp2 ' .or. ovirt.eq.'osv'
      ppl_calc = ovirt .eq. 'ppl '
      lnaf = naf .ne. 'off             '
      if((lno .or. lnaf) .and. dfintran.ne.'ovirt   ' .and.
     $      localcc.eq.'off ') then
         if(master_thread) then
            open(ifcfile, file = "iface", status = "old")
            read(ifcfile, *)
            met = "               "
            do while(met .ne. "MP2            ")
               read(ifcfile, 7596, iostat = ios)
     $               csrc, met, is1, is2, is3, emp2full
               if(ios .ne. 0) then
                  write(iout, *)
     $                  "Error: MP2 energy not found in file iface!"
                  call mrccend(1)
               end if
            end do
            close(ifcfile)
c            if(eps .ne. "                ") lno  = .true.

c            write(*,*) "MP2_0: ",emp2full
         end if
      end if
7596  format(a8,1x,a15,1x,3(i2,1x),1pe23.15)
#if defined(OMP)
      call getkey('ccsdthreads',11,ccsdth,4)
      read(ccsdth,*) ccsdthreads
      if(ccsdthreads .gt. omp_max_threads) then
         write(iout,'(/"WARNING: ccsdthreads greater than the maximal nu
     $mber of OpenMP threads")')
         call mrccend(1)
      end if
      if(ccsdthreads .gt. mkl_max_threads) then
         write(iout, '(/"WARNING: ccsdthreads greater than the maximal n
     $umber of MKL threads")')
         write(iout, '("   using number of OpenMP threads instead")')
         mkl_max_threads = omp_max_threads
      end if
      call getkey('ptthreads',9,ptth,4)
      read(ptth,*) ptthreads
c      if(mpi_rank .eq. 0) ptthreads = 1
      if(calctype.eq.'CCSD(T)         ' .and.
     $      ptthreads .gt. omp_max_threads) then
         write(iout,'(/"WARNING: ptthreads greater than the maximal numb
     $er of OpenMP threads")')
         call mrccend(1)
      end if
      if(ptthreads .gt. mkl_max_threads .and.
     $      ccsdthreads .le. mkl_max_threads) then
         write(iout, '(/"WARNING: ptthreads greater than the maximal num
     $ber of MKL threads")')
         write(iout, '("   using number of OpenMP threads instead")')
         mkl_max_threads = omp_max_threads
      end if
#else
      ccsdthreads = 1
      ptthreads = 1
#endif

      call getvar('dhyb      ',dhyb)
      if(dhyb .eq. 3) then
         call getvar('cmp2s     ',cmp2os)
         call getvar('cmp2t     ',cmp2ss)
         call getvar('cmp3      ',cmp3)
      end if
      call getvar('ncore     ',nc)
      if (localcc.ne.'off ') nc=0 ! ncore is meaningless for domains in local CC
      if(.not.leom) write(iout,*)
c     open(minpfile,file='MINP')
      ifacename='iface'
c#if defined (MPI)
c       if (localcc.eq.'2022') then ! not needed if all LISs have dedicated folders
c         write(rank_ch, '(i8)') mpi_rank ! ??? mpi_rank not ok, not the same in ldrpa mpi and ccsd mpi
c         ifacename='iface.' // trim(adjustl(rank_ch))
c       endif
c#endif
      call getvar('nbset     ',nbset)
      allocate(nbfread(nbset))
      if(master_thread)
     $ open(ifcfile,status='unknown',file=ifacename,position='append')
c      open(555,file='fort.555')
      open(555,file='55') !RZ.
      rewind 555
      if(dfbasis_cor .eq. 'none') then
         dfnb = 1
         read(555, *)
      else if (localcc15p(localcc)) then
         read(555,*) (dfnb,i=1,4), dfnb
      else
        call getvar('nbf       ',nbfread)
        naobasis=nbfread(1)
         if(naf.eq.'                ' .or. dfintran.eq.'ovirt   ' .or.
     $         localcc .eq. '2013') then
            dfnb=nbfread(3)
            read(555,*) !!!!!
         else
            read(555,*) (dfnb,i=1,4), dfnb
         endif
      endif
c pontcsop dim
      read(555,*) dgroup
c a palyak szimmetria szerint rendezve; a szimmetriak hatarait mutatja meg
      read(555,*) bsymmv(1:dgroup+1)
c adott szimmhez tartozo betoltott palyak szama
      read(555,*) symmocc(1:dgroup)

      nocc=0
      do i=1,dgroup
       nocc=nocc+symmocc(i)
      enddo
      nbasis=bsymmv(dgroup+1)-1
      nvirt=nbasis-nocc
      if (localcc15p(localcc)) naobasis=nbasis ! warning: dummy initialization!!!
      wsize=max(nocc,nvirt)**3+ibufln

      if(nocc.eq.0 .or. nvirt.eq.0) goto 123

      close(555,status='keep')

      if(.not.leom) then
         if (dfbasis_cor.eq.'none') then
           write(iout,"(' Number of occupied/virtual orbitals:',2i6)")
     $      nocc,nvirt
c           if (dfnb.eq.0) dfnb=1
         else
           write(iout,
     $      "(' Number of occupied/virtual/auxiliary orbitals:', 3i6)")
     $      nocc,nvirt,dfnb
         endif
      endif

!NP
c construct input file 56 and 2e integrals <pq|rs> from J_pq density fitted integrals
      if (localcc15p(localcc).and..not.(ccsdalg .eq. 'dfdirect')) then ! TODO: localcc=2022
#ifdef MPI
         if(mpi_size .gt. 1) then
            call bcast_file('DFINT_IJ', bcast_comm)
            call bcast_file('DFINT_AI', bcast_comm)
            call bcast_file('DFINT_AB', bcast_comm)
            if(lnaf) then
               call bcast_file('DFINT_AI_NONAF', bcast_comm)
               call bcast_file('iface', bcast_comm)
            end if
         end if
#endif
         mxmem=maxcor-(imem-imem1)
         tmp=0.d0
         write(iout,*) 'Assembly of four-center integrals'
         write(iout,*)
         call jpq2pqrs(nocc,nvirt,dfnb,dcore,irecln,ibufln,ibuftmp,mxmem
     $        ,imem,tmp,'ccsd')
      endif
#ifdef MPI
      if ((localcc15p(localcc) .and. naf.ne.'off             ') ! TODO: localcc=2022
     $      .and. (lccoporder.eq.'trffirst' .or. ccsdalg.eq.'disk    ')
     $      .and. mpi_size.gt.1) call bcast_file('ajb', bcast_comm)
#endif
!NP

      eppl_correction = 0.d0

C Partitioning memory

      allocation: if(.not.leom) then
      iui=dblalloc(nocc)
      iico=intalloc(nbasis)
      ico=intalloc(nbasis)
      ifirst=intalloc(6*8*8*nbasis)
      ilast=intalloc(6*8*8*nbasis)
      if(ccsdalg .eq. 'dfdirect'.and.lccoporder.eq.'trffirst') then
         ijij = dblalloc(dfnb*nocc**2)
         ijia = dblalloc(dfnb*nocc*nvirt)
         ijab = dblalloc(dfnb*nvirt**2)
      endif
c
c minimum memory requirement for canconical talg=occ/virt route
      talgmem=nocc+nvirt+nvirt*nocc+nvirt**2*nocc**2+nvirt*nocc**3  ! common part: fdo+fdv+itnew+ittnew+iaijk
      if(ccsdalg .eq. 'disk') then
         to_abij = nvirt**2*nocc**2
      else
         to_abij = nvirt**2*nocc
      end if
      talgoccmem=talgmem + to_abij +                                ! abij
     $      ptthreads*nvirt*nocc*max(nvirt,nocc) +                  ! trr
     $      nvirt**3 + ptthreads*(nvirt**2+3*nvirt**3+wsize)        ! IabIJ,abck,w,abci,abcj,v
c     $      ptthreads*(nvirt**2+nvirt**3+wsize)        ! IabIJ,abck,w,abci,abcj,v
      talgvirtmem=talgmem+2*nvirt**2*nocc**2+6*nocc**2+2*nocc**3+   ! abij,trr,iv1-iw3,scro,w
     $                    wsize+3*nvirt**2*nocc                     ! v+abci block

      if(ccsdalg .eq. 'disk') then
         tlape = nocc**2 + dfnb*(nvirt**2+ nvirt*nocc+nocc**2)
      else if(lccoporder .eq. 'trffirst') then
         tlape = nocc**2
      else
         tlape = 0
      end if
      laplminmem=(2+ptthreads)*nvirt**3+wsize*ptthreads+
     $      nvirt**2*(dfnb+nocc*ptthreads)
      abcistoremin=nvirt**3*(nocc+ptthreads)+wsize*ptthreads+
     $      nvirt**2*nocc*ptthreads ! store full abci & rr(a,b,i,1) & w and v
      talgmem = talgmem - nvirt*nocc**3 +
     $      max(nvirt*nocc**3, dfnb*nocc*max(nvirt, nocc)) ! aijk
      if(localcc .eq. 'off ') then
         laplminmem=abcistoremin
      else
         laplminmem=min(laplminmem,abcistoremin)
      end if
      if(localcc .eq. 'off ') then
         jlen = nocc
      else
         jlen = 1
      end if
      talglaplmem=talgmem + tlape + laplminmem +
c     $      max(nvirt*nocc**3, dfnb*nocc*max(nocc, nvirt)) +   ! aijk
     $      dfnb*nvirt*nocc + max(nocc, nvirt)**2*ptthreads +  ! lljpai, IabIJ
     $      nvirt*nocc*max(nvirt, nocc) + ! ttl
     $      nvirt**2*(nocc*(1+jlen)+ptthreads) + max(nocc, nvirt)*nocc ! ttl2, abij, ttl3, t1

      if(talg .eq. 'occ ') then
         tmemmin = talgoccmem
      else if(talg .eq. 'virt') then
         tmemmin = talgvirtmem
      else if(talg .eq. 'lapl') then
         tmemmin = talglaplmem
      end if
      if(.not.leom .and. calctype.eq.'CCSD(T)         ') then
c       if (localcc.eq.'off     ') then
         write(iout,'(f10.2,
     $        " MB is the minimal memory requirement for (T)")')
     $        (tmemmin+(imem-imem1))*8.d0/1024**2
         if(maxcor-(imem-imem1).lt.tmemmin) then
            write(iout,*)"Insufficient memory for (T) correction"
c            call mrccend(1)
c         else
c          if(talg.eq.'occ '.and.maxcor-(imem-imem1).lt.talgoccmem) then
c            talg='virt'
c            write(iout,*)"Warning: Insufficient memory for talg=occ,
c     $  switching to talg=virt"
c            write(iout,'(f10.2,
c     $" MB is the minimal memory requirement for the faster talg=occ")')
c     $         (talgoccmem+(imem-imem1))*8.d0/1024**2
c            call flush(iout)
c          endif
c         endif
         endif
      endif
c
      iscrs=dblalloc(max(nvirt*(nvirt+1),nocc**2,
     $      nocc*nvirt*ccsdthreads))
c warning: do not change the position of ifij, ifab, it, itt to stay consistent with the "Preparing memory" part of (T)
      ifij=dblalloc(nocc**2)
      ifab=dblalloc(nvirt**2)
      it=dblalloc(nocc*nvirt)
c      itt=dblalloc(nvirt**2*(nocc+1)*nocc/2) ! has to be here so that it is not overwritten by ittnew in (T)
      ! for tt or (+)tt and (-)tt
      itt=dblalloc(nvirt*(nvirt+1)*(nocc+1)*nocc/2+1) ! has to be here so that it is not overwritten by ittnew in (T)
      ittend=itt+nvirt**2*(nocc+1)*nocc/2             ! stores tt or (+)tt and (-)tt;  +1 for MPI communication
      inorm = itt+nvirt**2*(nocc+1)*nocc/2

      ibmat=dblalloc(ccmaxit**2)
      ibvec=dblalloc(ccmaxit**2)
      iinvbmat=dblalloc((ccmaxit+1)**2)
      icvec=dblalloc(ccmaxit)
      ifock = dblalloc(nbasis**2)
C
      ih=dblalloc(nbasis**2)
      ifai=dblalloc(nvirt*nocc)
      if(localcc.ne.'off ')then
         iint1=dblalloc(nocc**2)
      else
         iint1=1
      endif

      ibeta=dblalloc(nocc**2)
      ix=dblalloc(nvirt**2)
      irr=dblalloc(nvirt*nocc)
      ibuffer=dblalloc(ibufln)

      iv=dblalloc(nvirt*nocc)
      ivv=dblalloc(nvirt**2*(nocc+1)*nocc/2)
      ivvend=ivv+nvirt**2*(nocc+1)*nocc/2

      iend = iv

      else allocation
         imem = iend
         iv = is
         ivv = iss

      end if allocation
C

      call readinfo(nbasis,nocc,dgroup,bsymmv,symmocc,
     $icore(iico),icore(ico),dcore(ih),nenergy,icore(ifirst),
     $icore(ilast),dcore(iui),deglev,ccsdalg,nirmax)
C
 123  continue
      if(ccsdalg .eq. 'disk    ') then
         ifij0 = imem
         ifia0 = imem
         ifai0 = imem
         ifab0 = imem
         ijij = imem
         ijia = imem
         ijai = imem
         ijab = imem
         ifia = imem

      ! 15a
      memreq1 = (imem-imem1)+nvirt**2*nocc**2+nvirt**3
      ! 2g
      memreq2 = (imem-imem1)+nvirt**2*nocc**2+nocc**4+nocc**3*(nocc+1)/2
      ! 4
      memreq3 = (imem-imem1)+nocc**3*nvirt+nvirt**3+nvirt**2*(nvirt+1)/2
     $      +nocc*(nocc+1)/2*(nocc*nvirt+nvirt*(nvirt+1))
      memreq = max(memreq1, memreq2, memreq3)
      if(.not.leom) write(iout,'(f10.2,
     $ " MB is the minimal memory requirement for CCSD")')
     $ memreq*8.d0/1024**2

         call ccsdcore(dcore(it),dcore(itt),dcore(iv),dcore(ivv),
     $         dcore(iscrs),dcore(imem),dcore(ih),
     $         dcore(ibmat),dcore(ibvec),dcore(iinvbmat),dcore(icvec),
     $         dcore(ibeta),dcore(ix),dcore(irr),
     $         dcore(ifij),dcore(ifab),dcore(ifai),
     $         icore(ico),icore(ifirst),icore(ilast),
     $         dgroup,bsymmv,symmocc,dcore(ibuffer),
     $         calctype,localcc,dcore(iui),dcore(iint1),esdi,
     $         dfintran,ccmaxit,tprint,nc,ccsdalg,nenergy,
     $         irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,
     $         maxcor,imem,dcore,ifltln,iout,diisfile,errfile,
     $         tfile,tinfo,eref,ecc,emp2,tol,locnoc,mpi_rank,mpi_size,
     $         mpi_ccsd,ccsdmkl,ccsdthreads,omp_max_threads,
     $         ccsdrest,ptrest, ccsd_converged, ccsd_communicator,
     $         emp2full,lno,lnaf,emp2corr,master_thread, lpert)

      else if(ccsdalg .eq. 'dfdirect') then
         ihij = dblalloc(nocc**2)
         ihia = dblalloc(nocc*nvirt)
         ihai = dblalloc(nocc*nvirt)
         ihab = dblalloc(nvirt**2)
         ijai = dblalloc(dfnb*nocc*nvirt)
         ifai0 = dblalloc(nocc*nvirt)
         ifij0 = dblalloc(nocc**2)
         ifab0 = dblalloc(nvirt**2)
         ifia = dblalloc(nvirt*nocc)
         if(leom) then
            ifij_c1 = dblalloc(nocc**2)
            ifab_c1 = dblalloc(nvirt**2)
            ifia_c1 = dblalloc(nvirt*nocc)
            ifai_c1 = dblalloc(nvirt*nocc)
            ijai_c1 = dblalloc(dfnb*nocc*nvirt)
            ijab_c1 = dblalloc(dfnb*nvirt**2)
            ijij_c1 = dblalloc(dfnb*nocc**2)
         else
            ifij_c1 = imem
            ifab_c1 = imem
            ifia_c1 = imem
            ifai_c1 = imem
            ijai_c1 = imem
            ijab_c1 = imem
            ijij_c1 = imem
         end if

         mra2 = (imem-imem1) + 2*nvirt*(nvirt+1) + nocc*(nocc+1)/2
#if defined(OMP)
     $       + omp_get_max_threads() * nvirt**2
#endif
         mrbc2 = (imem-imem1) + nocc**3*(nocc+1)/2 +
     $         (3 + 1) * ccsdthreads * nvirt**2*nocc +
     $         nvirt*nocc*dfnb + nvirt**2
         mrd2 = (imem-imem1) + nvirt**2 +
     $         (3 +1) *  ccsdthreads * nvirt**2*nocc
         mreg2 = (imem-imem1) + 2 * nvirt*nocc*dfnb
         mrtrf = (imem-imem1) + dfnb*max(nocc**2, nocc*nvirt)
         mren = (imem-imem1) + 2 * nocc**2*nvirt
         mrri = (imem-imem1) + dfnb*nocc*(nocc+1)/2 ! to read integrals
         memreq = max(mra2, mrbc2, mrd2, mreg2, mrtrf, mren, mrri)
         if(.not.leom .and. .not.lmp3) write(iout,'(f10.2,
     $         " MB is the minimal memory requirement for CCSD")')
     $         memreq*8.d0/1024**2
         ccmem(5)=memreq

         call dfccsdcore(dcore(it),dcore(itt),dcore(iv),dcore(ivv),
     $        dcore(iscrs),iscrs,
     $        dcore(ijij),dcore(ijai),dcore(ijia),dcore(ijab),
     $        dcore(ih),
     $        dcore(ifij0),dcore(ifai0),dcore(ifab0),dcore(ifock),
     $        dcore(ibmat),dcore(iinvbmat),
     $        dcore(ibeta),dcore(ix),
     $        dcore(ifij),dcore(ifab),dcore(ifai),dcore(ifia),
     $        localcc,dcore(iui),
     $        dcore(iint1),esdi,dfintran,ccmaxit,tprint,nc,dfnb,
     $        ccsdalg,lccoporder,dcore(iabj),ccmem,
     $        nbasis,nocc,nvirt,iout,ifltln,naobasis,imem,
     $        maxcor,dcore,icore,eref,emp2,ecc,diisfile,errfile,tol,
     $        locnoc,
     $        mpi_rank,mpi_size,dcore(inorm),master_rank, master_thread,
     $        mpi_ccsd,
     $        ccsdmkl, ccsdthreads, omp_max_threads, mkl_max_threads,
     $        ccsdrest, ptrest, ccsd_converged, ccsd_communicator,
     $        emp2full,lno,lnaf,emp2corr, c, cc, leom,
     $        dcore(ifia_c1), dcore(ifij_c1), dcore(ifab_c1),
     $        dcore(ifai_c1),
     $        dcore(ijij_c1), dcore(ijab_c1), dcore(ijai_c1),
     $        eppl_correction, ppl_calc, lmp3, calctype,lf12,
     $        cmp2os, cmp2ss, cmp3, dhyb, lpert, nbfread)


      else
         call unknown('ccsdalg', 7)
      end if
C
      if(calctype.eq.'CCSD(T)         ') then ! .and. ccsd_converged) then
         if(ccsdalg .eq. 'dfdirect') then
            call dcopy(nocc**2, dcore(ifij0), 1, dcore(ifij), 1)
            call dcopy(nvirt**2, dcore(ifab0), 1, dcore(ifab), 1)
         end if
         call ptcore(dcore(it),dcore(itt),dcore(ifij),dcore(ifab),
     $      icore(ico),icore(ifirst),icore(ilast),dgroup,bsymmv,symmocc,
     $      iscrs,localcc,dcore(iui),esdi,et,iui,dfnb,ittend,ijij,ijia,
     $      ijab,ccsdalg,igsl,lccoporder,ccmem,talg,
     $      irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,
     $      iout,maxcor,imem,dcore,iimem,icore,wsize,gbasfile,ecc,eref,
     $      locnoc,mpi_rank,mpi_size,imem,mpi_ccsd,
     $      ccsdmkl, ptthreads, omp_max_threads, mkl_max_threads,
     $      ccsdrest, ptrest, bcast_comm, ccsd_communicator,emp2corr,
     $      lno,lnaf,eppl_correction,emp2,emp2full,master_rank,
     $      master_thread,lf12,ppl_calc)
      end if

c      if(localcc.ne.'off ') then
         call dbldealloc(iui)
c      else
c         call dbldealloc(ivvend)
c      end if

      if(master_thread) close(ifcfile,status='keep')
#if defined(MPI)
      if(mpi_ccsd .and. mpi_size .ne. 1.and.localcc.eq.'off ') then ! TODO: localcc
         if(master_thread) then
            call gather_file('ccsd', ccsd_communicator)
         else
            call gather_file(outfilename, ccsd_communicator)

            call delete_file(outfilename, .true.)
            call delete_file('KEYWD', inbcast)
            call delete_file('VARS', inbcast)
            call delete_file('55', inbcast)
            if(lf12) then
               call delete_file('F12INTE', inbcast)
               call delete_file('F12INT1', inbcast)
            end if
         end if
      end if
#endif
      if(ccsd_converged .and. inbcast) then !master_thread) then
c         call delete_file('ccsd.rst', .true.)
c         call delete_file('pt.rst', .true.)
         call delete_file('fort.18', .true.)
         call delete_file('fort.19', .true.)
      end if
      end subroutine

************************************************************************
      subroutine ccsdcore(t,tt,v,vv,scrs,vscr,h,bmat,bvec,invbmat,cvec,
     $beta,x,rr,fij,fab,fai,co,first,last,dgroup,bsymmv,symmocc,buffer,
     $calctype,localcc,ui,int1,esdi,dfintran,ccmaxit,tprint,nc,ccsdalg,
     $nenergy,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,maxcor,imem,dcore,
     $ifltln,iout,diisfile,errfile,tfile,tinfo,eref,ecc,emp2,tol,locnoc,
     $mpi_rank,mpi_size, mpi_ccsd,ccsdmkl,ccsdthreads,omp_max_threads,
     $ccsdrest,ptrest, ccsd_converged, ccsd_communicator, emp2full,
     $lno,lnaf,emp2corr,master_thread,lpert)
************************************************************************
      implicit none
      integer irecln,ibufln,nbasis,nocc,nvirt,dgroup,nirmax
      integer bsymmv(nirmax+1),symmocc(nirmax),multpg(nirmax,nirmax)
      integer maxcor, imem, ifltln,iout,diisfile,errfile,tfile,tinfo
      real*8 dcore(*)
      real*8 eref,ecc,emp2,tol
      integer a,b,c,i,j,k,l,n
      integer ttind,ttind2,ind,id,nc
      integer ccmaxit,locnoc, nit0, ccsd_communicator
      real*8 norm,nenergy
      real*8 bmat(ccmaxit,ccmaxit),invbmat(ccmaxit+1,ccmaxit+1)
      real*8 cvec(ccmaxit),bvec(ccmaxit**2)
      real*8 tt(nvirt,nvirt,(nocc+1)*nocc/2)
      real*8 vv(nvirt,nvirt,(nocc+1)*nocc/2)
      real*8 scrs(nvirt,nvirt),vscr(*)
      real*8 beta(nocc,nocc),x(nvirt,nvirt)
      real*8 t(nvirt,nocc),rr(nvirt,nocc),v(nvirt,nocc)
      real*8 fab(nvirt,nvirt),fai(nvirt,nocc),fij(nocc,nocc)
      real*8 h(nbasis,nbasis),ui(nocc)
      real*8 eccold,esdi, emp2full, emp2corr

      integer mpi_rank, mpi_size
      integer ccsdthreads, omp_max_threads
      character*3 ccsdmkl
      character*4 ccsdrest

      integer co(nbasis)
      integer first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
      integer recbounds(nvirt,2)

      real*8 buffer(ibufln)

      integer imem1,avmem,dblalloc
      common/memcom/ imem1
      logical notend,lene, master_thread, lpert
c      character*1 ison
      character*4 localcc,dfbasis_cor,ovirt
      character*8 dfintran
      character*16 calctype,tprint
      integer itmp
      real*8 int1(nocc,nocc),tprtol
      integer isrpa
      logical ltpr,localcc15p
c times
      real*8 ttmp(6,10),times(6,0:100)
      character*80 txt(0:100)

      real*8, pointer :: alpha_ijkl(:,:,:,:),cc_abIJ(:,:,:),ck_IJKL(:,:)
      real*8, pointer :: ks_aklj(:,:,:,:), tk_iklj(:,:,:,:)
      real*8, pointer :: ks_ak_ofIJ(:,:)
      real*8, pointer :: alpha_klIJ(:,:,:), cc_abkl(:,:,:,:)
      real*8, pointer :: kdo_akij(:,:,:,:), kk_ablk(:,:,:,:)
      real*8, pointer :: abcj_abcj(:,:,:,:), kk_abik(:,:,:)
      real*8, pointer :: ccx(:,:)
      real*8, pointer :: jj_abkj(:,:,:), ccp(:,:), ccm(:,:)
      real*8, pointer :: abcix(:,:,:), nmmat(:,:,:)
      real*8, pointer :: nmmat2(:,:)
      real*8, pointer :: ll_abkl(:,:,:,:)
      real*8, pointer :: ccpr_bcl_ofj(:,:,:), ll_ablk(:,:,:,:)
      real*8, pointer :: tt_abij(:,:,:,:), ll_klab(:,:,:,:)
      real*8, pointer :: cc_labi(:,:,:,:), cc_abk_ofj(:,:,:)
      real*8, pointer :: betac_abj_ofi(:,:,:)
      real*8, pointer :: z_bakj(:,:,:,:)
      real*8, pointer :: y_bakj(:,:,:,:), tztr(:,:)
      real*8, pointer :: ks_laK_J(:,:,:), tt_ack_ofi(:,:,:)
      real*8, pointer :: ccbar_bcl_ofj(:,:,:)
      real*8, pointer :: tz(:,:)
      real*8, pointer :: ls_lakj(:,:,:,:), ttpr(:,:,:), ccpr(:,:)
      real*8, pointer :: abic_baic(:,:,:,:), tmp(:), aibc_baic(:,:,:,:)
      real*8, pointer :: aibc_abic(:,:,:,:), tks_baK_J(:,:,:)
      real*8, pointer :: ls_alki(:,:,:,:), ccij(:,:)
      real*8, pointer :: tj_iakj(:,:,:)
      real*8 fac,eccss,eccos,eccs,eccscs,eccoss,eccsss,et1
      integer ialpha, iccabij, ickijkl, iksaklj, itkijkl, iksakofij
      integer ialphaklij, iccabkl, ikdoakij, illabkl
      integer ittabij, iccabkofj, izbakj, itz, itztr
      integer iccbarbclofj, iybakj, ittackofi
      integer ittpr, iccprbclofj, iccpr, iccx, inmmat, iccp, iccm
      integer ijjabkj
      integer indmin, indmax
      integer ccx_size, nmmat_size, kssize, ccpm_size
      real*8 calcnorm
      integer blocknumber, bsize, maxmemalloc
      character*8 ccsdalg
      real*8 rdummy(1) ! for energycalc instead of Jai and Jia
      integer idummy, iscrs2
      logical mpi_ccsd, rstex, ptrest, ccsd_converged, lno, lnaf

      integer nit
      logical lfock

      interface
c {{{ interfaces for pointers
      subroutine rpoint1d(egydim1,egydim2,dim1)
         implicit none
         integer dim1
         real*8,target :: egydim1(dim1)
         real*8, pointer :: egydim2(:)
      end subroutine
      subroutine rpoint2d(egydim,haromdim,dim1,dim2)
         implicit none
         integer dim1,dim2
         real*8,target :: egydim(dim1,dim2)
         real*8, pointer :: haromdim(:,:)
      end subroutine
      subroutine rpoint3d(egydim,haromdim,dim1,dim2,dim3)
         implicit none
         integer dim1,dim2,dim3
         real*8,target :: egydim(dim1,dim2,dim3)
         real*8, pointer :: haromdim(:,:,:)
      end subroutine
      subroutine rpoint4d(egydim,haromdim,dim1,dim2,dim3,dim4)
         implicit none
         integer dim1,dim2,dim3,dim4
         real*8,target :: egydim(dim1,dim2,dim3,dim4)
         real*8, pointer :: haromdim(:,:,:,:)
      end subroutine
c}}}
      end interface

      emp2corr = 0.d0

      call getkey('ovirt',5,ovirt,4)  !RZ
      lfock=ovirt.eq.'off '.and.localcc.ne.'2013'           !RZ
      times(1:6,0:100)=0.d0
c
      if(tprint.ne.'off             ') then
         ltpr=.true.
         read(tprint,*) tprtol
      else
         ltpr=.false.
      endif

      iscrs2 = dblalloc(nocc*nvirt)
c
c      call getarg(1,ison)
c      ison='y'

      call rbwrt(recbounds,nvirt,ibufln)

c {{{ ccsd
      nit0 = 0
      nit=0
      id=0
c      if (ison.eq.'y') write(*,*) 'start pre iter'
c      call stopper(id,ison)
c      call timer

      if(maxcor-(imem-imem1) .ge. nvirt**2*nocc**2) then
         call dfillzero(dcore(imem),nvirt**2*nocc**2)
         call llextract(dcore(imem),bsymmv,symmocc,dgroup,first,last,
     $         co,buffer,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,ifltln)
      else
         call memerr(nvirt**2*nocc**2, maxcor-(imem-imem1),
     $         'Insufficient memory to construct L!     ')
      endif

C Construct l, Eq. 19

      if(maxcor-(imem-imem1).ge.nvirt*nocc**3)then
         call dfillzero(dcore(imem),nvirt*nocc**3)
         call lsextract(dcore(imem),bsymmv,symmocc,dgroup,co,
     $        first,last,buffer,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,ifltln)
      else
         call memerr(nvirt*nocc**3, maxcor-(imem-imem1),
     $         'Insufficient memory to construct l!     ')
      endif
c      write(*,*) 'Construct l, Eq. 19'
c      call timer

c decompress K
      if(maxcor-(imem-imem1) .ge. nvirt*2*nocc**2) then
         call rpoint4d(dcore(imem), kk_ablk, nvirt, nvirt, nocc, nocc)
         call kklkread(kk_ablk, bsymmv, symmocc, dgroup,
     $         first, last, co, buffer, 'n',
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
         call kklkwrite(kk_ablk, nocc, nvirt, ifltln)
      else
         call memerr(nvirt**2*nocc**2, maxcor-(imem-imem1),
     $         'Insufficient memory for K!              ')
      end if

C B3)
      if(maxcor-(imem-imem1).ge.nocc**4)then
         call fockextract(h,fij,fai,fab,bsymmv,symmocc,co,first,
     $         last,dgroup,dcore(imem),buffer,localcc,dfintran,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
      else
         call memerr(nocc**4, maxcor-(imem-imem1),
     $         'Insufficient memory for Fock matrix!   ')
      endif

c      if (ison.eq.'y') write(*,*) 'fock B3'
c      call stopper(id,ison)
c      call timer
C Calculate the Hartree-Fock energy {{{
      call getkey('dfbasis_cor',11,dfbasis_cor,4)
      if(localcc.eq.'off '.and.dfbasis_cor.ne.'none') then
         call getvar('eref      ',eref)
      elseif(localcc15p(localcc)) then
         eref=0.d0
      else
       if(lfock)then !RZ
         call getvar('eref      ',eref)
       else !RZ
         eref=nenergy
         do i=1,nocc
            eref=eref+h(i,i)+fij(i,i)
         enddo
       endif !RZ
      endif
      if(.not.localcc15p(localcc)) then
         write(iout,*)
         write(iout,"(' Reference energy [au]:    ',f25.12)")
     $         eref
      endif ! }}}

C Initialize t's {{{
      call vvinit(vv,bsymmv,symmocc,dgroup,first,last,co,buffer,
     $irecln,ibufln,nocc,nvirt,nbasis,nirmax,multpg)
      do j = 1, nocc
         do i = 1, j
            ttind = (j-1) * j / 2 + i
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(a,b)
            do b = 1, nvirt
               do a = 1, nvirt
                  tt(a, b, ttind) = -vv(a, b, ttind) /
     $                  (fab(a, a) + fab(b, b) - fij(i, i) - fij(j, j))
               end do
            end do
C$OMP END PARALLEL DO
         end do
         do a = 1, nvirt
            t(a, j) = -fai(a, j) / (fab(a, a) - fij(j, j))
         end do
      end do ! }}}

      nit=0
      call energycalc(t,tt,fai,nit,eref,0.d0,
     $      scrs,localcc,ccsdalg,scrs, !NP
     $maxcor,imem,dcore,ifltln,iout,nvirt,nocc,idummy,ecc,scrs,
     $dcore(iscrs2), mpi_rank, mpi_size,mpi_ccsd, times,
     $ccsd_communicator,emp2full,lno,lnaf,emp2corr,eccs,eccss,eccos,
     $eccscs,eccoss,eccsss,.true.,master_thread,.false., vv, vv, et1)
      if(localcc.ne.'off ') then
      call localenergycalc(t,tt,fai,nit,scrs,int1,esdi,ui,
     $      ccsdalg,idummy,scrs,scrs,
     $nocc, nvirt, dcore, maxcor, imem, iout,ifltln,scrs,dcore(iscrs2))

      if(nocc.eq.0 .or. nvirt.eq.0) goto 234

c     write(iout,*)
c     write(iout,'(" CCSD correlation energy [au]:   ",f22.12)')esdi
c     write(iout,'(" CCSD total energy [au]:         ",f22.12)')
c    $esdi+eref
c     write(iout,*)
         call prtenergc('MP2            ',esdi,eref,locnoc)
      else
         call prtenergc('MP2            ',ecc,eref,locnoc)
      endif
      if(trim(calctype).eq.'MP2') Return
c      if(.false.) then
c      if(.true.) then
c      calctype='SOSEX           '
c      localcc='sose'
C testing rpa calculation
      if(localcc.eq.'drpa'.or.calctype.eq.'DRPA            '.or.
     $    localcc.eq.'rpa '.or.calctype.eq.'RPA             '.or.
     $    localcc.eq.'sose'.or.calctype.eq.'SOSEX           ')then
c      IF(.true.)then
C Save MP2 amplitudes for initial CCSD amplitudes
         call twrite(t, tfile, irecln, tinfo, ibufln, nvirt, nocc)
         call system('mv tfile tfile.mp2')
         call system('mv tinfofile tinfofile.mp2')
C Calculate dRPA energy
         call rpa_twrite(tt,tfile,tinfo,irecln,ibufln,nocc,nvirt)
C kell valami keyword az RPA-hoz!!
         isrpa=0
         if(localcc.eq.'rpa '.or.calctype.eq.'RPA             ') isrpa=1
         call rpacorecc(nvirt,nocc,tt,dcore(imem),vv,fab,fij,eref,tol,
     $ccmaxit,vscr,bmat,bvec,invbmat,cvec,iout,ecc,scrs,localcc,int1,
     $ui,esdi,isrpa,bsymmv,symmocc,dgroup,first,last,co,buffer,
     $ibufln,dcore,imem,diisfile,errfile,tfile,tinfo,ifltln,ibufln,
     $irecln,nbasis,nirmax,multpg,locnoc)
         if(localcc.eq.'sose'.or.calctype.eq.'SOSEX           ')then
          if(localcc.ne.'off ')then
            call localenergycalc(t,tt,fai,nit,scrs,int1,esdi,ui,
     $            ccsdalg,idummy,scrs,scrs, nocc, nvirt, dcore,
     $         maxcor, imem, iout, ifltln, scrs, dcore(iscrs2))
            esdi=0.5d0*esdi
            write(iout,'(" SOSEX correlation energy [au]:  ",f22.12)')
     $esdi
            write(iout,*)
            call prtenergc('SOSEX          ',esdi,eref,locnoc)
          else
            norm = calcnorm(nvirt,nocc,fai,vv)
            call energycalc(t,tt,fai,nit,eref,
     $            norm,scrs,localcc,ccsdalg,scrs, !NP
     $maxcor,imem,dcore,ifltln,iout,nvirt,nocc,idummy,ecc,scrs,
     $dcore(iscrs2), mpi_rank, mpi_size,mpi_ccsd, times,
     $ccsd_communicator,emp2full,lno,lnaf,emp2corr,eccs,eccss,eccos,
     $eccscs,eccoss,eccsss,.false.,master_thread,.false., vv, vv, et1)
            ecc=0.5d0*(ecc-eref)+eref
            write(iout,'(" SOSEX correlation energy [au]:  ",f22.12)')
     $ecc-eref
            write(iout,'(" Total SOSEX energy [au]:        ",f22.12)')
     $ecc
            write(iout,*)
            call prtenergc('SOSEX          ',ecc,eref,locnoc)
          endif
         endif
         write(iout,*)

C Restore MP2 amplitudes for initial CCSD amplitudes
      call system('mv tfile.mp2 tfile')
      call system('mv tinfofile.mp2 tinfofile')
      call ttread(t,'n', ibufln, irecln, tfile, tinfo)
       endif
c      call twrite(t, tfile, irecln, tinfo, ibufln, nvirt, nocc)
c      call stopper(id,ison)
      emp2=ecc

      if(nocc.eq.0 .or. nvirt.eq.0) goto 234

C Starting iterations
      write(iout,*) 'Starting CCSD iterations...'
      write(iout,*)
cc     call timer
      nit=0
      if (.not.localcc15p(localcc)) then
        write(iout,'(21x,"Residual norm",10x,"Energy")')
      else
        write(iout,'(21x,"Residual norm",7x,"Correlation Energy")')
      endif

      if(ccsdrest .eq. 'ccsd') then
c         if(mpi_rank .eq. master_rank) then !0) then
         if(master_thread) then
            inquire(file = 'ccsd.rst', exist = rstex)
            if(rstex) call read_rst_ccsd(nit0, ecc, eccold, norm, lene,
     $            t, tt, nocc, nvirt)
            nit0=0
            write(iout, "(' Iteration', i3, 2f22.12)") nit, norm, ecc
            call flush(iout)
         end if
      else
         norm=1000.d0*tol
         eccold=emp2+10.d0*tol
         lene=.true.
      end if

c      TIME0(times(1,99))
      do while(
     $      (norm / 10.d0 .gt. tol .or.
     $      abs(eccold - ecc) .gt. tol .or.
     $      lene) .and.
     $      (nit .lt. ccmaxit))
      lene=abs(eccold-ecc).gt.tol
      eccold=ecc
      nit=nit+1
c      TIME0(ttmp)

      id=0
c     write(6,*)
c      call stopper(id,ison)

C 1) a), b) c)
c      TIME0(times)
      call dcopy(nvirt*nocc, fai, 1, v, 1)
      call dcopy(nvirt*nocc, fai, 1, rr, 1)
      call dcopy(nocc**2, fij, 1, beta, 1)
      call dgemm('t', 'n', nocc, nocc, nvirt,
     $      1.d0, v, nvirt, t, nvirt,
     $      1.d0, beta, nocc)
c      TIMEADD(times(1,1),times)

c      if (ison.eq.'y') write(*,*) '1) a), b) c)'
c      call stopper(id,ison)
c      call timer
C 2) a), b)
c      TIME0(times)
      if(maxcor-(imem-imem1).ge.nocc**4) then
         ialpha = dblalloc(nocc**4)
         call rpoint4d(dcore(ialpha), alpha_ijkl,
     $         nocc, nocc, nocc, nocc)

         ! read <ij|kl> into alpha and K into V
         call list2abread(alpha_ijkl, vv, bsymmv, symmocc, dgroup,
     $         first, last, co, buffer,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
      else
         call memerr(nocc**4, maxcor - (imem-imem1),
     $         'Insufficient memory for alpha_ijkl!     ')
      endif

c      if (ison.eq.'y') write(*,*) '2) a), b) '
c      call stopper(id,ison)
c      call timer

C 2)  c)
c      TIME0(times)
      if(maxcor-(imem-imem1).ge.nvirt**2*nocc*(nocc+1)/2) then
         iccabij = dblalloc(nvirt**2*nocc*(nocc+1)/2)
         call rpoint3d(dcore(iccabij), cc_abIJ,
     $         nvirt, nvirt, nocc*(nocc+1)/2)
      else
         call memerr(nvirt**2*nocc*(nocc+1)/2,maxcor - (imem-imem1),
     $        'Insufficient memory for cc_abIJ!        ')
      end if

      if(maxcor-(imem-imem1).ge.nocc**2*(nocc+1)**2/4) then
         ickijkl = dblalloc(nocc**2*(nocc+1)**2/4)
         call rpoint2d(dcore(ickijkl), ck_IJKL,
     $        nocc*(nocc+1)/2, nocc*(nocc+1)/2)
      else
         call memerr(nocc**2*(nocc+1)**2/4, maxcor - (imem-imem1),
     $        'Insufficient memory for ck_IJKL!        ')
      end if

      ! C^{ji}_{ab} = C^{ij}_{ba} for i <= j
      do j = 1, nocc
        do i = 1, j
          ttind = j * (j - 1) / 2 + i
          call ccijtrcalc(cc_abIJ(:, :, ttind), nvirt, t(1,i), t(1,j),
     $         tt(1, 1, ttind))
        enddo
      enddo

      ! C^{ji}_{ab} * K^{kl}_{ab} -> alpha_ijlk, alpha_jikl
      call dgemm('t', 'n', nocc*(nocc+1)/2, nocc*(nocc+1)/2, nvirt**2,
     $      1.d0, cc_abIJ, nvirt**2, vv, nvirt**2,
     $      0.d0, ck_IJKL, nocc*(nocc+1)/2)

      do l = 1, nocc
         do k = 1, l - 1
            ttind2 = (l - 1) * l / 2 + k
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(i,j,ttind)
            do j = 1, nocc
               do i = 1, j - 1
                  ttind = (j - 1) * j / 2 + i
                  alpha_ijkl(i,j,l,k) = alpha_ijkl(i,j,l,k) +
     $                  ck_IJKL(ttind, ttind2)

                  alpha_ijkl(j,i,k,l) = alpha_ijkl(j,i,k,l) +
     $                  ck_IJKL(ttind, ttind2)
               end do
            end do
C$OMP END PARALLEL DO
         end do
      end do

      ! C^{ij}_{ab}
      do j = 1, nocc
        do i = 1, j
           ttind = j * (j - 1) / 2 + i
           call ccijcalc(cc_abIJ(:, :, ttind), nvirt, t(1, i), t(1, j),
     $         tt(1, 1, ttind))
        enddo
      enddo

      ! C^{ij}_{ab} * K^{lk}_{ab} -> alpha_ijkl, alpha_jilk
      call dgemm('t', 'n', nocc*(nocc+1)/2, nocc*(nocc+1)/2, nvirt**2,
     $      1.d0, cc_abIJ, nvirt**2, vv, nvirt**2,
     $      0.d0, ck_IJKL, nocc*(nocc+1)/2)

      ! divide C^{ii}*K^{kk} by 2, because these elements will be added twice to alpha
      do i = 1, nocc
         ttind = (i + 1) * i / 2
         do k = 1, nocc
            ttind2 = (k + 1) * k / 2
            ck_IJKL(ttind, ttind2) = ck_IJKL(ttind, ttind2) / 2.d0
         end do
      end do

      do l = 1, nocc
         do k = 1, l
            ttind2 = (l - 1) * l / 2 + k
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(i,j,ttind)
            do j = 1, nocc
               do i = 1, j
                  ttind = (j - 1) * j / 2 + i
                  alpha_ijkl(i,j,k,l) = alpha_ijkl(i,j,k,l) +
     $                  ck_IJKL(ttind, ttind2)

                  alpha_ijkl(j,i,l,k) = alpha_ijkl(j,i,l,k) +
     $                  ck_IJKL(ttind, ttind2)
               end do
            end do
C$OMP END PARALLEL DO
         end do
      end do

      maxmemalloc = imem - imem1
      call dbldealloc(iccabij)

c      TIMEADD(times(1,2),times)

c      if (ison.eq.'y') write(*,*) '2) c) '
c      call stopper(id,ison)
c      call timer

C 2)  d), e)
c      TIME0(times)
      if(maxcor-(imem-imem1) .ge. nvirt*nocc**3)then
         iksaklj = dblalloc(nvirt*nocc**3)
         call rpoint4d(dcore(iksaklj), ks_aklj, nvirt, nocc, nocc, nocc)

         ! read k^{klj}
         call ksread(ks_aklj, bsymmv, symmocc, dgroup, first, last,
     $         co, buffer,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
      else
         call memerr(nvirt*nocc**3, maxcor - (imem-imem1),
     $         'Insufficient memory for ks_aklj!                ')
      endif

      if(maxcor-(imem-imem1).ge.nocc**4)then
         itkijkl = dblalloc(nocc**4)
         call rpoint4d(dcore(itkijkl), tk_iklj,
     $         nocc, nocc, nocc, nocc)
      else
         call memerr(nocc**4, maxcor - (imem-imem1),
     $         'Insufficient memory for tk_iklj!                ')
      end if

      ! t^i*k^{klj} -> alpha_{ijkl}, alpha_{jilk}
      call dgemm('t', 'n', nocc, nocc**3, nvirt,
     $      1.d0, t, nvirt, ks_aklj, nvirt,
     $      0.d0, tk_iklj, nocc)

      do l = 1, nocc
         do k = 1, nocc
            do j = 1, nocc
               alpha_ijkl(1:nocc,j,k,l) = alpha_ijkl(1:nocc,j,k,l) +
     $               tk_iklj(1:nocc,k,l,j)
               alpha_ijkl(j,1:nocc,l,k) = alpha_ijkl(j,1:nocc,l,k) +
     $               tk_iklj(1:nocc,k,l,j)
            end do
         end do
      end do

      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(itkijkl)

c      TIMEADD(times(1,3),times)

c      if (ison.eq.'y') write(*,*) '2) d), e)'
c      call stopper(id,ison)
c      call timer

C 2)  f)
c      TIME0(times)
      if(maxcor-(imem-imem1).ge.nvirt*nocc)then
         iksakofij = dblalloc(nvirt*nocc)
         call rpoint2d(dcore(iksakofij), ks_ak_ofIJ, nvirt, nocc)
      else
         call memerr(nocc*nvirt, maxcor - (imem-imem1),
     $         'Insufficient memory for ks_ak_ofIJ!     ')
      endif

      do j = 1, nocc
         do i = 1, j
            ttind = (j-1) * j / 2 + i

            ! k^{ijk}*t^k -> V^{ij}
            do k = 1, nocc
               ks_ak_ofIJ(1:nvirt, k) = ks_aklj(1:nvirt, i, j, k)
            end do
            call dgemm('n', 't', nvirt, nvirt, nocc,
     $            -1.d0, ks_ak_ofIJ, nvirt, t, nvirt,
     $            1.d0, vv(1, 1, ttind), nvirt)

            ! t^k*k^{jik} -> V^{ij}
            do k = 1, nocc
               ks_ak_ofIJ(1:nvirt, k) = ks_aklj(1:nvirt, j, i, k)
            end do
            call dgemm('n', 't', nvirt, nvirt, nocc,
     $            -1.d0, t, nvirt, ks_ak_ofIJ, nvirt,
     $            1.d0, vv(1, 1, ttind), nvirt)
         enddo
      enddo

c      TIMEADD(times(1,4),times)

      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(iksaklj)

c      if (ison.eq.'y') write(*,*) '2) f)'
c      call stopper(id,ison)

C 2)  g)
c      TIME0(times)
      if(maxcor-(imem-imem1) .ge. nocc**3*(nocc+1)/2) then
         ialphaklij = dblalloc(nocc**3*(nocc+1)/2)
         call rpoint3d(dcore(ialphaklij), alpha_klIJ,
     $         nocc, nocc, nocc*(nocc+1)/2)
      else
         call memerr(nocc**3*(nocc+1)/2, maxcor - (imem-imem1),
     $         'Insufficient memory for alpha_klIJ!     ')
      endif

      ! sort alpha: ijkl -> klIJ
      do j = 1, nocc
         do i = 1, j
            ttind = j * (j - 1) / 2 + i
            do l = 1, nocc
               alpha_klIJ(1:nocc, l, ttind) =
     $               alpha_ijkl(i, j, 1:nocc, l)
            end do
         end do
      end do

      if(maxcor-(imem-imem1) .ge. nvirt**2*nocc**2) then
         iccabkl = dblalloc(nvirt**2*nocc**2)
         call rpoint4d(dcore(iccabkl), cc_abkl,
     $         nvirt, nvirt, nocc, nocc)
      else
         call memerr(nvirt**2*nocc**2, maxcor - (imem-imem1),
     $         'Insufficient memory for cc_abkl!        ')
      endif

      ! generate C^{ij} for all i,j
      do j = 1, nocc
         call calcccfori(t, tt, nocc, nvirt, cc_abkl(:,:,:,j), j, 'n')
      enddo

      ! sum_{kl} {C^{kl} * alpha_{klIJ}} -> V^{IJ}
      call dgemm('n', 'n', nvirt**2, nocc*(nocc+1)/2, nocc**2,
     $      1.d0, cc_abkl, nvirt**2, alpha_klIJ, nocc**2,
     $      1.d0, vv, nvirt**2)

      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(ialpha)

c      TIMEADD(times(1,5),times)

c      if (ison.eq.'y') write(*,*) '2) g)'
c      call stopper(id,ison)
c      call timer
C 3) a), b
c      TIME0(times)

      ccpm_size = (nvirt+1)*nvirt*(nocc+1)*nocc/4
      nmmat_size = (nvirt+1)*nvirt*(nocc+1)*nocc/4
      if(maxcor-(imem-imem1) .ge. 2 * ccpm_size) then ! TODO: ccpm+nmmat???
         iccx = dblalloc(ccpm_size)
         call rpoint2d(dcore(iccx), ccx,
     $         (nvirt+1)*nvirt/2, (nocc+1)*nocc/2)
      else
         call memerr(2*ccpm_size, maxcor-(imem-imem1),
     $         'Insufficient memory for ccx!            ')
      end if
      call rpoint2d(dcore(imem1+maxcor - nmmat_size),
     $      nmmat2, (nvirt+1)*nvirt/2, (nocc+1)*nocc/2)

      call crccx('a', t, tt, ccx, nvirt, nocc)
      call xpplcalc('a', nmmat2, dcore(imem), ccx, recbounds, times,
     $      nocc, nvirt, maxcor, imem, iout, ibufln, irecln)
c      if (ison.eq.'y') write(*,*) 'ppl a',nit,times(3,3),times(4,3)
      call xmatextr('a', nmmat2, vv, nvirt, nocc)

      call crccx('s', t, tt, ccx, nvirt, nocc)
      call xpplcalc('s', nmmat2, dcore(imem), ccx, recbounds, times,
     $      nocc, nvirt, maxcor, imem, iout, ibufln, irecln)
c      if (ison.eq.'y') write(*,*) 'ppl s',nit,times(3,3),times(4,3)
      call xmatextr('s', nmmat2, vv, nvirt, nocc)

      maxmemalloc = max(maxmemalloc, imem - imem1 + nmmat_size)

      call dbldealloc(iccx)

c      TIMEADD(times(1,6),times)

c      if (ison.eq.'y')       write(*,*) '3) a), b) ppl'
c      call timer

c      if(ison.eq.'y')write(6,*)'ppl'
c      call stopper(id,ison)

C 4) a)
      if(maxcor - (imem-imem1) .ge. nvirt*nocc**3) then
         ikdoakij = dblalloc(nvirt*nocc**3)
         call rpoint4d(dcore(ikdoakij), kdo_akij, nvirt, nocc, nocc,
     $         nocc)
         call dfillzero(kdo_akij, nvirt*nocc**3)
      else
         call memerr(nvirt*nocc**3,
     $         maxcor - (imem-imem1),
     $         'Insufficient memory for kdo_akij!       ')
      endif

      ccx_size = nocc*(nocc+1)/2 * nvirt*(nvirt+1)/2
      nmmat_size = nocc*(nocc+1)/2 * nvirt * nocc
      if(maxcor - (imem-imem1) .ge. 2 * ccx_size + nmmat_size) then ! C(+), C(-) and N or M

c C(+) and C(-) to the beginning of the free memory
         iccp = dblalloc(ccx_size)
         call rpoint2d(dcore(iccp),
     $         ccp, nvirt*(nvirt+1)/2, nocc*(nocc+1)/2)
         call crccx('s', t, tt, ccp, nvirt, nocc)

         iccm = dblalloc(ccx_size)
         call rpoint2d(dcore(iccm),
     $         ccm, nvirt*(nvirt+1)/2, nocc*(nocc+1)/2)
         call crccx('a', t, tt, ccm, nvirt, nocc)

c N and M after C(-)
         inmmat = dblalloc(nmmat_size)
         call rpoint3d(dcore(inmmat),
     $         nmmat, nocc*(nocc+1)/2, nvirt, nocc)
      else
         call memerr(ccx_size + nmmat_size, maxcor - (imem-imem1),
     $         'Insufficient memory for C(+-) and N/M!  ')
      endif

c      if (ison.eq.'y')  write(*,*) 'C for 4/a/III'
c      call timer
C 4) a)
c      call stopper(id,ison)
c      if(ison.eq.'y')write(6,*)'abci'
cc      TIME0(times)

      ind=1
      notend=.true.

      n = blocknumber(nvirt**3+nvirt**2*(nvirt+1)/2, 1, nocc, 'y', ind,
     $         bsize, 'Insufficient memory for one <ab|ci> blk!',
     $      maxcor-(imem-imem1))

      do while(notend) ! for abci
c      TIME0(times)
         if(ind + n - 1 .ge. nocc)then
            notend = .false.
            n = nocc - ind + 1
         endif

         call rpoint4d(dcore(imem), abcj_abcj, nvirt, nvirt, nvirt, n)

cc         TIME0(times(1,99))
         ! read n blocks of abci
         call abciread(ind, n, abcj_abcj, bsymmv, symmocc,
     $         dgroup, co, first, last, buffer,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
cc         TIMEADD(times(1,11),times(1,99))

c      TIMEADD(times(1,7),times)

c         if (ison.eq.'y') write(*,*) '4) b) abci I/O'
c         call stopper(id,ison)
c 4 b)
C II term (2.62)
c      TIME0(times)
         ! <ab|cj> * t_c^i -> KDV^{ij}
         do j = ind, ind+n-1
            ttind = (j - 1) * j / 2 + 1
            call dgemm('n', 'n', nvirt**2, j, nvirt,
     $            1.d0, abcj_abcj(1, 1, 1, j-ind+1), nvirt**2, t, nvirt,
     $            1.d0, vv(1, 1, ttind), nvirt**2)
         enddo

c      TIMEADD(times(1,8),times)

c         if (ison.eq.'y') write(*,*) '4) b) '
c         call stopper(id,ison)
C 5) a) III of 2.33
c      TIME0(times)
         ! <bc|ak> * C_{bc}^{ij} -> kd^{kij}, analogous to 3) (<ab|cd> * C_{cd}^{ij})
         call rpoint3d(dcore(imem+nvirt**3*n),  ! store <ab||ck> and <ab|||ck> after the integrals
     $         abcix, nvirt*(nvirt+1)/2, nvirt, n)

         call abckccalc('a', nocc, nvirt, n, ind, abcj_abcj,
     $         ccm, abcix, kdo_akij, nmmat)
         call abckccalc('s', nocc, nvirt, n, ind, abcj_abcj,
     $         ccp, abcix, kdo_akij, nmmat)
c      TIMEADD(times(1,10),times)

         ind = ind + n
      enddo ! while for abci

      maxmemalloc = max(maxmemalloc,
     $      imem-imem1+nvirt**2*(nvirt+(nvirt+1)/2)*n)

      call dbldealloc(iccp)

c      if (ison.eq.'y') write(*,*) '5) a) III'
c      if(ison.eq.'y')write(6,*)'abciend'
c      call stopper(id,ison)
cc      TIMEADD(times(1,8),times)

C 5) b) I
c      TIME0(times)
      call rpoint3d(dcore(imem), kk_abik, nvirt, nvirt,
     $      nocc*(nocc+1)/2)

      ! read n blocks of K (i>=k)
      call kklkread(kk_abik, bsymmv, symmocc, dgroup,
     $   first, last, co, buffer, 'y',
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)

      ! <ab|ik> * t_b^j -> kd^{kij}; <a,b|ik>^T * t_b^j -> kd^{ikj}
      do k = 1, nocc
         do i = k, nocc
            ! sum_{j=nocc-(k-1)+1)^{nocc} {j} + (i-k+1)
            ttind = (2*nocc-k+2) * (k-1) / 2 + i-k+1

            call dgemm('n', 'n', nvirt, nocc, nvirt,
     $      1.d0, kk_abik(1, 1, ttind), nvirt, t, nvirt,
     $      1.d0, kdo_akij(1, k, i, 1), nvirt*nocc**2)

            if(i.ne.k) then
               call dgemm('t', 'n', nvirt, nocc, nvirt,
     $         1.d0, kk_abik(1, 1, ttind), nvirt, t, nvirt,
     $         1.d0, kdo_akij(1, i, k, 1), nvirt*nocc**2)
            endif
         enddo
      enddo

c      TIMEADD(times(1,11),times)


C 5) c) II
c      TIME0(times)
      ind = 1
      notend = .true.

      do while(notend) ! for J
         n = blocknumber(nvirt**2 + nocc*nvirt, nocc, nocc, 'n', ind,
     $        bsize, 'Insufficient memory for one block of J! ',
     $      maxcor-(imem-imem1))

         if(n+ind-1 .ge. nocc) notend = .false.

         ijjabkj = dblalloc(nvirt**2 * (2*nocc-2*ind-n+3)*n/2)
         call rpoint3d(dcore(ijjabkj), jj_abkj, nvirt, nvirt,
     $         (2*nocc-2*ind-n+3)*n/2)

         ! read n blocks of J
         call jj1read(ind, n, jj_abkj, bsymmv, symmocc, dgroup,
     $         co, first, last, buffer,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)

         call rpoint3d(dcore(imem), tj_iakj, nocc, nvirt,
     $         (2*nocc-2*ind-n+3)*n/2)

         call dgemm('t', 'n', nocc, nvirt*(2*nocc-2*ind-n+3)*n/2, nvirt,
     $         1.d0, t, nvirt, jj_abkj, nvirt,
     $         0.d0, tj_iakj, nocc)

      maxmemalloc = max(maxmemalloc, imem - imem1)
         call dbldealloc(ijjabkj)

         ! (a,b|kj)^T * t_b^i -> kd^{kij}; (ab|kj) * t_b^j -> kd^{jik}
         do j = ind, ind+n-1
            do k = j, nocc
               ! sum_{l=nocc-(j-1)+1)^{nocc-ind+1} {l} + (k-j+1)
               ttind = ((j-ind)*(2*nocc-ind-j+3)/2+k-j+1)
               do i = 1, nocc

                  kdo_akij(1:nvirt, k, i, j) =
     $                  kdo_akij(1:nvirt, k, i, j) +
     $                  tj_iakj(i, 1:nvirt, ttind)

                  if(j .ne. k)
     $                  kdo_akij(1:nvirt, j, i, k) =
     $                  kdo_akij(1:nvirt, j, i, k) +
     $                  tj_iakj(i, 1:nvirt, ttind)
               end do
            enddo
         enddo

         ind = ind + n
      end do ! while for J
c      TIMEADD(times(1,12),times)

c      if (ison.eq.'y') write(*,*) '5) b)'
c      call stopper(id,ison)
c      call timer

C 5) d)
c      TIME0(times)
      ! 2 kd^{kik} - kd^{kki} -> v^i
      do j=1,nocc
         do k=1,nocc
            call daxpy(nvirt, 2.d0, kdo_akij(1, k, j, k), 1, v(1, j), 1)
            call daxpy(nvirt,-1.d0, kdo_akij(1, k, k, j), 1, v(1, j), 1)
         enddo
      end do
c      TIMEADD(times(1,13),times)

c 5) e)
c      TIME0(times)
      ! - kd^{kij} * t^k -> V^{ij}; - t^k * kd^{kji} -> V^{ij}
      do j = 1, nocc
         do i = 1, j
            ttind = j * (j-1) / 2 + i
            call dgemm('n', 't', nvirt, nvirt, nocc,
     $            -1.d0, kdo_akij(1, 1, i, j), nvirt, t, nvirt,
     $            1.d0, vv(1, 1, ttind), nvirt)
            call dgemm('n', 't', nvirt, nvirt, nocc,
     $            -1.d0, t, nvirt, kdo_akij(1, 1, j, i), nvirt,
     $            1.d0, vv(1, 1, ttind), nvirt)
         end do
      enddo
c      TIMEADD(times(1,14),times)

      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(ikdoakij)

c      if (ison.eq.'y') write(*,*) '5) d) e)'
c      call timer

c      call stopper(id,ison)
C 6) a)
c      TIME0(times)

      ! sort tt: abIJ -> abji
      if(maxcor - (imem-imem1) .ge. nvirt**2*nocc**2) then
         ittabij = dblalloc(nvirt**2*nocc**2)
         call rpoint4d(dcore(ittabij), tt_abij, nvirt, nvirt, nocc,
     $         nocc)

         do j = 1, nocc
            do i = 1, nocc
               indmin = min(i,j)
               indmax = max(i,j)
               ttind = indmax * (indmax-1) / 2 + indmin
               do b = 1, nvirt
                  if(i .le . j) then
                     tt_abij(1:nvirt, b, j, i) = tt(1:nvirt, b, ttind)
                  else
                     tt_abij(1:nvirt, b, j, i) = tt(b, 1:nvirt, ttind)
                  end if
               end do
            end do
         end do
      else
         call memerr(nvirt**2*nocc**2, maxcor - (imem-imem1),
     $         'Insufficient memory for T!              ')
      end if

      ind = 1
      notend = .true.

      n = blocknumber(nvirt*nocc, nocc, nocc, 'y', ind, bsize,
     $      'Insufficient memory for one block of ls!',
     $      maxcor-(imem-imem1))

      do while(notend) ! for l

         if(ind+n-1 .ge. nocc)then
            notend = .false.
            n = nocc-ind+1
         endif

         call rpoint4d(dcore(imem), ls_alki, nvirt, nocc, nocc, n)

         ! read n blocks of l
         call lsread(ls_alki, ind, n, nocc, nvirt, ifltln)

c 6) b)
         ! T^{lk} * l^{lki} -> s^i
         call dgemm('n', 'n', nvirt, n, nvirt*nocc**2,
     $         -1.d0, tt_abij, nvirt, ls_alki, nvirt*nocc**2,
     $         1.d0, v(1, ind), nvirt)

c 6) c)
         ! t^l * l^{lki} -> beta_{ki}
         call dgemv('t', nocc*nvirt, nocc*n,
     $         1.d0, ls_alki, nocc*nvirt, t, 1,
     $         1.d0, beta(1, ind), 1)

         ind = ind + n
      end do ! while for l

      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(ittabij)
c      TIMEADD(times(1,15),times)

c      if (ison.eq.'y') write(*,*) '6) a-c'
c      call timer

c      call stopper(id,ison)
C 7) a)
c      call dfillzero(x,nvirt**2)

      itmp = dblalloc(nvirt**2)
      call rpoint2d(dcore(itmp), ccij, nvirt, nvirt)

      ind=1
      notend=.true.

      n = blocknumber(2 * nvirt**2*nocc, 1, nocc, 'y', ind, bsize,
     $      'Insufficient memory for one block of ll!',  ! halve the block size to leave space for tt_abij and cc_labi
     $      maxcor-(imem-imem1))

      illabkl = dblalloc(n * nvirt**2*nocc)
      call rpoint4d(dcore(illabkl), ll_abkl, nvirt, nvirt, nocc, n)

      ittabij = dblalloc(n * nvirt**2*nocc)
      call rpoint4d(dcore(ittabij), tt_abij, nvirt, nvirt, nocc, n)

      do while(notend) ! for L

c      TIME0(times)
         if(ind+n-1 .ge. nocc)then
            notend = .false.
            n = nocc-ind+1
         endif

C 7) b)
         ! read n blocks of L
         call ll2read(ind, n, ll_abkl, 'abkl',nocc,nvirt,ifltln)

C 7) c)
         ! L^{kl} * t^l -> r^k
         do l = ind, ind+n-1
            do k = 1, nocc
               call dgemv('n', nvirt, nvirt,
     $               1.d0, ll_abkl(1, 1, k, l-ind+1), nvirt, t(1, l), 1,
     $               1.d0, rr(1, k), 1)
            enddo
         enddo
c      TIMEADD(times(1,17),times)

C 7) e)
c calculate -A
c      TIME0(times)
         ! T_{cb}^{lk} -> T_{bc}^{kl} for l's read
         do l = ind, ind+n-1
            do k = 1, nocc
               indmin = min(l, k)
               indmax = max(l, k)
               ttind = (indmax-1) * indmax / 2 + indmin

               if(l .le. k) then
                  call tr(nvirt, tt(1,1,ttind), tt_abij(:,:,k,l-ind+1))
               else
                  call dcopy(nvirt**2, tt(1, 1, ttind), 1,
     $                  tt_abij(1, 1, k, l-ind+1), 1)
               end if
            end do
         end do

         ! - L^{kl} * T^{kl} -> X
         if(ind .eq. 1) then
            fac = 0.d0
         else
            fac = 1.d0
         end if

         call dgemm('n', 't', nvirt, nvirt, nvirt*nocc*n,
     $         -1.d0, ll_abkl, nvirt, tt_abij, nvirt,
     $         fac, x, nvirt)
c      TIMEADD(times(1,18),times)

c 7) d)
c      TIME0(times)
         call rpoint4d(tt_abij, ll_klab, nocc, n, nvirt, nvirt)

         ! sort L: abkl -> klab for l read
         do b = 1, nvirt
            do a = 1, nvirt
               do l = 1, n
                  ll_klab(1:nocc, l, a, b) =
     $                  ll_abkl(a, b, 1:nocc, l)
               end do
            end do
         end do

         call rpoint4d(ll_abkl, cc_labi, n, nvirt, nvirt, nocc)

         ! sort C: abli -> labi for all i and l read
         do i = 1, nocc
            do l = ind, ind+n-1
               indmax = max(i, l)
               indmin = min(i, l)
               ttind = indmax * (indmax-1) / 2 + indmin

               if(i .le. l) then
                  call ccijcalc(ccij, nvirt, t(1, i), t(1, l),
     $                  tt(1, 1, ttind))

                  do b = 1, nvirt
                     cc_labi(l-ind+1, 1:nvirt, b, i) = ccij(1:nvirt, b)
                  end do
               else
                  call ccijtrcalc(ccij, nvirt, t(1, l), t(1, i),
     $                  tt(1, 1, ttind))

                  do b = 1, nvirt
                     cc_labi(l-ind+1, 1:nvirt, b, i) = ccij(1:nvirt, b)
                  end do
               end if
            end do
         end do

         ! tr(L^{kl}_{ab} * C^l_{ab}^i -> beta_{ki}
         call dgemm('n', 'n', nocc, nocc, n*nvirt**2,
     $         1.d0, ll_klab, nocc, cc_labi, n*nvirt**2,
     $         1.d0, beta, nocc)
c      TIMEADD(times(1,19),times)

         ind = ind + n
      end do ! while for L
      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(itmp)

c      if (ison.eq.'y') write(*,*) '7) a-e'
c      call stopper(id,ison)
c      call timer
C check
C 8) a)
c      TIME0(times)
      ! h - A^T -> scrs
      call tr(nvirt, x, scrs)
      do a=1,nvirt
         do b=1,nvirt
            scrs(a,b)=scrs(a,b)+h(a+nocc,b+nocc)
         enddo
      enddo
c      TIMEADD(times(1,20),times)


C 8) b)
c      TIME0(times)
      ! (h - A^T) * t^i -> v^i
      call dgemm('n', 'n', nvirt, nocc, nvirt,
     $      1.d0, scrs, nvirt, t, nvirt,
     $      1.d0, v, nvirt)
c      TIMEADD(times(1,21),times)


C 8) c)
c      TIME0(times)
      ! s^i * t^j^T -> V^{ij}; t^i * s^j^T -> V^{ij}
      do j = 1, nocc
        do i = 1, j
          ttind = (j-1) * j / 2 + i
          call dger(nvirt, nvirt,
     $           1.d0, v(1, i), 1, t(1, j), 1,
     $           vv(1, 1, ttind), nvirt)
          call dger(nvirt, nvirt,
     $           1.d0, t(1, i), 1, v(1, j), 1,
     $           vv(1, 1, ttind), nvirt)
        end do
      end do
c      TIMEADD(times(1,22),times)

c       if (ison.eq.'y') write(*,*) '8) a-c'
c      call timer

c      call stopper(id,ison)

C 9) b)
c      TIME0(times)
      ! t^k * beta_{ki} -> v^i
      call dgemm('n', 'n', nvirt, nocc, nocc,
     $      -1.d0, t, nvirt, beta, nocc,
     $      1.d0, v, nvirt)
c      TIMEADD(times(1,24),times)

c       if (ison.eq.'y') write(*,*) '9) a-b'
c      call timer

c      call stopper(id,ison)
C 10)
c      TIME0(times)
      if(maxcor - (imem-imem1) .ge. nvirt**2*nocc) then
         iccabkofj = dblalloc(nvirt**2*nocc)
         call rpoint3d(dcore(iccabkofj), cc_abk_ofj, nvirt, nvirt, nocc)
      else
         call memerr(nvirt**2*nocc, maxcor - (imem-imem1),
     $         'Insufficient memory for cc_abk_ofj!     ')
      end if

      ! C_{ab}^{kj} * beta_{ki} -> V^{ij}
      do j = 1, nocc
         call calcccfori(t, tt, nocc, nvirt, cc_abk_ofj, j, 'n')

         call dgemm('n', 'n', nvirt**2, j, nocc,
     $         -1.d0, cc_abk_ofj, nvirt**2, beta, nocc,
     $         1.d0, vv(1, 1, j*(j-1)/2+1), nvirt**2)
      end do

      call rpoint3d(dcore(imem), betac_abj_ofi, nvirt, nvirt, nocc)

      ! C_{ba}^{ki} * beta_{kj} -> V^{ij}
      do i = 1, nocc
         call calcccfori(t, tt, nocc, nvirt, cc_abk_ofj, i, 't')

         call dgemm('n', 'n', nvirt**2, nocc-i+1, nocc,
     $         -1.d0, cc_abk_ofj, nvirt**2, beta(1, i), nocc,
     $         0.d0, betac_abj_ofi, nvirt**2)

         do j = i, nocc
            ttind = j * (j-1) / 2 + i
            do b = 1, nvirt
               vv(1:nvirt, b, ttind) = vv(1:nvirt, b, ttind) +
     $               betac_abj_ofi(1:nvirt, b, j-i+1)
            end do
         end do
      end do

      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(iccabkofj)
c      TIMEADD(times(1,25),times)

c      if (ison.eq.'y') write(*,*) '10)'
c      call stopper(id,ison)
c      call timer

C 11)
c      TIME0(times)
      call daxpy(nvirt**2, 1.d0, fab, 1, x, 1)
c      TIMEADD(times(1,26),times)


C 12)
c      TIME0(times)
      ! - r^k * t^k^T -> X
      call dgemm('n', 't', nvirt, nvirt, nocc,
     $      -1.d0, rr, nvirt, t, nvirt,
     $      1.d0, x, nvirt)
c      TIMEADD(times(1,27),times)

c      if (ison.eq.'y') write(*,*) '11-12)'
c      call timer

c      call stopper(id,ison)
c      if(ison.eq.'y')write(6,*)'abci'
cc      TIME0(times)

C 13) a)
c calculate the transpose of Z^{kj} (see 14))
c      TIME0(times)
      if(maxcor - (imem-imem1) .ge. nocc**2*nvirt**2) then
         izbakj = dblalloc(nocc**2*nvirt**2)
         call rpoint4d(dcore(izbakj), z_bakj, nvirt, nvirt, nocc, nocc)
      else
         call memerr(nocc**2*nvirt**2, maxcor - (imem-imem1),
     $         'Insufficient memory for z_bakj!         ')
      end if

c      TIME0(times)

      ind = 1
      notend = .true.

      n = blocknumber(nvirt**2, nvirt, nocc, 'y', ind, bsize,
     $      'Insufficient mem for one <ab|ci> block! ',
     $      maxcor-(imem-imem1))

      do while(notend)

         if(ind+n-1 .ge. nocc) then
            notend = .false.
            n = nocc-ind+1
         end if

         call rpoint4d(dcore(imem), aibc_abic, nvirt, nvirt, n, nvirt)

cc      TIME0(times(1,99))
         call abci2read(ind, n, aibc_abic, bsymmv, symmocc, dgroup,
     $         co, first, last, buffer,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
cc      TIMEADD(times(1,11),times(1,99))

c calculate the symmetric JE^{kj} matrices
         call dgemm('n', 'n', nvirt**2*n, nocc, nvirt,
     $         1.d0, aibc_abic, nvirt**2*n, t, nvirt,
     $         0.d0, z_bakj(1, 1, ind, 1), nvirt**2*nocc)

         ind=ind+n
      end do ! while for abci

c      TIMEADD(times(1,30),times)

c      if (ison.eq.'y') write(*,*) '13 a)'
c      call stopper(id,ison)

C 13) c)
      ! 2 * JE^{kk} -> X
      do k=1,nocc
         call daxpy(nvirt**2, 2.d0, z_bakj(1, 1, k, k), 1, x, 1)
      enddo

C 13) d)
c read J and add it to Z
c J^{kj} is symmetric to the permutation of the virtual indices
c      TIME0(times)
      call jjtoz(z_bakj,bsymmv,symmocc,dgroup,first,last,co,buffer,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
c      TIMEADD(times(1,29),times)


c      if (ison.eq.'y') write(*,*) '13 b-d)'
c      call stopper(id,ison)

C 13) e)
c K
      avmem=maxcor-(imem-imem1)
      if(avmem .ge. nvirt**2*nocc) then
         iccbarbclofj = dblalloc(nvirt**2*nocc)
         call rpoint3d(dcore(iccbarbclofj), ccbar_bcl_ofj, nvirt, nvirt,
     $         nocc)
      else
         call memerr(nvirt**2*nocc, avmem,
     $       'Insufficient memory for ccbar_cbl_ofj!  ')
      end if

      ind = 1
      notend = .true.

      n = blocknumber(nvirt**2, nocc, nocc, 'y', ind, bsize,
     $      'Insufficient memory for one block of K! ',
     $      maxcor-(imem-imem1))

      do while(notend)

         if(ind+n-1 .ge. nocc) then
            notend = .false.
            n = nocc-ind+1
         end if

         call rpoint4d(dcore(imem), kk_ablk,
     $         nvirt, nvirt, nocc, n)

         ! read n blocks of K
         call kklk2read(ind, n, kk_ablk, nocc, nvirt, ifltln)

         do j = 1, nocc
            ! generate Cbar^{lj}^T's for j
            do l = 1, j
               ttind = j * (j-1) / 2 + l
               call coijcalc(ccbar_bcl_ofj(:, :, l), nvirt,
     $               t(1, l), t(1, j), tt(1, 1, ttind))
            end do

            do l = j+1, nocc
               ttind = l * (l-1) / 2 + j
               call coijtrcalc(ccbar_bcl_ofj(:, :, l), nvirt,
     $               t(1, j), t(1, l), tt(1, 1, ttind))
            end do

            ! Cbar^{lj}^T * K^{lk} -> Z_{ba}^{kj}
            do k = ind, ind+n-1
               call dgemm('n', 't', nvirt, nvirt, nvirt*nocc,
     $               -1.d0, ccbar_bcl_ofj, nvirt,
     $               kk_ablk(1, 1, 1, k-ind+1), nvirt,
     $               1.d0, z_bakj(1, 1, k, j), nvirt)
            end do
         end do

         ind = ind + n
      end do ! while for K

      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(iccbarbclofj)
c      TIMEADD(times(1,28),times)

c      if (ison.eq.'y') write(*,*) '13 e) III'
c     call timer

c      call stopper(id,ison)

c ks
c      TIME0(times)
      ind=1
      notend=.true.

      do while(notend)
         n = blocknumber(nvirt*nocc + nvirt**2, nocc, nocc, 'n', ind,
     $       kssize, 'Insufficient memory for one block of ks!',
     $      maxcor-(imem-imem1))
         kssize = kssize / (nvirt*nocc+nvirt**2)

         if(n+ind-1 .ge. nocc) then
            notend = .false.
c            n = nocc - ind + 1
         end if

         call rpoint3d(dcore(imem), ks_laK_J, nocc, nvirt, kssize)
         call rpoint3d(dcore(imem+nocc*nvirt*kssize), tks_baK_J,
     $         nvirt, nvirt, kssize)

         ! read n blocks of k as k^l_a^{KJ}
         call ks2read(ind,n,kssize,ks_laK_J,
     $         bsymmv,symmocc,dgroup,first,last,co,buffer,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)

         ! t_b^l * k^l_a^{KJ}
         call dgemm('n', 'n', nvirt, nvirt*kssize, nocc,
     $         -1.d0, t, nvirt, ks_laK_J, nocc,
     $         0.d0, tks_baK_J, nvirt)
c     $         1.d0, z_bakj(1, 1, 1, ind), nvirt)

         ! tks_baK_J -> Z^{k,j}, Z^{j,k}
         ttind = 0
         do j = ind, ind+n-1
            ttind = ttind + 1
            call daxpy(nvirt**2*(nocc-j+1),
     $            1.d0, tks_baK_J(1, 1, ttind), 1,
     $            z_bakj(1, 1, j, j), 1)

            do k = j + 1, nocc
               ttind = ttind + 1
               call daxpy(nvirt**2, 1.d0, tks_baK_J(1, 1, ttind), 1,
     $               z_bakj(1, 1, j, k), 1)
            end do
         end do

         ind = ind + n
      end do ! while for k

c      TIMEADD(times(1,31),times)
c      if (ison.eq.'y') write(*,*) '13 e) IV'
c      call stopper(id,ison)

C 14)
c      TIME0(times)
      if(maxcor - (imem-imem1) .ge. nvirt**2*nocc + 2 * nvirt**2) then
         ittackofi = dblalloc(nvirt**2*nocc)
         call rpoint3d(dcore(ittackofi), tt_ack_ofi, nvirt, nvirt, nocc)

         itz = dblalloc(nvirt**2)
         call rpoint2d(dcore(itz), tz, nvirt, nvirt)

         itztr = dblalloc(nvirt**2)
         call rpoint2d(dcore(itztr), tztr, nvirt, nvirt)
      else
         call memerr(nvirt**2*nocc + 2*nvirt**2, maxcor - (imem-imem1),
     $         'Insufficient memory for tztr and tz!     ')
      end if

      do i = 1, nocc
         ! T^{k,i} k<=i
         call dcopy(nvirt**2*i, tt(1, 1, i*(i-1)/2+1), 1, tt_ack_ofi, 1)

         ! T^{k,i} k>i
         do k = i+1, nocc
            ttind = k * (k-1) / 2 + i
            call tr(nvirt, tt(1, 1, ttind), tt_ack_ofi(:, :, k))
         end do

         do j = 1, nocc
            ! T{ac}_^{ki} * Z_{bc}^{kj}
            call dgemm('n', 't', nvirt, nvirt, nvirt*nocc,
     $            1.d0, tt_ack_ofi, nvirt, z_bakj(1, 1, 1, j), nvirt,
     $            0.d0, tz, nvirt)

            ! -1/2 T^{ki} * Z^{kj} - (T^{ki} * Z^{kj})^T -> V^{ij}
            if(i .lt. j) then
               ! G^{ij}
               do b = 1, nvirt
                  tztr(1:nvirt,b) = -0.5d0*tz(1:nvirt,b) - tz(b,1:nvirt)
               end do

               ttind = j * (j-1) / 2 + i
               call daxpy(nvirt**2, 1.d0, tztr, 1, vv(1, 1, ttind), 1)
            else if(j .lt. i) then
               ! G^{ji}^T
               do b = 1, nvirt
                  tztr(1:nvirt,b) = -tz(1:nvirt,b) - 0.5d0*tz(b,1:nvirt)
               end do

               ttind = i * (i-1) / 2 + j
               call daxpy(nvirt**2, 1.d0, tztr, 1, vv(1, 1, ttind), 1)
            else if(i .eq. j) then
               ! G^{ij} + G^{ji}^T
               do b = 1, nvirt
                  tztr(1:nvirt, b) = -1.5d0 * tz(1:nvirt, b) -
     $                  1.5d0 * tz(b, 1:nvirt)
               end do

               ttind = i * (i+1) / 2
               call daxpy(nvirt**2, 1.d0, tztr, 1, vv(1, 1, ttind), 1)
            end if
         end do
      end do
c      TIMEADD(times(1,32),times)

      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(izbakj)

c      if (ison.eq.'y') write(*,*) '14)'
c      call timer

c      call stopper(id,ison)
c         if(ison.eq.'y')write(6,*)'abci'
cc      TIME0(times)
C 15) a)
c calculate the transpose of Y^{kj} (see 16))
      if(maxcor - (imem+nvirt-imem1) .ge. nocc**2*nvirt**2) then
         iybakj = dblalloc(nocc**2*nvirt**2)
         call rpoint4d(dcore(iybakj), y_bakj, nvirt, nvirt, nocc, nocc)

         itmp = dblalloc(nvirt)
         call rpoint1d(dcore(itmp), tmp, nvirt)
      else
         call memerr(nocc**2*nvirt**2+nvirt, maxcor - (imem-imem1),
     $         'Insufficient memory for y_bakj!         ')
      end if

      ind=1
      notend=.true.

      n = blocknumber(nvirt**2, nvirt, nocc, 'y', ind, bsize,
     $      'Insufficient memory for one abci block! ',
     $      maxcor-(imem-imem1))

      do while(notend)
c      TIME0(times)
         if(ind+n-1.ge.nocc)then
            notend=.false.
            n=nocc-ind+1
         endif

         call rpoint4d(dcore(imem), abic_baic, nvirt, nvirt, n, nvirt)
         call rpoint4d(dcore(imem), aibc_baic, nvirt, nvirt, n, nvirt)

cc      TIME0(times(1,99))
         call abci3read(ind,n,abic_baic,
     $         bsymmv,symmocc,dgroup,co,first,last,buffer,
     $irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
cc      TIMEADD(times(1,11),times(1,99))

         ! KE
         call dgemm('n', 'n', nvirt**2*n, nocc, nvirt,
     $         1.d0, abic_baic, nvirt**2*n, t, nvirt,
     $         0.d0, y_bakj(1, 1, ind, 1), nvirt**2*nocc)
c      TIMEADD(times(1,33),times)

C 15) b)
c      TIME0(times)
         ! KE^{kk} -> X
         do k = ind, ind + n - 1
            call tradd(nvirt, -1.d0, y_bakj(:, :, k, k), x)
         enddo
c      TIMEADD(times(1,34),times)

c 4) b) I
c      TIME0(times)
         ! <ab|cj> * t_c^i from KDV
         do j = 1, nocc
            do i = ind, min(j, ind+n-1)
               ttind = j * (j-1) / 2 + i
               do a = 1, nvirt
                  vv(a, 1:nvirt, ttind) = vv(a, 1:nvirt, ttind) +
     $                  y_bakj(1:nvirt, a, i, j)
               end do
            end do
         end do
c      TIMEADD(times(1,9),times)

c 15) c) JE
c      TIME0(times)
         ! sort abci for JE
         do i = 1, n
            do b = 1, nvirt
               do c = 1, nvirt-1
                  tmp(c+1:nvirt) = abic_baic(b, c+1:nvirt, i, c)
                  aibc_baic(b, c+1:nvirt, i, c) =
     $                  abic_baic(b, c, i, c+1:nvirt)
                  aibc_baic(b, c, i, c+1:nvirt) = tmp(c+1:nvirt)
               end do
            end do
         end do

         ! JE
         call dgemm('n', 'n', nvirt**2*n, nocc, nvirt,
     $         -0.5d0, aibc_baic, nvirt**2*n, t, nvirt,
     $         1.d0, y_bakj(1, 1, ind, 1), nvirt**2*nocc)
c      TIMEADD(times(1,35),times)

         ind=ind+n
      end do ! while for abci

      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(itmp)

c      if (ison.eq.'y') write(*,*) '15) a'
cc     TIMEADD(times(1,10),times)
c      call stopper(id,ison)


C Construct JE, Eq. 22
C 15) c)
c      TIME0(times)
      call jjtoy(y_bakj,bsymmv,symmocc,dgroup,first,last,co,
     $      buffer,irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
      call kktoy(y_bakj,bsymmv,symmocc,dgroup,first,last,co, buffer,
     $      irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg)
c      TIMEADD(times(1,35),times)


C 15) d)
c f^k * adj(t^j)
c      TIME0(times)
      do j=1,nocc
         do k=1,nocc
            call dger(nvirt, nvirt, 1.d0, t(1, j), 1, fai(1, k), 1,
     $            y_bakj(1, 1, k, j), nvirt)
         enddo
      enddo
c      TIMEADD(times(1,36),times)

c 1/2 L^{kl} (2 Cbar^{lj - Cbar^{jl}})
c      TIME0(times)
      ind=1
      notend=.true.
      if(maxcor - (imem-imem1) .ge. nvirt**2*nocc + nvirt**2) then
         iccprbclofj = dblalloc(nvirt**2*nocc)
         call rpoint3d(dcore(iccprbclofj), ccpr_bcl_ofj, nvirt, nvirt,
     $         nocc)

         iccpr = dblalloc(nvirt**2)
         call rpoint2d(dcore(iccpr), ccpr, nvirt, nvirt)
      else
         call memerr(nvirt**2*nocc + nvirt**2, maxcor-(imem-imem1),
     $         'Insufficient memory for ccpr_bcl_ofj!   ')
      endif

      n = blocknumber(nvirt**2, nocc, nocc, 'y', ind, bsize,
     $      'Insufficient memory for one block of ll! ',
     $      maxcor-(imem-imem1))

      do while(notend)

         call rpoint4d(dcore(imem), ll_ablk, nvirt, nvirt, nocc, n)

         if(ind+n-1.ge.nocc)then
            notend=.false.
            n=nocc-ind+1
         endif

         call ll2read(ind, n, ll_ablk, 'ablk',nocc,nvirt,ifltln)

         do j = 1, nocc
            do l = 1, j-1
               ttind = j * (j-1) / 2 + l
               call coijtrcalc(ccpr,nvirt,t(1,l),t(1,j),tt(1,1,ttind))

               call dcopy(nvirt**2, ccpr, 1, ccpr_bcl_ofj(1, 1, l), 1)

               call tradd(nvirt, -0.5d0, ccpr, ccpr_bcl_ofj(:, :, l))
            end do

            ttind = j * (j+1) / 2
            call coijcalc(ccpr_bcl_ofj(:, :, j), nvirt,
     $            t(1, j), t(1, j), tt(1, 1, ttind))
            call dscal(nvirt**2, 0.5d0, ccpr_bcl_ofj(1, 1, j), 1)

            do l = j+1, nocc
               ttind = l * (l-1) / 2 + j
               call coijtrcalc(ccpr,nvirt,t(1,j),t(1,l),tt(1,1,ttind))

               call dcopy(nvirt**2, ccpr, 1, ccpr_bcl_ofj(1, 1, l), 1)
               call dscal(nvirt**2, -0.5d0, ccpr_bcl_ofj(1, 1, l), 1)

               call tradd(nvirt, 1.d0, ccpr, ccpr_bcl_ofj(:, :, l))
            end do

            do k = ind, ind+n-1
               call dgemm('n', 't', nvirt, nvirt, nvirt*nocc,
     $               1.d0, ccpr_bcl_ofj, nvirt,
     $               ll_ablk(1, 1, 1, k-ind+1), nvirt,
     $               1.d0, y_bakj(1, 1, k, j), nvirt)
            end do
         end do

         ind=ind+n
      end do ! while for L
c      TIMEADD(times(1,37),times)

      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(iccprbclofj)

c -1/2 sum_l{l^{lkj} adj(t^l)}
c      TIME0(times)
      ind=1
      notend=.true.

      n = blocknumber(nvirt**2, nocc, nocc, 'y', ind, bsize,
     $      'Insufficient memory for one block of ls!',
     $      maxcor-(imem-imem1))

      do while(notend)
         call rpoint4d(dcore(imem), ls_lakj, nocc, nvirt, nocc, n)

         if(ind+n-1.ge.nocc)then
            notend=.false.
            n=nocc-ind+1
         endif

         call ls2read(ind, n, ls_lakj, nocc, nvirt, ifltln)

         call dgemm('n', 'n', nvirt, nvirt*nocc*n, nocc,
     $         -0.5d0, t, nvirt, ls_lakj, nocc,
     $         1.d0, y_bakj(1, 1, 1, ind), nvirt)

         ind=ind+n
      end do ! while for l
c      TIMEADD(times(1,38),times)

c      if (ison.eq.'y') write(*,*) '15) b-d'

c      call stopper(id,ison)
c      if(ison.eq.'y')write(6,*)'16), 17)'
C 16)
      if(maxcor-(imem-imem1) .ge. nvirt**2*nocc)then
         ittpr = dblalloc(nvirt**2*nocc)
         call rpoint3d(dcore(ittpr), ttpr, nvirt, nvirt, nocc)
      else
         call memerr(nvirt**2*nocc, maxcor-(imem-imem1),
     $            'Insufficient memory for ttpr!           ')
      endif

      do i = 1, nocc
c      TIME0(times)
         ! 2 * T^{ik} - T^{ki}
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(k,l,indmin,indmax,ttind)
         do k=1,nocc
            indmin = min(i,k)
            indmax = max(i,k)
            ttind = indmax * (indmax-1) / 2 + indmin
            if(i. le. k) then
               do l = 1, nvirt
                  ttpr(1:nvirt, l, k) =
     $                  2.d0 * tt(1:nvirt,l,ttind) - tt(l,1:nvirt,ttind)
               end do
            else
               do l = 1, nvirt
                  ttpr(1:nvirt, l, k) =
     $                  2.d0 * tt(l,1:nvirt,ttind) - tt(1:nvirt,l,ttind)
               end do
            end if
         enddo
C$OMP END PARALLEL DO

c G^{i,j}
         do j = i, nocc
            ttind = j * (j-1) / 2 + i
            call dgemm('n', 't', nvirt, nvirt, nvirt*nocc,
     $            1.d0, ttpr, nvirt, y_bakj(1, 1, 1, j), nvirt,
     $            1.d0, vv(1, 1, ttind), nvirt)
         end do

c G^{j,i}^T
         do j = 1, i
            ttind = i * (i-1) / 2 + j
            call dgemm('n', 't', nvirt, nvirt, nvirt*nocc,
     $            1.d0, y_bakj(1, 1, 1, j), nvirt, ttpr, nvirt,
     $            1.d0, vv(1, 1, ttind), nvirt)
         end do
c      TIMEADD(times(1,39),times)

cC 9) a)
         ! (2 * T^{ik} - T^{ki}) *r^k -> v^i
c      TIME0(times)
         call dgemv('n', nvirt, nvirt*nocc,
     $         1.d0, ttpr, nvirt, rr, 1, 1.d0, v(1, i), 1)
c      TIMEADD(times(1,23),times)
      end do

      maxmemalloc = max(maxmemalloc, imem - imem1)
      call dbldealloc(iybakj)

c 17)
c      TIME0(times)
      ! T^{ij} * X -> V^{ij}
      do j = 1, nocc
         do i = 1, j
            ttind2 = (j-1) * j / 2 + i
            call dgemm('n', 'n', nvirt, nvirt, nvirt,
     $            1.d0, tt(1, 1, ttind2), nvirt, x, nvirt,
     $            1.d0, vv(1, 1, ttind2), nvirt)
         enddo
      enddo

c adj(G^{j,i})
      ! X * T^{ji} -> V^{ij}
      call dgemm('t', 'n', nvirt, nvirt*nocc*(nocc+1)/2, nvirt,
     $      1.d0, x, nvirt, tt, nvirt,
     $      1.d0, vv, nvirt)
c      TIMEADD(times(1,40),times)

c       if (ison.eq.'y') write(*,*) '16-17)'
c     call timer


c      call stopper(id,ison)
C Construct D, Eq. 27, including E, Eq. 21
C Construct C overbar, Eq. 37
C Construct JE, Eq. 22
C Construct KE, Eq. 23
C Construct Z, Eq. 36
C Construct Y, Eq. 35
C Construct A Eq. 33
C Construct r Eq. 31
C Construct X Eq. 34
C Construct KD Eq. 20
C Construct s Eq. 32
C Construct beta Eq. 30
C Construct alpha Eq. 29
C Construct G, Eq. 28
C Construct v Eq. 24
C Construct V, Eq. 25

      ! calculate norm before updating amplitudes
      norm = calcnorm(nvirt, nocc, v, vv)
C Update cluster amplitudes
      do j = 1, nocc
         do i = 1, j
            ttind = (j-1) * j / 2 + i
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(a,b)
            do b = 1, nvirt
               do a = 1, nvirt
                  vv(a, b, ttind) = vv(a, b, ttind) /
     $                  (fab(a, a) + fab(b, b) - fij(i, i) - fij(j, j))
                  tt(a, b, ttind) = tt(a, b, ttind) - vv(a, b, ttind)
               end do
            end do
C$OMP END PARALLEL DO
         end do
         do a = 1, nvirt
            v(a, j) = v(a, j) / (fab(a, a) - fij(j, j))
            t(a, j) = t(a, j) - v(a, j)
         end do
      end do

c       if (ison.eq.'y') write(*,*) 'update)'
c      call timer

c      call stopper(id,ison)
c     call getarg(2,dextr)
c     if(dextr.eq.'y') then
C DIIS extrapolation
      call diis(nit+nit0,nocc*nvirt+nvirt**2*nocc*(nocc+1)/2,t,v,ccmaxit
     $      ,diisfile,errfile,ifltln,bmat,invbmat)

c       if (ison.eq.'y') write(*,*) 'diis)'
c      call timer
c      call stopper(id,ison)
c     call timer
c     endif
C Construct C, Eq. 26
C Calculate CCSD Energy
c       call largest(nocc,nvirt,vv,v,tt,t,dcore(imem+nvirt**2*(nocc+1)*
c     $nocc/2),dcore(imem+nvirt**2*(nocc+1)*nocc/2),icore(iimem),iout)
       call energycalc(t,tt,fai,nit,eref,norm,
     $    scrs,localcc,ccsdalg,scrs,
     $    maxcor,imem,dcore,ifltln,iout,nvirt,nocc,idummy,ecc,scrs,
     $    dcore(iscrs2), mpi_rank, mpi_size,mpi_ccsd, times,
     $    ccsd_communicator,emp2full,lno,lnaf,emp2corr,eccs,eccss,eccos,
     $eccscs,eccoss,eccsss,.false.,master_thread,.false., vv, vv, et1)
cc       TIMEADD(times(1,6),ttmp)
c       if (ison.eq.'y') write(*,*) 'energy)'
c      call stopper(id,ison)
c      call timer

      if(ccsdrest .ne. 'off ') then
         call write_rst_ccsd(nit, ecc, eccold, norm, lene, t, tt,
     $            nocc, nvirt)
      end if

      enddo ! iteration }}}
c     TIMEADD(times(1,50),times(1, 99))

      if(localcc.ne.'off ') then
      call localenergycalc(t,tt,fai,nit,scrs,int1,esdi,ui,
     $      ccsdalg,idummy,rdummy,rdummy,nocc, nvirt, dcore, maxcor,
     $      imem, iout, ifltln,scrs,dcore(iscrs2))
      endif
C
c     write(6,*)norm,dabs(eccold-ecc),tol
      write(iout,*)
      if(norm/10.gt.tol.or.dabs(eccold-ecc).gt.tol) then
         ccsd_converged = .false.
         write(iout,
     $"(' Convergence not achieved in',i3,' iterations!')") nit
         if(localcc.ne.'off ')call mrccend(1)
      else
         ccsd_converged = .true.
         write(iout,"(' CCSD iteration has converged in',i3,' steps.')")
     $nit
      endif

 234  continue

      inquire(file = 'pt.rst', exist = rstex)
      ptrest = (ccsdrest .eq. 'ccsd') .and. (nit .eq. 0) .and. rstex

      write(iout,'(f12.2," MB memory was utilized for CCSD")')
     $      maxmemalloc*8.d0/1024**2
c analyse T1 & T2 amplitudes
      if(nocc.ne.0 .and. nvirt.ne.0)
     $      call tprt(ltpr,nc,nocc,nvirt,t,tt,tprtol,iout,scrs)
      if(localcc.ne.'off ')then
      write(iout,*)
      write(iout,'(" T1 contribution [au]:           ",f22.12)')et1
      write(iout,'(" CCSD correlation energy [au]:   ",f22.12)')esdi
         write(iout,*)
c     write(iout,'(" CCSD total energy [au]:         ",f22.12)')
c    $esdi+eref
      call prtenergc('CCSD           ',esdi,eref,locnoc)
       else
      write(iout,*)
      write(iout,
     $        '(" T1 contribution [au]:                   ",f22.12)')et1
         write(iout,
     $         '(" CCSD correlation energy [au]:           ",f22.12)')
     $         ecc-eref
         write(iout,
     $         '(" Total CCSD energy [au]:                 ",f22.12)')
     $         ecc
         if((lno .or. lnaf) .and. dfintran.ne.'ovirt   ')
     $         write(iout,
     $         '(" Total CCSD energy with correction [au]: ",f22.12)')
     $         ecc + emp2corr
      write(iout,*)
c         call prtenergc('CCSD           ',ecc,eref,locnoc)
         call prtenergc('CCSD           ',ecc+emp2corr,eref,locnoc)
       endif
      call flush(iout)
C  }}} end of (local)CCSD calculation
C
c timing data  {{{
       if (.false.) then
       write(iout,*)
       write(iout,"(1x,70('*'))")
       write(iout,'(a94)') ' Time of manipulations:' //
     $ '1 it. CPU/Wall [min], tot CPU/Wall [min], CPU/Wall ratio, tot %'
       write(iout,*)
        txt(1)= '" 1                                  "'
        txt(2)= '" 2 abc                              "'
        txt(3)= '" 2 de                               "'
        txt(4)= '" 2 f                                "'
        txt(5)= '" 2 g                                "'
        txt(6)= '" 3                                  "'
        txt(7)= '" 4 a                                "'
        txt(8)= '" 4 b II                             "'
        txt(9)= '" 4 b I                              "'
        txt(10)='" 5 a                                "'
        txt(11)='" 5 b                                "'
        txt(12)='" 5 c                                "'
        txt(13)='" 5 d                                "'
        txt(14)='" 5 e                                "'
        txt(15)='" 6 abc                              "'
        txt(16)='"                                    "'
        txt(17)='" 7 bc                               "'
        txt(18)='" 7 e                                "'
        txt(19)='" 7 d                                "'
        txt(20)='" 8 a                                "'
        txt(21)='" 8 b                                "'
        txt(22)='" 8 c                                "'
        txt(23)='" 9 a                                "'
        txt(24)='" 9 b                                "'
        txt(25)='" 10                                 "'
        txt(26)='" 11                                 "'
        txt(27)='" 12                                 "'
        txt(28)='" 13 e K                             "'
        txt(29)='" 13 d                               "'
        txt(30)='" 13 ac                              "'
        txt(31)='" 13 e k                             "'
        txt(32)='" 14                                 "'
        txt(33)='" 15 a                               "'
        txt(34)='" 15 b                               "'
        txt(35)='" 15 c                               "'
        txt(36)='" 15 d f                             "'
        txt(37)='" 15 d L                             "'
        txt(38)='" 15 d l                             "'
        txt(39)='" 16                                 "'
        txt(40)='" 17                                 "'
        txt(41)='" 5 b teljes K                       "'
        do i=1,40
           if(i.eq.16) cycle
         ttmp(1,1)=times(4,50)
         write(txt(0),*) trim(txt(i))
         write(iout,"(" // txt(0) // ",1x,7f10.3)")
     $    times(3,i)/nit/60.d0,times(4,i)/nit/60.d0,
c     $    times(5,i),times(6,i),
     $    times(3,i)/60.d0,times(4,i)/60.d0,times(3,i)/times(4,i),
     $    times(4,i)/ttmp(1,1)*100.d0
        enddo
      endif ! timing data }}}
C delete tfile,llfile,lsfile,diisfile,errfile {{{
      open(tfile,file='tfile')
      close(tfile,status="delete")
      open(703,file='lsfile')
      close(703,status="delete")
      open(704,file='llfile')
      close(704,status="delete")
      open(710,file='kkfile')
      close(710,status="delete")
      open(diisfile,file='fort.18')
      close(diisfile,status="delete")
      open(errfile,file='fort.19')
      close(errfile,status="delete")
      if(localcc.ne.'off ')then
        open(722,file='iabc')
        close(722,status="delete")
      endif
      call system('rm -f *infofile')
C }}}
      return
      end subroutine
************************************************************************
      subroutine atpakol(n,nw,ind,ind2,abci,iabc,nvirt,nocc)
************************************************************************
c rearrange: <a_all b_all | c_block i_block> to < i_block a_all | b_all c_block >
      implicit none
      integer nvirt, nocc
      integer n,nw,ind,ind2
      integer i,c
      real*8 abci(nvirt,nvirt,nvirt,n)
      real*8 iabc(nocc,nvirt,nvirt,nw)
      do i=1,n
        do c=ind2,nw+ind2-1
          call dcopy(nvirt**2,abci(1,1,c,i),1,
     $iabc(i+ind-1,1,1,c-ind2+1),nocc)
        enddo
      enddo

      return
      end subroutine
C
************************************************************************
      subroutine rpa_diis
     $(nact,nmax,pvec,evec,ndmax,bmat,bvec,invbmat,cvec,norm,
     $diisfile,errfile,tfile,tinfo,ifltln,ibufln,irecln,nvirt,nocc)
************************************************************************
* DIIS extrapolation for the dRPA iteration                             *
************************************************************************
      implicit none
      integer diisfile, errfile, tfile, tinfo
      integer ifltln, ibufln, irecln
      integer nvirt, nocc
      integer nact,iact,i,j,nmax,ndmax
      integer ipiv(ndmax)
      real*8  bmat(ndmax,ndmax),bvec(ndmax**2),invbmat(ndmax,ndmax),ddot
      real*8  cvec(ndmax),sum,sum1,evec(nmax),pvec(nmax),norm,dnrm2
C
      open(diisfile,file='diisfile',status='unknown',access='direct',
     $recl=nmax*ifltln)
      open(errfile,file='errfile',status='unknown',access='direct',
     $recl=nmax*ifltln)
C
c     rewind(tfile)
c      if(nact.eq.1) then
c       call dfillzero(evec,nmax)
c      else
c       read(tfile) evec
        call rpa_ttread(evec,'d',nvirt,nocc,ibufln,irecln,tfile,tinfo)
c      endif
C Save amplitudes and calculate new error vector
        write(diisfile,rec=nact) pvec
        call daxpy(nmax,-1.d0,evec,1,pvec,1)
        norm=dnrm2(nmax,pvec,1)
        write(errfile,rec=nact) pvec
C Update the scalar product matrix (B matrix)
       do iact=1,nact
        read(errfile,rec=iact) evec
        sum=ddot(nmax,pvec,1,evec,1)
        bmat(nact,iact)=sum
        bmat(iact,nact)=sum
       enddo
C Invert B matrix
       do j=1,nact
         do i=1,j
          bvec(i+(j-1)*j/2)=bmat(i,j)
         enddo
       enddo
      call dsptrf('U',nact,bvec,ipiv,invbmat,i)
      call dsptri('U',nact,bvec,ipiv,invbmat,i)
       do j=1,nact
         do i=1,j
          invbmat(i,j)=bvec(i+(j-1)*j/2)
          invbmat(j,i)=invbmat(i,j)
         enddo
       enddo
C Denominator
      sum=0.d0
       do i=1,nact
         do j=1,nact
          sum=sum+invbmat(i,j)
         enddo
       enddo
      sum=1.d0/sum
C Coefficients
       do i=1,nact
        sum1=0.d0
         do j=1,nact
          sum1=sum1+invbmat(i,j)
         enddo
        cvec(i)=sum*sum1
       enddo
C New amplitudes
      call dfillzero(pvec,nmax)
       do iact=1,nact
        sum=cvec(iact)
        read(diisfile,rec=iact) evec
        call daxpy(nmax,sum,evec,1,pvec,1)
       enddo
C Save new amplitudes
c     rewind(tfile)
c     write(tfile) pvec
      call rpa_twrite(pvec,tfile,tinfo,irecln,ibufln,nocc,nvirt)
C
      close(diisfile)
      close(errfile)
C
      return
      end subroutine
C

************************************************************************
      subroutine tradd(n,alpha,mat,newmat)
************************************************************************
* transpose and add constant times a square matrix to another matrix   *
************************************************************************
      integer n,i
      real*8 mat(n,n),newmat(n,n),alpha
      do i=1,n
        call daxpy(n,alpha,mat(i,1),n,newmat(1,i),1)
      enddo

      return
      end subroutine

************************************************************************
      subroutine tr(n,mat,newmat)
************************************************************************
* transpose a square matrix to a scratch matrix                        *
************************************************************************
      integer n,i
      real*8 mat(n,n),newmat(n,n)
      do i=1,n
        call dcopy(n,mat(i,1),n,newmat(1,i),1)
      enddo

      return
      end subroutine

************************************************************************
      subroutine ccijtrcalc(scrs,nvirt,ti,tj,ttij)
************************************************************************
* calculates the cc(:,:,i,j)' matrix to a scratch matrix               *
************************************************************************
      implicit none
      integer nvirt
      real*8 scrs(nvirt,nvirt)
      real*8 ttij(nvirt,nvirt)
      real*8 ti(nvirt)
      real*8 tj(nvirt)
      call tr(nvirt,ttij,scrs)
      call dger(nvirt,nvirt,1.d0,tj,1,ti,1,scrs,nvirt)
      return
      end subroutine

************************************************************************
      subroutine ccijcalc(scrs,nvirt,ti,tj,ttij)
************************************************************************
* calculates the cc(:,:,i,j) matrix to a scratch matrix                *
************************************************************************
      implicit none
      integer nvirt
      real*8 scrs(nvirt,nvirt)
      real*8 ttij(nvirt,nvirt)
      real*8 ti(nvirt)
      real*8 tj(nvirt)
      call dcopy(nvirt**2,ttij,1,scrs,1)
      call dger(nvirt,nvirt,1.d0,ti,1,tj,1,scrs,nvirt)
      return
      end subroutine

************************************************************************
      subroutine coijcalc(scrs,nvirt,ti,tj,ttij)
************************************************************************
* calculates the co(:,:,i,j) matrix to a scratch matrix                *
************************************************************************
      implicit none
      integer nvirt
      real*8 scrs(nvirt,nvirt)
      real*8 ttij(nvirt,nvirt)
      real*8 ti(nvirt)
      real*8 tj(nvirt)
      call dcopy(nvirt**2,ttij,1,scrs,1)
      call dscal(nvirt**2,0.5d0,scrs,1)
      call dger(nvirt,nvirt,1.d0,ti,1,tj,1,scrs,nvirt)
      return
      end subroutine
************************************************************************
      subroutine coijtrcalc(scrs,nvirt,ti,tj,ttij)
************************************************************************
* calculates the co(:,:,i,j) matrix to a scratch matrix                *
************************************************************************
      implicit none
      integer nvirt
      real*8 scrs(nvirt,nvirt)
      real*8 ttij(nvirt,nvirt)
      real*8 ti(nvirt)
      real*8 tj(nvirt)
      call tr(nvirt,ttij,scrs)
      call dscal(nvirt**2,0.5d0,scrs,1)
      call dger(nvirt,nvirt,1.d0,tj,1,ti,1,scrs,nvirt)
      return
      end subroutine
***********************************************************************
      subroutine xpplcalc(x,xmat,tmp,ccx,recbounds,times,nocc,nvirt,
     $      maxcor,imem,iout,ibufln,irecln)
***********************************************************************
      implicit none
      integer ibufln, irecln
      integer maxcor, imem, iout
      integer nocc, nvirt
      integer ind,n
      integer a,b,c,nn,nm
      character*1 x
      logical notend,notfull
      integer avmem,imem1,recbounds(nvirt,2)
      common/memcom/ imem1
      real*8 tmp(*),dbeta
      real*8 ccx((nvirt+1)*nvirt/2,(nocc+1)*nocc/2)
      real*8 xmat((nvirt+1)*nvirt/2,(nocc+1)*nocc/2)
      real*8 times(6,0:100)

      ind=1
      notend=.true.
c     if(nit.eq.1)then
c       write(6,'("    a    b    c   nn   nm nn-a+1 nn-b+1  n  ind")')
c     endif
      do while(notend)
         notfull=.true.
         avmem=maxcor-(imem-imem1)-(nvirt+1)*nvirt*(nocc+1)*nocc/4
c         write(6,'("avmem: ",i10)')avmem
         if(avmem.ge.(nvirt-ind+1)**2*(nvirt-ind+2)/2)then
            n=1
            a=(nvirt-ind+1)*(nvirt-ind+2)/2
            do while(notfull.and.notend)
               if(n+ind-1.eq.nvirt)then
                  notend=.false.
               else
                  b=(nvirt-(ind+n)+1)*(nvirt-(ind+n)+2)/2
                  c=a-b
                  if(a*c.le.avmem)then
                  n=n+1
c                  write(6,'(3i5)')a,c,n
                  endif
                  b=(nvirt-(ind+n)+1)*(nvirt-(ind+n)+2)/2
                  c=a-b
                  if(a*c.gt.avmem)then
                     n=n-1
                     notfull=.false.
                  endif
               endif
            enddo
         else
            write(iout,*)'Insufficient memory!'
            call mrccend(1)
         endif

         if(ind+n-1.ge.nvirt)then
            notend=.false.
            n=nvirt-ind+1
         else
            notend=.true.
         endif
c         write(6,'("in ppl: beg,end,num,tot:",9i5)')ind,ind+n-1,n,nvirt

         a=(nvirt-ind+1)*(nvirt-ind+2)/2
         b=(nvirt-(ind+n)+1)*(nvirt-(ind+n)+2)/2
         c=a-b
         nn=nvirt*(nvirt+1)/2
         nm=nocc*(nocc+1)/2
cc         TIME0(times)
         call readxlist(x,ind,n,tmp,a,c,recbounds,nocc,nvirt,
     $         ibufln,irecln)
cc         TIMEADD(times(1,4),times)


c         if(nit.eq.1)then
c            write(6,'(9i5)')a,b,c,nn,nm,nn-a+1,nn-b+1,n,ind
c         endif
cc         TIME0(times)
         if(ind.eq.1)then
            dbeta=0.d0
         else
            dbeta=1.d0
         endif

         if(ind+n-1.eq.nvirt)then
            call dsymm('l', 'l', a, nm,
     $            1.d0, tmp, a, ccx(nn-a+1,1), nn,
     $            dbeta, xmat(nn-a+1,1), nn)
         else
            call dsymm('l', 'l', a-b, nm,
     $            1.d0, tmp, a, ccx(nn-a+1,1), nn,
     $            dbeta, xmat(nn-a+1,1), nn)

            call dgemm('n', 'n', b, nm, c,
     $            1.d0, tmp(c+1), a, ccx(nn-a+1,1), nn,
     $            dbeta, xmat(nn-b+1,1),nn)

            call dgemm('t', 'n', c, nm, b,
     $            1.d0, tmp(c+1), a, ccx(nn-b+1,1), nn,
     $            1.d0, xmat(nn-a+1,1), nn)
         endif

cc         TIMEADD(times(1,5),times)
         ind=ind+n
      enddo
      return

      end subroutine

*********************************************************************
      subroutine twrite(t, tfile, irecln, tinfo, ibufln, nvirt, nocc)
*********************************************************************
      implicit none
      integer tfile, irecln, tinfo, ibufln, nvirt, nocc
      real*8 t(nvirt*nocc+nvirt**2*(nocc+1)*nocc/2),buffer(ibufln)
      integer i,lastrec,lastrecsize
       call dfillzero(buffer,ibufln)
      open(tfile,file='tfile',access='direct',recl=irecln,
     $status='unknown')
      lastrecsize=modulo(nvirt*nocc+nvirt**2*(nocc+1)*nocc/2,
     $ibufln)
      if(lastrecsize.eq.0)lastrecsize=ibufln
      lastrec=(nvirt*nocc+nvirt**2*(nocc+1)*nocc/2-lastrecsize)/ibufln+1
      do i=1,lastrec-1
        write(tfile,rec=i)t((i-1)*ibufln+1:i*ibufln)
      enddo
      call dcopy(lastrecsize,t((lastrec-1)*ibufln+1),1,buffer,1)
      write(tfile,rec=lastrec)buffer(1:ibufln)

      open(tinfo,file='tinfofile')
      rewind(tinfo)
      write(tinfo,*)lastrec,lastrecsize
c     close(vfile)
      close(tfile)
      close(tinfo)
      return
      end subroutine
*********************************************************************
      subroutine rpa_twrite(t,tfile,tinfo,irecln,ibufln,nocc,nvirt)
*********************************************************************
      implicit none
      integer tfile, tinfo, irecln, ibufln
      integer nocc, nvirt
      real*8 t(nvirt**2*(nocc+1)*nocc/2),buffer(ibufln)
      integer i,lastrec,lastrecsize
       call dfillzero(buffer,ibufln)
      open(tfile,file='tfile',access='direct',recl=irecln,
     $status='unknown')
      lastrecsize=modulo(nvirt**2*(nocc+1)*nocc/2,
     $ibufln)
      if(lastrecsize.eq.0)lastrecsize=ibufln
      lastrec=(nvirt**2*(nocc+1)*nocc/2-lastrecsize)/ibufln+1
      do i=1,lastrec-1
        write(tfile,rec=i)t((i-1)*ibufln+1:i*ibufln)
      enddo
      call dcopy(lastrecsize,t((lastrec-1)*ibufln+1),1,buffer,1)
      write(tfile,rec=lastrec)buffer(1:ibufln)

      open(tinfo,file='tinfofile')
      rewind(tinfo)
      write(tinfo,*)lastrec,lastrecsize
c     close(vfile)
      close(tfile)
      close(tinfo)
      return
      end subroutine
*********************************************************************
      subroutine rpa_ttread(t,d,nvirt,nocc,ibufln,irecln,tfile,tinfo)
*********************************************************************
      implicit none
      integer ibufln, irecln, tfile, tinfo
      integer nvirt, nocc
      real*8 t(nvirt**2*(nocc+1)*nocc/2),buffer(ibufln)
      integer i,lastrec,lastrecsize
      character*1 d
      open(tinfo,file='tinfofile')
      rewind(tinfo)
      read(tinfo,*)lastrec,lastrecsize
      open(tfile,file='tfile',access='direct',recl=irecln,
     $status='old')
      do i=1,lastrec-1
        read(tfile,rec=i)t((i-1)*ibufln+1:i*ibufln)
      enddo
      read(tfile,rec=lastrec)buffer(1:ibufln)
      call dcopy(lastrecsize,buffer,1,t((lastrec-1)*ibufln+1),1)
      if(d.eq.'d')close(tfile,status="delete")
      if(d.eq.'n')close(tfile)
      close(tinfo)
      return
      end subroutine
*********************************************************************
      subroutine ttread(t,d, ibufln, irecln, tfile, tinfo)
*********************************************************************
      implicit none
      integer ibufln, irecln, tfile, tinfo
      real*8 t(*),buffer(ibufln)
      integer i, lastrec, lastrecsize
      character*1 d

      open(tinfo,file='tinfofile')
      rewind(tinfo)
      read(tinfo,*)lastrec,lastrecsize
      open(tfile,file='tfile',access='direct',recl=irecln,
     $status='old')

      do i=1,lastrec-1
         read(tfile,rec=i)t((i-1)*ibufln+1:i*ibufln)
      enddo
      read(tfile,rec=lastrec)buffer(1:ibufln)
      call dcopy(lastrecsize,buffer,1,t((lastrec-1)*ibufln+1),1)

      if(d.eq.'d')close(tfile,status="delete")
      if(d.eq.'n')close(tfile)
      close(tinfo)
      return
      end subroutine
*********************************************************************
       subroutine readinfo(nbasis,nocc,dgroup,bsymmv,symmocc,
     $ico,co,h,NENERGY,first,last,ui,deglev,ccsdalg,nirmax)
*********************************************************************
       implicit none
       integer nirmax
       integer bsymmv(nirmax+1),symmocc(nirmax)
       integer nocc,nbasis,dgroup
       integer co(nbasis),ico(nbasis)
       real*8  h(nbasis,nbasis)
       integer first(0:5,8,8,nbasis),last(0:5,8,8,nbasis)
       integer i,j,k,ii,jj
       integer bsymm,csymm,isymm,xsymm
       real*8  NENERGY
       integer deglev,actocc
       real*8  ui(nocc)
       character*8 ccsdalg
c
       ii=1
       do isymm=1,dgroup
        do j=bsymmv(isymm),bsymmv(isymm)+symmocc(isymm)-1
         co(j)=ii
         ico(ii)=isymm
         ii=ii+1
        enddo
       enddo
       do isymm=1,dgroup
        do j=bsymmv(isymm)+symmocc(isymm),bsymmv(isymm+1)-1
         co(j)=ii
         ico(ii)=isymm
         ii=ii+1
        enddo
       enddo
c       open(555,file='fort.555')
       open(555,file='55') !RZ.
       rewind 555
       read(555,*)
       read(555,*)
       read(555,*)
       read(555,*)actocc,deglev
       read(555,*)ui(1:nocc)
C Reading the Fock-matrix!!
       do xsymm=1,dgroup
        do i=bsymmv(xsymm),bsymmv(xsymm+1)-1
         do j=bsymmv(xsymm),bsymmv(xsymm+1)-1
          read(555,*) h(i,j)
          h(j,i)=h(i,j)
c         write(6,'(2i3,f14.10)')i,j,h(i,j)
c       write(917,'(F19.12,2I5)')h(i,j),i,j
         enddo
        enddo
       enddo
       read(555,*) NENERGY
       close(555,status='keep')
c
!       open(56,file='fort.56')
       first=0
       last=0
       bsymm=1
       if(ccsdalg .ne. 'dfdirect') then
          open(56,file='56')  !R.Z
          rewind 56
          do while(bsymm.ne.0)
           read(56,*) jj,bsymm,csymm,i,j,k
c       write(916,'(6I5)')jj,bsymm,csymm,i,j,k
c jj: milyen tipusu integral
c b/csymm: 2./3. indexre a szimm
c i: utolso index erteke
c j,k: mettol meddig kell olvasni (rekordot)
c       if(bsymm.eq.0)exit
           if(bsymm.ne.0)then
             first(jj,bsymm,csymm,i)=j
             last(jj,bsymm,csymm,i)=k
            endif
          enddo
          close(56)
       end if
       return
       end subroutine
************************************************************************
       subroutine localenergycalc(t,tt,fai,nn,
     $scrs,int1,esdi,ui,ccsdalg,dfnb,jia,abj,
     $nocc, nvirt, dcore, maxcor, imem, iout,ifltln,kk_jk_ofab,t1t)
C Calculate i-th local energy part
************************************************************************
       implicit none
       integer nocc, nvirt
       integer maxcor, imem, iout, ifltln
       real*8 dcore(*)
       real*8 t(nvirt,nocc),tt(nvirt,nvirt,(nocc+1)*nocc/2)
       real*8 scrs(nvirt,nvirt),fai(nvirt,nocc)
       real*8 int1(nocc,nocc),ui(nocc)
       real*8 ddot,esdi,abj(nvirt,nvirt,nocc)
       logical notend,localcc15p,diskints4cc
       integer ind,n,j,k,l,ttind,nn
       integer imem1,pos
       character*4 localcc !NP
       character*16 naf !NP
       common/memcom/imem1

       integer dblalloc
       integer blocknumber, bsize
       real*8, pointer :: ll_kaj_b(:,:,:,:), cc_laj_b(:,:,:,:),abofj(:)
       real*8 kk_jk_ofab(nocc, nocc)
       integer illkajb, icclajb
       integer a, b
       character*8 ccsdalg,lccoporder
       integer dfnb
       real*8 jia(nocc, nvirt, dfnb)
       real*8 t1t(nocc, nvirt)

      interface
c {{{ interfaces for pointers
      subroutine rpoint1d(egydim1,egydim2,dim1)
         implicit none
         integer dim1
         real*8,target :: egydim1(dim1)
         real*8, pointer :: egydim2(:)
      end subroutine
      subroutine rpoint4d(egydim,haromdim,dim1,dim2,dim3,dim4)
         implicit none
         integer dim1,dim2,dim3,dim4
         real*8,target :: egydim(dim1,dim2,dim3,dim4)
         real*8, pointer :: haromdim(:,:,:,:)
      end subroutine
c}}}
      end interface

       call getkey('localcc',7,localcc,4)
       call getkey('naf_cor',7,naf,16)
       call getkey('lccoporder',10,lccoporder,8)
       diskints4cc=lccoporder.eq.'trffirst'.or.ccsdalg.eq.'disk    '

       call dgemm('t','n',nocc,nocc,nvirt,2.d0,
     $fai,nvirt,t,nvirt,0.d0,int1,nocc)

       if (localcc15p(localcc).and.naf.ne.'off             ') then
c contribution of the f*t1 type term to dE^CCSD_i
        call dgemv('n',nocc,nocc,1.d0,int1,nocc,ui,1,0.d0,dcore(imem),1)
        esdi=ddot(nocc,ui,1,dcore(imem),1)
       endif

       if (.not.localcc15p(localcc).or.naf.eq.'off             ') then !NP
c {{{ E^CCSD is computed with density fitted integrals and amplitudes
         if(ccsdalg .ne. 'dfdirect') then
          ind=1
          notend=.true.

          n = blocknumber(nvirt**2, nocc, nocc, 'y', ind, bsize,
     $          'Insufficient memory!                    ',
     $          maxcor-(imem-imem1))

          do while(notend)
c            if(maxcor-(imem-imem1).ge.nvirt**2*nocc)then
c              n=(maxcor-(imem-imem1)-modulo(maxcor-(imem-imem1),
c      $nvirt**2*nocc))/(nvirt**2*nocc)
c            else
c              write(iout,*)'Insufficient memory!'
c              call mrccend(1)
c            endif

            if(ind+n-1.ge.nocc)then
              notend=.false.
              n=nocc-ind+1
            endif
            call ll2read(ind,n,dcore(imem), 'abkl',nocc,nvirt,ifltln)
            do j=ind,ind+n-1
              do k=1,nocc
                do l=1,nocc
                  if(l.le.j)then
                    ttind=(j-1)*j/2+l
                    call ccijcalc(scrs,nvirt,t(1,l),t(1,j),
     $ tt(1,1,ttind))
                    int1(k,l)=int1(k,l)+ddot(nvirt**2,
     $ dcore(imem+(k-1)*nvirt**2+(j-ind)*nvirt**2*nocc),1,scrs,1)
                  endif
                  if(l.gt.j)then
                    ttind=(l-1)*l/2+j
                    call ccijtrcalc(scrs,nvirt,t(1,j),t(1,l),
     $ tt(1,1,ttind))
                    int1(k,l)=int1(k,l)+ddot(nvirt**2,
     $ dcore(imem+(k-1)*nvirt**2+(j-ind)*nvirt**2*nocc),1,scrs,1)
                endif
c                do a=1,nvirt
c                   do b=1,nvirt
c                      write(*,'(4i3,f22.16)') j,l,a,b,
c     $                 scrs(a,b)
c                   end do
c                end do
                enddo
              enddo
            enddo
            ind=ind+n
           enddo
         else ! ccsdalg .eq. 'dfdirect'
           n = blocknumber(2*nvirt*nocc, nocc, nvirt, 'y', 1, bsize,
     $          'Insufficient memory in localenergycalc! ',
     $          maxcor-(imem-imem1))

           do a = 1, nvirt
              call dcopy(nocc, t(a, 1), nvirt, t1t(1, a), 1)
           end do

           illkajb = dblalloc(nocc**2*nvirt*n)
           call rpoint4d(dcore(illkajb), ll_kaj_b, nocc, nvirt, nocc, n)
           icclajb = dblalloc(nocc**2*nvirt*n)
           call rpoint4d(dcore(icclajb), cc_laj_b, nocc, nvirt, nocc, n)

           ind = 1
           notend = .true.
           do while(notend)
              if(ind+n-1 .ge. nvirt) then
                 notend = .false.
                 n = nvirt-ind+1
              end if

              ! Labkj
              call dgemm('n', 't', nocc*nvirt, nocc*n, dfnb,
     $               1.d0, jia, nocc*nvirt, jia(1, ind, 1), nocc*nvirt,
     $               0.d0, ll_kaj_b, nocc*nvirt)

              do b = 1, n
                do a = 1, nvirt
                  do k = 1, nocc
                    call dcopy(nocc, ll_kaj_b(k, a, 1, b), nocc*nvirt,
     $                     kk_jk_ofab(1, k), 1)
                  end do

                  do k = 1, nocc
                     ll_kaj_b(k, a, 1:nocc, b) =
     $                     2.d0 * ll_kaj_b(k, a, 1:nocc, b)
     $                     - kk_jk_ofab(k, 1:nocc)
                  end do
                end do
              end do

              ! Cablj
              do j = 1, nocc
                 do b = ind, ind+n-1
                    do l = 1, j
                       ttind = j * (j-1) / 2 + l
                       cc_laj_b(l, 1:nvirt, j, b-ind+1) =
     $                        tt(1:nvirt, b, ttind)
                    end do

                    do l = j+1, nocc
                       ttind = l * (l-1) / 2 + j
                       cc_laj_b(l, 1:nvirt, j, b-ind+1) =
     $                        tt(b, 1:nvirt, ttind)
                    end do
                 end do
              end do

              call dger(nocc*nvirt, nocc*n,
     $               1.d0, t1t, 1, t1t(1, ind), 1,
     $               cc_laj_b, nocc*nvirt)
c              do j=1,nocc
c                 do l=1,nocc
c                    do a=1,nvirt
c                       do b=ind,ind+n-1
c                  write(*,'(4i3,f22.16)') j,l,a,b,cc_laj_b(l,a,j,b)
c                       end do
c                    end do
c                 end do
c              end do

              call dgemm('n', 't', nocc, nocc, nvirt*nocc*n,
     $               1.d0, ll_kaj_b, nocc, cc_laj_b, nocc,
     $               1.d0, int1, nocc)

              ind = ind + n
           end do

           call dbldealloc(illkajb)
         end if
c       esdi= !!??? Ui'*INT1*Ui
        call dgemv('n',nocc,nocc,1.d0,int1,nocc,ui,1,0.d0,scrs,1)
        esdi=ddot(nocc,ui,1,scrs,1)
c }}}
c
       elseif(localcc15p(localcc).and.naf.ne.'off             ') then
c E^CCSD is computed with amplitudes assembled in the NAF basis and integrals assembled in the total aux basis
          if(maxcor-(imem-imem1).lt.2*nvirt**2) then ! for L_ab and transformed C_ab
           write(iout,*)'Insufficient memory!'
             call mrccend(1)
          endif
          if (diskints4cc) then
            pos=imem+nvirt**2
            open(700,file='ajb',form='unformatted')
            rewind(700)
            call rpoint1d(dcore(pos), abofj, nvirt**2)
          endif
c db
c           write(*,*) 'ajb in ccsd.f', imem, pos
c db
          do j=1,nocc
           call dfillzero(dcore(imem),nvirt**2)
           if (diskints4cc) then
             read(700) abofj(1:nvirt**2)
           else
             call rpoint1d(abj(1,1,j), abofj, nvirt**2)
           endif
c db
c           write(*,*) 'abofj for j',j
c           write(*,'(1000f12.6)') abofj(1:nvirt**2)
c db
           do l=1,nocc
            if(l.le.j)then
             ttind=(j-1)*j/2+l
                  call ccijcalc(scrs,nvirt,t(1,l),t(1,j),
     $                  tt(1,1,ttind)) ! get C^ab_lj for a given lj
            elseif(l.gt.j)then
             ttind=(l-1)*l/2+j
                  call ccijtrcalc(scrs,nvirt,t(1,j),t(1,l),
     $                  tt(1,1,ttind)) ! get C^ab_lj for a given lj
            endif
            call daxpy(nvirt**2,ui(l),scrs,1,dcore(imem),1)       ! transform l canonical to I localized orb index
           enddo !l
           esdi=esdi+ddot(nvirt**2,abofj,1,dcore(imem),1)     ! sum_ab L^ab_Ij * C^ab_Ij for all j
          enddo !j
c db
c        write(*,*) 'localE, t1 +t2', esdi , nocc, nvirt
c db
          if (diskints4cc) close(700)
       endif

       if(nn.eq.0)then
         write(iout,"(
     $         ' LMP2 domain correlation energy contribution [au]:'
     $               ,f22.12)")esdi
       endif
        return

        end subroutine
************************************************************************
       subroutine energycalc(t,tt,fai,nn,er,norm,scrs,localcc,ccsdalg,
     $jia,maxcor,imem,dcore,ifltln,iout,nvirt,nocc,dfnb,ecc,kk_ij_ofab,
     $t1t,mpi_rank,mpi_size,mpi_ccsd, times, ccsd_communicator,emp2full,
     $lno,lnaf,emp2corr,eccs,eccss,eccos, eccscs, eccoss, eccsss, lpert,
     $master_thread, lf12,f12int1, f12int2, et1)
************************************************************************
*
*  nn: >0 iteration number, calc. CCSD energy
*       0 MP2 energy calculation
*      -1 CCSD energy calculation for NAF correction
*      -2 does not print energy (PPL contribution with t=0)
*      -3 - -9: CCSD energy (contributions with printing)
*
      implicit none
#if defined(MPI)
      include "mpif.h"
#endif
      integer maxcor, imem, ifltln, iout
      integer mpi_rank, mpi_size, mpi_err
      integer nvirt, nocc, dfnb, ccsd_communicator
      real*8 dcore(*)
      real*8 times(6,0:100), eccss, eccos, ecabs,emp2f12,ef12,tmp
      real*8 t(nvirt,nocc),tt(nvirt,nvirt,(nocc+1)*nocc/2)
      real*8 scrs(nvirt,nvirt)
      real*8 fai(nvirt,nocc)
      real*8 f12int1(nvirt, nocc)
      real*8 f12int2(nvirt, nvirt, nocc*(nocc+1)/2)
      real*8 ddot,norm,er
      logical notend,localcc15p,lf12
      real*8 emp2full, emp2corr
      logical lno,lnaf,lpert,master_thread
      integer ind,n,i,j,ttind,nn
      integer imem1
      character*4 localcc
      character*8 dfintran
      character*9 nonaf
      common/memcom/imem1

      real*8, pointer :: qf(:,:), qfact(:)
      real*8 eccs, tfact

      real*8 jia(nocc, nvirt, dfnb)
      integer blocknumber, bsize
      character*8 ccsdalg
      integer dblalloc
      integer a, b, find, lind, dhyb
      logical mpi_ccsd, lscale

      integer illiajb, ittiajb
      real*8, pointer :: lliaj_b(:,:,:,:)
      real*8, pointer :: ttiaj_b(:,:,:,:)
      real*8 kk_ij_ofab(nocc, nocc), t1t(nocc, nvirt)

       real*8 ecc, eccscs, eccoss, eccsss, et1
       integer iqfact, iqf

       real*8 ediag(nocc)

      interface
c {{{ interfaces for pointers
      subroutine rpoint1d(egydim,haromdim,dim1)
         implicit none
         integer dim1
         real*8,target :: egydim(dim1)
         real*8, pointer :: haromdim(:)
      end subroutine
      subroutine rpoint2d(egydim,haromdim,dim1,dim2)
         implicit none
         integer dim1,dim2
         real*8,target :: egydim(dim1,dim2)
         real*8, pointer :: haromdim(:,:)
      end subroutine
      subroutine rpoint4d(egydim,haromdim,dim1,dim2,dim3,dim4)
         implicit none
         integer dim1,dim2,dim3,dim4
         real*8,target :: egydim(dim1,dim2,dim3,dim4)
         real*8, pointer :: haromdim(:,:,:,:)
      end subroutine
c}}}
      end interface

      ecc = 0.d0
      eccss = 0.d0
      eccos = 0.d0
      eccs = 0.d0
      eccscs = 0.d0
      eccoss = 0.d0
      eccsss = 0.d0
      ediag = 0.d0

      call getvar('dhyb      ',dhyb)

C Calculate energy
      if(ccsdalg .eq. 'disk    ') then ! {{{
         ecc=er+2.d0*ddot(nvirt*nocc,fai,1,t,1)
         et1 = ecc - er
         if(lpert .and. dhyb.ge.1) ecc = er

         ind=1
         notend=.true.

         n = blocknumber(nvirt**2, nocc, nocc, 'y', ind, bsize,
     $          'Insufficient memory!                    ',
     $          maxcor-(imem-imem1))

         do while(notend)
            if(ind+n-1 .ge. nocc) then
               notend = .false.
               n = nocc-ind+1
            end if

            call ll2read(ind, n, dcore(imem), 'abkl',nocc,nvirt,ifltln)

            do j = ind, ind+n-1
               do i = 1, nocc
                  if(i .le. j) then
                     ttind = (j-1) * j / 2 + i

                     if (lpert) then ! quadratic t1 term is not needed for MP2
                       call dcopy(nvirt**2,tt(1, 1, ttind),1,scrs,1)
                     else
                       call ccijcalc(scrs, nvirt, t(1, i), t(1, j),
     $                     tt(1, 1, ttind))
                     endif

                     ecc = ecc + ddot(nvirt**2,
     $                     dcore(imem+(i-1)*nvirt**2+
     $                     (j-ind)*nvirt**2*nocc), 1, scrs, 1)
                  endif

                  if(i .gt. j) then
                     ttind = (i-1) * i / 2 + j

                     if (lpert) then ! quadratic t1 term is not needed for MP2
                       call tr(nvirt,tt(1, 1, ttind),scrs)
                     else
                       call ccijtrcalc(scrs, nvirt, t(1, j), t(1, i),
     $                     tt(1, 1, ttind))
                     endif

                     ecc = ecc + ddot(nvirt**2,
     $                     dcore(imem+(i-1)*nvirt**2+
     $                     (j-ind)*nvirt**2*nocc), 1, scrs, 1)
                  end if
               end do
            end do
            ind = ind + n
         enddo ! }}}
      else if(ccsdalg .eq. 'dfdirect') then ! {{{
         lscale=(lno.or.lnaf).and.trim(localcc).eq.'off'.and..not.lf12
         if(lscale) then
            iqfact=dblalloc(nocc)
            call rpoint1d(dcore(iqfact), qfact, nocc)
            iqf=dblalloc(nvirt*nocc)
            call rpoint2d(dcore(iqf), qf, nvirt, nocc)
            open(800,file='F12INTE',form='unformatted')
            read(800) ecabs,emp2f12
            read(800) tfact,qfact
         else
            iqfact=imem
            iqf=imem
            call rpoint2d(fai, qf, nvirt, nocc)
         endif

         TIME0(times(1,95))
         if(master_thread) then !mpi_rank .eq. 0) then
c            ecc = er + 2.d0 * ddot(nvirt*nocc, fai, 1, t, 1)
            if(lscale) then
               do i = 1, nocc
                  qf(:, i) = qfact(i) * fai(:, i)
               end do
               eccs = 2.d0 * ddot(nvirt*nocc, qf, 1, t, 1)
            end if
            ecc = er + 2.d0 * ddot(nvirt*nocc, fai, 1, t, 1)
         else
            ecc = 0
         end if
         et1 = ecc - er
         if(lpert .and. dhyb.ge.1) ecc = er
         TIMEADD(times(1, 45), times(1, 95))

         if(nocc.eq.0 .or. nvirt.eq.0) goto 123
         n = blocknumber(2*nocc**2*nvirt, 1, nvirt, 'y', 1, bsize,
     $         'Insufficient memory for one block of K! ',
     $         maxcor-(imem-imem1))

         illiajb = dblalloc(nocc**2*nvirt*n)
         call rpoint4d(dcore(illiajb), lliaj_b, nocc, nvirt, nocc, n)

         ittiajb = dblalloc(nocc**2*nvirt*n)
         call rpoint4d(dcore(ittiajb), ttiaj_b, nocc, nvirt, nocc, n)

         TIME0(times(1,95))
         do i = 1, nocc
            call dcopy(nvirt, t(1, i), 1, t1t(i, 1), nocc)
         end do
         TIMEADD(times(1, 46), times(1, 95))

         call blockindices(find, lind, nvirt, n, mpi_rank, mpi_size,
     $         notend, 1)
         ind = find

         do while(notend)
            if(ind+n-1 .ge. lind) then !nvirt) then
               n = lind - ind + 1 !nvirt - ind + 1
               notend = .false.
            end if


         TIME0(times(1,95))
            ! ttiaj_b
            do j = 1, nocc
c$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,ttind,b) SCHEDULE(DYNAMIC)
               do i = 1, j
                  ttind = j * (j-1) / 2 + i
                  do b = ind, ind+n-1
                     ttiaj_b(i, 1:nvirt, j, b-ind+1) =
     $                     tt(1:nvirt, b, ttind)
                  end do
               end do
c$OMP END PARALLEL DO

c$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,ttind,b) SCHEDULE(DYNAMIC)
               do i = j+1, nocc
                  ttind = i * (i-1) / 2 + j
                  do b = ind, ind+n-1
                     ttiaj_b(i, 1:nvirt, j, b-ind+1) =
     $                     tt(b, 1:nvirt, ttind)
                  end do
               end do
c$OMP END PARALLEL DO
            end do
         TIMEADD(times(1, 49), times(1, 95))

           if (.not.lpert) then ! quadratic t1 term is not needed for MP2
         TIME0(times(1,95))
            call dger(nocc*nvirt, nocc*n,
     $            1.d0, t1t, 1, t1t(1, ind), 1, ttiaj_b, nocc*nvirt)
         TIMEADD(times(1, 50), times(1, 95))
           endif


         TIME0(times(1,95))
            call dgemm('n', 't', nocc*nvirt, nocc*n, dfnb,
     $            1.d0, jia, nocc*nvirt, jia(1, ind, 1), nocc*nvirt,
     $            0.d0, lliaj_b, nocc*nvirt)
         TIMEADD(times(1, 47), times(1, 95))

         TIME0(times(1,95))
c            ediag = 0.d0
c            do i = 1, nocc
c               do a = 1, nvirt
c                  do b = 1, n
c                     ediag(i) = ediag(i) +
c     $                     lliaj_b(i,a,i,b) * ttiaj_b(i,a,i,b)
c                  end do
c               end do
c            end do
c            write(*,*) 'CCSD oppsite spin diagonal', sum(ediag)
c            write(*,'(f20.12)') ediag
            eccos = ddot(nvirt*nocc**2*n, lliaj_b, 1, ttiaj_b, 1)
            eccss = -eccos
            ecc = ecc + eccos
         TIMEADD(times(1, 51), times(1, 95))

         TIME0(times(1,95))


            do b = ind, ind+n-1
               do a = 1, nvirt
                  do j = 1, nocc
                     kk_ij_ofab(1:nocc, j) =
     $                     lliaj_b(1:nocc, a, j, b-ind+1)
                  end do

                  do j = 1, nocc
                     lliaj_b(1:nocc, a, j, b-ind+1) =
c     $                     lliaj_b(1:nocc, a, j, b-ind+1) -
     $                     2.d0 * lliaj_b(1:nocc, a, j, b-ind+1) -
     $                     kk_ij_ofab(j, 1:nocc)
                  end do
               end do
            end do
         TIMEADD(times(1, 48), times(1, 95))

         TIME0(times(1,95))
            eccss = eccss +
     $            ddot(nvirt*nocc**2*n, lliaj_b, 1, ttiaj_b, 1)
            ecc = ecc + eccss

               do i = 1, nocc
                  do j = 1, nocc
                     do a = 1, nvirt
                        do b = 1, n
c$OMP ATOMIC
                           ediag(i) = ediag(i) +
     $                           lliaj_b(i,a,j,b) * ttiaj_b(i,a,j,b)
                        end do
                     end do
                  end do
               end do
         TIMEADD(times(1, 51), times(1, 95))

            if(lscale) then
c q-scaled total energy
               do b = 1, n
                  do j = 1, nocc
                     do a = 1, nvirt
                        do i = 1, nocc
                           lliaj_b(i, a, j, b) =
     $                        (qfact(i) + qfact(j)) / 2 *
     $                        lliaj_b(i, a, j, b)
                        end do
                     end do
                  end do
               end do
               eccs = eccs +
     $               ddot(nvirt*nocc**2*n, lliaj_b, 1, ttiaj_b, 1)

! q-scs innen {{{
            read(800) qfact
         TIME0(times(1,95))
            call dgemm('n', 't', nocc*nvirt, nocc*n, dfnb,
     $            1.d0, jia, nocc*nvirt, jia(1, ind, 1), nocc*nvirt,
     $            0.d0, lliaj_b, nocc*nvirt)
         TIMEADD(times(1, 47), times(1, 95))
               do b = 1, n
                  do j = 1, nocc
                     do a = 1, nvirt
                        do i = 1, nocc
                           lliaj_b(i, a, j, b) =
     $                        (qfact(i) + qfact(j)) / 2 *
     $                        lliaj_b(i, a, j, b)
                        end do
                     end do
                  end do
               end do

         TIME0(times(1,95))
            eccoss = ddot(nvirt*nocc**2*n, lliaj_b, 1, ttiaj_b, 1)
c            eccsss = -eccoss
            eccscs = eccscs + eccoss
         TIMEADD(times(1, 51), times(1, 95))

         TIME0(times(1,95))
            read(800) qfact
            call dgemm('n', 't', nocc*nvirt, nocc*n, dfnb,
     $            1.d0, jia, nocc*nvirt, jia(1, ind, 1), nocc*nvirt,
     $            0.d0, lliaj_b, nocc*nvirt)
            do b = ind, ind+n-1
               do a = 1, nvirt
                  do j = 1, nocc
                     kk_ij_ofab(1:nocc, j) =
     $                     lliaj_b(1:nocc, a, j, b-ind+1)
                  end do

                  do j = 1, nocc
                     lliaj_b(1:nocc, a, j, b-ind+1) =
c     $                     lliaj_b(1:nocc, a, j, b-ind+1) -
     $                     lliaj_b(1:nocc, a, j, b-ind+1) -
     $                     kk_ij_ofab(j, 1:nocc)
                  end do
               end do
            end do
               do b = 1, n
                  do j = 1, nocc
                     do a = 1, nvirt
                        do i = 1, nocc
                           lliaj_b(i, a, j, b) =
     $                        (qfact(i) + qfact(j)) / 2 *
     $                        lliaj_b(i, a, j, b)
                        end do
                     end do
                  end do
               end do
         TIMEADD(times(1, 48), times(1, 95))

         TIME0(times(1,95))
            eccsss = eccsss +
     $            ddot(nvirt*nocc**2*n, lliaj_b, 1, ttiaj_b, 1)
            eccscs = eccscs + eccsss
         TIMEADD(times(1, 51), times(1, 95))

         rewind(800)
! q-scs eddig }}}

         rewind(800)
            end if

            ind = ind + n
         end do

c            if(nn.ne.-2) then
c               open(799, file='ccsd_ei')
c               write(799,'(f20.14)') ediag
cc               write(*,*) 'ccsd energy ',sum(ediag)
c               close(799)
c            else
c               open(799, file='ppl_ei')
c               write(799,'(f20.14)') ediag
cc               write(*,*) 'ccsd energy ',sum(ediag)
c               close(799)
c            end if
c
         call dbldealloc(illiajb)

C F12 energy contribution
         if(lf12 .and. (nn.gt.0 .or. nn.eq.-1) .and. mpi_rank.eq.0) then
            ef12 = 0.d0
            rewind(800)
            read(800) ecabs, emp2f12
            read(800)
            read(800) f12int2
            ef12 = ef12 + ddot(nvirt**2*(nocc+1)*nocc/2,f12int2,1,tt,1)
            read(800) f12int2
            tmp = 0.d0
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(a,b,i,j,ttind)
C$OMP& REDUCTION(+:tmp)
            do j = 1, nocc
               do i = 1, j
                  ttind = (j-1) * j / 2 + i
                  do b = 1, nvirt
                     do a = 1, nvirt
                        tmp = tmp + f12int2(a, b, ttind)*t(a,i)*t(b,j)
                     end do
                  end do
               end do
            end do
C$OMP END PARALLEL DO
            ef12 = ef12 + tmp
            read(800) f12int1
            ef12 = ef12 + ddot(nvirt*nocc, f12int1, 1, t, 1)
            ecc = ecc + emp2f12 + 2.d0 * ef12
         endif
C F12 end

 123     continue

         TIME0(times(1,95))
#if defined(MPI)
         if(mpi_ccsd .and. mpi_size .ne. 1) then
               call MPI_Allreduce(MPI_IN_PLACE, eccss, 1,
     $         MPI_DOUBLE_PRECISION, MPI_SUM, ccsd_communicator,
     $         mpi_err)
               call MPI_Allreduce(MPI_IN_PLACE, eccos, 1,
     $         MPI_DOUBLE_PRECISION, MPI_SUM, ccsd_communicator,
     $         mpi_err)
               call MPI_Allreduce(MPI_IN_PLACE, ecc, 1,
     $         MPI_DOUBLE_PRECISION, MPI_SUM, ccsd_communicator,
     $         mpi_err)
         end if
#endif
         TIMEADD(times(1, 52), times(1, 95))
      end if ! }}}

         if(nn.eq.0)then
            if (localcc15p(localcc)) then
               write(iout,*)
               write(iout,"(' MP2 correlation energy [au]: ',f21.12)")
     $               ecc
            else
               write(iout,"(' T1 contribution [au]:         ',f21.12)")
     $               et1
              if(ccsdalg .eq. 'dfdirect') then
               write(iout,"(' Same spin MP2 energy [au]:    ',f21.12)")
     $               eccss
               write(iout,"(' Opposite spin MP2 energy [au]:',f21.12)")
     $               eccos
              end if
               write(iout,"(' MP2 correlation energy [au]:  ',f21.12)")
     $               ecc-er
               write(iout,"(' Total MP2 energy [au]:        ',f21.12)")
     $               ecc
            call getkey('dfintran', 8, dfintran, 8)
            if(dfintran.ne.'ovirt   ') then
               if(lno .or. lnaf) then
                  emp2corr = emp2full - ecc
                  write(iout,
     $                 "(' MP2 correction [au]:   ',f28.12)")
     $                  emp2corr
               end if

c               if(lno .and. lnaf) then
c                  write(iout,
c     $                 "(' Correction for NOs&NAFs [au]: ',f22.12)")
c     $                  emp2corr
c               else if(lno) then
c                  write(iout,
c     $                 "(' Correction for NOs [au]:      ',f22.12)")
c     $                  emp2corr
c               else if(lnaf) then
c                  write(iout,
c     $                 "(' Correction for NAFs [au]:     ',f22.12)")
c     $                  emp2corr
c               end if
            end if
         endif
         write(iout,*)
      else if(nn .eq. -1) then
c         write(iout,
c     $         "(/' CCSD energy with MO integrals [au]: ',
c     $         f22.12)")ecc
      else if(nn .gt. 0) then
         write(iout,"(' Iteration',i3,2f22.12)")nn,norm,ecc
         call flush(iout)
      else if(nn .eq. -3) then
         write(iout,"(' rest_eg  ',f22.12)") ecc
      else if(nn .eq. -4) then
         write(iout,"(' rest_s   ',f22.12)") ecc
      else if(nn .eq. -5) then
         write(iout,"(' rest_mp3 ',f22.12)") ecc
      else if(nn .eq. -6) then
         write(iout,"(' rest_nl6 ',f22.12)") ecc
      else if(nn .eq. -7) then
         write(iout,"(' rest_v   ',f22.12)") ecc
      else if(nn .eq. -8) then
         write(iout,"(' rest_a2  ',f22.12)") ecc
      else if(nn .eq. -9) then
         write(iout,"(' rest_egnl',f22.12)") ecc
      endif
      return

      end subroutine

************************************************************************
      function calcnorm(nvirt, nocc, v, vv)
************************************************************************
      implicit none
      real*8 calcnorm
      integer nvirt, nocc
      real*8 v(nvirt,nocc),vv(nvirt,nvirt,(nocc+1)*nocc/2)
      real*8 dnrm2

      calcnorm = dnrm2(nvirt**2*nocc*(nocc+1)/2,vv,1) +
     $      dnrm2(nvirt*nocc,v,1)

      end function

*********************************************************************
      subroutine crccx(x,t,tt,ccx,nvirt,nocc)
*********************************************************************
      implicit none
      integer nvirt,nocc
      integer i,j,c,d
      integer ijind,cdind
      character*1 x
      real*8 tt(nvirt,nvirt,(nocc+1)*nocc/2)
      real*8 t(nvirt,nocc)
      real*8 ccx((nvirt+1)*nvirt/2,(nocc+1)*nocc/2)

      integer dmax
      real*8 fac

      if(x .eq. 's') then
         dmax = nvirt
         fac = 1.d0
      else ! if(x .eq. 'a')
         dmax = nvirt - 1
         fac = -1.d0
      end if

      do j = 1, nocc
         do i = 1, j
            ijind = j * (j-1) / 2 + i
C$OMP PARALLEL DO Schedule(Dynamic) Default(Shared)
C$OMP& PRIVATE(c,d,cdind)
            do d = 1, dmax

               do c = d+1, nvirt
                  cdind = (d-1) * (2*nvirt-d+2) / 2 + c-d+1
                  ccx(cdind, ijind) =
     $                  0.5d0 * (tt(c, d, ijind) + t(c, i) * t(d, j) +
     $                  fac * (tt(d, c, ijind) + t(d, i) * t(c, j)))
               end do
            end do
C$OMP END PARALLEL DO
         end do
      end do

      ! diagonal elements
      if(x .eq. 's')then
         do j = 1, nocc
            do i = 1, j
               ijind = j * (j-1) / 2 + i
               do d = 1, nvirt
                  cdind = (d-1) * (2*nvirt-d+2) / 2 + 1
                  ccx(cdind, ijind) =
     $                  0.5d0 * (tt(d, d, ijind) + t(d, i) * t(d, j))
               end do
            end do
         end do

      else ! if(x .eq. 'a')
         do j = 1, nocc
            do i = 1, j
               ijind = j * (j-1) / 2 + i
               do d = 1, nvirt
                  cdind = (d-1) * (2*nvirt-d+2) / 2 + 1
                  ccx(cdind, ijind) = 0.d0
               end do
            end do
         end do
      end if

      return
      end subroutine
*********************************************************************
       subroutine readxlist(x,ind,n,tmp,na,nc,recbounds,nocc,nvirt,
     $      ibufln,irecln)
*********************************************************************
       implicit none
       integer nocc, nvirt
       integer ibufln, irecln
       integer c,d,ii,jj,jj0,ll
       integer na,nc,nd,nn,nm
       integer abind,cdind

       integer ind,n
       real*8  tmp(na,nc),buffer(ibufln)
       character*1 x
       character*6 fname
       integer recbounds(nvirt,2)

       if(x.eq.'s')ll=0
       if(x.eq.'a')ll=1
       if(x.eq.'s')fname='abcd.s'
       if(x.eq.'a')fname='abcd.a'

       abind=1
       cdind=1
       if(ind.eq.1)then
         jj0=1
       else
         jj0=recbounds(ind-1,1)+1
       endif
         jj=jj0

       OPEN(599+ll,file=fname,access='direct',recl=irecln,STATUS='OLD')
c      if(nit.eq.1)then
c        if(x.eq.'s')fname='abcd.s.info'
c        if(x.eq.'a')fname='abcd.a.info'
c        open(410+ll,file=fname)
c        rewind 410+ll
c      endif

       nn=nvirt*(nvirt+1)/2
       nd=nn-na
       nm=nocc*(nocc+1)/2
       ii=0
       cdind=nn-na
c      if(nit.eq.1)write(6,'(6i5)')nn,na,nb,nc,ind,n
       do d=ind,ind+n-1
c        do jj=jj0,recbounds(d,1)
c          read(599+ll,rec=jj)buffer(1:ibufln)
c        enddo
c        ii=0
c        wind=1
c        do c=d,nvirt
c          cdind=cdind+1
c          ii=ii+1
c          call dcopy(nn-cdind+1,w(wind),1,
c    $tmp(cdind-(nn-na),cdind-(nn-na)),1)
c          wind=wind+nn-cdind+1
c        enddo

         do c=d,nvirt
           cdind=cdind+1
           do abind=cdind,nn
             ii=ii+1
             if(ii.eq.1)then
               read(599+ll,rec=jj)buffer(1:ibufln)
             endif
             tmp(abind-(nn-na),cdind-(nn-na))=buffer(ii)
c     if(nit.eq.1.and.abs(buffer(ii)).gt.10.d0)
c    $write(410+ll,'(f20.3,3i6)')buffer(ii),abind,c,d
             if(ii.eq.ibufln.or.(jj.eq.recbounds(d,1).and.ii.eq.
     $recbounds(d,2)))then
               jj=jj+1
               ii=0
             endif
           enddo
         enddo
       enddo
c if ind+n-1.eq.nvirt the lower triangle is not needed!

       CLOSE(599+ll)
c      CLOSE(410+ll)
       return
       end subroutine
*********************************************************************
       subroutine xmatextr(x,xmat,kdv,nvirt,nocc)
*********************************************************************
       implicit none
       integer nvirt,nocc
       integer a,b
       integer ijind,abind
       real*8 xmat((nvirt+1)*nvirt/2,(nocc+1)*nocc/2)
       real*8 kdv(nvirt,nvirt,(nocc+1)*nocc/2)
       character*1 x
       if(x.eq.'s')then
       do ijind=1,(nocc+1)*nocc/2
         kdv(1,1,ijind)=kdv(1,1,ijind)+xmat(1,ijind)
         abind=1
         do b=1,nvirt-1
           abind=abind+1
           do a=b+1,nvirt
             kdv(a,b,ijind)=kdv(a,b,ijind)+xmat(abind,ijind)
             kdv(b,a,ijind)=kdv(b,a,ijind)+xmat(abind,ijind)
             abind=abind+1
           enddo
           kdv(b+1,b+1,ijind)=kdv(b+1,b+1,ijind)+xmat(abind,ijind)
         enddo
       enddo
       endif

       if(x.eq.'a')then
       do ijind=1,(nocc+1)*nocc/2
         abind=1
         do b=1,nvirt-1
           abind=abind+1
           do a=b+1,nvirt
             kdv(a,b,ijind)=kdv(a,b,ijind)+xmat(abind,ijind)
             kdv(b,a,ijind)=kdv(b,a,ijind)-xmat(abind,ijind)
             abind=abind+1
           enddo
         enddo
       enddo
       endif
       return

       end subroutine
*********************************************************************
* timer call                                                        *
*********************************************************************
       subroutine stopper(id,ison)
*********************************************************************
       implicit none
       real*8 tim, timold, wct, wctold
       integer id
       character*1 ison
       save timold, wctold
       if(ison.eq.'y')then
       timold=tim
       wctold=wct
       tim=1.d-15
       wct=1.d-15
       call mtime(tim,wct)
       write(*,"(' CPU:',f9.5,
     $' Wall:',f14.5,' xCPU:',f9.5,' xWall:',2f9.5)")
     $tim/60.d0,wct/60.d0,tim-timold,wct-wctold,
     $ (tim-timold)/(wct-wctold)
       id=id+1
       endif

       return
       end subroutine
*********************************************************************
       subroutine rbwrt(recbounds,nvirt,ibufln)
*********************************************************************
       implicit none
       integer nvirt, ibufln
       integer recbounds(nvirt,2)
       integer d,n
       n=nvirt
c      rewind 906
       do d=1,nvirt
         recbounds(d,2)=modulo(((n-d+1)**2+1)*(n-d+1)/2,ibufln)
         if(recbounds(d,2).eq.0)recbounds(d,2)=ibufln
       enddo
       recbounds(1,1)=((n**2+1)*n/2-recbounds(1,2))/ibufln+1
c      write(906,*)recbounds(1,1:2)
       do d=2,nvirt
         recbounds(d,1)=recbounds(d-1,1)+(((n-d+1)**2+1)*(n-d+1)/2-
     $recbounds(d,2))/ibufln+1
c        write(906,*)recbounds(d,1:2)
       enddo
       return
       end subroutine
************************************************************************
      subroutine rpacorecc(nvirt,nocc,ttt,tp1,vvv,fab,fij,eref,tol,maxit
     $,vscr,bmat,bvec,invbmat,cvec,iout,erpa,scrs,localcc,int1,ui,esdi,
     $isrpa,bsymmv,symmocc,dgroup,first,last,co,w,wsize,dcore,imem,
     $diisfile,errfile,tfile,tinfo,ifltln,ibufln,irecln,nbasis,nirmax,
     $multpg,locnoc)
************************************************************************
      implicit none
      integer diisfile,errfile,tfile,tinfo,ifltln,ibufln,irecln
      integer nvirt,nocc,a,b,i,j,k,l,lk,kj,il,ij,lj
      integer nit,ndim,maxit,nn,iout, nbasis,nirmax
      integer multpg(nirmax,nirmax),locnoc
      real*8 ttt(nvirt,nvirt,nocc*(nocc+1)/2),fij(nocc,nocc),erpa,eref
      real*8 tp1(nvirt,nvirt,nocc*(nocc+1)/2),scrs(nvirt,nvirt),alpha
      real*8 vvv(nvirt,nvirt,nocc*(nocc+1)/2),fab(nvirt,nvirt),erpaold
      real*8 bmat(maxit,maxit),invbmat(maxit,maxit),ddot,tol
      real*8 cvec(maxit),bvec(maxit**2),vscr(*),esdi,norm
      real*8 int1(nocc,nocc),ui(nocc),coeff
      character*4 localcc
      integer isrpa,wsize
       integer first(0:5,8,8,nvirt+nocc),last(0:5,8,8,nvirt+nocc)
       integer co(nvirt+nocc)
       integer bsymmv(nocc+1),symmocc(nocc)
       integer dgroup,imem
       real*8 dcore(1)
       real*8 w(wsize)

      ndim=nocc*nvirt
      nn=nvirt**2*nocc*(nocc+1)/2

      IF(isrpa.eq.1)THEN
        write(iout,'(" RPA calculation")')
        write(iout,*)
C Construct B (=L with i.le.j)
        call llread(dcore(imem), nocc, nvirt, ifltln)
        call bbextract(dcore(imem),dcore(imem+nvirt**2*nocc**2)
     $   ,nocc,nvirt,ibufln,irecln)
C Construct A
       call aaextract(dcore(imem),dcore(imem+nvirt**2*nocc**2),
     $      bsymmv,symmocc,dgroup,first,last,co,w,
     $      irecln,ibufln,nbasis,nocc,nvirt,nirmax,multpg,ifltln)
      ELSE
        write(iout,'(" dRPA calculation")')
        write(iout,*)
      ENDIF
C Starting iteration
      erpa=0.d0
      erpaold=1.d0
      nit=0
      norm=tol+1.d0
       write(iout,'(21x,"Error vector norm",6x,"Energy")')
       do while((dabs(erpa-erpaold).gt.tol.or.
     $norm/10.gt.tol).and.nit.lt.maxit)
        erpaold=erpa
        nit=nit+1
C Calculate 1+T
        call dcopy(nn,ttt,1,tp1,1)
        IF(isrpa.eq.1)THEN
        ELSE
         do i=1,nocc
           ij=i*(i+1)/2
           do a=1,nvirt
            tp1(a,a,ij)=1.d0+tp1(a,a,ij)
           enddo
         enddo
        ENDIF
C Update T
      IF(isrpa.eq.1)THEN
C Read B (L)
        call bbread(vvv,nocc,nvirt,ibufln,irecln)
C Copy B to Tnew
        call dcopy(nn,vvv,1,ttt,1)
        coeff=1.d0
      ELSE
        call dfillzero(ttt,nn)
        coeff=2.d0
      ENDIF
       do j=1,nocc
         do l=1,nocc
C (1)
           call dfillzero(scrs,nvirt**2)
           do k=1,nocc
             if(k.ge.l.and.k.le.j) then
               kj=(j-1)*j/2+k
               lk=(k-1)*k/2+l
               call dgemm('n','n',nvirt,nvirt,nvirt,coeff,
     $vvv(1,1,lk),nvirt,tp1(1,1,kj),nvirt,1.d0,scrs,nvirt)
             endif
             if(k.ge.l.and.k.gt.j) then
               kj=(k-1)*k/2+j
               lk=(k-1)*k/2+l
               call dgemm('n','t',nvirt,nvirt,nvirt,coeff,
     $vvv(1,1,lk),nvirt,tp1(1,1,kj),nvirt,1.d0,scrs,nvirt)
             endif
             if(k.lt.l.and.k.le.j) then
               kj=(j-1)*j/2+k
               lk=(l-1)*l/2+k
               call dgemm('t','n',nvirt,nvirt,nvirt,coeff,
     $vvv(1,1,lk),nvirt,tp1(1,1,kj),nvirt,1.d0,scrs,nvirt)
             endif
             if(k.lt.l.and.k.gt.j) then
               kj=(k-1)*k/2+j
               lk=(l-1)*l/2+k
               call dgemm('t','t',nvirt,nvirt,nvirt,coeff,
     $vvv(1,1,lk),nvirt,tp1(1,1,kj),nvirt,1.d0,scrs,nvirt)
             endif
           enddo
C (2)
           do i=1,j
             if(i.le.l) then
               il=(l-1)*l/2+i
               ij=(j-1)*j/2+i
               call dgemm('n','n',nvirt,nvirt,nvirt,1.d0,
     $tp1(1,1,il),nvirt,scrs,nvirt,1.d0,ttt(1,1,ij),nvirt)
             endif
             if(i.gt.l) then
               il=(i-1)*i/2+l
               ij=(j-1)*j/2+i
               call dgemm('t','n',nvirt,nvirt,nvirt,1.d0,
     $tp1(1,1,il),nvirt,scrs,nvirt,1.d0,ttt(1,1,ij),nvirt)
             endif
           enddo
         enddo
       enddo
c      write(6,*)'B+TBT'
c      do j=1,nocc
c        do i=1,j
c          ij=(j-1)*j/2+i
c          do b=1,nvirt
c            do a=1,nvirt
c              write(6,'(4i3,e16.8)')a,b,i,j,ttt(a,b,ij)
c            enddo
c          enddo
c        enddo
c      enddo
      IF(isrpa.eq.1)THEN
C Read A
      call  aaread(vvv,nocc,nvirt,ibufln,irecln)
c      write(6,*)'A'
c      do j=1,nocc
c        do i=1,j
c          ij=(j-1)*j/2+i
c          do b=1,nvirt
c            do a=1,nvirt
c              write(6,'(4i3,e16.8)')a,b,i,j,vvv(a,b,ij)
c            enddo
c          enddo
c        enddo
c      enddo
C Calculate AT+TA
       do j=1,nocc
         do i=1,j
           ij=(j-1)*j/2+i
           do l=1,nocc
             if(j.le.l)then
               lj=(l-1)*l/2+j
             if(i.le.l) then
               il=(l-1)*l/2+i
               call dgemm('n','t',nvirt,nvirt,nvirt,1.d0,
     $vvv(1,1,il),nvirt,tp1(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
               call dgemm('n','t',nvirt,nvirt,nvirt,1.d0,
     $tp1(1,1,il),nvirt,vvv(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
             endif
             if(i.gt.l) then
               il=(i-1)*i/2+l
               call dgemm('t','t',nvirt,nvirt,nvirt,1.d0,
     $vvv(1,1,il),nvirt,tp1(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
               call dgemm('t','t',nvirt,nvirt,nvirt,1.d0,
     $tp1(1,1,il),nvirt,vvv(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
             endif
             endif
             if(j.gt.l)then
               lj=(j-1)*j/2+l
             if(i.le.l) then
               il=(l-1)*l/2+i
               call dgemm('n','n',nvirt,nvirt,nvirt,1.d0,
     $vvv(1,1,il),nvirt,tp1(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
               call dgemm('n','n',nvirt,nvirt,nvirt,1.d0,
     $tp1(1,1,il),nvirt,vvv(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
             endif
             if(i.gt.l) then
               il=(i-1)*i/2+l
               call dgemm('t','n',nvirt,nvirt,nvirt,1.d0,
     $vvv(1,1,il),nvirt,tp1(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
               call dgemm('t','n',nvirt,nvirt,nvirt,1.d0,
     $tp1(1,1,il),nvirt,vvv(1,1,lj),nvirt,1.d0,ttt(1,1,ij),nvirt)
             endif
             endif
           enddo
         enddo
       enddo
C ITT
c      write(6,*)'+AT+TA'
c      do j=1,nocc
c        do i=1,j
c          ij=(j-1)*j/2+i
c          do b=1,nvirt
c            do a=1,nvirt
c              write(6,'(4i3,e16.8)')a,b,i,j,ttt(a,b,ij)
c            enddo
c          enddo
c        enddo
c      enddo
      ENDIF
       do j=1,nocc
         do i=1,j
           ij=(j-1)*j/2+i
           do b=1,nvirt
             do a=1,nvirt
       ttt(a,b,ij)=ttt(a,b,ij)/(fij(i,i)+fij(j,j)-fab(a,a)-fab(b,b))
             enddo
           enddo
         enddo
       enddo
      IF(isrpa.eq.1)THEN
c       call dscal(nn,0.5d0,ttt,1)
      ENDIF
c      write(6,*)'T new'
c      do j=1,nocc
c        do i=1,j
c          ij=(j-1)*j/2+i
c          do b=1,nvirt
c            do a=1,nvirt
c              write(6,'(4i3,e16.8)')a,b,i,j,ttt(a,b,ij)
c            enddo
c          enddo
c        enddo
c      enddo

C DIIS extrapolation
        call rpa_diis(nit,nn,ttt,vscr,maxit,bmat,bvec,invbmat,cvec,
     $        norm,diisfile,errfile,tfile,tinfo,ifltln,ibufln,irecln,
     $        nvirt,nocc)
C Read K
      call vvinit(vvv,bsymmv,symmocc,dgroup,first,last,co,w,
     $irecln,ibufln,nocc,nvirt,nbasis,nirmax,multpg)
C Calculate energy
       erpa=0.d0
       do j=1,nocc
         do i=1,j
           ij=(j-1)*j/2+i
c          IF(isrpa.eq.1)THEN
c          if(i.eq.j)then
c            alpha=0.5d0
c          else
c            alpha=1.d0
c          endif
c          ELSE
           if(i.eq.j)then
             alpha=1.d0
           else
             alpha=2.d0
           endif
c          ENDIF
           erpa=erpa+alpha*ddot(nvirt**2,ttt(1,1,ij),1,vvv(1,1,ij),1)
         enddo
       enddo
c       erpa=ddot(nn,vvv,1,ttt,1)
c       write(6,*)
c       write(6,"(1x,70('='))")
c       write(6,"(' Iteration',i3,'   Energy: ',2f20.12)")
        write(iout,"(' Iteration',i3,2f22.12)") nit,norm,erpa+eref
c       write(6,*)
       enddo
C Calculate local energy contribution
       if(localcc.eq.'drpa'.or.localcc.eq.'rpa ')then
         call dfillzero(int1,nocc**2)
         do j=1,nocc
           do k=1,nocc
             do l=1,nocc
               if(l.le.j.and.k.le.j)then
                 lj=(j-1)*j/2+l
                 kj=(j-1)*j/2+k
                 int1(l,k)=int1(l,k)+ddot(nvirt**2,
     $ttt(1,1,lj),1,vvv(1,1,kj),1)
               endif
               if(l.gt.j.and.k.gt.j)then
                 lj=(l-1)*l/2+j
                 kj=(k-1)*k/2+j
                 int1(l,k)=int1(l,k)+ddot(nvirt**2,
     $ttt(1,1,lj),1,vvv(1,1,kj),1)
               endif
               if(l.gt.j.and.k.le.j)then
                 lj=(l-1)*l/2+j
                 kj=(j-1)*j/2+k
                 call tr(nvirt,ttt(1,1,lj),scrs)
                 int1(l,k)=int1(l,k)+ddot(nvirt**2,
     $scrs,1,vvv(1,1,kj),1)
               endif
               if(l.le.j.and.k.gt.j)then
                 lj=(j-1)*j/2+l
                 kj=(k-1)*k/2+j
                 call tr(nvirt,vvv(1,1,kj),scrs)
                 int1(l,k)=int1(l,k)+ddot(nvirt**2,
     $scrs,1,ttt(1,1,lj),1)
               endif
             enddo
           enddo
         enddo
c       esdi= !!??? Ui'*INT1*Ui
        call dgemv('n',nocc,nocc,1.d0,int1,nocc,ui,1,0.d0,scrs,1)
        esdi=ddot(nocc,ui,1,scrs,1)

       endif

       write(iout,*)
       if(dabs(erpa-erpaold).gt.tol.or.norm/10.gt.tol) then
         write(iout,
     $"(' Convergence not achieved in',i3,' iterations!')") nit
         call mrccend(0)
       else
         IF(isrpa.eq.1)THEN
         write(iout,"(' RPA iteration has converged in',i3,' steps.')")
     $nit
         ELSE
         write(iout,"(' dRPA iteration has converged in',i3,' steps.')")
     $nit
         ENDIF
         write(iout,*)
         if(localcc.ne.'off ')then
           IF(isrpa.eq.1)THEN
           write(iout,'(" RPA correlation energy [au]:    ",f22.12)')
     $esdi
           write(iout,*)
            if(localcc.eq.'rpa ')
     $           call prtenergc('RPA            ',esdi,eref,locnoc)
           else
           write(iout,'(" dRPA correlation energy [au]:   ",f22.12)')
     $esdi
           write(iout,*)
            if(localcc.eq.'drpa')
     $           call prtenergc('dRPA           ',esdi,eref,locnoc)
           endif
         else
         IF(isrpa.eq.1)THEN
           write(iout,'(" RPA correlation energy [au]:    ",f22.12)')
     $erpa
           write(iout,'(" Total RPA energy [au]:          ",f22.12)')
     $erpa+eref
         ELSE
           write(iout,'(" dRPA correlation energy [au]:   ",f22.12)')
     $erpa
           write(iout,'(" Total dRPA energy [au]:         ",f22.12)')
     $erpa+eref
         ENDIF
           write(iout,*)
            if(localcc.eq.'drpa')
     $           call prtenergc('dRPA           ',erpa,eref,locnoc)
         endif
         write(iout,*)
       endif
C
       IF(isrpa.eq.1)THEN
         open(714,file='bbfile')
         close(714,status='delete')
         open(715,file='aafile')
         close(715,status='delete')
       ENDIF
      return
      end subroutine
C {{{ readdfints
************************************************************************
      subroutine readdfints(jij,jai,jab,cmo,nocc,nvirt,dfnb,sjij,sjab,
     $jai2,localcc,ccsdalg,lccoporder,move)
************************************************************************
c read and expand 3 center DF integrals in the can MO basis
************************************************************************
      implicit none
      integer i,j,a,b,nocc,nvirt,ij,ab,dfnb
      real*8  jij(dfnb,nocc,nocc),sjij(dfnb,nocc*(nocc+1)/2)
      real*8  jai(nocc,nvirt,dfnb),cmo(nocc,nocc),jai2(nvirt,nocc,dfnb)
      real*8  jab(dfnb,nvirt,nvirt),sjab(dfnb,nvirt*(nvirt+1)/2)
      real*8  move((dfnb+nvirt**2)/2)
      character*4 localcc
      character*8 ccsdalg,lccoporder

      if(ccsdalg .eq. 'disk    ') then
c
c read J_ij from DFINT_IJ in (P,ij) order, upper triangle format for ij indices
         open(111,file='DFINT_IJ',form='unformatted')
         read(111)  sjij
         close(111)
         ij=0
         do j=1,nocc
          do i=1,j
           ij=ij+1
           jij(1:dfnb,i,j)=sjij(1:dfnb,ij)
           jij(1:dfnb,j,i)=sjij(1:dfnb,ij)
          enddo
         enddo
c read J_ab from DFINT_AB in (P,ab) order, upper triangle format for ab indices
         open(111,file='DFINT_AB',form='unformatted')
         read(111)  sjab
         close(111)
         ab=0
         do b=1,nvirt
            do a=1,b
               ab=ab+1
               jab(1:dfnb,a,b)=sjab(1:dfnb,ab)
               jab(1:dfnb,b,a)=sjab(1:dfnb,ab)
            enddo
         enddo
         if (localcc.ne.'off ') then
c read J_ai from DFINT_AI in J(i,a,P) order
          open(111,file='DFINT_AI',form='unformatted')
          read(111) jai
          close(111)
         else
c read J_ai from DFINT_AI in J(a,i,P) order and transpose
          open(111,file='DFINT_AI',form='unformatted')
          read(111) jai2
          close(111)
          do i=1,nocc
           do a=1,nvirt
           call dcopy(dfnb,jai2(a,i,1:dfnb),1,jai(i,a,1:dfnb),1)
           enddo
          enddo
         endif
      else if(ccsdalg .eq. 'dfdirect') then
         ! reorder indices of DF integrals
         call reorderdfints(jij, jab, nocc, nvirt, dfnb, move)

      end if

      if (localcc.ne.'off ') then
c read occ MO basis of Laplace (T)
       if (lccoporder.eq.'trffirst'.or.ccsdalg .eq. 'disk    ') then
         open(111,file='laplbas',form='unformatted')
         read(111) cmo
         close(111)
c else : already in memory @ igsl position
       endif
      else
       cmo=0.d0
       do i=1,nocc
        cmo(i,i)=1.d0
       enddo
      end if
c
      return
      end subroutine readdfints
c }}}
c
C {{{ tprt
************************************************************************
      subroutine tprt(ltpr,ncore,nocc,nvirt,t,tt,tprtol,iout,t1t1)
************************************************************************
c print CCSD amplitude diagnostics
************************************************************************
      implicit none
      integer ncore,nocc,nvirt,i,j,a,b,iout,ij,iisyev
      real*8 tprtol,t(nvirt,nocc),ddot,t1max,t2max,t1t1(nocc,nocc)
      real*8 tt(nvirt,nvirt,(nocc+1)*nocc/2),norm,mrtol
      logical ltpr
      character*40 str40
      character*6 str6
      integer*4 isyev
      equivalence(isyev,iisyev) !For Intel
c in stack
      real*8 eigval(nocc),scr(20*nocc)
      mrtol=0.15d0
c
      t1max=0.d0
      t2max=0.d0
      do i=1,nocc
       do a=1,nvirt
        if (dabs(t(a,i)).gt.dabs(t1max)) t1max=t(a,i)
        do j=1,i
         ij=j*(j-1)/2+i
         do b=1,nvirt
          if (dabs(tt(b,a,ij)).gt.dabs(t2max)) t2max=tt(b,a,ij)
         enddo
        enddo
       enddo
      enddo
c T1 diagnostic
      norm=ddot(nocc*nvirt,t,1,t,1)
c D1 diagnostic
      call dsyrk('u','t',nocc,nvirt,1.d0,t,nvirt,0.d0,t1t1,nocc)
      call dsyev('N','U',nocc,t1t1,nocc,eigval,scr,20*nocc,isyev)
      if(isyev.ne.0) then
       write(iout,*) 'Fatal error at the D1 diagnostic evaluation'
       eigval(nocc)=0.d0
c       call mrccend(1)
      endif
      write(iout,*)
      write(iout,"(' T1 diagnostic: ',es14.3,12x,
     $'D1 diagnostic: ',es14.3)")
     $dsqrt(norm/(nocc*2.d0)),dsqrt(eigval(nocc))
c
      if (ltpr) then
       write(iout,*) 'Dominant cluster amplitudes'
       write(iout,"(' Printing threshold: ',1pe9.2)") tprtol
       write(iout,*)
       do i=1,nocc
        do a=1,nvirt
         if (dabs(t(a,i)).gt.tprtol) then
          write(str6,'(i6)') i+ncore
          str40= '      ' // trim(adjustl(str6)) // ' -> '
          write(str6,'(i6)') a+nocc+ncore
          str40=trim(str40) // ' ' // trim(adjustl(str6))
          write(iout,"(1es10.3,a40)") t(a,i), str40
         endif
        enddo
       enddo
       do i=1,nocc
        do j=1,i
         ij=(i-1)*i/2+j
         do a=1,nvirt
          do b=1,nvirt
           if (dabs(tt(b,a,ij)).gt.tprtol) then
            write(str6,'(i6)') j+ncore
            str40= '    ' // trim(adjustl(str6))
            write(str6,'(i6)') i+ncore
            str40=trim(str40) // ' ' // trim(adjustl(str6)) // ' -> '
            write(str6,'(i6)') b+nocc+ncore
            str40=trim(str40) // ' ' // trim(adjustl(str6))
            write(str6,'(i6)') a+nocc+ncore
            str40=trim(str40) // ' ' // trim(adjustl(str6))
            write(iout,"(1es10.3,a40)") tt(b,a,ij), str40
           endif
          enddo
         enddo
        enddo
       enddo
      endif
c
       if (dabs(t1max).gt.mrtol.or.dabs(t2max).gt.mrtol.or.ltpr) then
        write(iout,*)
        if (.not.ltpr)write(iout,*)'Warning: large CCSD amplitude found'
        write(iout,"(' Largest T1 amplitude: ',1es9.2)") t1max
        write(iout,"(' Largest T2 amplitude: ',1es9.2)") t2max
       endif
c
      return
      end subroutine tprt
c }}}
c
C {{{ largest
************************************************************************
      subroutine largest(nocc,nvirt,vv,v,tt,t,scr,scr2,map,iout)
************************************************************************
c print largest components of the input arrays
************************************************************************
      implicit none
      integer nocc,nvirt,i,j,a,b,map(nvirt**2*(nocc+1)*nocc/2),d,ij,k
      integer iout
      real*8 vv(nvirt,nvirt,(nocc+1)*nocc/2),v(nvirt,nocc)
      real*8 tt(nvirt,nvirt,(nocc+1)*nocc/2),t(nvirt,nocc)
      real*8 scr(nvirt,nvirt,(nocc+1)*nocc/2),scr2(nvirt,nocc)
c
      write(iout,*)
      write(iout,*) 'Largest residuals'
      d=nvirt**2*(nocc+1)*nocc/2
      scr(1:nvirt,1:nvirt,1:(nocc+1)*nocc/2)=
     $ dabs(vv(1:nvirt,1:nvirt,1:(nocc+1)*nocc/2))
      call qsortd(map,d,scr)
      do k=1,min(10,d)
       a=mod(map(d-k+1),nvirt)
       if (a.eq.0) a=nvirt
       b=mod(map(d-k+1),nvirt**2)
       if (b.eq.0) then
        if (a.ne.nvirt) then
           write(iout,*) 'sg wrong'
           call mrccend(1)
        end if
        if (a.eq.nvirt) b=nvirt
       else
        b=(b-a)/nvirt+1
       endif
       ij=(map(d-k+1)-a-(b-1)*nvirt)/nvirt**2+1
       do i=1,nocc
        if (i*(i-1)/2.ge.ij) exit
       enddo
       i=i-1
       j=ij-i*(i-1)/2
       write(iout,'(i10,5i6,2es16.8)')
     $ map(d-k+1),a,b,i,j,ij,vv(a,b,ij),tt(a,b,ij)
      enddo
c singles
      d=nvirt*nocc
      scr2(1:nvirt,1:nocc)=dabs(v(1:nvirt,1:nocc))
      call qsortd(map,d,scr2)
      do k=1,min(10,d)
       a=mod(map(d-k+1),nvirt)
       if (a.eq.0) a=nvirt
       i=(map(d-k+1)-a)/nvirt+1
       write(iout,'(3i9,2es16.8)') map(d-k+1),a,i,v(a,i),t(a,i)
      enddo

      write(iout,*)
      write(iout,*) 'Largest amplitudes'
      d=nvirt**2*(nocc+1)*nocc/2
      scr(1:nvirt,1:nvirt,1:(nocc+1)*nocc/2)=
     $ dabs(tt(1:nvirt,1:nvirt,1:(nocc+1)*nocc/2))
      call qsortd(map,d,scr)
      do k=1,min(10,d)
       a=mod(map(d-k+1),nvirt)
       if (a.eq.0) a=nvirt
       b=mod(map(d-k+1),nvirt**2)
       if (b.eq.0) then
        if (a.ne.nvirt) then
           write(iout,*) 'sg wrong'
           call mrccend(1)
        end if
        if (a.eq.nvirt) b=nvirt
       else
        b=(b-a)/nvirt+1
       endif
       ij=(map(d-k+1)-a-(b-1)*nvirt)/nvirt**2+1
       do i=1,nocc
        if (i*(i-1)/2.ge.ij) exit
       enddo
       i=i-1
       j=ij-i*(i-1)/2
       write(iout,'(i10,5i6,2es16.8)')
     $map(d-k+1),a,b,i,j,ij,vv(a,b,ij),tt(a,b,ij)
      enddo
c singles
      d=nvirt*nocc
      scr2(1:nvirt,1:nocc)=dabs(t(1:nvirt,1:nocc))
      call qsortd(map,d,scr2)
      do k=1,min(10,d)
       a=mod(map(d-k+1),nvirt)
       if (a.eq.0) a=nvirt
       i=(map(d-k+1)-a)/nvirt+1
       write(iout,'(3i9,2es16.8)') map(d-k+1),a,i,v(a,i),t(a,i)
      enddo
      write(iout,*)
c
      return
      end subroutine
c }}}
c
************************************************************************
      subroutine abcixcalc(x, abcix, abci_list, nvirt, n)
************************************************************************
c Generates <ab||ci> = <ab|ci> - <ba|ci> or <ab|||ci> = <ab|ci> + <ba|ci>
      implicit none
      character x
      real*8 abcix(nvirt*(nvirt+1)/2, nvirt, n)
      real*8 abci_list(nvirt, nvirt, nvirt, n)
      integer nvirt, n

      real*8 fac
      integer ind, a, b, c, i

      if(x .eq. 's') then
         fac = 1.d0
      else if(x .eq. 'a') then
         fac = -1.d0
      end if

      do i = 1, n
         do c = 1, nvirt
            ind = 0
            do b = 1, nvirt
               do a = b, nvirt
                  ind = ind + 1
                  abcix(ind, c, i) = abci_list(a, b, c, i) +
     $                  fac * abci_list(b, a, c, i)
               end do
            end do
         end do
      end do

      end subroutine

************************************************************************
      subroutine abckccalc(x, nocc, nvirt, n, ind, abcj_abcj,
     $      ccx, abcix, kdo_akij, nmmat)
************************************************************************
      implicit none
      character x
      integer n, nvirt, nocc, ind
      real*8 abcj_abcj(nvirt, nvirt, nvirt, n)
      real*8 ccx(nvirt*(nvirt+1)/2, nocc*(nocc+1)/2)
      real*8 nmmat(nocc*(nocc+1)/2, nvirt, nocc)
      real*8 abcix(nvirt*(nvirt+1)/2, nvirt, n)
      real*8 kdo_akij(nvirt, nocc, nocc, nocc)

      integer ttind, i, j, k
      real*8 fac

c      call crccx(x, t, tt, ccx, nvirt, nocc)

      ! generate <ab||ci> or <ab|||ci> in abcix
      call abcixcalc(x, abcix, abcj_abcj, nvirt, n)

      ! abcix * C(+/-) -> M/N
      call dgemm('t', 'n', nocc*(nocc+1)/2, nvirt*n, nvirt*(nvirt+1)/2,
     $      1.d0, ccx, nvirt*(nvirt+1)/2, abcix, nvirt*(nvirt+1)/2,
     $      0.d0, nmmat(1, 1, ind), nocc*(nocc+1)/2)

      ! sort N/M into kdo
      if(x .eq. 's') then
         fac = 1.d0
      else if(x .eq. 'a') then
         fac = -1.d0
      end if

      do j = 1, nocc
         do i = 1, j
            ! diagonal elements of N are zero
            if(x.eq.'a'  .and. i.eq.j) cycle

            ttind = j * (j-1) / 2 + i
            do k = ind, ind+n-1
               if(x .eq. 'a') then
                  ! first call
                  kdo_akij(1:nvirt,k,i,j) = nmmat(ttind, 1:nvirt, k)
               else
                  ! N already in kdo
                  kdo_akij(1:nvirt,k,i,j) = kdo_akij(1:nvirt,k,i,j) +
     $                  nmmat(ttind, 1:nvirt, k)
               end if

               if(i .ne. j) kdo_akij(1:nvirt, k, j, i) =
     $               kdo_akij(1:nvirt, k, j, i) +
     $               fac * nmmat(ttind, 1:nvirt, k)
            end do
         end do
      end do

      end subroutine

************************************************************************
      subroutine calcccfori(t, tt, nocc, nvirt, cci, i, tr)
************************************************************************
      implicit none
      real*8 t(nvirt, nocc), tt(nvirt, nvirt, nocc*(nocc+1)/2)
      integer nocc, nvirt, i
      real*8 cci(nvirt, nvirt, nocc)
      character tr

      integer j, ttind

      do j = 1, nocc
         if(tr .eq. 'n') then
            if(j .le. i) then
               ttind = i * (i-1) / 2 + j
               call ccijcalc(cci(1, 1, j), nvirt, t(1, j), t(1, i),
     $               tt(1, 1, ttind))
            else
               ttind = j * (j-1) / 2 + i
               call ccijtrcalc(cci(1, 1, j), nvirt, t(1, i), t(1, j),
     $               tt(1, 1, ttind))
            end if
         else if(tr .eq. 't') then
            if(i .le. j) then
               ttind = j * (j-1) / 2 + i
               call ccijcalc(cci(1, 1, j), nvirt, t(1, i), t(1, j),
     $               tt(1, 1, ttind))
            else
               ttind = i * (i-1) / 2 + j
               call ccijtrcalc(cci(1, 1, j), nvirt, t(1, j), t(1, i),
     $               tt(1, 1, ttind))
            end if
         end if
      end do

      end subroutine

************************************************************************
      function blocknumber(firstdim, seconddim, lastdim,
     $      constant, ind, bsize, message, avmem)
************************************************************************
c firstdim: size of the first two dimensions (first two indices of the integrals)
c seconddim: second dimension, may vary with the value of the last index
c lastdim: size of the last (fourth) dimension
c constant: whether the second dimension is constant
c ind: first block to read
      implicit none
      integer blocknumber, bsize, avmem
      integer firstdim, seconddim, lastdim, ind
      character constant
      character*40 message

      integer n
      integer blocksize
      logical notfull

      bsize = avmem

      if(constant .eq. 'y') then
c block size is constant
         blocksize = firstdim * seconddim

         if(blocksize .ne. 0) then
            n = (avmem - mod(avmem, blocksize)) / blocksize
         else
            n = 0
         end if

         bsize = n * blocksize

      else
c block size not constant
         notfull = .true.
         n = 0

         do while(notfull .and. (ind + n .le. lastdim))
            blocksize = firstdim * (seconddim - ind - n + 1)

            if(avmem .ge. blocksize) then
               avmem = avmem - blocksize
               n = n + 1
            else
               notfull = .false.
            endif
         enddo

         bsize = bsize - avmem
      end if

      ! insufficient memory for one block
      if(n.le.0 .and. lastdim.ne.0)
     $      call memerr(blocksize, avmem, message)

      ! all remaining blocks fit into the memory
      if(ind + n .gt. lastdim) n = lastdim - ind + 1

      blocknumber = n

      end function

#if defined(MPI)
************************************************************************
      subroutine sum_v(v2, v1, v_len, v_type) ! tt1 = tt1 + tt2 {{{
************************************************************************
c      implicit none
c      integer v_len, v_type
c      type(v_type) :: v1(*), v2(*)
c
c      integer i
c
c      do i = 1, v_len
c         v1(i) = v1(i) + v2(i)
c      end do

      use, intrinsic :: iso_c_binding, only : c_ptr, c_f_pointer
c      use mpi_f08
c#include <mpif.h>
#if defined(MPI)
      include "mpif.h"
#endif

      type(c_ptr), value :: v1, v2
      integer v_len
c      type(MPI_Datatype) :: v_type
      integer v_type

      real*8, pointer :: v1_r(:), v2_r(:)
      integer(kind = MPI_ADDRESS_KIND) lb, sizeof_double, sizeof_vtype
      integer mpi_err

c      integer num
c      data num/0/
c      save num
c      num=num+1

      call MPI_Type_get_extent(MPI_DOUBLE_PRECISION, lb,
     $      sizeof_double, mpi_err)
      call MPI_Type_get_extent(v_type, lb,
     $      sizeof_vtype, mpi_err)

c      write(*,*) "sizes",num,v_len,sizeof_vtype / sizeof_double,
c     $ v_len * sizeof_vtype / sizeof_double

c sum real*8 arrays
      call c_f_pointer(v1, v1_r,
     $      (/ v_len * sizeof_vtype / sizeof_double /) )
      call c_f_pointer(v2, v2_r,
     $      (/ v_len * sizeof_vtype / sizeof_double /) )
      v1_r = v1_r + v2_r

      end subroutine ! }}}

      subroutine prk_communicate(mpi_rank,mpi_size,nummes,comm_scr,
     $prk,prk_sent,mpi_ccsd, ccsd_communicator) ! {{{
************************************************************************
      implicit none
      include "mpif.h"
      integer, intent(in) :: mpi_rank, mpi_size, ccsd_communicator
      integer             :: request, mpi_err, comm_scr,nummes
      integer             :: stat(MPI_STATUS_SIZE), prk, prk_sent
      logical             :: flag, mpi_ccsd

      if(mpi_ccsd) then
          if(mpi_rank .ne. mpi_size-1) then  ! TODO: use master_rank or master_thread
             flag = .true.
             if(nummes .ne. 0) then
                  call MPI_Test(request, flag, stat, mpi_err)
             end if

             if(.not.flag) then
                call MPI_Cancel(request, mpi_err)
             else
                comm_scr = 0
             end if

             comm_scr = comm_scr + prk - prk_sent
             prk_sent = prk

            call MPI_Isend(comm_scr, 1, MPI_INTEGER_MRCC, mpi_size-1,
     $            0, ccsd_communicator, request, mpi_err)
             nummes=nummes+1
          else if(mpi_ccsd) then ! last rank receives if ccsd is running MPI parallel
            flag = .true.
            do while(flag)
               call MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
     $               ccsd_communicator, flag, stat, mpi_err)

               if(flag) then
                  call MPI_Recv(comm_scr, 1, MPI_INTEGER_MRCC,
     $                  stat(MPI_SOURCE), stat(MPI_TAG),
     $                  ccsd_communicator, stat, mpi_err)
                  prk = prk + comm_scr
               end if
            end do
          end if
      endif ! mpi_ccsd
      return
      end subroutine ! }}}

************************************************************************
      subroutine prk_communicate_fin(mpi_rank,mpi_size,
     $prk,prold,dkji,iout,et,mpi_ccsd, ccsd_communicator) ! {{{
************************************************************************
      implicit none
      include "mpif.h"
      integer, intent(in) :: mpi_rank, mpi_size, ccsd_communicator
      integer             :: request, mpi_err, comm_scr, iout
      integer             :: stat(MPI_STATUS_SIZE), prk,prold,nint
      logical             :: flag, mpi_ccsd, ptdone
      real*8 :: dkji,et,pr

      if(mpi_ccsd) then
        call MPI_Ibarrier(ccsd_communicator, request, mpi_err)
        if(mpi_rank .eq. mpi_size-1) then  ! TODO: use master_rank or master_thread
           ptdone = .false.
           do while(.not.ptdone)
              flag = .true.
              do while(flag)
                 call MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
     $                 ccsd_communicator, flag, stat, mpi_err)
                 if(flag) then
                    call MPI_Recv(comm_scr, 1, MPI_INTEGER_MRCC,
     $                    stat(MPI_SOURCE), stat(MPI_TAG),
     $                    ccsd_communicator, stat, mpi_err)
                    prk = prk + comm_scr
                 end if
              end do

              pr=dble(prk)/dkji !progress counter
              if(pr.ge.prold+0.1d0.and.pr.ne.1.d0) then
                 prold=pr
                 write(iout,'(i4,"% done.")')nint(pr*100)
              endif

              call MPI_Test(request, ptdone, MPI_STATUS_IGNORE,
     $              mpi_err)
           end do
        end if
cc         call MPI_Barrier(ccsd_communicator, mpi_err)
cc         write(*,*) "rank:",mpi_rank,et
cc         call MPI_Barrier(ccsd_communicator, mpi_err)
c         if(mpi_rank .eq. 0) then
c            call MPI_Reduce(MPI_IN_PLACE, et, 1, MPI_DOUBLE_PRECISION,
c     $            MPI_SUM, 0, ccsd_communicator, request, mpi_err)
c         else
c            call MPI_Reduce(et, et, 1, MPI_DOUBLE_PRECISION,
c     $            MPI_SUM, 0, ccsd_communicator, request, mpi_err)
c         end if
         call MPI_Allreduce(MPI_IN_PLACE, et, 1, MPI_DOUBLE_PRECISION,
     $         MPI_SUM, ccsd_communicator, mpi_err)
      endif ! mpi_ccsd
      return
      end subroutine ! }}}

#endif

c restart {{{
************************************************************************
      subroutine write_rst_ccsd(nit, ecc, eccold, norm, lene, t, tt,
     $      nocc, nvirt)
************************************************************************
      implicit none
      integer :: nit, nocc, nvirt
      real*8  :: ecc, eccold, norm
      real*8  :: t(nvirt, nocc), tt(nvirt, nvirt, nocc*(nocc+1)/2)
      logical :: lene

      integer*8, parameter :: version = 1
      integer*8 :: noccw, nvirtw

      ! convert from (possibly) 32-bit to 64-bit
      noccw = nocc
      nvirtw = nvirt

      open(unit = 725, file = 'ccsd.rst', access = 'stream')

      write(725) version
      write(725) noccw, nvirtw
      write(725) nit
      write(725) ecc, eccold, norm, lene
      write(725) t, tt

      close(725)

c      write(*,*) ecc, eccold, norm, lene

      end subroutine

************************************************************************
      subroutine read_rst_ccsd(nit, ecc, eccold, norm, lene, t, tt,
     $      nocc, nvirt)
************************************************************************
      implicit none
      integer :: nit, nocc, nvirt
      real*8  :: ecc, eccold, norm
      real*8  :: t(nvirt, nocc), tt(nvirt, nvirt, nocc*(nocc+1)/2)
      logical :: lene

      integer*8 :: version
      integer*8 :: noccr, nvirtr
      logical rstex

      inquire(file = 'ccsd.rst', exist = rstex)
      if(.not. rstex) return

      open(unit = 725, file = 'ccsd.rst', access = 'stream')

      read(725) version
      if(version .ne. 1) then
         write(*, *) 'ERROR: Unknown version of restart file.'
         call mrccend(1)
      end if

      read(725) noccr, nvirtr
      if(noccr .ne. nocc .or. nvirtr .ne. nvirt) then
         write(*, *) 'ERROR: Wrong restart file.'
         call mrccend(1)
      end if

      read(725) nit
      read(725) ecc, eccold, norm, lene
      read(725) t, tt

      close(725)

      end subroutine

************************************************************************
      subroutine zero_rst_pt(nocc)
************************************************************************
      implicit none
      integer :: nocc

      integer :: k, j, i, kji
      logical*1 :: done

      open(unit = 725, file = 'pt.rst', access  = 'stream')

      kji = 0
      done = .false.
      do k = 1, nocc
         do j = k, nocc
            do i = j, nocc
               if(k .eq. j .and. j .eq. i) cycle
               write(725) done, 0.d0
               kji = kji + 1
            end do
         end do
      end do
      close(725)

      end subroutine

************************************************************************
      subroutine read_rst_pt(k, j, i, nocc, done, et)
************************************************************************
      implicit none
      integer :: nocc
      integer :: k, j, i
      logical*1 :: done
      real*8 :: et

      integer :: kji
      integer :: ijk_index

      kji = ijk_index(k, j, i, nocc)

c$OMP CRITICAL
      read(725, pos = 9 * (kji-1) + 1) done
      if(done) then
         read(725, pos = 9 * (kji-1) + 2) et
      else
         et = 0.d0
      end if
c$OMP END CRITICAL

      end subroutine

************************************************************************
      subroutine write_rst_pt(kji, et)
************************************************************************
      implicit none
      integer :: kji
      real*8 :: et

      logical*1 :: done

      done = .true.
c      write(*,*) kji, 9 * (kji-1) + 1
c$OMP CRITICAL
      write(725, pos = 9 * (kji-1) + 1) done, et
c$OMP END CRITICAL

      end subroutine

************************************************************************
      function ijk_index(k, j, i, nocc)
************************************************************************
      implicit none
      integer :: nocc
      integer :: k, j, i
      integer :: ijk_index

      integer :: kji, k2, j2, i2

      kji = 0
      do k2 = 1, k-1
         do j2 = k2, nocc
            do i2 = j2, nocc
               if(k2 .eq. j2 .and. j2 .eq. i2) cycle
               kji = kji + 1
            end do
         end do
      end do
      do j2 = k, j-1
         do i2 = j2, nocc
            if(k .eq. j2 .and. j2 .eq. i2) cycle
            kji = kji + 1
         end do
      end do
      do i2 = j, i
         if(k .eq. j .and. j .eq. i2) cycle
         kji = kji + 1
      end do

c      write(*,*) k, j, i, kji
      ijk_index = kji

      end function


************************************************************************
      subroutine saveToRst(nReqs, requests, ijk_indices, energies,
     $      toSend, en, k, j, i, nocc, world_small, nReqsAll,
     $      ccsd_communicator, master_rank) !, mpi_type)
************************************************************************
      implicit none
      integer nReqs, k, j, i, nocc, world_small, nReqsAll !, mpi_type
      real*8 en
      integer ijk_index, reqptr, ii, ijk, ccsd_communicator
      integer mpi_rank, mpi_err, omp_get_thread_num, master_rank
      logical flag
      integer tag
      save tag
#ifdef MPI
#include "mpif.h"
      integer requests(nReqsAll),ijk_indices(nReqsAll),toSend(nReqsAll)
      real*8 energies(nReqsAll)
#else
      integer requests,ijk_indices,toSend
      real*8 energies
#endif

#ifdef MPI
      call MPI_Comm_rank(ccsd_communicator, mpi_rank, mpi_err)
#else
      mpi_rank = 0
#endif

c      if(mpi_rank .eq. 0) then
      if(mpi_rank .eq. master_rank) then
         ijk = ijk_index(k, j, i, nocc)
c         write(*,*) 'writing ', ijk, en
         call write_rst_pt(ijk, en)

#ifdef MPI
      else
#ifdef OMP
         if(omp_get_thread_num() .eq. 0) then
#endif
            ! send everything
            do ii = 1, nReqsAll
               if(toSend(ii) .eq. 1) then
                  call MPI_Isend(ijk_indices(ii), 1, MPI_INTEGER_MRCC,
     $                  master_rank, 1, world_small,
     $                  requests(ii), mpi_err)
                  call MPI_Request_free(requests(ii), mpi_err)
                  call MPI_Isend(energies(ii), 1, MPI_DOUBLE_PRECISION,
     $                  master_rank, 1, world_small,
     $                  requests(ii), mpi_err)
                  toSend(ii) = 0
               end if
            end do

            ! check all pending communication
            do ii = 1, nReqsAll
                  call MPI_Test(requests(ii), flag,
     $                  MPI_STATUS_IGNORE, mpi_err)
            end do

            ! find a free slot
            reqptr = ijk_index(k, j, i, nocc)

            ! send energy
            energies(reqptr) = en
            ijk_indices(reqptr) = ijk_index(k, j, i, nocc)
            call MPI_Isend(ijk_indices(reqptr), 1, MPI_INTEGER_MRCC,
     $            master_rank,
     $            1, world_small, requests(reqptr),
     $            mpi_err)
            call MPI_Request_free(requests(reqptr), mpi_err)
            call MPI_Isend(energies(reqptr), 1, MPI_DOUBLE_PRECISION,
     $            master_rank,
     $            1, world_small, requests(reqptr),
     $            mpi_err)
#ifdef OMP
         else
c$OMP CRITICAL
            reqptr = ijk_index(k, j, i, nocc)

            toSend(reqptr) = 2
            energies(reqptr) = en
            ijk_indices(reqptr) = ijk_index(k, j, i, nocc)
            toSend(reqptr) = 1
c$OMP END CRITICAL
         end if
#endif
#endif
      end if
      end subroutine

c }}}

#include "ccsd_io.f"
#include "ccsd_dfdirect.f"
#include "ccsd_pt.f"