From 910c4b4dcd740f231915739f1b685164eeb6f378 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Krup=C4=8D=C3=ADk?= Date: Thu, 15 Oct 2020 11:36:18 +0200 Subject: [PATCH] modified: g/GROMACS/GROMACS-2016.5-intel-2017c-hybrid-single-PLUMED-2.3.8.eb modified: g/GROMACS/GROMACS-2020.2-intel-2020a-PLUMED-2.6.1.eb deleted: g/GROMACS/GROMACS-2016.4-intel-2017c-hybrid-single-PLUMED-2.4.1.eb deleted: g/GROMACS/GROMACS-2016.4-intel-2017c-hybrid-single-PLUMED.eb deleted: g/GROMACS/GROMACS-2016.5-intel-2017b-serial.eb deleted: g/GROMACS/GROMACS-2016.5-intel-2017c-hybrid-single-PLUMED.eb deleted: g/GROMACS/GROMACS-2018-intel-2017b-serial.eb deleted: g/GROMACS/GROMACS-2018.1-intel-2017c-hybrid-single-PLUMED.eb deleted: g/GROMACS/GROMACS-2018.3-intel-2017c-hybrid-single-PLUMED.eb deleted: g/GROMACS/GROMACS-4.6.7-CrayGNU-2015.06-mpi.eb deleted: g/GROMACS/GROMACS-4.6.7-CrayGNU-2015.11-mpi.eb deleted: g/GROMACS/GROMACS-4.6.7-CrayIntel-2015.11-mpi.eb deleted: g/GROMACS/GROMACS-5.0.4-gompi-2015e-hybrid-single-PLUMED.eb deleted: g/GROMACS/GROMACS-5.0.4-ictce-7.3.5-hybrid-single-PLUMED.eb deleted: g/GROMACS/GROMACS-5.0.4-ictce-7.3.5-hybrid-single.eb deleted: g/GROMACS/GROMACS-5.1.2-goolf-1.7.20-mt.eb deleted: g/GROMACS/GROMACS-5.1.4-foss-2017a-hybrid-single-PLUMED.eb deleted: g/GROMACS/gromacs-4.6.7-plumed-2.1.3-mpi.patch deleted: g/GROMACS/gromacs-5.0.4-plumed-2.1.3-mpi.patch deleted: g/GROMACS/gromacs-5.0.4-plumed-2.1.3.patch deleted: g/GROMACS/gromacs-5.1.4-plumed-2.3.0-mpi.patch --- ...-intel-2017c-hybrid-single-PLUMED-2.4.1.eb | 36 - ...2016.4-intel-2017c-hybrid-single-PLUMED.eb | 36 - .../GROMACS-2016.5-intel-2017b-serial.eb | 33 - ...-intel-2017c-hybrid-single-PLUMED-2.3.8.eb | 5 - ...2016.5-intel-2017c-hybrid-single-PLUMED.eb | 36 - g/GROMACS/GROMACS-2018-intel-2017b-serial.eb | 32 - ...2018.1-intel-2017c-hybrid-single-PLUMED.eb | 36 - ...2018.3-intel-2017c-hybrid-single-PLUMED.eb | 36 - ...GROMACS-2020.2-intel-2020a-PLUMED-2.6.1.eb | 4 - .../GROMACS-4.6.7-CrayGNU-2015.06-mpi.eb | 45 - .../GROMACS-4.6.7-CrayGNU-2015.11-mpi.eb | 45 - .../GROMACS-4.6.7-CrayIntel-2015.11-mpi.eb | 45 - ...-5.0.4-gompi-2015e-hybrid-single-PLUMED.eb | 26 - ...-5.0.4-ictce-7.3.5-hybrid-single-PLUMED.eb | 26 - ...GROMACS-5.0.4-ictce-7.3.5-hybrid-single.eb | 24 - g/GROMACS/GROMACS-5.1.2-goolf-1.7.20-mt.eb | 42 - ...S-5.1.4-foss-2017a-hybrid-single-PLUMED.eb | 38 - .../gromacs-4.6.7-plumed-2.1.3-mpi.patch | 9676 ----------------- .../gromacs-5.0.4-plumed-2.1.3-mpi.patch | 9575 ---------------- g/GROMACS/gromacs-5.0.4-plumed-2.1.3.patch | 9575 ---------------- .../gromacs-5.1.4-plumed-2.3.0-mpi.patch | 9575 ---------------- 21 files changed, 38946 deletions(-) delete mode 100644 g/GROMACS/GROMACS-2016.4-intel-2017c-hybrid-single-PLUMED-2.4.1.eb delete mode 100644 g/GROMACS/GROMACS-2016.4-intel-2017c-hybrid-single-PLUMED.eb delete mode 100644 g/GROMACS/GROMACS-2016.5-intel-2017b-serial.eb delete mode 100644 g/GROMACS/GROMACS-2016.5-intel-2017c-hybrid-single-PLUMED.eb delete mode 100644 g/GROMACS/GROMACS-2018-intel-2017b-serial.eb delete mode 100644 g/GROMACS/GROMACS-2018.1-intel-2017c-hybrid-single-PLUMED.eb delete mode 100644 g/GROMACS/GROMACS-2018.3-intel-2017c-hybrid-single-PLUMED.eb delete mode 100644 g/GROMACS/GROMACS-4.6.7-CrayGNU-2015.06-mpi.eb delete mode 100644 g/GROMACS/GROMACS-4.6.7-CrayGNU-2015.11-mpi.eb delete mode 100644 g/GROMACS/GROMACS-4.6.7-CrayIntel-2015.11-mpi.eb delete mode 100644 g/GROMACS/GROMACS-5.0.4-gompi-2015e-hybrid-single-PLUMED.eb delete mode 100644 g/GROMACS/GROMACS-5.0.4-ictce-7.3.5-hybrid-single-PLUMED.eb delete mode 100644 g/GROMACS/GROMACS-5.0.4-ictce-7.3.5-hybrid-single.eb delete mode 100644 g/GROMACS/GROMACS-5.1.2-goolf-1.7.20-mt.eb delete mode 100644 g/GROMACS/GROMACS-5.1.4-foss-2017a-hybrid-single-PLUMED.eb delete mode 100644 g/GROMACS/gromacs-4.6.7-plumed-2.1.3-mpi.patch delete mode 100644 g/GROMACS/gromacs-5.0.4-plumed-2.1.3-mpi.patch delete mode 100644 g/GROMACS/gromacs-5.0.4-plumed-2.1.3.patch delete mode 100644 g/GROMACS/gromacs-5.1.4-plumed-2.3.0-mpi.patch diff --git a/g/GROMACS/GROMACS-2016.4-intel-2017c-hybrid-single-PLUMED-2.4.1.eb b/g/GROMACS/GROMACS-2016.4-intel-2017c-hybrid-single-PLUMED-2.4.1.eb deleted file mode 100644 index 52cb2eef..00000000 --- a/g/GROMACS/GROMACS-2016.4-intel-2017c-hybrid-single-PLUMED-2.4.1.eb +++ /dev/null @@ -1,36 +0,0 @@ -# IT4Innovations 2018 - -name = 'GROMACS' -version = '2016.4' -versionsuffix = '-hybrid-single-PLUMED-2.4.1' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'intel', 'version': '2017c'} -toolchainopts = {'openmp': True, 'usempi': True} - -source_urls = ['ftp://ftp.gromacs.org/pub/gromacs/'] -sources = [SOURCELOWER_TAR_GZ] - -#preconfigopts = 'plumed patch -p -e gromacs-2016.5 &&' -#preconfigopts = 'plumed patch -p --runtime &&' - -configopts = ' -DGMX_GPU=OFF -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON -DGMX_DOUBLE=OFF -DGMX_SIMD=AVX2_256 -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON' - -builddependencies = [ - ('CMake', '3.5.2', '', True), -] - -dependencies = [ - ('Boost', '1.67.0', '-serial'), - ('PLUMED', '2.4.1') -] - -sanity_check_paths = { - 'files': ['bin/gmx_mpi'], - 'dirs': [''], -} - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-2016.4-intel-2017c-hybrid-single-PLUMED.eb b/g/GROMACS/GROMACS-2016.4-intel-2017c-hybrid-single-PLUMED.eb deleted file mode 100644 index c9497bdf..00000000 --- a/g/GROMACS/GROMACS-2016.4-intel-2017c-hybrid-single-PLUMED.eb +++ /dev/null @@ -1,36 +0,0 @@ -# IT4Innovations 2018 - -name = 'GROMACS' -version = '2016.4' -versionsuffix = '-hybrid-single-PLUMED' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'intel', 'version': '2017c'} -toolchainopts = {'openmp': True, 'usempi': True} - -source_urls = ['http://ftp.gromacs.org/pub/gromacs/'] -sources = [SOURCELOWER_TAR_GZ] - -#preconfigopts = 'plumed patch -p -e gromacs-2016.5 &&' -#preconfigopts = 'plumed patch -p --runtime &&' - -configopts = ' -DGMX_GPU=OFF -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON -DGMX_DOUBLE=OFF -DGMX_SIMD=AVX2_256 -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON' - -builddependencies = [ - ('CMake', '3.5.2', '', True), -] - -dependencies = [ - ('Boost', '1.67.0', '-serial'), - ('PLUMED', '2.3.5') -] - -sanity_check_paths = { - 'files': ['bin/gmx_mpi'], - 'dirs': [''], -} - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-2016.5-intel-2017b-serial.eb b/g/GROMACS/GROMACS-2016.5-intel-2017b-serial.eb deleted file mode 100644 index 5b7e47d2..00000000 --- a/g/GROMACS/GROMACS-2016.5-intel-2017b-serial.eb +++ /dev/null @@ -1,33 +0,0 @@ -name = 'GROMACS' -version = '2016.5' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'intel', 'version': '2017b'} -toolchainopts = {'openmp': True, 'usempi': True} - -source_urls = ['http://ftp.gromacs.org/pub/gromacs/'] -sources = [SOURCELOWER_TAR_GZ] - -checksums = ['f41807e5b2911ccb547a3fd11f105d47'] - -#configopts = ' -DGMX_GPU=OFF -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON -DGMX_DOUBLE=OFF -DGMX_SIMD=AVX2_256 -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON' -# anselm -configopts = ' -DGMX_GPU=OFF -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON -DGMX_DOUBLE=OFF -DGMX_SIMD=SSE4.1 -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON' - -builddependencies = [ - ('CMake', '3.9.1', '', True), -] - -dependencies = [ - ('Boost', '1.66.0', '-serial'), -] - -sanity_check_paths = { - 'files': ['bin/gmx_mpi'], - 'dirs': [''], -} - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-2016.5-intel-2017c-hybrid-single-PLUMED-2.3.8.eb b/g/GROMACS/GROMACS-2016.5-intel-2017c-hybrid-single-PLUMED-2.3.8.eb index da56a60b..f32b1be9 100644 --- a/g/GROMACS/GROMACS-2016.5-intel-2017c-hybrid-single-PLUMED-2.3.8.eb +++ b/g/GROMACS/GROMACS-2016.5-intel-2017c-hybrid-single-PLUMED-2.3.8.eb @@ -15,11 +15,6 @@ toolchainopts = {'openmp': True, 'usempi': True} source_urls = ['http://ftp.gromacs.org/pub/gromacs/'] sources = [SOURCELOWER_TAR_GZ] -#preconfigopts = 'plumed patch -p -e gromacs-2016.5 &&' -#preconfigopts = 'plumed patch -p --runtime &&' - -#configopts = ' -DGMX_GPU=OFF -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON -DGMX_DOUBLE=OFF -DGMX_SIMD=AVX2_256 -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON' - builddependencies = [ ('CMake', '3.5.2', '', True), ] diff --git a/g/GROMACS/GROMACS-2016.5-intel-2017c-hybrid-single-PLUMED.eb b/g/GROMACS/GROMACS-2016.5-intel-2017c-hybrid-single-PLUMED.eb deleted file mode 100644 index bff8ea2f..00000000 --- a/g/GROMACS/GROMACS-2016.5-intel-2017c-hybrid-single-PLUMED.eb +++ /dev/null @@ -1,36 +0,0 @@ -# IT4Innovations 2018 - -name = 'GROMACS' -version = '2016.5' -versionsuffix = '-hybrid-single-PLUMED' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'intel', 'version': '2017c'} -toolchainopts = {'openmp': True, 'usempi': True} - -source_urls = ['ftp://ftp.gromacs.org/pub/gromacs/'] -sources = [SOURCELOWER_TAR_GZ] - -#preconfigopts = 'plumed patch -p -e gromacs-2016.5 &&' -#preconfigopts = 'plumed patch -p --runtime &&' - -configopts = ' -DGMX_GPU=OFF -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON -DGMX_DOUBLE=OFF -DGMX_SIMD=AVX2_256 -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON' - -builddependencies = [ - ('CMake', '3.5.2', '', True), -] - -dependencies = [ - ('Boost', '1.67.0', '-serial'), - ('PLUMED', '2.3.5') -] - -sanity_check_paths = { - 'files': ['bin/gmx_mpi'], - 'dirs': [''], -} - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-2018-intel-2017b-serial.eb b/g/GROMACS/GROMACS-2018-intel-2017b-serial.eb deleted file mode 100644 index 4ef68757..00000000 --- a/g/GROMACS/GROMACS-2018-intel-2017b-serial.eb +++ /dev/null @@ -1,32 +0,0 @@ -name = 'GROMACS' -version = '2018' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'intel', 'version': '2017b'} -toolchainopts = {'openmp': True, 'usempi': True} - -source_urls = ['http://ftp.gromacs.org/pub/gromacs/'] -sources = [SOURCELOWER_TAR_GZ] - -checksums = ['6467ffb1575b8271548a13abfba6374c'] - -#configopts = ' -DGMX_GPU=OFF -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON -DGMX_DOUBLE=OFF -DGMX_SIMD=AVX2_256 -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON' -configopts = ' -DGMX_GPU=OFF -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON -DGMX_DOUBLE=OFF -DGMX_SIMD=AUTO -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON' - -builddependencies = [ - ('CMake', '3.9.1', '', True), -] - -dependencies = [ - ('Boost', '1.66.0', '-serial'), -] - -sanity_check_paths = { - 'files': ['bin/gmx_mpi'], - 'dirs': [''], -} - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-2018.1-intel-2017c-hybrid-single-PLUMED.eb b/g/GROMACS/GROMACS-2018.1-intel-2017c-hybrid-single-PLUMED.eb deleted file mode 100644 index 0ca827a4..00000000 --- a/g/GROMACS/GROMACS-2018.1-intel-2017c-hybrid-single-PLUMED.eb +++ /dev/null @@ -1,36 +0,0 @@ -# IT4Innovations 2018 - -name = 'GROMACS' -version = '2018.1' -versionsuffix = '-hybrid-single-PLUMED' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'intel', 'version': '2017c'} -toolchainopts = {'openmp': True, 'usempi': True} - -source_urls = ['http://ftp.gromacs.org/pub/gromacs/'] -sources = [SOURCELOWER_TAR_GZ] - -#preconfigopts = 'plumed patch -p -e gromacs-2016.5 &&' -#preconfigopts = 'plumed patch -p --runtime &&' - -configopts = ' -DGMX_GPU=OFF -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON -DGMX_DOUBLE=OFF -DGMX_SIMD=AVX2_256 -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON' - -builddependencies = [ - ('CMake', '3.13.1', '', True), -] - -dependencies = [ - ('Boost', '1.68.0', '-serial'), - ('PLUMED', '2.4.2') -] - -sanity_check_paths = { - 'files': ['bin/gmx_mpi'], - 'dirs': [''], -} - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-2018.3-intel-2017c-hybrid-single-PLUMED.eb b/g/GROMACS/GROMACS-2018.3-intel-2017c-hybrid-single-PLUMED.eb deleted file mode 100644 index 26af7ea9..00000000 --- a/g/GROMACS/GROMACS-2018.3-intel-2017c-hybrid-single-PLUMED.eb +++ /dev/null @@ -1,36 +0,0 @@ -# IT4Innovations 2018 - -name = 'GROMACS' -version = '2018.3' -versionsuffix = '-hybrid-single-PLUMED' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'intel', 'version': '2017c'} -toolchainopts = {'openmp': True, 'usempi': True} - -source_urls = ['http://ftp.gromacs.org/pub/gromacs/'] -sources = [SOURCELOWER_TAR_GZ] - -#preconfigopts = 'plumed patch -p -e gromacs-2016.5 &&' -#preconfigopts = 'plumed patch -p --runtime &&' - -configopts = ' -DGMX_GPU=OFF -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON -DGMX_DOUBLE=OFF -DGMX_SIMD=AVX2_256 -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON' - -builddependencies = [ - ('CMake', '3.9.1', '', True), -] - -dependencies = [ - ('Boost', '1.68.0', '-serial'), - ('PLUMED', '2.4.2') -] - -sanity_check_paths = { - 'files': ['bin/gmx_mpi'], - 'dirs': [''], -} - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-2020.2-intel-2020a-PLUMED-2.6.1.eb b/g/GROMACS/GROMACS-2020.2-intel-2020a-PLUMED-2.6.1.eb index 97f9cc0d..f856912b 100644 --- a/g/GROMACS/GROMACS-2020.2-intel-2020a-PLUMED-2.6.1.eb +++ b/g/GROMACS/GROMACS-2020.2-intel-2020a-PLUMED-2.6.1.eb @@ -22,10 +22,6 @@ source_urls = [ 'ftp://ftp.gromacs.org/pub/gromacs/', ] sources = [SOURCELOWER_TAR_GZ] -#atches = [ -# 'GROMACS-2018_fix_search_for_nvml_include.patch', -# 'GROMACS-2018_amend_search_for_nvml_lib.patch', -# builddependencies = [ ('CMake', '3.16.4'), diff --git a/g/GROMACS/GROMACS-4.6.7-CrayGNU-2015.06-mpi.eb b/g/GROMACS/GROMACS-4.6.7-CrayGNU-2015.06-mpi.eb deleted file mode 100644 index b69a2609..00000000 --- a/g/GROMACS/GROMACS-4.6.7-CrayGNU-2015.06-mpi.eb +++ /dev/null @@ -1,45 +0,0 @@ -## -# This file is an EasyBuild reciPY as per https://github.com/hpcugent/easybuild -# -# Copyright:: Copyright 2012-2013 University of Luxembourg / LCSB, Cyprus Institute / CaSToRC, Ghent University -# Authors:: Wiktor Jurkowski , Fotis Georgatos , \ -# George Tsouloupas , Kenneth Hoste -# License:: MIT/GPL -# $Id$ -# -# This work implements a part of the HPCBIOS project and is a local_component of the policy: -# http://hpcbios.readthedocs.org/en/latest/HPCBIOS_2012-93.html -## -name = 'GROMACS' -version = '4.6.7' -versionsuffix = '-mpi' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'CrayGNU', 'version': '2015.06'} -toolchainopts = {'usempi': True} - -# eg. ftp://ftp.gromacs.org/pub/gromacs/gromacs-4.6.tar.gz -source_urls = [ - 'ftp://ftp.gromacs.org/pub/gromacs/', # GROMACS sources - 'http://gerrit.gromacs.org/download/', # regression tests sources -] -sources = [ - SOURCELOWER_TAR_GZ, - 'regressiontests-%(version)s.tar.gz', -] - -preconfigopts = "export CMAKE_LIBRARY_PATH=$CMAKE_LIBRARY_PATH:${EBROOTFFTW}/lib && " -preconfigopts += "export CMAKE_INCLUDE_PATH=$CMAKE_INCLUDE_PATH:${EBROOTFFTW}/include && " - -dependencies = [ - ('fftw/3.3.4.3', EXTERNAL_MODULE), -] - -builddependencies = [('CMake', '3.2.2')] - -runtest = False - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-4.6.7-CrayGNU-2015.11-mpi.eb b/g/GROMACS/GROMACS-4.6.7-CrayGNU-2015.11-mpi.eb deleted file mode 100644 index 3ef3a4e4..00000000 --- a/g/GROMACS/GROMACS-4.6.7-CrayGNU-2015.11-mpi.eb +++ /dev/null @@ -1,45 +0,0 @@ -## -# This file is an EasyBuild reciPY as per https://github.com/hpcugent/easybuild -# -# Copyright:: Copyright 2012-2013 University of Luxembourg / LCSB, Cyprus Institute / CaSToRC, Ghent University -# Authors:: Wiktor Jurkowski , Fotis Georgatos , \ -# George Tsouloupas , Kenneth Hoste -# License:: MIT/GPL -# $Id$ -# -# This work implements a part of the HPCBIOS project and is a local_component of the policy: -# http://hpcbios.readthedocs.org/en/latest/HPCBIOS_2012-93.html -## -name = 'GROMACS' -version = '4.6.7' -versionsuffix = '-mpi' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'CrayGNU', 'version': '2015.11'} -toolchainopts = {'usempi': True} - -# eg. ftp://ftp.gromacs.org/pub/gromacs/gromacs-4.6.tar.gz -source_urls = [ - 'ftp://ftp.gromacs.org/pub/gromacs/', # GROMACS sources - 'http://gerrit.gromacs.org/download/', # regression tests sources -] -sources = [ - SOURCELOWER_TAR_GZ, - 'regressiontests-%(version)s.tar.gz', -] - -preconfigopts = "export CMAKE_LIBRARY_PATH=$CMAKE_LIBRARY_PATH:$EBROOTFFTW/lib && " -preconfigopts += "export CMAKE_INCLUDE_PATH=$CMAKE_INCLUDE_PATH:$EBROOTFFTW/include && " - -dependencies = [ - ('fftw/3.3.4.5', EXTERNAL_MODULE), -] - -builddependencies = [('CMake', '3.2.2')] - -runtest = False - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-4.6.7-CrayIntel-2015.11-mpi.eb b/g/GROMACS/GROMACS-4.6.7-CrayIntel-2015.11-mpi.eb deleted file mode 100644 index a03812a7..00000000 --- a/g/GROMACS/GROMACS-4.6.7-CrayIntel-2015.11-mpi.eb +++ /dev/null @@ -1,45 +0,0 @@ -## -# This file is an EasyBuild reciPY as per https://github.com/hpcugent/easybuild -# -# Copyright:: Copyright 2012-2013 University of Luxembourg / LCSB, Cyprus Institute / CaSToRC, Ghent University -# Authors:: Wiktor Jurkowski , Fotis Georgatos , \ -# George Tsouloupas , Kenneth Hoste -# License:: MIT/GPL -# $Id$ -# -# This work implements a part of the HPCBIOS project and is a local_component of the policy: -# http://hpcbios.readthedocs.org/en/latest/HPCBIOS_2012-93.html -## -name = 'GROMACS' -version = '4.6.7' -versionsuffix = '-mpi' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'CrayIntel', 'version': '2015.11'} -toolchainopts = {'usempi': True} - -# eg. ftp://ftp.gromacs.org/pub/gromacs/gromacs-4.6.tar.gz -source_urls = [ - 'ftp://ftp.gromacs.org/pub/gromacs/', # GROMACS sources - 'http://gerrit.gromacs.org/download/', # regression tests sources -] -sources = [ - SOURCELOWER_TAR_GZ, - 'regressiontests-%(version)s.tar.gz', -] - -preconfigopts = "export CMAKE_LIBRARY_PATH=$CMAKE_LIBRARY_PATH:$EBROOTFFTW/lib && " -preconfigopts += "export CMAKE_INCLUDE_PATH=$CMAKE_INCLUDE_PATH:$EBROOTFFTW/include && " - -dependencies = [ - ('fftw/3.3.4.5', EXTERNAL_MODULE), -] - -builddependencies = [('CMake', '3.2.2')] - -runtest = False - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-5.0.4-gompi-2015e-hybrid-single-PLUMED.eb b/g/GROMACS/GROMACS-5.0.4-gompi-2015e-hybrid-single-PLUMED.eb deleted file mode 100644 index 663369fe..00000000 --- a/g/GROMACS/GROMACS-5.0.4-gompi-2015e-hybrid-single-PLUMED.eb +++ /dev/null @@ -1,26 +0,0 @@ -name = 'GROMACS' -version = '5.0.4' -versionsuffix = '-hybrid-single-PLUMED' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'gompi', 'version': '2015e'} -toolchainopts = {'openmp': True, 'usempi': True} - -patches = ['gromacs-5.0.4-plumed-2.1.3.patch'] - -source_urls = ['ftp://ftp.gromacs.org/pub/gromacs/'] -sources = [SOURCELOWER_TAR_GZ] - -builddependencies = [ - # ('CMake', '3.0.0'), # We are using system CMake - ('libxml2', '2.9.2') -] - -configopts = ' -DGMX_GPU=OFF -DGMX_DOUBLE=OFF -DGMX_SIMD=AVX2' - -dependencies = [('Boost', '1.58.0', '-Python-2.7.9')] - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-5.0.4-ictce-7.3.5-hybrid-single-PLUMED.eb b/g/GROMACS/GROMACS-5.0.4-ictce-7.3.5-hybrid-single-PLUMED.eb deleted file mode 100644 index a455e6fe..00000000 --- a/g/GROMACS/GROMACS-5.0.4-ictce-7.3.5-hybrid-single-PLUMED.eb +++ /dev/null @@ -1,26 +0,0 @@ -name = 'GROMACS' -version = '5.0.4' -versionsuffix = '-hybrid-single-PLUMED' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'ictce', 'version': '7.3.5'} -toolchainopts = {'openmp': True, 'usempi': True} - -patches = ['gromacs-5.0.4-plumed-2.1.3.patch'] - -source_urls = ['ftp://ftp.gromacs.org/pub/gromacs/'] -sources = [SOURCELOWER_TAR_GZ] - -builddependencies = [ - ('CMake', '3.0.0'), - ('libxml2', '2.9.2') -] - -configopts = ' -DGMX_GPU=OFF -DGMX_DOUBLE=OFF' - -dependencies = [('Boost', '1.58.0', '-Python-2.7.9')] - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-5.0.4-ictce-7.3.5-hybrid-single.eb b/g/GROMACS/GROMACS-5.0.4-ictce-7.3.5-hybrid-single.eb deleted file mode 100644 index fcce8863..00000000 --- a/g/GROMACS/GROMACS-5.0.4-ictce-7.3.5-hybrid-single.eb +++ /dev/null @@ -1,24 +0,0 @@ -name = 'GROMACS' -version = '5.0.4' -versionsuffix = '-hybrid-single' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'ictce', 'version': '7.3.5'} -toolchainopts = {'openmp': True, 'usempi': True} - -source_urls = ['ftp://ftp.gromacs.org/pub/gromacs/'] -sources = [SOURCELOWER_TAR_GZ] - -builddependencies = [ - ('CMake', '3.0.0'), - ('libxml2', '2.9.2') -] - -configopts = ' -DGMX_GPU=OFF -DGMX_DOUBLE=OFF' - -dependencies = [('Boost', '1.58.0', '-Python-2.7.9')] - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-5.1.2-goolf-1.7.20-mt.eb b/g/GROMACS/GROMACS-5.1.2-goolf-1.7.20-mt.eb deleted file mode 100644 index 7ce66b9c..00000000 --- a/g/GROMACS/GROMACS-5.1.2-goolf-1.7.20-mt.eb +++ /dev/null @@ -1,42 +0,0 @@ -## -# This file is an EasyBuild reciPY as per https://github.com/hpcugent/easybuild -# -# Copyright:: Copyright 2012-2013 University of Luxembourg / LCSB, Cyprus Institute / CaSToRC, Ghent University -# Authors:: Wiktor Jurkowski , Fotis Georgatos , \ -# George Tsouloupas , Kenneth Hoste -# License:: MIT/GPL -# $Id$ -# -# This work implements a part of the HPCBIOS project and is a local_component of the policy: -# http://hpcbios.readthedocs.org/en/latest/HPCBIOS_2012-93.html -## -name = 'GROMACS' -version = '5.1.2' -versionsuffix = '-mt' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'goolf', 'version': '1.7.20'} -toolchainopts = {'openmp': True, 'usempi': False} - -source_urls = [ - 'ftp://ftp.gromacs.org/pub/gromacs/', - 'http://gerrit.gromacs.org/download/', -] - -sources = [ - SOURCELOWER_TAR_GZ, - # seems to have disappeared? - # 'regressiontests-5.0.2.tar.gz', -] - -builddependencies = [ - ('CMake', '2.8.12'), - ('libxml2', '2.9.3') -] - -dependencies = [('Boost', '1.53.0')] - -moduleclass = 'bio' diff --git a/g/GROMACS/GROMACS-5.1.4-foss-2017a-hybrid-single-PLUMED.eb b/g/GROMACS/GROMACS-5.1.4-foss-2017a-hybrid-single-PLUMED.eb deleted file mode 100644 index 61f4a62c..00000000 --- a/g/GROMACS/GROMACS-5.1.4-foss-2017a-hybrid-single-PLUMED.eb +++ /dev/null @@ -1,38 +0,0 @@ -name = 'GROMACS' -version = '5.1.4' -versionsuffix = '-hybrid-single-PLUMED' - -homepage = 'http://www.gromacs.org' -description = """GROMACS is a versatile package to perform molecular dynamics, - i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.""" - -toolchain = {'name': 'foss', 'version': '2017a'} -toolchainopts = {'openmp': True, 'usempi': True} - -source_urls = ['ftp://ftp.gromacs.org/pub/gromacs/'] -sources = [SOURCELOWER_TAR_GZ] - -#patches = ['gromacs-%s-plumed-2.3.0-mpi.patch' % version] - -builddependencies = [ - ('CMake', '3.7.2', '', True), # We are using system CMake - ('libxml2', '2.9.2', '', True), -] - -preconfigopts = 'plumed patch -p -e gromacs-5.1.4 --shared &&' - -configopts = ' -DGMX_GPU=OFF -DGMX_DOUBLE=OFF -DGMX_SIMD=AVX2_256' - -dependencies = [ - ('Boost', '1.61.0', '-serial'), - ('almost', '2.1.0', '', ('foss', '2016a')), - ('libmatheval', '1.1.11'), - ('PLUMED', '2.3.0') -] - -sanity_check_paths = { - 'files': ['bin/gmx_mpi'], - 'dirs': [''], -} - -moduleclass = 'bio' diff --git a/g/GROMACS/gromacs-4.6.7-plumed-2.1.3-mpi.patch b/g/GROMACS/gromacs-4.6.7-plumed-2.1.3-mpi.patch deleted file mode 100644 index 6583388a..00000000 --- a/g/GROMACS/gromacs-4.6.7-plumed-2.1.3-mpi.patch +++ /dev/null @@ -1,9676 +0,0 @@ -diff --git a/Plumed.cmake b/Plumed.cmake -new file mode 100644 -index 0000000..01472f0 ---- /dev/null -+++ b/Plumed.cmake -@@ -0,0 +1,3 @@ -+# PLUMED: shared installation -+set(PLUMED_LOAD /apps/all/PLUMED/2.1.3-foss-2015g/lib/plumed///src/lib/libplumed.so -ldl ) -+set(PLUMED_DEPENDENCIES /apps/all/PLUMED/2.1.3-foss-2015g/lib/plumed///src/lib/libplumed.so) -diff --git a/Plumed.h b/Plumed.h -new file mode 100644 -index 0000000..16da74a ---- /dev/null -+++ b/Plumed.h -@@ -0,0 +1,494 @@ -+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -+ Copyright (c) 2011-2014 The plumed team -+ (see the PEOPLE file at the root of the distribution for a list of names) -+ -+ See http://www.plumed-code.org for more information. -+ -+ This file is part of plumed, version 2. -+ -+ plumed is free software: you can redistribute it and/or modify -+ it under the terms of the GNU Lesser General Public License as published by -+ the Free Software Foundation, either version 3 of the License, or -+ (at your option) any later version. -+ -+ plumed is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public License -+ along with plumed. If not, see . -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ -+#ifndef __PLUMED_wrapper_Plumed_h -+#define __PLUMED_wrapper_Plumed_h -+ -+/** -+\page ReferencePlumedH Reference for interfacing MD codes with PLUMED -+ -+ Plumed.h and Plumed.c contain the external plumed interface, which is used to -+ integrate it with MD engines. This interface is very general, and is expected -+ not to change across plumed versions. Plumed.c also implements a dummy version -+ of the interface, so as to allow a code to be fully linked even if the plumed -+ library is not available yet. These files could be directly included in the official -+ host MD distribution. In this manner, it will be sufficient to link the plumed -+ library at link time (on all systems) or directly at runtime (on system where -+ dynamic loading is enabled) to include plumed features. -+ -+ Why is Plumed.c written in C and not C++? The reason is that the resulting Plumed.o -+ needs to be linked with the host MD code immediately (whereas the rest of plumed -+ could be linked a posteriori). Imagine the MD code is written in FORTRAN: when we -+ link the Plumed.o file we would like not to need any C++ library linked. In this -+ manner, we do not need to know which C++ compiler will be used to compile plumed. -+ The C++ library is only linked to the "rest" of plumed, which actually use it. -+ Anyway, Plumed.c is written in such a manner to allow its compilation also in C++ -+ (C++ is a bit stricter than C; compatibility is checked when PlumedStatic.cpp, -+ which basically includes Plumed.c, is compiled with the C++ compiler). This will -+ allow e.g. MD codes written in C++ to just incorporate Plumed.c (maybe renamed into -+ Plumed.cpp), without the need of configuring a plain C compiler. -+ -+ Plumed interface can be used from C, C++ and FORTRAN. Everything concerning plumed -+ is hidden inside a single object type, which is described in C by a structure -+ (struct \ref plumed), in C++ by a class (PLMD::Plumed) and in FORTRAN by a -+ fixed-length string (CHARACTER(LEN=32)). Obviously C++ can use both struct -+ and class interfaces, but the first should be preferred. The reference interface -+ is the C one, whereas FORTRAN and C++ interfaces are implemented as wrappers -+ around it. -+ -+ In the C++ interface, all the routines are implemented as methods of PLMD::Plumed. -+ In the C and FORTRAN interfaces, all the routines are named plumed_*, to -+ avoid potential name clashes. Notice that the entire plumed library -+ is implemented in C++, and it is hidden inside the PLMD namespace. -+ -+ Handlers to the plumed object can be converted among different representations, -+ to allow inter-operability among languages. In C, there are tools to convert -+ to/from FORTRAN, whereas in C++ there are tools to convert to/from FORTRAN and C. -+ -+ These handlers only contain a pointer to the real structure, so that -+ when a plumed object is brought from one language to another, -+ it brings a reference to the same environment. -+ -+ Moreover, to simplify life in all cases where a single Plumed object is -+ required for the entire simulation (which covers most of the practical -+ applications with conventional MD codes) it is possible to take advantage -+ of a global interface, which is implicitly referring to a unique global instance. -+ The global object should still be initialized and finalized properly. -+ -+ The basic method to send a message to plumed is -+\verbatim -+ (C) plumed_cmd -+ (C++) PLMD::Plumed::cmd -+ (FORTRAN) PLUMED_F_CMD -+\endverbatim -+ -+ To initialize a plumed object, use: -+\verbatim -+ (C) plumed_create -+ (C++) (constructor of PLMD::Plumed) -+ (FORTRAN) PLUMED_F_CREATE -+\endverbatim -+ -+ To finalize it, use -+\verbatim -+ (C) plumed_finalize -+ (C++) (destructor of PLMD::Plumed) -+ (FORTRAN) PLUMED_F_FINALIZE -+\endverbatim -+ -+ To access to the global-object, use -+\verbatim -+ (C) plumed_gcreate, plumed_gfinalize, plumed_gcmd -+ (C++) PLMD::Plumed::gcreate, PLMD::Plumed::gfinalize, PLMD::Plumed::gcmd -+ (FORTRAN) PLUMED_F_GCREATE, PLUMED_F_GFINALIZE, PLUMED_F_GCMD -+\endverbatim -+ -+ To check if the global object has been initialized, use -+\verbatim -+ (C) plumed_ginitialized -+ (C++) PLMD::Plumed::ginitialized -+ (FORTRAN) PLUMED_F_GINITIALIZED -+\endverbatim -+ -+ To check if plumed library is available (this is useful for runtime linking), use -+\verbatim -+ (C) plumed_installed -+ (C++) PLMD::Plumed::installed -+ (FORTRAN) PLUMED_F_INSTALLED -+\endverbatim -+ -+ To convert handlers use -+\verbatim -+ (C) plumed_c2f (C to FORTRAN) -+ (C) plumed_f2c (FORTRAN to C) -+ (C++) Plumed(plumed) constructor (C to C++) -+ (C++) operator plumed() cast (C++ to C) -+ (C++) Plumed(char*) constructor (FORTRAN to C++) -+ (C++) toFortran(char*) (C++ to FORTRAN) -+\endverbatim -+ -+\verbatim -+ FORTRAN interface -+ SUBROUTINE PLUMED_F_INSTALLED(i) -+ INTEGER, INTENT(OUT) :: i -+ SUBROUTINE PLUMED_F_GINITIALIZED(i) -+ INTEGER, INTENT(OUT) :: i -+ SUBROUTINE PLUMED_F_GCREATE() -+ SUBROUTINE PLUMED_F_GCMD(key,val) -+ CHARACTER(LEN=*), INTENT(IN) :: key -+ UNSPECIFIED_TYPE, INTENT(INOUT) :: val(*) -+ SUBROUTINE PLUMED_F_GFINALIZE() -+ SUBROUTINE PLUMED_F_GLOBAL(p) -+ CHARACTER(LEN=32), INTENT(OUT) :: p -+ SUBROUTINE PLUMED_F_CREATE(p) -+ CHARACTER(LEN=32), INTENT(OUT) :: p -+ SUBROUTINE PLUMED_F_CMD(p,key,val) -+ CHARACTER(LEN=32), INTENT(IN) :: p -+ CHARACTER(LEN=*), INTENT(IN) :: key -+ UNSPECIFIED_TYPE, INTENT(INOUT) :: val(*) -+ SUBROUTINE PLUMED_F_FINALIZE(p) -+ CHARACTER(LEN=32), INTENT(IN) :: p -+\endverbatim -+ -+ The main routine is "cmd", which accepts two arguments: -+ key is a string containing the name of the command -+ val is the argument. it is declared const so as to use allow passing const objects, but in practice plumed -+ is going to modify val in several cases (using a const_cast). -+ In some cases val can be omitted: just pass a NULL pointer (in C++, val is optional and can be omitted). -+ The set of possible keys is the real API of the plumed library, and will be expanded with time. -+ New commands will be added, but backward compatibility will be retained as long as possible. -+ -+ To pass plumed a callback function use the following syntax (not available in FORTRAN yet) -+\verbatim -+ plumed_function_holder ff; -+ ff.p=your_function; -+ plumed_cmd(plumed,"xxxx",&ff); -+\endverbatim -+ (this is passing the your_function() function to the "xxxx" command) -+*/ -+ -+#ifdef __cplusplus -+ extern "C" { -+#endif -+ -+/* Generic function pointer */ -+typedef void (*plumed_function_pointer)(void); -+ -+/** -+ \brief Holder for function pointer. -+ -+ To pass plumed a callback function use the following syntax: -+\verbatim -+ plumed_function_holder ff; -+ ff.p=your_function; -+ plumed_cmd(plumed,"xxxx",&ff); -+\endverbatim -+ (this is going to pass the your_function() function to the "xxxx" command) -+*/ -+ -+typedef struct { -+ plumed_function_pointer p; -+} plumed_function_holder; -+ -+/** -+ \brief Main plumed object -+ -+ This is an object containing a Plumed instance, which should be used in -+ the MD engine. It should first be initialized with plumed_create(), -+ then it communicates with the MD engine using plumed_cmd(). Finally, -+ before the termination, it should be deallocated with plumed_finalize(). -+ Its interface is very simple and general, and is expected -+ not to change across plumed versions. See \ref ReferencePlumedH. -+*/ -+typedef struct { -+/** -+ \private -+ \brief Void pointer holding the real PlumedMain structure -+*/ -+ void*p; -+} plumed; -+ -+/** \relates plumed -+ \brief Constructor -+ -+ \return The constructed plumed object -+*/ -+plumed plumed_create(void); -+ -+/** \relates plumed -+ \brief Tells p to execute a command -+ -+ \param p The plumed object on which command is acting -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like plumed_cmd(p,"A","B"), -+ but for some choice of key it can change the content -+*/ -+void plumed_cmd(plumed p,const char*key,const void*val); -+ -+/** \relates plumed -+ \brief Destructor -+ -+ \param p The plumed object to be deallocated -+*/ -+void plumed_finalize(plumed p); -+ -+/** \relates plumed -+ \brief Check if plumed is installed (for runtime binding) -+ -+ \return 1 if plumed is installed, to 0 otherwise -+*/ -+int plumed_installed(void); -+ -+/** \relates plumed -+ \brief Retrieves an handler to the global structure. -+*/ -+plumed plumed_global(void); -+ -+/** \relates plumed -+ \brief Check if the global interface has been initialized -+ -+ \return 1 if plumed has been initialized, 0 otherwise -+*/ -+int plumed_ginitialized(void); -+ -+/* global C interface, working on a global object */ -+ -+/** \relates plumed -+ \brief Constructor for the global interface. -+ -+ \note Equivalent to plumed_create(), but initialize a static global plumed object -+*/ -+void plumed_gcreate(void); -+ -+/** \relates plumed -+ \brief Tells to the global interface to execute a command. -+ -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like plumed_gcmd("A","B"), -+ but for some choice of key it can change the content -+ -+ \note Equivalent to plumed_cmd(), but skipping the plumed argument -+*/ -+void plumed_gcmd(const char* key,const void* val); -+ -+/** \relates plumed -+ \brief Destructor for the global interface. -+ -+ \note Equivalent to plumed_finalize(), but skipping the plumed argument -+*/ -+void plumed_gfinalize(void); -+ -+/* routines to convert char handler from/to plumed objects */ -+ -+/** \related plumed -+ \brief Converts a C handler to a FORTRAN handler -+ -+ \param p The C handler -+ \param c The FORTRAN handler (a char[32]) -+*/ -+void plumed_c2f(plumed p,char* c); -+ -+/** \related plumed -+ \brief Converts a FORTRAN handler to a C handler -+ \param c The FORTRAN handler (a char[32]) -+ \return The C handler -+*/ -+plumed plumed_f2c(const char* c); -+ -+#ifdef __cplusplus -+ } -+#endif -+ -+#ifdef __cplusplus -+ -+/* this is to include the NULL pointer */ -+#include -+ -+/* C++ interface is hidden in PLMD namespace (same as plumed library) */ -+namespace PLMD { -+ -+/** -+ C++ wrapper for \ref plumed. -+ -+ This class provides a C++ interface to PLUMED. -+*/ -+ -+class Plumed{ -+ plumed main; -+/** -+ keeps track if the object was created from scratch using -+ the defaults destructor (cloned=false) or if it was imported -+ from C or FORTRAN (cloned-true). In the latter case, the -+ plumed_finalize() method is not called when destructing the object, -+ since it is expected to be finalized in the C/FORTRAN code -+*/ -+ bool cloned; -+public: -+/** -+ Check if plumed is installed (for runtime binding) -+ \return true if plumed is installed, false otherwise -+*/ -+ static bool installed(); -+/** -+ Check if global-plumed has been initialized -+ \return true if global plumed object (see global()) is initialized (i.e. if gcreate() has been -+ called), false otherwise. -+*/ -+ static bool ginitialized(); -+/** -+ Initialize global-plumed -+*/ -+ static void gcreate(); -+/** -+ Send a command to global-plumed -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like gcmd("A","B"), -+ but for some choice of key it can change the content -+*/ -+ static void gcmd(const char* key,const void* val); -+/** -+ Finalize global-plumed -+*/ -+ static void gfinalize(); -+/** -+ Returns the Plumed global object -+ \return The Plumed global object -+*/ -+ static Plumed global(); -+/** -+ Constructor -+*/ -+ Plumed(); -+/** -+ Clone a Plumed object from a FORTRAN char* handler -+ \param c The FORTRAN handler (a char[32]). -+ -+ \attention The Plumed object created in this manner -+ will not finalize the corresponding plumed structure. -+ It is expected that the FORTRAN code calls plumed_c_finalize for it -+*/ -+ Plumed(const char*c); -+/** -+ Clone a Plumed object from a C plumed structure -+ \param p The C plumed structure. -+ -+ \attention The Plumed object created in this manner -+ will not finalize the corresponding plumed structure. -+ It is expected that the C code calls plumed_finalize for it -+*/ -+ Plumed(plumed p); -+private: -+/** Copy constructor is disabled (private and unimplemented) -+ The problem here is that after copying it will not be clear who is -+ going to finalize the corresponding plumed structure. -+*/ -+ Plumed(const Plumed&); -+/** Assignment operator is disabled (private and unimplemented) -+ The problem here is that after copying it will not be clear who is -+ going to finalize the corresponding plumed structure. -+*/ -+ Plumed&operator=(const Plumed&); -+public: -+/** -+ Retrieve the C plumed structure for this object -+*/ -+ operator plumed()const; -+/** -+ Retrieve a FORTRAN handler for this object -+ \param c The FORTRAN handler (a char[32]). -+*/ -+ void toFortran(char*c)const; -+/** -+ Send a command to this plumed object -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like p.cmd("A","B"), -+ but for some choice of key it can change the content -+*/ -+ void cmd(const char*key,const void*val=NULL); -+/** -+ Destructor -+ -+ Destructor is virtual so as to allow correct inheritance from Plumed object. -+ To avoid linking problems with g++, I specify "inline" also here (in principle -+ it should be enough to specify it down in the definition of the function, but -+ for some reason that I do not understand g++ does not inline it properly in that -+ case and complains when Plumed.h is included but Plumed.o is not linked. Anyway, the -+ way it is done here seems to work properly). -+*/ -+ inline virtual ~Plumed(); -+}; -+ -+/* All methods are inlined so as to avoid the compilation of an extra c++ file */ -+ -+inline -+bool Plumed::installed(){ -+ return plumed_installed(); -+} -+ -+inline -+Plumed::Plumed(): -+ main(plumed_create()), -+ cloned(false) -+{} -+ -+inline -+Plumed::Plumed(const char*c): -+ main(plumed_f2c(c)), -+ cloned(true) -+{} -+ -+inline -+Plumed::Plumed(plumed p): -+ main(p), -+ cloned(true) -+{} -+ -+inline -+Plumed::operator plumed()const{ -+ return main; -+} -+ -+inline -+void Plumed::toFortran(char*c)const{ -+ plumed_c2f(main,c); -+} -+ -+inline -+void Plumed::cmd(const char*key,const void*val){ -+ plumed_cmd(main,key,val); -+} -+ -+inline -+Plumed::~Plumed(){ -+ if(!cloned)plumed_finalize(main); -+} -+ -+inline -+bool Plumed::ginitialized(){ -+ return plumed_ginitialized(); -+} -+ -+inline -+void Plumed::gcreate(){ -+ plumed_gcreate(); -+} -+ -+inline -+void Plumed::gcmd(const char* key,const void* val){ -+ plumed_gcmd(key,val); -+} -+ -+inline -+void Plumed::gfinalize(){ -+ plumed_gfinalize(); -+} -+ -+inline -+Plumed Plumed::global(){ -+ return plumed_global(); -+} -+ -+} -+ -+#endif -+ -+ -+#endif -diff --git a/Plumed.inc b/Plumed.inc -new file mode 100644 -index 0000000..e1e29a7 ---- /dev/null -+++ b/Plumed.inc -@@ -0,0 +1,3 @@ -+# PLUMED: shared installation -+PLUMED_LOAD= /apps/all/PLUMED/2.1.3-foss-2015g/lib/plumed///src/lib/libplumed.so -ldl -+PLUMED_DEPENDENCIES= /apps/all/PLUMED/2.1.3-foss-2015g/lib/plumed///src/lib/libplumed.so -diff --git a/src/kernel/CMakeLists.txt b/src/kernel/CMakeLists.txt -index fea8282..8e108b3 100644 ---- a/src/kernel/CMakeLists.txt -+++ b/src/kernel/CMakeLists.txt -@@ -33,6 +33,8 @@ - # the research papers on the package. Check out http://www.gromacs.org. - # - -+include(${CMAKE_SOURCE_DIR}/Plumed.cmake) -+ - set(GMXPREPROCESS_SOURCES - add_par.c - calc_verletbuf.c -@@ -123,7 +125,7 @@ endforeach() - - add_executable(mdrun ${MDRUN_SOURCES} main.c) - gmx_add_man_page(mdrun) --target_link_libraries(mdrun gmxpreprocess md gmx ${OpenMP_LINKER_FLAGS}) -+target_link_libraries(mdrun gmxpreprocess md gmx ${OpenMP_LINKER_FLAGS} ${PLUMED_LOAD}) - set_target_properties(mdrun PROPERTIES OUTPUT_NAME "mdrun${GMX_BINARY_SUFFIX}" COMPILE_FLAGS "${OpenMP_C_FLAGS}") - - if(GMX_OPENMM) -diff --git a/src/kernel/CMakeLists.txt.preplumed b/src/kernel/CMakeLists.txt.preplumed -new file mode 100644 -index 0000000..fea8282 ---- /dev/null -+++ b/src/kernel/CMakeLists.txt.preplumed -@@ -0,0 +1,195 @@ -+# -+# This file is part of the GROMACS molecular simulation package. -+# -+# Copyright (c) 2012,2013, by the GROMACS development team, led by -+# David van der Spoel, Berk Hess, Erik Lindahl, and including many -+# others, as listed in the AUTHORS file in the top-level source -+# directory and at http://www.gromacs.org. -+# -+# GROMACS is free software; you can redistribute it and/or -+# modify it under the terms of the GNU Lesser General Public License -+# as published by the Free Software Foundation; either version 2.1 -+# of the License, or (at your option) any later version. -+# -+# GROMACS is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# Lesser General Public License for more details. -+# -+# You should have received a copy of the GNU Lesser General Public -+# License along with GROMACS; if not, see -+# http://www.gnu.org/licenses, or write to the Free Software Foundation, -+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+# -+# If you want to redistribute modifications to GROMACS, please -+# consider that scientific software is very special. Version -+# control is crucial - bugs must be traceable. We will be happy to -+# consider code for inclusion in the official distribution, but -+# derived work must not be called official GROMACS. Details are found -+# in the README & COPYING files - if they are missing, get the -+# official version at http://www.gromacs.org. -+# -+# To help us fund GROMACS development, we humbly ask that you cite -+# the research papers on the package. Check out http://www.gromacs.org. -+# -+ -+set(GMXPREPROCESS_SOURCES -+ add_par.c -+ calc_verletbuf.c -+ compute_io.c -+ convparm.c -+ gen_ad.c -+ gen_vsite.c -+ genhydro.c -+ gpp_atomtype.c -+ gpp_bond_atomtype.c -+ h_db.c -+ hackblock.c -+ hizzie.c -+ nm2type.c -+ pdb2top.c -+ pgutil.c -+ readir.c -+ readpull.c -+ readadress.c -+ readrot.c -+ resall.c -+ sorting.c -+ specbond.c -+ ter_db.c -+ tomorse.c -+ topdirs.c -+ topexcl.c -+ topio.c -+ toppush.c -+ topshake.c -+ toputil.c -+ tpbcmp.c -+ vsite_parm.c -+ fflibutil.c -+ xlate.c) -+ -+set(MDRUN_SOURCES -+ gctio.c ionize.c runner.c -+ do_gct.c repl_ex.c xutils.c pme_loadbal.c -+ md.c mdrun.c genalg.c membed.c) -+ -+add_library(gmxpreprocess ${GMXPREPROCESS_SOURCES}) -+target_link_libraries(gmxpreprocess md) -+set_target_properties(gmxpreprocess PROPERTIES OUTPUT_NAME "gmxpreprocess${GMX_LIBS_SUFFIX}" SOVERSION ${SOVERSION} -+ COMPILE_FLAGS "${OpenMP_C_FLAGS}") -+ -+ -+if(GMX_GPU) -+ include_directories(${CMAKE_SOURCE_DIR}/src/gmxlib/gpu_utils) -+endif() -+ -+if(GMX_OPENMM) -+ # Even though the OpenMM build has "moved to contrib", many things -+ # have be be done from within the scope of the CMakeLists.txt that -+ # builds its mdrun, and that is here -+ list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/src/contrib) -+ find_package(OpenMM) -+ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) -+ include(${CMAKE_SOURCE_DIR}/src/contrib/BuildMdrunOpenMM.cmake) -+endif(GMX_OPENMM) -+ -+if(GMX_GPU OR GMX_FORCE_CXX) -+ set_source_files_properties(main.c PROPERTIES LANGUAGE CXX) -+ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") -+ set_source_files_properties(main.c PROPERTIES COMPILE_FLAGS "-x c++") -+ endif() -+endif() -+ -+if(GMX_FAHCORE) -+ add_library(fahcore ${MDRUN_SOURCES}) -+else(GMX_FAHCORE) -+ -+set(GMX_KERNEL_PROGRAMS -+ grompp tpbconv pdb2gmx g_protonate gmxdump g_x2top gmxcheck) -+if (NOT GMX_NO_QUOTES) -+ set(GMX_KERNEL_PROGRAMS ${GMX_KERNEL_PROGRAMS} g_luck) -+endif (NOT GMX_NO_QUOTES) -+ -+ -+foreach(PROGRAM ${GMX_KERNEL_PROGRAMS}) -+ add_executable(${PROGRAM} ${PROGRAM}.c main.c) -+ if (NOT ${PROGRAM} STREQUAL "g_luck") -+ gmx_add_man_page(${PROGRAM}) -+ endif() -+ target_link_libraries(${PROGRAM} gmxpreprocess md gmx ${OpenMP_LINKER_FLAGS}) -+ set_target_properties(${PROGRAM} PROPERTIES OUTPUT_NAME "${PROGRAM}${GMX_BINARY_SUFFIX}") -+endforeach() -+ -+add_executable(mdrun ${MDRUN_SOURCES} main.c) -+gmx_add_man_page(mdrun) -+target_link_libraries(mdrun gmxpreprocess md gmx ${OpenMP_LINKER_FLAGS}) -+set_target_properties(mdrun PROPERTIES OUTPUT_NAME "mdrun${GMX_BINARY_SUFFIX}" COMPILE_FLAGS "${OpenMP_C_FLAGS}") -+ -+if(GMX_OPENMM) -+ target_link_libraries(mdrun openmm_api_wrapper) -+endif() -+ -+# Construct component groups for installation; note that a component may -+# belong to only one group -+foreach(PROGRAM ${GMX_KERNEL_PROGRAMS}) -+ set(CPACK_COMPONENT_${PROGRAM}_GROUP tools) -+endforeach() -+set(CPACK_COMPONENT_MDRUN_GROUP mdrun) -+ -+foreach(PROGRAM ${GMX_KERNEL_PROGRAMS} mdrun) -+ # Manage CPack component dependencies -+ set(CPACK_COMPONENT_${PROGRAM}_DEPENDS libraries libraries-gmxpreprocess) -+ -+ # Create custom install-xxxx target -+ if (BUILD_SHARED_LIBS) -+ # If shared libraries are used, we need to install the libraries in -+ # addition to the mdrun binary. -+ add_custom_target(install-${PROGRAM} -+ COMMAND ${CMAKE_COMMAND} -DCOMPONENT=libraries -+ -P ${CMAKE_BINARY_DIR}/cmake_install.cmake -+ COMMAND ${CMAKE_COMMAND} -DCOMPONENT=libraries-gmxpreprocess -+ -P ${CMAKE_BINARY_DIR}/cmake_install.cmake -+ COMMAND ${CMAKE_COMMAND} -DCOMPONENT=${PROGRAM} -+ -P ${CMAKE_BINARY_DIR}/cmake_install.cmake -+ COMMENT "Installing ${PROGRAM}") -+ else() -+ add_custom_target(install-${PROGRAM} -+ COMMAND ${CMAKE_COMMAND} -DCOMPONENT=${PROGRAM} -+ -P ${CMAKE_BINARY_DIR}/cmake_install.cmake -+ COMMENT "Installing ${PROGRAM}") -+ endif() -+ add_dependencies(install-${PROGRAM} ${PROGRAM}) -+ -+ # Finally, trigger installation -+ install( -+ TARGETS ${PROGRAM} -+ COMPONENT ${PROGRAM} -+ DESTINATION ${BIN_INSTALL_DIR} -+ ) -+endforeach() -+ -+install(TARGETS gmxpreprocess DESTINATION ${LIB_INSTALL_DIR} COMPONENT libraries-gmxpreprocess) -+ -+if (INSTALL_CUDART_LIB) #can be set manual by user -+ if (GMX_GPU) -+ foreach(CUDA_LIB ${CUDA_LIBRARIES}) -+ string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB}) -+ if(IS_CUDART) #libcuda should not be installed -+ #install also name-links (linker uses those) -+ file(GLOB CUDA_LIBS ${CUDA_LIB}*) -+ install(FILES ${CUDA_LIBS} DESTINATION -+ ${LIB_INSTALL_DIR} COMPONENT libraries) -+ endif() -+ endforeach() -+ else() -+ message(WARNING "INSTALL_CUDART_LIB only makes sense with GMX_GPU") -+ endif() -+endif () -+endif(GMX_FAHCORE) -+ -+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgmxpreprocess.pc.cmakein ${CMAKE_CURRENT_BINARY_DIR}/libgmxpreprocess.pc @ONLY) -+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgmxpreprocess.pc -+ DESTINATION ${LIB_INSTALL_DIR}/pkgconfig -+ RENAME "libgmxpreprocess${GMX_LIBS_SUFFIX}.pc" -+ COMPONENT development) -diff --git a/src/kernel/md.c b/src/kernel/md.c -index 4c4a88c..b4b8c51 100644 ---- a/src/kernel/md.c -+++ b/src/kernel/md.c -@@ -93,6 +93,12 @@ - #include "types/iteratedconstraints.h" - #include "nbnxn_cuda_data_mgmt.h" - -+/* PLUMED */ -+#include "../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+/* END PLUMED */ -+ - #ifdef GMX_LIB_MPI - #include - #endif -@@ -236,6 +242,12 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - double cycles_pmes; - gmx_bool bPMETuneTry = FALSE, bPMETuneRunning = FALSE; - -+/* PLUMED */ -+ int plumedNeedsEnergy=0; -+ int plumedWantsToStop=0; -+ matrix plumed_vir; -+/* END PLUMED */ -+ - #ifdef GMX_FAHCORE - /* Temporary addition for FAHCORE checkpointing */ - int chkpt_ret; -@@ -732,6 +744,53 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - fprintf(fplog, "\n"); - } - -+ /* PLUMED */ -+ if(plumedswitch){ -+ /* detect plumed API version */ -+ int pversion=0; -+ plumed_cmd(plumedmain,"getApiVersion",&pversion); -+ /* setting kbT is only implemented with api>1) */ -+ real kbT=ir->opts.ref_t[0]*BOLTZ; -+ if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT); -+ -+ if(cr->ms && cr->ms->nsim>1) { -+ if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters); -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); -+ }else{ -+ plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); -+ } -+ } -+ plumed_cmd(plumedmain,"GREX init",NULL); -+ } -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); -+ }else{ -+ plumed_cmd(plumedmain,"setMPIComm",&cr->mpi_comm_mysim); -+ } -+ } -+ plumed_cmd(plumedmain,"setNatoms",&top_global->natoms); -+ plumed_cmd(plumedmain,"setMDEngine","gromacs"); -+ plumed_cmd(plumedmain,"setLog",fplog); -+ real real_delta_t; -+ real_delta_t=ir->delta_t; -+ plumed_cmd(plumedmain,"setTimestep",&real_delta_t); -+ plumed_cmd(plumedmain,"init",NULL); -+ -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ }else{ -+ plumed_cmd(plumedmain,"setAtomsNlocal",&mdatoms->homenr); -+ plumed_cmd(plumedmain,"setAtomsContiguous",&mdatoms->start); -+ } -+ } -+ } -+ /* END PLUMED */ -+ - print_start(fplog, cr, runtime, "mdrun"); - runtime_start(runtime); - wallcycle_start(wcycle, ewcRUN); -@@ -1044,6 +1103,13 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - do_verbose && !bPMETuneRunning); - wallcycle_stop(wcycle, ewcDOMDEC); - /* If using an iterative integrator, reallocate space to match the decomposition */ -+ -+ /* PLUMED */ -+ if(plumedswitch){ -+ plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ /* END PLUMED */ - } - } - -@@ -1189,12 +1255,45 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - * This is parallellized as well, and does communication too. - * Check comments in sim_util.c - */ -+ -+ /* PLUMED */ -+ plumedNeedsEnergy=0; -+ if(plumedswitch){ -+ long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep); -+ plumed_cmd(plumedmain,"setPositions",&state->x[mdatoms->start][0]); -+ plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[mdatoms->start]); -+ plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[mdatoms->start]); -+ plumed_cmd(plumedmain,"setBox",&state->box[0][0]); -+ plumed_cmd(plumedmain,"prepareCalc",NULL); -+ plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop); -+ plumed_cmd(plumedmain,"setForces",&f[mdatoms->start][0]); -+ plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ clear_mat(plumed_vir); -+ plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]); -+ } -+ /* END PLUMED */ - do_force(fplog, cr, ir, step, nrnb, wcycle, top, top_global, groups, - state->box, state->x, &state->hist, - f, force_vir, mdatoms, enerd, fcd, - state->lambda, graph, - fr, vsite, mu_tot, t, outf->fp_field, ed, bBornRadii, -- (bNS ? GMX_FORCE_NS : 0) | force_flags); -+ (plumedNeedsEnergy? GMX_FORCE_ENERGY : 0) |(bNS ? GMX_FORCE_NS : 0) | force_flags); -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(plumedNeedsEnergy){ -+ msmul(force_vir,2.0,plumed_vir); -+ plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]); -+ plumed_cmd(plumedmain,"performCalc",NULL); -+ msmul(plumed_vir,0.5,force_vir); -+ } else { -+ msmul(plumed_vir,0.5,plumed_vir); -+ m_add(force_vir,plumed_vir,force_vir); -+ } -+ if ((repl_ex_nst > 0) && (step > 0) && !bLastStep && -+ do_per_step(step,repl_ex_nst)) plumed_cmd(plumedmain,"GREX savePositions",NULL); -+ if(plumedWantsToStop) ir->nsteps=step_rel+1; -+ } -+ /* END PLUMED */ - } - - GMX_BARRIER(cr->mpi_comm_mygroup); -diff --git a/src/kernel/md.c.preplumed b/src/kernel/md.c.preplumed -new file mode 100644 -index 0000000..4c4a88c ---- /dev/null -+++ b/src/kernel/md.c.preplumed -@@ -0,0 +1,2283 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team, -+ * check out http://www.gromacs.org for more information. -+ * Copyright (c) 2012,2013, by the GROMACS development team, led by -+ * David van der Spoel, Berk Hess, Erik Lindahl, and including many -+ * others, as listed in the AUTHORS file in the top-level source -+ * directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include "typedefs.h" -+#include "smalloc.h" -+#include "sysstuff.h" -+#include "vec.h" -+#include "statutil.h" -+#include "vcm.h" -+#include "mdebin.h" -+#include "nrnb.h" -+#include "calcmu.h" -+#include "index.h" -+#include "vsite.h" -+#include "update.h" -+#include "ns.h" -+#include "trnio.h" -+#include "xtcio.h" -+#include "mdrun.h" -+#include "md_support.h" -+#include "md_logging.h" -+#include "confio.h" -+#include "network.h" -+#include "pull.h" -+#include "xvgr.h" -+#include "physics.h" -+#include "names.h" -+#include "xmdrun.h" -+#include "ionize.h" -+#include "disre.h" -+#include "orires.h" -+#include "pme.h" -+#include "mdatoms.h" -+#include "repl_ex.h" -+#include "qmmm.h" -+#include "mpelogging.h" -+#include "domdec.h" -+#include "domdec_network.h" -+#include "partdec.h" -+#include "topsort.h" -+#include "coulomb.h" -+#include "constr.h" -+#include "shellfc.h" -+#include "compute_io.h" -+#include "mvdata.h" -+#include "checkpoint.h" -+#include "mtop_util.h" -+#include "sighandler.h" -+#include "txtdump.h" -+#include "string2.h" -+#include "pme_loadbal.h" -+#include "bondf.h" -+#include "membed.h" -+#include "types/nlistheuristics.h" -+#include "types/iteratedconstraints.h" -+#include "nbnxn_cuda_data_mgmt.h" -+ -+#ifdef GMX_LIB_MPI -+#include -+#endif -+#ifdef GMX_THREAD_MPI -+#include "tmpi.h" -+#endif -+ -+#ifdef GMX_FAHCORE -+#include "corewrap.h" -+#endif -+ -+static void reset_all_counters(FILE *fplog, t_commrec *cr, -+ gmx_large_int_t step, -+ gmx_large_int_t *step_rel, t_inputrec *ir, -+ gmx_wallcycle_t wcycle, t_nrnb *nrnb, -+ gmx_runtime_t *runtime, -+ nbnxn_cuda_ptr_t cu_nbv) -+{ -+ char sbuf[STEPSTRSIZE]; -+ -+ /* Reset all the counters related to performance over the run */ -+ md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n", -+ gmx_step_str(step, sbuf)); -+ -+ if (cu_nbv) -+ { -+ nbnxn_cuda_reset_timings(cu_nbv); -+ } -+ -+ wallcycle_stop(wcycle, ewcRUN); -+ wallcycle_reset_all(wcycle); -+ if (DOMAINDECOMP(cr)) -+ { -+ reset_dd_statistics_counters(cr->dd); -+ } -+ init_nrnb(nrnb); -+ ir->init_step += *step_rel; -+ ir->nsteps -= *step_rel; -+ *step_rel = 0; -+ wallcycle_start(wcycle, ewcRUN); -+ runtime_start(runtime); -+ print_date_and_time(fplog, cr->nodeid, "Restarted time", runtime); -+} -+ -+double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], -+ const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, -+ int nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int stepout, t_inputrec *ir, -+ gmx_mtop_t *top_global, -+ t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t ed, t_forcerec *fr, -+ int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed, -+ real cpt_period, real max_hours, -+ const char *deviceOptions, -+ unsigned long Flags, -+ gmx_runtime_t *runtime) -+{ -+ gmx_mdoutf_t *outf; -+ gmx_large_int_t step, step_rel; -+ double run_time; -+ double t, t0, lam0[efptNR]; -+ gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEner; -+ gmx_bool bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE, -+ bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep, -+ bBornRadii, bStartingFromCpt; -+ gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; -+ gmx_bool do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE, -+ bForceUpdate = FALSE, bCPT; -+ int mdof_flags; -+ gmx_bool bMasterState; -+ int force_flags, cglo_flags; -+ tensor force_vir, shake_vir, total_vir, tmp_vir, pres; -+ int i, m; -+ t_trxstatus *status; -+ rvec mu_tot; -+ t_vcm *vcm; -+ t_state *bufstate = NULL; -+ matrix *scale_tot, pcoupl_mu, M, ebox; -+ gmx_nlheur_t nlh; -+ t_trxframe rerun_fr; -+ gmx_repl_ex_t repl_ex = NULL; -+ int nchkpt = 1; -+ gmx_localtop_t *top; -+ t_mdebin *mdebin = NULL; -+ t_state *state = NULL; -+ rvec *f_global = NULL; -+ int n_xtc = -1; -+ rvec *x_xtc = NULL; -+ gmx_enerdata_t *enerd; -+ rvec *f = NULL; -+ gmx_global_stat_t gstat; -+ gmx_update_t upd = NULL; -+ t_graph *graph = NULL; -+ globsig_t gs; -+ gmx_rng_t mcrng = NULL; -+ gmx_bool bFFscan; -+ gmx_groups_t *groups; -+ gmx_ekindata_t *ekind, *ekind_save; -+ gmx_shellfc_t shellfc; -+ int count, nconverged = 0; -+ real timestep = 0; -+ double tcount = 0; -+ gmx_bool bIonize = FALSE; -+ gmx_bool bTCR = FALSE, bConverged = TRUE, bOK, bSumEkinhOld, bExchanged; -+ gmx_bool bAppend; -+ gmx_bool bResetCountersHalfMaxH = FALSE; -+ gmx_bool bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter; -+ gmx_bool bUpdateDoLR; -+ real mu_aver = 0, dvdl_constr; -+ int a0, a1, gnx = 0, ii; -+ atom_id *grpindex = NULL; -+ char *grpname; -+ t_coupl_rec *tcr = NULL; -+ rvec *xcopy = NULL, *vcopy = NULL, *cbuf = NULL; -+ matrix boxcopy = {{0}}, lastbox; -+ tensor tmpvir; -+ real fom, oldfom, veta_save, pcurr, scalevir, tracevir; -+ real vetanew = 0; -+ int lamnew = 0; -+ /* for FEP */ -+ int nstfep; -+ real rate; -+ double cycles; -+ real saved_conserved_quantity = 0; -+ real last_ekin = 0; -+ int iter_i; -+ t_extmass MassQ; -+ int **trotter_seq; -+ char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; -+ int handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/ -+ gmx_iterate_t iterate; -+ gmx_large_int_t multisim_nsteps = -1; /* number of steps to do before first multisim -+ simulation stops. If equal to zero, don't -+ communicate any more between multisims.*/ -+ /* PME load balancing data for GPU kernels */ -+ pme_load_balancing_t pme_loadbal = NULL; -+ double cycles_pmes; -+ gmx_bool bPMETuneTry = FALSE, bPMETuneRunning = FALSE; -+ -+#ifdef GMX_FAHCORE -+ /* Temporary addition for FAHCORE checkpointing */ -+ int chkpt_ret; -+#endif -+ -+ /* Check for special mdrun options */ -+ bRerunMD = (Flags & MD_RERUN); -+ bIonize = (Flags & MD_IONIZE); -+ bFFscan = (Flags & MD_FFSCAN); -+ bAppend = (Flags & MD_APPENDFILES); -+ if (Flags & MD_RESETCOUNTERSHALFWAY) -+ { -+ if (ir->nsteps > 0) -+ { -+ /* Signal to reset the counters half the simulation steps. */ -+ wcycle_set_reset_counters(wcycle, ir->nsteps/2); -+ } -+ /* Signal to reset the counters halfway the simulation time. */ -+ bResetCountersHalfMaxH = (max_hours > 0); -+ } -+ -+ /* md-vv uses averaged full step velocities for T-control -+ md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) -+ md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ -+ bVV = EI_VV(ir->eI); -+ if (bVV) /* to store the initial velocities while computing virial */ -+ { -+ snew(cbuf, top_global->natoms); -+ } -+ /* all the iteratative cases - only if there are constraints */ -+ bIterativeCase = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD)); -+ gmx_iterate_init(&iterate, FALSE); /* The default value of iterate->bIterationActive is set to -+ false in this step. The correct value, true or false, -+ is set at each step, as it depends on the frequency of temperature -+ and pressure control.*/ -+ bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir))); -+ -+ if (bRerunMD) -+ { -+ /* Since we don't know if the frames read are related in any way, -+ * rebuild the neighborlist at every step. -+ */ -+ ir->nstlist = 1; -+ ir->nstcalcenergy = 1; -+ nstglobalcomm = 1; -+ } -+ -+ check_ir_old_tpx_versions(cr, fplog, ir, top_global); -+ -+ nstglobalcomm = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir); -+ bGStatEveryStep = (nstglobalcomm == 1); -+ -+ if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL) -+ { -+ fprintf(fplog, -+ "To reduce the energy communication with nstlist = -1\n" -+ "the neighbor list validity should not be checked at every step,\n" -+ "this means that exact integration is not guaranteed.\n" -+ "The neighbor list validity is checked after:\n" -+ " - 2*std.dev.(n.list life time) steps.\n" -+ "In most cases this will result in exact integration.\n" -+ "This reduces the energy communication by a factor of 2 to 3.\n" -+ "If you want less energy communication, set nstlist > 3.\n\n"); -+ } -+ -+ if (bRerunMD || bFFscan) -+ { -+ ir->nstxtcout = 0; -+ } -+ groups = &top_global->groups; -+ -+ /* Initial values */ -+ init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda, -+ &(state_global->fep_state), lam0, -+ nrnb, top_global, &upd, -+ nfile, fnm, &outf, &mdebin, -+ force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, state_global, Flags); -+ -+ clear_mat(total_vir); -+ clear_mat(pres); -+ /* Energy terms and groups */ -+ snew(enerd, 1); -+ init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, -+ enerd); -+ if (DOMAINDECOMP(cr)) -+ { -+ f = NULL; -+ } -+ else -+ { -+ snew(f, top_global->natoms); -+ } -+ -+ /* Kinetic energy data */ -+ snew(ekind, 1); -+ init_ekindata(fplog, top_global, &(ir->opts), ekind); -+ /* needed for iteration of constraints */ -+ snew(ekind_save, 1); -+ init_ekindata(fplog, top_global, &(ir->opts), ekind_save); -+ /* Copy the cos acceleration to the groups struct */ -+ ekind->cosacc.cos_accel = ir->cos_accel; -+ -+ gstat = global_stat_init(ir); -+ debug_gmx(); -+ -+ /* Check for polarizable models and flexible constraints */ -+ shellfc = init_shell_flexcon(fplog, -+ top_global, n_flexible_constraints(constr), -+ (ir->bContinuation || -+ (DOMAINDECOMP(cr) && !MASTER(cr))) ? -+ NULL : state_global->x); -+ if (shellfc && ir->nstcalcenergy != 1) -+ { -+ gmx_fatal(FARGS, "You have nstcalcenergy set to a value (%d) that is different from 1.\nThis is not supported in combinations with shell particles.\nPlease make a new tpr file.", ir->nstcalcenergy); -+ } -+ if (shellfc && DOMAINDECOMP(cr)) -+ { -+ gmx_fatal(FARGS, "In order to run parallel simulations with shells you need to use the -pd flag to mdrun."); -+ } -+ if (shellfc && ir->eI == eiNM) -+ { -+ /* Currently shells don't work with Normal Modes */ -+ gmx_fatal(FARGS, "Normal Mode analysis is not supported with shells.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n"); -+ } -+ -+ if (vsite && ir->eI == eiNM) -+ { -+ /* Currently virtual sites don't work with Normal Modes */ -+ gmx_fatal(FARGS, "Normal Mode analysis is not supported with virtual sites.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n"); -+ } -+ -+ if (DEFORM(*ir)) -+ { -+#ifdef GMX_THREAD_MPI -+ tMPI_Thread_mutex_lock(&deform_init_box_mutex); -+#endif -+ set_deform_reference_box(upd, -+ deform_init_init_step_tpx, -+ deform_init_box_tpx); -+#ifdef GMX_THREAD_MPI -+ tMPI_Thread_mutex_unlock(&deform_init_box_mutex); -+#endif -+ } -+ -+ { -+ double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1); -+ if ((io > 2000) && MASTER(cr)) -+ { -+ fprintf(stderr, -+ "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", -+ io); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ top = dd_init_local_top(top_global); -+ -+ snew(state, 1); -+ dd_init_local_state(cr->dd, state_global, state); -+ -+ if (DDMASTER(cr->dd) && ir->nstfout) -+ { -+ snew(f_global, state_global->natoms); -+ } -+ } -+ else -+ { -+ if (PAR(cr)) -+ { -+ /* Initialize the particle decomposition and split the topology */ -+ top = split_system(fplog, top_global, ir, cr); -+ -+ pd_cg_range(cr, &fr->cg0, &fr->hcg); -+ pd_at_range(cr, &a0, &a1); -+ } -+ else -+ { -+ top = gmx_mtop_generate_local_top(top_global, ir); -+ -+ a0 = 0; -+ a1 = top_global->natoms; -+ } -+ -+ forcerec_set_excl_load(fr, top, cr); -+ -+ state = partdec_init_local_state(cr, state_global); -+ f_global = f; -+ -+ atoms2md(top_global, ir, 0, NULL, a0, a1-a0, mdatoms); -+ -+ if (vsite) -+ { -+ set_vsite_top(vsite, top, mdatoms, cr); -+ } -+ -+ if (ir->ePBC != epbcNONE && !fr->bMolPBC) -+ { -+ graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE); -+ } -+ -+ if (shellfc) -+ { -+ make_local_shells(cr, mdatoms, shellfc); -+ } -+ -+ setup_bonded_threading(fr, &top->idef); -+ -+ if (ir->pull && PAR(cr)) -+ { -+ dd_make_local_pull_groups(NULL, ir->pull, mdatoms); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ /* Distribute the charge groups over the nodes from the master node */ -+ dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, FALSE); -+ -+ } -+ -+ update_mdatoms(mdatoms, state->lambda[efptMASS]); -+ -+ if (opt2bSet("-cpi", nfile, fnm)) -+ { -+ bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr); -+ } -+ else -+ { -+ bStateFromCP = FALSE; -+ } -+ -+ if (ir->bExpanded) -+ { -+ init_expanded_ensemble(bStateFromCP,ir,&mcrng,&state->dfhist); -+ } -+ -+ if (MASTER(cr)) -+ { -+ if (bStateFromCP) -+ { -+ /* Update mdebin with energy history if appending to output files */ -+ if (Flags & MD_APPENDFILES) -+ { -+ restore_energyhistory_from_state(mdebin, &state_global->enerhist); -+ } -+ else -+ { -+ /* We might have read an energy history from checkpoint, -+ * free the allocated memory and reset the counts. -+ */ -+ done_energyhistory(&state_global->enerhist); -+ init_energyhistory(&state_global->enerhist); -+ } -+ } -+ /* Set the initial energy history in state by updating once */ -+ update_energyhistory(&state_global->enerhist, mdebin); -+ } -+ -+ if ((state->flags & (1<flags & (1<mols.nr; -+ snew(grpindex, gnx); -+ for (i = 0; (i < gnx); i++) -+ { -+ grpindex[i] = i; -+ } -+ } -+ -+ if (repl_ex_nst > 0) -+ { -+ /* We need to be sure replica exchange can only occur -+ * when the energies are current */ -+ check_nst_param(fplog, cr, "nstcalcenergy", ir->nstcalcenergy, -+ "repl_ex_nst", &repl_ex_nst); -+ /* This check needs to happen before inter-simulation -+ * signals are initialized, too */ -+ } -+ if (repl_ex_nst > 0 && MASTER(cr)) -+ { -+ repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir, -+ repl_ex_nst, repl_ex_nex, repl_ex_seed); -+ } -+ -+ /* PME tuning is only supported with GPUs or PME nodes and not with rerun. -+ * With perturbed charges with soft-core we should not change the cut-off. -+ */ -+ if ((Flags & MD_TUNEPME) && -+ EEL_PME(fr->eeltype) && -+ ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) && -+ !(ir->efep != efepNO && mdatoms->nChargePerturbed > 0 && ir->fepvals->bScCoul) && -+ !bRerunMD) -+ { -+ pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata); -+ cycles_pmes = 0; -+ if (cr->duty & DUTY_PME) -+ { -+ /* Start tuning right away, as we can't measure the load */ -+ bPMETuneRunning = TRUE; -+ } -+ else -+ { -+ /* Separate PME nodes, we can measure the PP/PME load balance */ -+ bPMETuneTry = TRUE; -+ } -+ } -+ -+ if (!ir->bContinuation && !bRerunMD) -+ { -+ if (mdatoms->cFREEZE && (state->flags & (1<start; i < mdatoms->start+mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) -+ { -+ state->v[i][m] = 0; -+ } -+ } -+ } -+ } -+ -+ if (constr) -+ { -+ /* Constrain the initial coordinates and velocities */ -+ do_constrain_first(fplog, constr, ir, mdatoms, state, f, -+ graph, cr, nrnb, fr, top, shake_vir); -+ } -+ if (vsite) -+ { -+ /* Construct the virtual sites for the initial configuration */ -+ construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, graph, cr, state->box); -+ } -+ } -+ -+ debug_gmx(); -+ -+ /* set free energy calculation frequency as the minimum -+ greatest common denominator of nstdhdl, nstexpanded, and repl_ex_nst*/ -+ nstfep = ir->fepvals->nstdhdl; -+ if (ir->bExpanded) -+ { -+ nstfep = gmx_greatest_common_divisor(ir->fepvals->nstdhdl,nstfep); -+ } -+ if (repl_ex_nst > 0) -+ { -+ nstfep = gmx_greatest_common_divisor(repl_ex_nst,nstfep); -+ } -+ -+ /* I'm assuming we need global communication the first time! MRS */ -+ cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT -+ | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM : 0) -+ | (bVV ? CGLO_PRESSURE : 0) -+ | (bVV ? CGLO_CONSTRAINT : 0) -+ | (bRerunMD ? CGLO_RERUNMD : 0) -+ | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0)); -+ -+ bSumEkinhOld = FALSE; -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &pcurr, top_global->natoms, &bSumEkinhOld, cglo_flags); -+ if (ir->eI == eiVVAK) -+ { -+ /* a second call to get the half step temperature initialized as well */ -+ /* we do the same call as above, but turn the pressure off -- internally to -+ compute_globals, this is recognized as a velocity verlet half-step -+ kinetic energy calculation. This minimized excess variables, but -+ perhaps loses some logic?*/ -+ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &pcurr, top_global->natoms, &bSumEkinhOld, -+ cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE)); -+ } -+ -+ /* Calculate the initial half step temperature, and save the ekinh_old */ -+ if (!(Flags & MD_STARTFROMCPT)) -+ { -+ for (i = 0; (i < ir->opts.ngtc); i++) -+ { -+ copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); -+ } -+ } -+ if (ir->eI != eiVV) -+ { -+ enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step, -+ and there is no previous step */ -+ } -+ -+ /* if using an iterative algorithm, we need to create a working directory for the state. */ -+ if (bIterativeCase) -+ { -+ bufstate = init_bufstate(state); -+ } -+ if (bFFscan) -+ { -+ snew(xcopy, state->natoms); -+ snew(vcopy, state->natoms); -+ copy_rvecn(state->x, xcopy, 0, state->natoms); -+ copy_rvecn(state->v, vcopy, 0, state->natoms); -+ copy_mat(state->box, boxcopy); -+ } -+ -+ /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter -+ temperature control */ -+ trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); -+ -+ if (MASTER(cr)) -+ { -+ if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS) -+ { -+ fprintf(fplog, -+ "RMS relative constraint deviation after constraining: %.2e\n", -+ constr_rmsd(constr, FALSE)); -+ } -+ if (EI_STATE_VELOCITY(ir->eI)) -+ { -+ fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]); -+ } -+ if (bRerunMD) -+ { -+ fprintf(stderr, "starting md rerun '%s', reading coordinates from" -+ " input trajectory '%s'\n\n", -+ *(top_global->name), opt2fn("-rerun", nfile, fnm)); -+ if (bVerbose) -+ { -+ fprintf(stderr, "Calculated time to finish depends on nsteps from " -+ "run input file,\nwhich may not correspond to the time " -+ "needed to process input trajectory.\n\n"); -+ } -+ } -+ else -+ { -+ char tbuf[20]; -+ fprintf(stderr, "starting mdrun '%s'\n", -+ *(top_global->name)); -+ if (ir->nsteps >= 0) -+ { -+ sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t); -+ } -+ else -+ { -+ sprintf(tbuf, "%s", "infinite"); -+ } -+ if (ir->init_step > 0) -+ { -+ fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", -+ gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf, -+ gmx_step_str(ir->init_step, sbuf2), -+ ir->init_step*ir->delta_t); -+ } -+ else -+ { -+ fprintf(stderr, "%s steps, %s ps.\n", -+ gmx_step_str(ir->nsteps, sbuf), tbuf); -+ } -+ } -+ fprintf(fplog, "\n"); -+ } -+ -+ print_start(fplog, cr, runtime, "mdrun"); -+ runtime_start(runtime); -+ wallcycle_start(wcycle, ewcRUN); -+ -+ /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ -+#ifdef GMX_FAHCORE -+ chkpt_ret = fcCheckPointParallel( cr->nodeid, -+ NULL, 0); -+ if (chkpt_ret == 0) -+ { -+ gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 ); -+ } -+#endif -+ -+ debug_gmx(); -+ /*********************************************************** -+ * -+ * Loop over MD steps -+ * -+ ************************************************************/ -+ -+ /* if rerunMD then read coordinates and velocities from input trajectory */ -+ if (bRerunMD) -+ { -+ if (getenv("GMX_FORCE_UPDATE")) -+ { -+ bForceUpdate = TRUE; -+ } -+ -+ rerun_fr.natoms = 0; -+ if (MASTER(cr)) -+ { -+ bNotLastFrame = read_first_frame(oenv, &status, -+ opt2fn("-rerun", nfile, fnm), -+ &rerun_fr, TRX_NEED_X | TRX_READ_V); -+ if (rerun_fr.natoms != top_global->natoms) -+ { -+ gmx_fatal(FARGS, -+ "Number of atoms in trajectory (%d) does not match the " -+ "run input file (%d)\n", -+ rerun_fr.natoms, top_global->natoms); -+ } -+ if (ir->ePBC != epbcNONE) -+ { -+ if (!rerun_fr.bBox) -+ { -+ gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time); -+ } -+ if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong)) -+ { -+ gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time); -+ } -+ } -+ } -+ -+ if (PAR(cr)) -+ { -+ rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); -+ } -+ -+ if (ir->ePBC != epbcNONE) -+ { -+ /* Set the shift vectors. -+ * Necessary here when have a static box different from the tpr box. -+ */ -+ calc_shifts(rerun_fr.box, fr->shift_vec); -+ } -+ } -+ -+ /* loop over MD steps or if rerunMD to end of input trajectory */ -+ bFirstStep = TRUE; -+ /* Skip the first Nose-Hoover integration when we get the state from tpx */ -+ bStateFromTPX = !bStateFromCP; -+ bInitStep = bFirstStep && (bStateFromTPX || bVV); -+ bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep; -+ bLastStep = FALSE; -+ bSumEkinhOld = FALSE; -+ bExchanged = FALSE; -+ -+ init_global_signals(&gs, cr, ir, repl_ex_nst); -+ -+ step = ir->init_step; -+ step_rel = 0; -+ -+ if (ir->nstlist == -1) -+ { -+ init_nlistheuristics(&nlh, bGStatEveryStep, step); -+ } -+ -+ if (MULTISIM(cr) && (repl_ex_nst <= 0 )) -+ { -+ /* check how many steps are left in other sims */ -+ multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps); -+ } -+ -+ -+ /* and stop now if we should */ -+ bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) || -+ ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps ))); -+ while (!bLastStep || (bRerunMD && bNotLastFrame)) -+ { -+ -+ wallcycle_start(wcycle, ewcSTEP); -+ -+ GMX_MPE_LOG(ev_timestep1); -+ -+ if (bRerunMD) -+ { -+ if (rerun_fr.bStep) -+ { -+ step = rerun_fr.step; -+ step_rel = step - ir->init_step; -+ } -+ if (rerun_fr.bTime) -+ { -+ t = rerun_fr.time; -+ } -+ else -+ { -+ t = step; -+ } -+ } -+ else -+ { -+ bLastStep = (step_rel == ir->nsteps); -+ t = t0 + step*ir->delta_t; -+ } -+ -+ if (ir->efep != efepNO || ir->bSimTemp) -+ { -+ /* find and set the current lambdas. If rerunning, we either read in a state, or a lambda value, -+ requiring different logic. */ -+ -+ set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0); -+ bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); -+ bDoFEP = (do_per_step(step, nstfep) && (ir->efep != efepNO)); -+ bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) -+ && (ir->bExpanded) && (step > 0) && (!bStartingFromCpt)); -+ } -+ -+ if (bSimAnn) -+ { -+ update_annealing_target_temp(&(ir->opts), t); -+ } -+ -+ if (bRerunMD) -+ { -+ if (!(DOMAINDECOMP(cr) && !MASTER(cr))) -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(rerun_fr.x[i], state_global->x[i]); -+ } -+ if (rerun_fr.bV) -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(rerun_fr.v[i], state_global->v[i]); -+ } -+ } -+ else -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ clear_rvec(state_global->v[i]); -+ } -+ if (bRerunWarnNoV) -+ { -+ fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n" -+ " Ekin, temperature and pressure are incorrect,\n" -+ " the virial will be incorrect when constraints are present.\n" -+ "\n"); -+ bRerunWarnNoV = FALSE; -+ } -+ } -+ } -+ copy_mat(rerun_fr.box, state_global->box); -+ copy_mat(state_global->box, state->box); -+ -+ if (vsite && (Flags & MD_RERUN_VSITE)) -+ { -+ if (DOMAINDECOMP(cr)) -+ { -+ gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition"); -+ } -+ if (graph) -+ { -+ /* Following is necessary because the graph may get out of sync -+ * with the coordinates if we only have every N'th coordinate set -+ */ -+ mk_mshift(fplog, graph, fr->ePBC, state->box, state->x); -+ shift_self(graph, state->box, state->x); -+ } -+ construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, state->v, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, graph, cr, state->box); -+ if (graph) -+ { -+ unshift_self(graph, state->box, state->x); -+ } -+ } -+ } -+ -+ /* Stop Center of Mass motion */ -+ bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); -+ -+ /* Copy back starting coordinates in case we're doing a forcefield scan */ -+ if (bFFscan) -+ { -+ for (ii = 0; (ii < state->natoms); ii++) -+ { -+ copy_rvec(xcopy[ii], state->x[ii]); -+ copy_rvec(vcopy[ii], state->v[ii]); -+ } -+ copy_mat(boxcopy, state->box); -+ } -+ -+ if (bRerunMD) -+ { -+ /* for rerun MD always do Neighbour Searching */ -+ bNS = (bFirstStep || ir->nstlist != 0); -+ bNStList = bNS; -+ } -+ else -+ { -+ /* Determine whether or not to do Neighbour Searching and LR */ -+ bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); -+ -+ bNS = (bFirstStep || bExchanged || bNStList || bDoFEP || -+ (ir->nstlist == -1 && nlh.nabnsb > 0)); -+ -+ if (bNS && ir->nstlist == -1) -+ { -+ set_nlistheuristics(&nlh, bFirstStep || bExchanged || bDoFEP, step); -+ } -+ } -+ -+ /* check whether we should stop because another simulation has -+ stopped. */ -+ if (MULTISIM(cr)) -+ { -+ if ( (multisim_nsteps >= 0) && (step_rel >= multisim_nsteps) && -+ (multisim_nsteps != ir->nsteps) ) -+ { -+ if (bNS) -+ { -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, -+ "Stopping simulation %d because another one has finished\n", -+ cr->ms->sim); -+ } -+ bLastStep = TRUE; -+ gs.sig[eglsCHKPT] = 1; -+ } -+ } -+ } -+ -+ /* < 0 means stop at next step, > 0 means stop at next NS step */ -+ if ( (gs.set[eglsSTOPCOND] < 0) || -+ ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) ) -+ { -+ bLastStep = TRUE; -+ } -+ -+ /* Determine whether or not to update the Born radii if doing GB */ -+ bBornRadii = bFirstStep; -+ if (ir->implicit_solvent && (step % ir->nstgbradii == 0)) -+ { -+ bBornRadii = TRUE; -+ } -+ -+ do_log = do_per_step(step, ir->nstlog) || bFirstStep || bLastStep; -+ do_verbose = bVerbose && -+ (step % stepout == 0 || bFirstStep || bLastStep); -+ -+ if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD)) -+ { -+ if (bRerunMD) -+ { -+ bMasterState = TRUE; -+ } -+ else -+ { -+ bMasterState = FALSE; -+ /* Correct the new box if it is too skewed */ -+ if (DYNAMIC_BOX(*ir)) -+ { -+ if (correct_box(fplog, step, state->box, graph)) -+ { -+ bMasterState = TRUE; -+ } -+ } -+ if (DOMAINDECOMP(cr) && bMasterState) -+ { -+ dd_collect_state(cr->dd, state, state_global); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ /* Repartition the domain decomposition */ -+ wallcycle_start(wcycle, ewcDOMDEC); -+ dd_partition_system(fplog, step, cr, -+ bMasterState, nstglobalcomm, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, -+ do_verbose && !bPMETuneRunning); -+ wallcycle_stop(wcycle, ewcDOMDEC); -+ /* If using an iterative integrator, reallocate space to match the decomposition */ -+ } -+ } -+ -+ if (MASTER(cr) && do_log && !bFFscan) -+ { -+ print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */ -+ } -+ -+ if (ir->efep != efepNO) -+ { -+ update_mdatoms(mdatoms, state->lambda[efptMASS]); -+ } -+ -+ if ((bRerunMD && rerun_fr.bV) || bExchanged) -+ { -+ -+ /* We need the kinetic energy at minus the half step for determining -+ * the full step kinetic energy and possibly for T-coupling.*/ -+ /* This may not be quite working correctly yet . . . . */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &pcurr, top_global->natoms, &bSumEkinhOld, -+ CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); -+ } -+ clear_mat(force_vir); -+ -+ /* Ionize the atoms if necessary */ -+ if (bIonize) -+ { -+ ionize(fplog, oenv, mdatoms, top_global, t, ir, state->x, state->v, -+ mdatoms->start, mdatoms->start+mdatoms->homenr, state->box, cr); -+ } -+ -+ /* Update force field in ffscan program */ -+ if (bFFscan) -+ { -+ if (update_forcefield(fplog, -+ nfile, fnm, fr, -+ mdatoms->nr, state->x, state->box)) -+ { -+ gmx_finalize_par(); -+ -+ exit(0); -+ } -+ } -+ -+ GMX_MPE_LOG(ev_timestep2); -+ -+ /* We write a checkpoint at this MD step when: -+ * either at an NS step when we signalled through gs, -+ * or at the last step (but not when we do not want confout), -+ * but never at the first step or with rerun. -+ */ -+ bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) || -+ (bLastStep && (Flags & MD_CONFOUT))) && -+ step > ir->init_step && !bRerunMD); -+ if (bCPT) -+ { -+ gs.set[eglsCHKPT] = 0; -+ } -+ -+ /* Determine the energy and pressure: -+ * at nstcalcenergy steps and at energy output steps (set below). -+ */ -+ if (EI_VV(ir->eI) && (!bInitStep)) -+ { -+ /* for vv, the first half of the integration actually corresponds -+ to the previous step. bCalcEner is only required to be evaluated on the 'next' step, -+ but the virial needs to be calculated on both the current step and the 'next' step. Future -+ reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */ -+ -+ bCalcEner = do_per_step(step-1, ir->nstcalcenergy); -+ bCalcVir = bCalcEner || -+ (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple))); -+ } -+ else -+ { -+ bCalcEner = do_per_step(step, ir->nstcalcenergy); -+ bCalcVir = bCalcEner || -+ (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); -+ } -+ -+ /* Do we need global communication ? */ -+ bGStat = (bCalcVir || bCalcEner || bStopCM || -+ do_per_step(step, nstglobalcomm) || (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)) || -+ (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck)); -+ -+ do_ene = (do_per_step(step, ir->nstenergy) || bLastStep); -+ -+ if (do_ene || do_log) -+ { -+ bCalcVir = TRUE; -+ bCalcEner = TRUE; -+ bGStat = TRUE; -+ } -+ -+ /* these CGLO_ options remain the same throughout the iteration */ -+ cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) | -+ (bGStat ? CGLO_GSTAT : 0) -+ ); -+ -+ force_flags = (GMX_FORCE_STATECHANGED | -+ ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) | -+ GMX_FORCE_ALLFORCES | -+ GMX_FORCE_SEPLRF | -+ (bCalcVir ? GMX_FORCE_VIRIAL : 0) | -+ (bCalcEner ? GMX_FORCE_ENERGY : 0) | -+ (bDoFEP ? GMX_FORCE_DHDL : 0) -+ ); -+ -+ if (fr->bTwinRange) -+ { -+ if (do_per_step(step, ir->nstcalclr)) -+ { -+ force_flags |= GMX_FORCE_DO_LR; -+ } -+ } -+ -+ if (shellfc) -+ { -+ /* Now is the time to relax the shells */ -+ count = relax_shell_flexcon(fplog, cr, bVerbose, bFFscan ? step+1 : step, -+ ir, bNS, force_flags, -+ bStopCM, top, top_global, -+ constr, enerd, fcd, -+ state, f, force_vir, mdatoms, -+ nrnb, wcycle, graph, groups, -+ shellfc, fr, bBornRadii, t, mu_tot, -+ state->natoms, &bConverged, vsite, -+ outf->fp_field); -+ tcount += count; -+ -+ if (bConverged) -+ { -+ nconverged++; -+ } -+ } -+ else -+ { -+ /* The coordinates (x) are shifted (to get whole molecules) -+ * in do_force. -+ * This is parallellized as well, and does communication too. -+ * Check comments in sim_util.c -+ */ -+ do_force(fplog, cr, ir, step, nrnb, wcycle, top, top_global, groups, -+ state->box, state->x, &state->hist, -+ f, force_vir, mdatoms, enerd, fcd, -+ state->lambda, graph, -+ fr, vsite, mu_tot, t, outf->fp_field, ed, bBornRadii, -+ (bNS ? GMX_FORCE_NS : 0) | force_flags); -+ } -+ -+ GMX_BARRIER(cr->mpi_comm_mygroup); -+ -+ if (bTCR) -+ { -+ mu_aver = calc_mu_aver(cr, state->x, mdatoms->chargeA, -+ mu_tot, &top_global->mols, mdatoms, gnx, grpindex); -+ } -+ -+ if (bTCR && bFirstStep) -+ { -+ tcr = init_coupling(fplog, nfile, fnm, cr, fr, mdatoms, &(top->idef)); -+ fprintf(fplog, "Done init_coupling\n"); -+ fflush(fplog); -+ } -+ -+ if (bVV && !bStartingFromCpt && !bRerunMD) -+ /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ -+ { -+ if (ir->eI == eiVV && bInitStep) -+ { -+ /* if using velocity verlet with full time step Ekin, -+ * take the first half step only to compute the -+ * virial for the first step. From there, -+ * revert back to the initial coordinates -+ * so that the input is actually the initial step. -+ */ -+ copy_rvecn(state->v, cbuf, 0, state->natoms); /* should make this better for parallelizing? */ -+ } -+ else -+ { -+ /* this is for NHC in the Ekin(t+dt/2) version of vv */ -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1); -+ } -+ -+ /* If we are using twin-range interactions where the long-range component -+ * is only evaluated every nstcalclr>1 steps, we should do a special update -+ * step to combine the long-range forces on these steps. -+ * For nstcalclr=1 this is not done, since the forces would have been added -+ * directly to the short-range forces already. -+ * -+ * TODO Remove various aspects of VV+twin-range in master -+ * branch, because VV integrators did not ever support -+ * twin-range multiple time stepping with constraints. -+ */ -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, -+ f, bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, wcycle, upd, bInitStep, etrtVELOCITY1, -+ cr, nrnb, constr, &top->idef); -+ -+ if (bIterativeCase && do_per_step(step-1, ir->nstpcouple) && !bInitStep) -+ { -+ gmx_iterate_init(&iterate, TRUE); -+ } -+ /* for iterations, we save these vectors, as we will be self-consistently iterating -+ the calculations */ -+ -+ /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */ -+ -+ /* save the state */ -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); -+ } -+ -+ bFirstIterate = TRUE; -+ while (bFirstIterate || iterate.bIterationActive) -+ { -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); -+ if (bFirstIterate && bTrotter) -+ { -+ /* The first time through, we need a decent first estimate -+ of veta(t+dt) to compute the constraints. Do -+ this by computing the box volume part of the -+ trotter integration at this time. Nothing else -+ should be changed by this routine here. If -+ !(first time), we start with the previous value -+ of veta. */ -+ -+ veta_save = state->veta; -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ0); -+ vetanew = state->veta; -+ state->veta = veta_save; -+ } -+ } -+ -+ bOK = TRUE; -+ if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */ -+ { -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, shake_vir, NULL, -+ cr, nrnb, wcycle, upd, constr, -+ bInitStep, TRUE, bCalcVir, vetanew); -+ -+ if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1) -+ { -+ /* Correct the virial for multiple time stepping */ -+ m_sub(shake_vir, fr->vir_twin_constr, shake_vir); -+ } -+ -+ if (!bOK && !bFFscan) -+ { -+ gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); -+ } -+ -+ } -+ else if (graph) -+ { -+ /* Need to unshift here if a do_force has been -+ called in the previous step */ -+ unshift_self(graph, state->box, state->x); -+ } -+ -+ /* if VV, compute the pressure and constraints */ -+ /* For VV2, we strictly only need this if using pressure -+ * control, but we really would like to have accurate pressures -+ * printed out. -+ * Think about ways around this in the future? -+ * For now, keep this choice in comments. -+ */ -+ /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */ -+ /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/ -+ bPres = TRUE; -+ bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); -+ if (bCalcEner && ir->eI == eiVVAK) /*MRS: 7/9/2010 -- this still doesn't fix it?*/ -+ { -+ bSumEkinhOld = TRUE; -+ } -+ /* for vv, the first half of the integration actually corresponds to the previous step. -+ So we need information from the last step in the first half of the integration */ -+ if (bGStat || do_per_step(step-1, nstglobalcomm)) -+ { -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &pcurr, top_global->natoms, &bSumEkinhOld, -+ cglo_flags -+ | CGLO_ENERGY -+ | (bTemp ? CGLO_TEMPERATURE : 0) -+ | (bPres ? CGLO_PRESSURE : 0) -+ | (bPres ? CGLO_CONSTRAINT : 0) -+ | ((iterate.bIterationActive) ? CGLO_ITERATE : 0) -+ | (bFirstIterate ? CGLO_FIRSTITERATE : 0) -+ | CGLO_SCALEEKIN -+ ); -+ /* explanation of above: -+ a) We compute Ekin at the full time step -+ if 1) we are using the AveVel Ekin, and it's not the -+ initial step, or 2) if we are using AveEkin, but need the full -+ time step kinetic energy for the pressure (always true now, since we want accurate statistics). -+ b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in -+ EkinAveVel because it's needed for the pressure */ -+ } -+ /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ -+ if (!bInitStep) -+ { -+ if (bTrotter) -+ { -+ m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */ -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2); -+ } -+ else -+ { -+ if (bExchanged) -+ { -+ -+ /* We need the kinetic energy at minus the half step for determining -+ * the full step kinetic energy and possibly for T-coupling.*/ -+ /* This may not be quite working correctly yet . . . . */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &pcurr, top_global->natoms, &bSumEkinhOld, -+ CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); -+ } -+ } -+ } -+ -+ if (iterate.bIterationActive && -+ done_iterating(cr, fplog, step, &iterate, bFirstIterate, -+ state->veta, &vetanew)) -+ { -+ break; -+ } -+ bFirstIterate = FALSE; -+ } -+ -+ if (bTrotter && !bInitStep) -+ { -+ copy_mat(shake_vir, state->svir_prev); -+ copy_mat(force_vir, state->fvir_prev); -+ if (IR_NVT_TROTTER(ir) && ir->eI == eiVV) -+ { -+ /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ -+ enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE, FALSE); -+ enerd->term[F_EKIN] = trace(ekind->ekin); -+ } -+ } -+ /* if it's the initial step, we performed this first step just to get the constraint virial */ -+ if (bInitStep && ir->eI == eiVV) -+ { -+ copy_rvecn(cbuf, state->v, 0, state->natoms); -+ } -+ -+ GMX_MPE_LOG(ev_timestep1); -+ } -+ -+ /* MRS -- now done iterating -- compute the conserved quantity */ -+ if (bVV) -+ { -+ saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ); -+ if (ir->eI == eiVV) -+ { -+ last_ekin = enerd->term[F_EKIN]; -+ } -+ if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) -+ { -+ saved_conserved_quantity -= enerd->term[F_DISPCORR]; -+ } -+ /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */ -+ if (!bRerunMD) -+ { -+ sum_dhdl(enerd, state->lambda, ir->fepvals); -+ } -+ } -+ -+ /* ######## END FIRST UPDATE STEP ############## */ -+ /* ######## If doing VV, we now have v(dt) ###### */ -+ if (bDoExpanded) -+ { -+ /* perform extended ensemble sampling in lambda - we don't -+ actually move to the new state before outputting -+ statistics, but if performing simulated tempering, we -+ do update the velocities and the tau_t. */ -+ -+ lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, &state->dfhist, step, mcrng, state->v, mdatoms); -+ /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */ -+ copy_df_history(&state_global->dfhist,&state->dfhist); -+ } -+ /* ################## START TRAJECTORY OUTPUT ################# */ -+ -+ /* Now we have the energies and forces corresponding to the -+ * coordinates at time t. We must output all of this before -+ * the update. -+ * for RerunMD t is read from input trajectory -+ */ -+ GMX_MPE_LOG(ev_output_start); -+ -+ mdof_flags = 0; -+ if (do_per_step(step, ir->nstxout)) -+ { -+ mdof_flags |= MDOF_X; -+ } -+ if (do_per_step(step, ir->nstvout)) -+ { -+ mdof_flags |= MDOF_V; -+ } -+ if (do_per_step(step, ir->nstfout)) -+ { -+ mdof_flags |= MDOF_F; -+ } -+ if (do_per_step(step, ir->nstxtcout)) -+ { -+ mdof_flags |= MDOF_XTC; -+ } -+ if (bCPT) -+ { -+ mdof_flags |= MDOF_CPT; -+ } -+ ; -+ -+#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP) -+ if (bLastStep) -+ { -+ /* Enforce writing positions and velocities at end of run */ -+ mdof_flags |= (MDOF_X | MDOF_V); -+ } -+#endif -+#ifdef GMX_FAHCORE -+ if (MASTER(cr)) -+ { -+ fcReportProgress( ir->nsteps, step ); -+ } -+ -+#if defined(__native_client__) -+ fcCheckin(MASTER(cr)); -+#endif -+ -+ /* sync bCPT and fc record-keeping */ -+ if (bCPT && MASTER(cr)) -+ { -+ fcRequestCheckPoint(); -+ } -+#endif -+ -+ if (mdof_flags != 0) -+ { -+ wallcycle_start(wcycle, ewcTRAJ); -+ if (bCPT) -+ { -+ if (state->flags & (1<flags & (1<ekinstate.bUpToDate = FALSE; -+ } -+ else -+ { -+ update_ekinstate(&state_global->ekinstate, ekind); -+ state_global->ekinstate.bUpToDate = TRUE; -+ } -+ update_energyhistory(&state_global->enerhist, mdebin); -+ } -+ } -+ write_traj(fplog, cr, outf, mdof_flags, top_global, -+ step, t, state, state_global, f, f_global, &n_xtc, &x_xtc); -+ if (bCPT) -+ { -+ nchkpt++; -+ bCPT = FALSE; -+ } -+ debug_gmx(); -+ if (bLastStep && step_rel == ir->nsteps && -+ (Flags & MD_CONFOUT) && MASTER(cr) && -+ !bRerunMD && !bFFscan) -+ { -+ /* x and v have been collected in write_traj, -+ * because a checkpoint file will always be written -+ * at the last step. -+ */ -+ fprintf(stderr, "\nWriting final coordinates.\n"); -+ if (fr->bMolPBC) -+ { -+ /* Make molecules whole only for confout writing */ -+ do_pbc_mtop(fplog, ir->ePBC, state->box, top_global, state_global->x); -+ } -+ write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm), -+ *top_global->name, top_global, -+ state_global->x, state_global->v, -+ ir->ePBC, state->box); -+ debug_gmx(); -+ } -+ wallcycle_stop(wcycle, ewcTRAJ); -+ } -+ GMX_MPE_LOG(ev_output_finish); -+ -+ /* kludge -- virial is lost with restart for NPT control. Must restart */ -+ if (bStartingFromCpt && bVV) -+ { -+ copy_mat(state->svir_prev, shake_vir); -+ copy_mat(state->fvir_prev, force_vir); -+ } -+ /* ################## END TRAJECTORY OUTPUT ################ */ -+ -+ /* Determine the wallclock run time up till now */ -+ run_time = gmx_gettime() - (double)runtime->real; -+ -+ /* Check whether everything is still allright */ -+ if (((int)gmx_get_stop_condition() > handled_stop_condition) -+#ifdef GMX_THREAD_MPI -+ && MASTER(cr) -+#endif -+ ) -+ { -+ /* this is just make gs.sig compatible with the hack -+ of sending signals around by MPI_Reduce with together with -+ other floats */ -+ if (gmx_get_stop_condition() == gmx_stop_cond_next_ns) -+ { -+ gs.sig[eglsSTOPCOND] = 1; -+ } -+ if (gmx_get_stop_condition() == gmx_stop_cond_next) -+ { -+ gs.sig[eglsSTOPCOND] = -1; -+ } -+ /* < 0 means stop at next step, > 0 means stop at next NS step */ -+ if (fplog) -+ { -+ fprintf(fplog, -+ "\n\nReceived the %s signal, stopping at the next %sstep\n\n", -+ gmx_get_signal_name(), -+ gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); -+ fflush(fplog); -+ } -+ fprintf(stderr, -+ "\n\nReceived the %s signal, stopping at the next %sstep\n\n", -+ gmx_get_signal_name(), -+ gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); -+ fflush(stderr); -+ handled_stop_condition = (int)gmx_get_stop_condition(); -+ } -+ else if (MASTER(cr) && (bNS || ir->nstlist <= 0) && -+ (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) && -+ gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0) -+ { -+ /* Signal to terminate the run */ -+ gs.sig[eglsSTOPCOND] = 1; -+ if (fplog) -+ { -+ fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); -+ } -+ fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); -+ } -+ -+ if (bResetCountersHalfMaxH && MASTER(cr) && -+ run_time > max_hours*60.0*60.0*0.495) -+ { -+ gs.sig[eglsRESETCOUNTERS] = 1; -+ } -+ -+ if (ir->nstlist == -1 && !bRerunMD) -+ { -+ /* When bGStatEveryStep=FALSE, global_stat is only called -+ * when we check the atom displacements, not at NS steps. -+ * This means that also the bonded interaction count check is not -+ * performed immediately after NS. Therefore a few MD steps could -+ * be performed with missing interactions. -+ * But wrong energies are never written to file, -+ * since energies are only written after global_stat -+ * has been called. -+ */ -+ if (step >= nlh.step_nscheck) -+ { -+ nlh.nabnsb = natoms_beyond_ns_buffer(ir, fr, &top->cgs, -+ nlh.scale_tot, state->x); -+ } -+ else -+ { -+ /* This is not necessarily true, -+ * but step_nscheck is determined quite conservatively. -+ */ -+ nlh.nabnsb = 0; -+ } -+ } -+ -+ /* In parallel we only have to check for checkpointing in steps -+ * where we do global communication, -+ * otherwise the other nodes don't know. -+ */ -+ if (MASTER(cr) && ((bGStat || !PAR(cr)) && -+ cpt_period >= 0 && -+ (cpt_period == 0 || -+ run_time >= nchkpt*cpt_period*60.0)) && -+ gs.set[eglsCHKPT] == 0) -+ { -+ gs.sig[eglsCHKPT] = 1; -+ } -+ -+ /* at the start of step, randomize or scale the velocities (trotter done elsewhere) */ -+ if (EI_VV(ir->eI)) -+ { -+ if (!bInitStep) -+ { -+ update_tcouple(fplog, step, ir, state, ekind, wcycle, upd, &MassQ, mdatoms); -+ } -+ if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ -+ { -+ gmx_bool bIfRandomize; -+ bIfRandomize = update_randomize_velocities(ir, step, mdatoms, state, upd, &top->idef, constr, DOMAINDECOMP(cr)); -+ /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ -+ if (constr && bIfRandomize) -+ { -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, tmp_vir, NULL, -+ cr, nrnb, wcycle, upd, constr, -+ bInitStep, TRUE, bCalcVir, vetanew); -+ } -+ } -+ } -+ -+ if (bIterativeCase && do_per_step(step, ir->nstpcouple)) -+ { -+ gmx_iterate_init(&iterate, TRUE); -+ /* for iterations, we save these vectors, as we will be redoing the calculations */ -+ copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); -+ } -+ -+ bFirstIterate = TRUE; -+ while (bFirstIterate || iterate.bIterationActive) -+ { -+ /* We now restore these vectors to redo the calculation with improved extended variables */ -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); -+ } -+ -+ /* We make the decision to break or not -after- the calculation of Ekin and Pressure, -+ so scroll down for that logic */ -+ -+ /* ######### START SECOND UPDATE STEP ################# */ -+ GMX_MPE_LOG(ev_update_start); -+ /* Box is changed in update() when we do pressure coupling, -+ * but we should still use the old box for energy corrections and when -+ * writing it to the energy file, so it matches the trajectory files for -+ * the same timestep above. Make a copy in a separate array. -+ */ -+ copy_mat(state->box, lastbox); -+ -+ bOK = TRUE; -+ dvdl_constr = 0; -+ -+ if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate)) -+ { -+ wallcycle_start(wcycle, ewcUPDATE); -+ /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ -+ if (bTrotter) -+ { -+ if (iterate.bIterationActive) -+ { -+ if (bFirstIterate) -+ { -+ scalevir = 1; -+ } -+ else -+ { -+ /* we use a new value of scalevir to converge the iterations faster */ -+ scalevir = tracevir/trace(shake_vir); -+ } -+ msmul(shake_vir, scalevir, shake_vir); -+ m_add(force_vir, shake_vir, total_vir); -+ clear_mat(shake_vir); -+ } -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); -+ /* We can only do Berendsen coupling after we have summed -+ * the kinetic energy or virial. Since the happens -+ * in global_state after update, we should only do it at -+ * step % nstlist = 1 with bGStatEveryStep=FALSE. -+ */ -+ } -+ else -+ { -+ update_tcouple(fplog, step, ir, state, ekind, wcycle, upd, &MassQ, mdatoms); -+ update_pcouple(fplog, step, ir, state, pcoupl_mu, M, wcycle, -+ upd, bInitStep); -+ } -+ -+ if (bVV) -+ { -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ /* velocity half-step update */ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, wcycle, upd, FALSE, etrtVELOCITY2, -+ cr, nrnb, constr, &top->idef); -+ } -+ -+ /* Above, initialize just copies ekinh into ekin, -+ * it doesn't copy position (for VV), -+ * and entire integrator for MD. -+ */ -+ -+ if (ir->eI == eiVVAK) -+ { -+ copy_rvecn(state->x, cbuf, 0, state->natoms); -+ } -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, wcycle, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); -+ wallcycle_stop(wcycle, ewcUPDATE); -+ -+ update_constraints(fplog, step, &dvdl_constr, ir, ekind, mdatoms, state, -+ fr->bMolPBC, graph, f, -+ &top->idef, shake_vir, force_vir, -+ cr, nrnb, wcycle, upd, constr, -+ bInitStep, FALSE, bCalcVir, state->veta); -+ -+ if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1) -+ { -+ /* Correct the virial for multiple time stepping */ -+ m_sub(shake_vir, fr->vir_twin_constr, shake_vir); -+ } -+ -+ if (ir->eI == eiVVAK) -+ { -+ /* erase F_EKIN and F_TEMP here? */ -+ /* just compute the kinetic energy at the half step to perform a trotter step */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, lastbox, -+ top_global, &pcurr, top_global->natoms, &bSumEkinhOld, -+ cglo_flags | CGLO_TEMPERATURE -+ ); -+ wallcycle_start(wcycle, ewcUPDATE); -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); -+ /* now we know the scaling, we can compute the positions again again */ -+ copy_rvecn(cbuf, state->x, 0, state->natoms); -+ -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, wcycle, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); -+ wallcycle_stop(wcycle, ewcUPDATE); -+ -+ /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */ -+ /* are the small terms in the shake_vir here due -+ * to numerical errors, or are they important -+ * physically? I'm thinking they are just errors, but not completely sure. -+ * For now, will call without actually constraining, constr=NULL*/ -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, tmp_vir, force_vir, -+ cr, nrnb, wcycle, upd, NULL, -+ bInitStep, FALSE, bCalcVir, -+ state->veta); -+ } -+ if (!bOK && !bFFscan) -+ { -+ gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); -+ } -+ -+ if (fr->bSepDVDL && fplog && do_log) -+ { -+ fprintf(fplog, sepdvdlformat, "Constraint dV/dl", 0.0, dvdl_constr); -+ } -+ if (bVV) -+ { -+ /* this factor or 2 correction is necessary -+ because half of the constraint force is removed -+ in the vv step, so we have to double it. See -+ the Redmine issue #1255. It is not yet clear -+ if the factor of 2 is exact, or just a very -+ good approximation, and this will be -+ investigated. The next step is to see if this -+ can be done adding a dhdl contribution from the -+ rattle step, but this is somewhat more -+ complicated with the current code. Will be -+ investigated, hopefully for 4.6.3. However, -+ this current solution is much better than -+ having it completely wrong. -+ */ -+ enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr; -+ } -+ else -+ { -+ enerd->term[F_DVDL_CONSTR] += dvdl_constr; -+ } -+ } -+ else if (graph) -+ { -+ /* Need to unshift here */ -+ unshift_self(graph, state->box, state->x); -+ } -+ -+ GMX_BARRIER(cr->mpi_comm_mygroup); -+ GMX_MPE_LOG(ev_update_finish); -+ -+ if (vsite != NULL) -+ { -+ wallcycle_start(wcycle, ewcVSITECONSTR); -+ if (graph != NULL) -+ { -+ shift_self(graph, state->box, state->x); -+ } -+ construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, state->v, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, graph, cr, state->box); -+ -+ if (graph != NULL) -+ { -+ unshift_self(graph, state->box, state->x); -+ } -+ wallcycle_stop(wcycle, ewcVSITECONSTR); -+ } -+ -+ /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */ -+ /* With Leap-Frog we can skip compute_globals at -+ * non-communication steps, but we need to calculate -+ * the kinetic energy one step before communication. -+ */ -+ if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm))) -+ { -+ if (ir->nstlist == -1 && bFirstIterate) -+ { -+ gs.sig[eglsNABNSB] = nlh.nabnsb; -+ } -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, -+ bFirstIterate ? &gs : NULL, -+ (step_rel % gs.nstms == 0) && -+ (multisim_nsteps < 0 || (step_rel < multisim_nsteps)), -+ lastbox, -+ top_global, &pcurr, top_global->natoms, &bSumEkinhOld, -+ cglo_flags -+ | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0) -+ | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) -+ | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) -+ | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) -+ | (iterate.bIterationActive ? CGLO_ITERATE : 0) -+ | (bFirstIterate ? CGLO_FIRSTITERATE : 0) -+ | CGLO_CONSTRAINT -+ ); -+ if (ir->nstlist == -1 && bFirstIterate) -+ { -+ nlh.nabnsb = gs.set[eglsNABNSB]; -+ gs.set[eglsNABNSB] = 0; -+ } -+ } -+ /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */ -+ /* ############# END CALC EKIN AND PRESSURE ################# */ -+ -+ /* Note: this is OK, but there are some numerical precision issues with using the convergence of -+ the virial that should probably be addressed eventually. state->veta has better properies, -+ but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could -+ generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ -+ -+ if (iterate.bIterationActive && -+ done_iterating(cr, fplog, step, &iterate, bFirstIterate, -+ trace(shake_vir), &tracevir)) -+ { -+ break; -+ } -+ bFirstIterate = FALSE; -+ } -+ -+ if (!bVV || bRerunMD) -+ { -+ /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */ -+ sum_dhdl(enerd, state->lambda, ir->fepvals); -+ } -+ update_box(fplog, step, ir, mdatoms, state, graph, f, -+ ir->nstlist == -1 ? &nlh.scale_tot : NULL, pcoupl_mu, nrnb, wcycle, upd, bInitStep, FALSE); -+ -+ /* ################# END UPDATE STEP 2 ################# */ -+ /* #### We now have r(t+dt) and v(t+dt/2) ############# */ -+ -+ /* The coordinates (x) were unshifted in update */ -+ if (bFFscan && (shellfc == NULL || bConverged)) -+ { -+ if (print_forcefield(fplog, enerd->term, mdatoms->homenr, -+ f, NULL, xcopy, -+ &(top_global->mols), mdatoms->massT, pres)) -+ { -+ gmx_finalize_par(); -+ -+ fprintf(stderr, "\n"); -+ exit(0); -+ } -+ } -+ if (!bGStat) -+ { -+ /* We will not sum ekinh_old, -+ * so signal that we still have to do it. -+ */ -+ bSumEkinhOld = TRUE; -+ } -+ -+ if (bTCR) -+ { -+ /* Only do GCT when the relaxation of shells (minimization) has converged, -+ * otherwise we might be coupling to bogus energies. -+ * In parallel we must always do this, because the other sims might -+ * update the FF. -+ */ -+ -+ /* Since this is called with the new coordinates state->x, I assume -+ * we want the new box state->box too. / EL 20040121 -+ */ -+ do_coupling(fplog, oenv, nfile, fnm, tcr, t, step, enerd->term, fr, -+ ir, MASTER(cr), -+ mdatoms, &(top->idef), mu_aver, -+ top_global->mols.nr, cr, -+ state->box, total_vir, pres, -+ mu_tot, state->x, f, bConverged); -+ debug_gmx(); -+ } -+ -+ /* ######### BEGIN PREPARING EDR OUTPUT ########### */ -+ -+ /* use the directly determined last velocity, not actually the averaged half steps */ -+ if (bTrotter && ir->eI == eiVV) -+ { -+ enerd->term[F_EKIN] = last_ekin; -+ } -+ enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; -+ -+ if (bVV) -+ { -+ enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; -+ } -+ else -+ { -+ enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ); -+ } -+ /* Check for excessively large energies */ -+ if (bIonize) -+ { -+#ifdef GMX_DOUBLE -+ real etot_max = 1e200; -+#else -+ real etot_max = 1e30; -+#endif -+ if (fabs(enerd->term[F_ETOT]) > etot_max) -+ { -+ fprintf(stderr, "Energy too large (%g), giving up\n", -+ enerd->term[F_ETOT]); -+ } -+ } -+ /* ######### END PREPARING EDR OUTPUT ########### */ -+ -+ /* Time for performance */ -+ if (((step % stepout) == 0) || bLastStep) -+ { -+ runtime_upd_proc(runtime); -+ } -+ -+ /* Output stuff */ -+ if (MASTER(cr)) -+ { -+ gmx_bool do_dr, do_or; -+ -+ if (fplog && do_log && bDoExpanded) -+ { -+ /* only needed if doing expanded ensemble */ -+ PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL, -+ &state_global->dfhist, state->fep_state, ir->nstlog, step); -+ } -+ if (!(bStartingFromCpt && (EI_VV(ir->eI)))) -+ { -+ if (bCalcEner) -+ { -+ upd_mdebin(mdebin, bDoDHDL, TRUE, -+ t, mdatoms->tmass, enerd, state, -+ ir->fepvals, ir->expandedvals, lastbox, -+ shake_vir, force_vir, total_vir, pres, -+ ekind, mu_tot, constr); -+ } -+ else -+ { -+ upd_mdebin_step(mdebin); -+ } -+ -+ do_dr = do_per_step(step, ir->nstdisreout); -+ do_or = do_per_step(step, ir->nstorireout); -+ -+ print_ebin(outf->fp_ene, do_ene, do_dr, do_or, do_log ? fplog : NULL, -+ step, t, -+ eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts)); -+ } -+ if (ir->ePull != epullNO) -+ { -+ pull_print_output(ir->pull, step, t); -+ } -+ -+ if (do_per_step(step, ir->nstlog)) -+ { -+ if (fflush(fplog) != 0) -+ { -+ gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); -+ } -+ } -+ } -+ if (bDoExpanded) -+ { -+ /* Have to do this part _after_ outputting the logfile and the edr file */ -+ /* Gets written into the state at the beginning of next loop*/ -+ state->fep_state = lamnew; -+ } -+ -+ /* Remaining runtime */ -+ if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning) -+ { -+ if (shellfc) -+ { -+ fprintf(stderr, "\n"); -+ } -+ print_time(stderr, runtime, step, ir, cr); -+ } -+ -+ /* Replica exchange */ -+ bExchanged = FALSE; -+ if ((repl_ex_nst > 0) && (step > 0) && !bLastStep && -+ do_per_step(step, repl_ex_nst)) -+ { -+ bExchanged = replica_exchange(fplog, cr, repl_ex, -+ state_global, enerd, -+ state, step, t); -+ -+ if (bExchanged && DOMAINDECOMP(cr)) -+ { -+ dd_partition_system(fplog, step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, FALSE); -+ } -+ } -+ -+ bFirstStep = FALSE; -+ bInitStep = FALSE; -+ bStartingFromCpt = FALSE; -+ -+ /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ -+ /* With all integrators, except VV, we need to retain the pressure -+ * at the current step for coupling at the next step. -+ */ -+ if ((state->flags & (1<nstpcouple > 0 && step % ir->nstpcouple == 0))) -+ { -+ /* Store the pressure in t_state for pressure coupling -+ * at the next MD step. -+ */ -+ copy_mat(pres, state->pres_prev); -+ } -+ -+ /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ -+ -+ if ( (membed != NULL) && (!bLastStep) ) -+ { -+ rescale_membed(step_rel, membed, state_global->x); -+ } -+ -+ if (bRerunMD) -+ { -+ if (MASTER(cr)) -+ { -+ /* read next frame from input trajectory */ -+ bNotLastFrame = read_next_frame(oenv, status, &rerun_fr); -+ } -+ -+ if (PAR(cr)) -+ { -+ rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); -+ } -+ } -+ -+ if (!bRerunMD || !rerun_fr.bStep) -+ { -+ /* increase the MD step number */ -+ step++; -+ step_rel++; -+ } -+ -+ cycles = wallcycle_stop(wcycle, ewcSTEP); -+ if (DOMAINDECOMP(cr) && wcycle) -+ { -+ dd_cycles_add(cr->dd, cycles, ddCyclStep); -+ } -+ -+ if (bPMETuneRunning || bPMETuneTry) -+ { -+ /* PME grid + cut-off optimization with GPUs or PME nodes */ -+ -+ /* Count the total cycles over the last steps */ -+ cycles_pmes += cycles; -+ -+ /* We can only switch cut-off at NS steps */ -+ if (step % ir->nstlist == 0) -+ { -+ /* PME grid + cut-off optimization with GPUs or PME nodes */ -+ if (bPMETuneTry) -+ { -+ if (DDMASTER(cr->dd)) -+ { -+ /* PME node load is too high, start tuning */ -+ bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05); -+ } -+ dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning); -+ -+ if (bPMETuneRunning || step_rel > ir->nstlist*50) -+ { -+ bPMETuneTry = FALSE; -+ } -+ } -+ if (bPMETuneRunning) -+ { -+ /* init_step might not be a multiple of nstlist, -+ * but the first cycle is always skipped anyhow. -+ */ -+ bPMETuneRunning = -+ pme_load_balance(pme_loadbal, cr, -+ (bVerbose && MASTER(cr)) ? stderr : NULL, -+ fplog, -+ ir, state, cycles_pmes, -+ fr->ic, fr->nbv, &fr->pmedata, -+ step); -+ -+ /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */ -+ fr->ewaldcoeff = fr->ic->ewaldcoeff; -+ fr->rlist = fr->ic->rlist; -+ fr->rlistlong = fr->ic->rlistlong; -+ fr->rcoulomb = fr->ic->rcoulomb; -+ fr->rvdw = fr->ic->rvdw; -+ } -+ cycles_pmes = 0; -+ } -+ } -+ -+ if (step_rel == wcycle_get_reset_counters(wcycle) || -+ gs.set[eglsRESETCOUNTERS] != 0) -+ { -+ /* Reset all the counters related to performance over the run */ -+ reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, runtime, -+ fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL); -+ wcycle_set_reset_counters(wcycle, -1); -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell our PME node to reset its counters */ -+ gmx_pme_send_resetcounters(cr, step); -+ } -+ /* Correct max_hours for the elapsed time */ -+ max_hours -= run_time/(60.0*60.0); -+ bResetCountersHalfMaxH = FALSE; -+ gs.set[eglsRESETCOUNTERS] = 0; -+ } -+ -+ } -+ /* End of main MD loop */ -+ debug_gmx(); -+ -+ /* Stop the time */ -+ runtime_end(runtime); -+ -+ if (bRerunMD && MASTER(cr)) -+ { -+ close_trj(status); -+ } -+ -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell the PME only node to finish */ -+ gmx_pme_send_finish(cr); -+ } -+ -+ if (MASTER(cr)) -+ { -+ if (ir->nstcalcenergy > 0 && !bRerunMD) -+ { -+ print_ebin(outf->fp_ene, FALSE, FALSE, FALSE, fplog, step, t, -+ eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts)); -+ } -+ } -+ -+ done_mdoutf(outf); -+ -+ debug_gmx(); -+ -+ if (ir->nstlist == -1 && nlh.nns > 0 && fplog) -+ { -+ fprintf(fplog, "Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n", nlh.s1/nlh.nns, sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns))); -+ fprintf(fplog, "Average number of atoms that crossed the half buffer length: %.1f\n\n", nlh.ab/nlh.nns); -+ } -+ -+ if (pme_loadbal != NULL) -+ { -+ pme_loadbal_done(pme_loadbal, cr, fplog, -+ fr->nbv != NULL && fr->nbv->bUseGPU); -+ } -+ -+ if (shellfc && fplog) -+ { -+ fprintf(fplog, "Fraction of iterations that converged: %.2f %%\n", -+ (nconverged*100.0)/step_rel); -+ fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n", -+ tcount/step_rel); -+ } -+ -+ if (repl_ex_nst > 0 && MASTER(cr)) -+ { -+ print_replica_exchange_statistics(fplog, repl_ex); -+ } -+ -+ runtime->nsteps_done = step_rel; -+ -+ return 0; -+} -diff --git a/src/kernel/mdrun.c b/src/kernel/mdrun.c -index eb30fc9..ca3b657 100644 ---- a/src/kernel/mdrun.c -+++ b/src/kernel/mdrun.c -@@ -58,6 +58,12 @@ - /* afm stuf */ - #include "pull.h" - -+/* PLUMED */ -+#include "../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; extern void(*plumedcmd)(plumed,const char*,const void*); -+/* END PLUMED */ -+ - int cmain(int argc, char *argv[]) - { - const char *desc[] = { -@@ -415,6 +421,7 @@ int cmain(int argc, char *argv[]) - { efMTX, "-mtx", "nm", ffOPTWR }, - { efNDX, "-dn", "dipole", ffOPTWR }, - { efRND, "-multidir", NULL, ffOPTRDMULT}, -+ { efDAT, "-plumed", "plumed", ffOPTRD }, /* PLUMED */ - { efDAT, "-membed", "membed", ffOPTRD }, - { efTOP, "-mp", "membed", ffOPTRD }, - { efNDX, "-mn", "membed", ffOPTRD } -@@ -752,6 +759,31 @@ int cmain(int argc, char *argv[]) - ddxyz[XX] = (int)(realddxyz[XX] + 0.5); - ddxyz[YY] = (int)(realddxyz[YY] + 0.5); - ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5); -+ /* PLUMED */ -+ plumedswitch=0; -+ if (opt2bSet("-plumed",NFILE,fnm)) plumedswitch=1; -+ if(plumedswitch){ plumedcmd=plumed_cmd; -+ int plumed_is_there=0; -+ int real_precision=sizeof(real); -+ real energyUnits=1.0; -+ real lengthUnits=1.0; -+ real timeUnits=1.0; -+ -+ -+ if(!plumed_installed()){ -+ gmx_fatal(FARGS,"Plumed is not available. Check your PLUMED_KERNEL variable."); -+ } -+ plumedmain=plumed_create(); -+ plumed_cmd(plumedmain,"setRealPrecision",&real_precision); -+ // this is not necessary for gromacs units: -+ plumed_cmd(plumedmain,"setMDEnergyUnits",&energyUnits); -+ plumed_cmd(plumedmain,"setMDLengthUnits",&lengthUnits); -+ plumed_cmd(plumedmain,"setMDTimeUnits",&timeUnits); -+ // -+ plumed_cmd(plumedmain,"setPlumedDat",ftp2fn(efDAT,NFILE,fnm)); -+ plumedswitch=1; -+ } -+ /* END PLUMED */ - - rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact, - nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr, -@@ -761,6 +793,12 @@ int cmain(int argc, char *argv[]) - nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, - pforce, cpt_period, max_hours, deviceOptions, Flags); - -+ /* PLUMED */ -+ if(plumedswitch){ -+ plumed_finalize(plumedmain); -+ } -+ /* END PLUMED */ -+ - gmx_finalize_par(); - - if (MULTIMASTER(cr)) -diff --git a/src/kernel/mdrun.c.preplumed b/src/kernel/mdrun.c.preplumed -new file mode 100644 -index 0000000..eb30fc9 ---- /dev/null -+++ b/src/kernel/mdrun.c.preplumed -@@ -0,0 +1,779 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team, -+ * check out http://www.gromacs.org for more information. -+ * Copyright (c) 2012,2013, by the GROMACS development team, led by -+ * David van der Spoel, Berk Hess, Erik Lindahl, and including many -+ * others, as listed in the AUTHORS file in the top-level source -+ * directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include "typedefs.h" -+#include "macros.h" -+#include "copyrite.h" -+#include "main.h" -+#include "statutil.h" -+#include "smalloc.h" -+#include "futil.h" -+#include "smalloc.h" -+#include "edsam.h" -+#include "mdrun.h" -+#include "xmdrun.h" -+#include "checkpoint.h" -+#ifdef GMX_THREAD_MPI -+#include "thread_mpi.h" -+#endif -+ -+/* afm stuf */ -+#include "pull.h" -+ -+int cmain(int argc, char *argv[]) -+{ -+ const char *desc[] = { -+ "The [TT]mdrun[tt] program is the main computational chemistry engine", -+ "within GROMACS. Obviously, it performs Molecular Dynamics simulations,", -+ "but it can also perform Stochastic Dynamics, Energy Minimization,", -+ "test particle insertion or (re)calculation of energies.", -+ "Normal mode analysis is another option. In this case [TT]mdrun[tt]", -+ "builds a Hessian matrix from single conformation.", -+ "For usual Normal Modes-like calculations, make sure that", -+ "the structure provided is properly energy-minimized.", -+ "The generated matrix can be diagonalized by [TT]g_nmeig[tt].[PAR]", -+ "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])", -+ "and distributes the topology over nodes if needed.", -+ "[TT]mdrun[tt] produces at least four output files.", -+ "A single log file ([TT]-g[tt]) is written, unless the option", -+ "[TT]-seppot[tt] is used, in which case each node writes a log file.", -+ "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and", -+ "optionally forces.", -+ "The structure file ([TT]-c[tt]) contains the coordinates and", -+ "velocities of the last step.", -+ "The energy file ([TT]-e[tt]) contains energies, the temperature,", -+ "pressure, etc, a lot of these things are also printed in the log file.", -+ "Optionally coordinates can be written to a compressed trajectory file", -+ "([TT]-x[tt]).[PAR]", -+ "The option [TT]-dhdl[tt] is only used when free energy calculation is", -+ "turned on.[PAR]", -+ "A simulation can be run in parallel using two different parallelization", -+ "schemes: MPI parallelization and/or OpenMP thread parallelization.", -+ "The MPI parallelization uses multiple processes when [TT]mdrun[tt] is", -+ "compiled with a normal MPI library or threads when [TT]mdrun[tt] is", -+ "compiled with the GROMACS built-in thread-MPI library. OpenMP threads", -+ "are supported when mdrun is compiled with OpenMP. Full OpenMP support", -+ "is only available with the Verlet cut-off scheme, with the (older)", -+ "group scheme only PME-only processes can use OpenMP parallelization.", -+ "In all cases [TT]mdrun[tt] will by default try to use all the available", -+ "hardware resources. With a normal MPI library only the options", -+ "[TT]-ntomp[tt] (with the Verlet cut-off scheme) and [TT]-ntomp_pme[tt],", -+ "for PME-only processes, can be used to control the number of threads.", -+ "With thread-MPI there are additional options [TT]-nt[tt], which sets", -+ "the total number of threads, and [TT]-ntmpi[tt], which sets the number", -+ "of thread-MPI threads.", -+ "The number of OpenMP threads used by [TT]mdrun[tt] can also be set with", -+ "the standard environment variable, [TT]OMP_NUM_THREADS[tt].", -+ "The [TT]GMX_PME_NUM_THREADS[tt] environment variable can be used to specify", -+ "the number of threads used by the PME-only processes.[PAR]", -+ "Note that combined MPI+OpenMP parallelization is in many cases", -+ "slower than either on its own. However, at high parallelization, using the", -+ "combination is often beneficial as it reduces the number of domains and/or", -+ "the number of MPI ranks. (Less and larger domains can improve scaling,", -+ "with separate PME processes fewer MPI ranks reduces communication cost.)", -+ "OpenMP-only parallelization is typically faster than MPI-only parallelization", -+ "on a single CPU(-die). Since we currently don't have proper hardware", -+ "topology detection, [TT]mdrun[tt] compiled with thread-MPI will only", -+ "automatically use OpenMP-only parallelization when you use up to 4", -+ "threads, up to 12 threads with Intel Nehalem/Westmere, or up to 16", -+ "threads with Intel Sandy Bridge or newer CPUs. Otherwise MPI-only", -+ "parallelization is used (except with GPUs, see below).", -+ "[PAR]", -+ "To quickly test the performance of the new Verlet cut-off scheme", -+ "with old [TT].tpr[tt] files, either on CPUs or CPUs+GPUs, you can use", -+ "the [TT]-testverlet[tt] option. This should not be used for production,", -+ "since it can slightly modify potentials and it will remove charge groups", -+ "making analysis difficult, as the [TT].tpr[tt] file will still contain", -+ "charge groups. For production simulations it is highly recommended", -+ "to specify [TT]cutoff-scheme = Verlet[tt] in the [TT].mdp[tt] file.", -+ "[PAR]", -+ "With GPUs (only supported with the Verlet cut-off scheme), the number", -+ "of GPUs should match the number of MPI processes or MPI threads,", -+ "excluding PME-only processes/threads. With thread-MPI, unless set on the command line, the number", -+ "of MPI threads will automatically be set to the number of GPUs detected.", -+ "To use a subset of the available GPUs, or to manually provide a mapping of", -+ "GPUs to PP ranks, you can use the [TT]-gpu_id[tt] option. The argument of [TT]-gpu_id[tt] is", -+ "a string of digits (without delimiter) representing device id-s of the GPUs to be used.", -+ "For example, \"[TT]02[tt]\" specifies using GPUs 0 and 2 in the first and second PP ranks per compute node", -+ "respectively. To select different sets of GPU-s", -+ "on different nodes of a compute cluster, use the [TT]GMX_GPU_ID[tt] environment", -+ "variable instead. The format for [TT]GMX_GPU_ID[tt] is identical to ", -+ "[TT]-gpu_id[tt], with the difference that an environment variable can have", -+ "different values on different compute nodes. Multiple MPI ranks on each node", -+ "can share GPUs. This is accomplished by specifying the id(s) of the GPU(s)", -+ "multiple times, e.g. \"[TT]0011[tt]\" for four ranks sharing two GPUs in this node.", -+ "This works within a single simulation, or a multi-simulation, with any form of MPI.", -+ "[PAR]", -+ "When using PME with separate PME nodes or with a GPU, the two major", -+ "compute tasks, the non-bonded force calculation and the PME calculation", -+ "run on different compute resources. If this load is not balanced,", -+ "some of the resources will be idle part of time. With the Verlet", -+ "cut-off scheme this load is automatically balanced when the PME load", -+ "is too high (but not when it is too low). This is done by scaling", -+ "the Coulomb cut-off and PME grid spacing by the same amount. In the first", -+ "few hundred steps different settings are tried and the fastest is chosen", -+ "for the rest of the simulation. This does not affect the accuracy of", -+ "the results, but it does affect the decomposition of the Coulomb energy", -+ "into particle and mesh contributions. The auto-tuning can be turned off", -+ "with the option [TT]-notunepme[tt].", -+ "[PAR]", -+ "[TT]mdrun[tt] pins (sets affinity of) threads to specific cores,", -+ "when all (logical) cores on a compute node are used by [TT]mdrun[tt],", -+ "even when no multi-threading is used,", -+ "as this usually results in significantly better performance.", -+ "If the queuing systems or the OpenMP library pinned threads, we honor", -+ "this and don't pin again, even though the layout may be sub-optimal.", -+ "If you want to have [TT]mdrun[tt] override an already set thread affinity", -+ "or pin threads when using less cores, use [TT]-pin on[tt].", -+ "With SMT (simultaneous multithreading), e.g. Intel Hyper-Threading,", -+ "there are multiple logical cores per physical core.", -+ "The option [TT]-pinstride[tt] sets the stride in logical cores for", -+ "pinning consecutive threads. Without SMT, 1 is usually the best choice.", -+ "With Intel Hyper-Threading 2 is best when using half or less of the", -+ "logical cores, 1 otherwise. The default value of 0 do exactly that:", -+ "it minimizes the threads per logical core, to optimize performance.", -+ "If you want to run multiple mdrun jobs on the same physical node," -+ "you should set [TT]-pinstride[tt] to 1 when using all logical cores.", -+ "When running multiple mdrun (or other) simulations on the same physical", -+ "node, some simulations need to start pinning from a non-zero core", -+ "to avoid overloading cores; with [TT]-pinoffset[tt] you can specify", -+ "the offset in logical cores for pinning.", -+ "[PAR]", -+ "When [TT]mdrun[tt] is started using MPI with more than 1 process", -+ "or with thread-MPI with more than 1 thread, MPI parallelization is used.", -+ "By default domain decomposition is used, unless the [TT]-pd[tt]", -+ "option is set, which selects particle decomposition.", -+ "[PAR]", -+ "With domain decomposition, the spatial decomposition can be set", -+ "with option [TT]-dd[tt]. By default [TT]mdrun[tt] selects a good decomposition.", -+ "The user only needs to change this when the system is very inhomogeneous.", -+ "Dynamic load balancing is set with the option [TT]-dlb[tt],", -+ "which can give a significant performance improvement,", -+ "especially for inhomogeneous systems. The only disadvantage of", -+ "dynamic load balancing is that runs are no longer binary reproducible,", -+ "but in most cases this is not important.", -+ "By default the dynamic load balancing is automatically turned on", -+ "when the measured performance loss due to load imbalance is 5% or more.", -+ "At low parallelization these are the only important options", -+ "for domain decomposition.", -+ "At high parallelization the options in the next two sections", -+ "could be important for increasing the performace.", -+ "[PAR]", -+ "When PME is used with domain decomposition, separate nodes can", -+ "be assigned to do only the PME mesh calculation;", -+ "this is computationally more efficient starting at about 12 nodes", -+ "or even fewer when OpenMP parallelization is used.", -+ "The number of PME nodes is set with option [TT]-npme[tt],", -+ "this can not be more than half of the nodes.", -+ "By default [TT]mdrun[tt] makes a guess for the number of PME", -+ "nodes when the number of nodes is larger than 16. With GPUs,", -+ "PME nodes are not selected automatically, since the optimal setup", -+ "depends very much on the details of the hardware.", -+ "In all cases you might gain performance by optimizing [TT]-npme[tt].", -+ "Performance statistics on this issue", -+ "are written at the end of the log file.", -+ "For good load balancing at high parallelization, the PME grid x and y", -+ "dimensions should be divisible by the number of PME nodes", -+ "(the simulation will run correctly also when this is not the case).", -+ "[PAR]", -+ "This section lists all options that affect the domain decomposition.", -+ "[PAR]", -+ "Option [TT]-rdd[tt] can be used to set the required maximum distance", -+ "for inter charge-group bonded interactions.", -+ "Communication for two-body bonded interactions below the non-bonded", -+ "cut-off distance always comes for free with the non-bonded communication.", -+ "Atoms beyond the non-bonded cut-off are only communicated when they have", -+ "missing bonded interactions; this means that the extra cost is minor", -+ "and nearly indepedent of the value of [TT]-rdd[tt].", -+ "With dynamic load balancing option [TT]-rdd[tt] also sets", -+ "the lower limit for the domain decomposition cell sizes.", -+ "By default [TT]-rdd[tt] is determined by [TT]mdrun[tt] based on", -+ "the initial coordinates. The chosen value will be a balance", -+ "between interaction range and communication cost.", -+ "[PAR]", -+ "When inter charge-group bonded interactions are beyond", -+ "the bonded cut-off distance, [TT]mdrun[tt] terminates with an error message.", -+ "For pair interactions and tabulated bonds", -+ "that do not generate exclusions, this check can be turned off", -+ "with the option [TT]-noddcheck[tt].", -+ "[PAR]", -+ "When constraints are present, option [TT]-rcon[tt] influences", -+ "the cell size limit as well.", -+ "Atoms connected by NC constraints, where NC is the LINCS order plus 1,", -+ "should not be beyond the smallest cell size. A error message is", -+ "generated when this happens and the user should change the decomposition", -+ "or decrease the LINCS order and increase the number of LINCS iterations.", -+ "By default [TT]mdrun[tt] estimates the minimum cell size required for P-LINCS", -+ "in a conservative fashion. For high parallelization it can be useful", -+ "to set the distance required for P-LINCS with the option [TT]-rcon[tt].", -+ "[PAR]", -+ "The [TT]-dds[tt] option sets the minimum allowed x, y and/or z scaling", -+ "of the cells with dynamic load balancing. [TT]mdrun[tt] will ensure that", -+ "the cells can scale down by at least this factor. This option is used", -+ "for the automated spatial decomposition (when not using [TT]-dd[tt])", -+ "as well as for determining the number of grid pulses, which in turn", -+ "sets the minimum allowed cell size. Under certain circumstances", -+ "the value of [TT]-dds[tt] might need to be adjusted to account for", -+ "high or low spatial inhomogeneity of the system.", -+ "[PAR]", -+ "The option [TT]-gcom[tt] can be used to only do global communication", -+ "every n steps.", -+ "This can improve performance for highly parallel simulations", -+ "where this global communication step becomes the bottleneck.", -+ "For a global thermostat and/or barostat the temperature", -+ "and/or pressure will also only be updated every [TT]-gcom[tt] steps.", -+ "By default it is set to the minimum of nstcalcenergy and nstlist.[PAR]", -+ "With [TT]-rerun[tt] an input trajectory can be given for which ", -+ "forces and energies will be (re)calculated. Neighbor searching will be", -+ "performed for every frame, unless [TT]nstlist[tt] is zero", -+ "(see the [TT].mdp[tt] file).[PAR]", -+ "ED (essential dynamics) sampling and/or additional flooding potentials", -+ "are switched on by using the [TT]-ei[tt] flag followed by an [TT].edi[tt]", -+ "file. The [TT].edi[tt] file can be produced with the [TT]make_edi[tt] tool", -+ "or by using options in the essdyn menu of the WHAT IF program.", -+ "[TT]mdrun[tt] produces a [TT].xvg[tt] output file that", -+ "contains projections of positions, velocities and forces onto selected", -+ "eigenvectors.[PAR]", -+ "When user-defined potential functions have been selected in the", -+ "[TT].mdp[tt] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]", -+ "a formatted table with potential functions. The file is read from", -+ "either the current directory or from the [TT]GMXLIB[tt] directory.", -+ "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,", -+ "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with", -+ "normal Coulomb.", -+ "When pair interactions are present, a separate table for pair interaction", -+ "functions is read using the [TT]-tablep[tt] option.[PAR]", -+ "When tabulated bonded functions are present in the topology,", -+ "interaction functions are read using the [TT]-tableb[tt] option.", -+ "For each different tabulated interaction type the table file name is", -+ "modified in a different way: before the file extension an underscore is", -+ "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals", -+ "and finally the table number of the interaction type.[PAR]", -+ "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM", -+ "coordinates and forces when pulling is selected", -+ "in the [TT].mdp[tt] file.[PAR]", -+ "With [TT]-multi[tt] or [TT]-multidir[tt], multiple systems can be ", -+ "simulated in parallel.", -+ "As many input files/directories are required as the number of systems. ", -+ "The [TT]-multidir[tt] option takes a list of directories (one for each ", -+ "system) and runs in each of them, using the input/output file names, ", -+ "such as specified by e.g. the [TT]-s[tt] option, relative to these ", -+ "directories.", -+ "With [TT]-multi[tt], the system number is appended to the run input ", -+ "and each output filename, for instance [TT]topol.tpr[tt] becomes", -+ "[TT]topol0.tpr[tt], [TT]topol1.tpr[tt] etc.", -+ "The number of nodes per system is the total number of nodes", -+ "divided by the number of systems.", -+ "One use of this option is for NMR refinement: when distance", -+ "or orientation restraints are present these can be ensemble averaged", -+ "over all the systems.[PAR]", -+ "With [TT]-replex[tt] replica exchange is attempted every given number", -+ "of steps. The number of replicas is set with the [TT]-multi[tt] or ", -+ "[TT]-multidir[tt] option, described above.", -+ "All run input files should use a different coupling temperature,", -+ "the order of the files is not important. The random seed is set with", -+ "[TT]-reseed[tt]. The velocities are scaled and neighbor searching", -+ "is performed after every exchange.[PAR]", -+ "Finally some experimental algorithms can be tested when the", -+ "appropriate options have been given. Currently under", -+ "investigation are: polarizability and X-ray bombardments.", -+ "[PAR]", -+ "The option [TT]-membed[tt] does what used to be g_membed, i.e. embed", -+ "a protein into a membrane. The data file should contain the options", -+ "that where passed to g_membed before. The [TT]-mn[tt] and [TT]-mp[tt]", -+ "both apply to this as well.", -+ "[PAR]", -+ "The option [TT]-pforce[tt] is useful when you suspect a simulation", -+ "crashes due to too large forces. With this option coordinates and", -+ "forces of atoms with a force larger than a certain value will", -+ "be printed to stderr.", -+ "[PAR]", -+ "Checkpoints containing the complete state of the system are written", -+ "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],", -+ "unless option [TT]-cpt[tt] is set to -1.", -+ "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to", -+ "make sure that a recent state of the system is always available,", -+ "even when the simulation is terminated while writing a checkpoint.", -+ "With [TT]-cpnum[tt] all checkpoint files are kept and appended", -+ "with the step number.", -+ "A simulation can be continued by reading the full state from file", -+ "with option [TT]-cpi[tt]. This option is intelligent in the way that", -+ "if no checkpoint file is found, Gromacs just assumes a normal run and", -+ "starts from the first step of the [TT].tpr[tt] file. By default the output", -+ "will be appending to the existing output files. The checkpoint file", -+ "contains checksums of all output files, such that you will never", -+ "loose data when some output files are modified, corrupt or removed.", -+ "There are three scenarios with [TT]-cpi[tt]:[PAR]", -+ "[TT]*[tt] no files with matching names are present: new output files are written[PAR]", -+ "[TT]*[tt] all files are present with names and checksums matching those stored", -+ "in the checkpoint file: files are appended[PAR]", -+ "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]", -+ "With [TT]-noappend[tt] new output files are opened and the simulation", -+ "part number is added to all output file names.", -+ "Note that in all cases the checkpoint file itself is not renamed", -+ "and will be overwritten, unless its name does not match", -+ "the [TT]-cpo[tt] option.", -+ "[PAR]", -+ "With checkpointing the output is appended to previously written", -+ "output files, unless [TT]-noappend[tt] is used or none of the previous", -+ "output files are present (except for the checkpoint file).", -+ "The integrity of the files to be appended is verified using checksums", -+ "which are stored in the checkpoint file. This ensures that output can", -+ "not be mixed up or corrupted due to file appending. When only some", -+ "of the previous output files are present, a fatal error is generated", -+ "and no old output files are modified and no new output files are opened.", -+ "The result with appending will be the same as from a single run.", -+ "The contents will be binary identical, unless you use a different number", -+ "of nodes or dynamic load balancing or the FFT library uses optimizations", -+ "through timing.", -+ "[PAR]", -+ "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint", -+ "file is written at the first neighbor search step where the run time", -+ "exceeds [TT]-maxh[tt]*0.99 hours.", -+ "[PAR]", -+ "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current", -+ "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is", -+ "pressed), it will stop after the next neighbor search step ", -+ "(with nstlist=0 at the next step).", -+ "In both cases all the usual output will be written to file.", -+ "When running with MPI, a signal to one of the [TT]mdrun[tt] processes", -+ "is sufficient, this signal should not be sent to mpirun or", -+ "the [TT]mdrun[tt] process that is the parent of the others.", -+ "[PAR]", -+ "When [TT]mdrun[tt] is started with MPI, it does not run niced by default." -+ }; -+ t_commrec *cr; -+ t_filenm fnm[] = { -+ { efTPX, NULL, NULL, ffREAD }, -+ { efTRN, "-o", NULL, ffWRITE }, -+ { efXTC, "-x", NULL, ffOPTWR }, -+ { efCPT, "-cpi", NULL, ffOPTRD }, -+ { efCPT, "-cpo", NULL, ffOPTWR }, -+ { efSTO, "-c", "confout", ffWRITE }, -+ { efEDR, "-e", "ener", ffWRITE }, -+ { efLOG, "-g", "md", ffWRITE }, -+ { efXVG, "-dhdl", "dhdl", ffOPTWR }, -+ { efXVG, "-field", "field", ffOPTWR }, -+ { efXVG, "-table", "table", ffOPTRD }, -+ { efXVG, "-tabletf", "tabletf", ffOPTRD }, -+ { efXVG, "-tablep", "tablep", ffOPTRD }, -+ { efXVG, "-tableb", "table", ffOPTRD }, -+ { efTRX, "-rerun", "rerun", ffOPTRD }, -+ { efXVG, "-tpi", "tpi", ffOPTWR }, -+ { efXVG, "-tpid", "tpidist", ffOPTWR }, -+ { efEDI, "-ei", "sam", ffOPTRD }, -+ { efXVG, "-eo", "edsam", ffOPTWR }, -+ { efGCT, "-j", "wham", ffOPTRD }, -+ { efGCT, "-jo", "bam", ffOPTWR }, -+ { efXVG, "-ffout", "gct", ffOPTWR }, -+ { efXVG, "-devout", "deviatie", ffOPTWR }, -+ { efXVG, "-runav", "runaver", ffOPTWR }, -+ { efXVG, "-px", "pullx", ffOPTWR }, -+ { efXVG, "-pf", "pullf", ffOPTWR }, -+ { efXVG, "-ro", "rotation", ffOPTWR }, -+ { efLOG, "-ra", "rotangles", ffOPTWR }, -+ { efLOG, "-rs", "rotslabs", ffOPTWR }, -+ { efLOG, "-rt", "rottorque", ffOPTWR }, -+ { efMTX, "-mtx", "nm", ffOPTWR }, -+ { efNDX, "-dn", "dipole", ffOPTWR }, -+ { efRND, "-multidir", NULL, ffOPTRDMULT}, -+ { efDAT, "-membed", "membed", ffOPTRD }, -+ { efTOP, "-mp", "membed", ffOPTRD }, -+ { efNDX, "-mn", "membed", ffOPTRD } -+ }; -+#define NFILE asize(fnm) -+ -+ /* Command line options ! */ -+ gmx_bool bCart = FALSE; -+ gmx_bool bPPPME = FALSE; -+ gmx_bool bPartDec = FALSE; -+ gmx_bool bDDBondCheck = TRUE; -+ gmx_bool bDDBondComm = TRUE; -+ gmx_bool bTunePME = TRUE; -+ gmx_bool bTestVerlet = FALSE; -+ gmx_bool bVerbose = FALSE; -+ gmx_bool bCompact = TRUE; -+ gmx_bool bSepPot = FALSE; -+ gmx_bool bRerunVSite = FALSE; -+ gmx_bool bIonize = FALSE; -+ gmx_bool bConfout = TRUE; -+ gmx_bool bReproducible = FALSE; -+ -+ int npme = -1; -+ int nmultisim = 0; -+ int nstglobalcomm = -1; -+ int repl_ex_nst = 0; -+ int repl_ex_seed = -1; -+ int repl_ex_nex = 0; -+ int nstepout = 100; -+ int resetstep = -1; -+ gmx_large_int_t nsteps = -2; /* the value -2 means that the mdp option will be used */ -+ -+ rvec realddxyz = {0, 0, 0}; -+ const char *ddno_opt[ddnoNR+1] = -+ { NULL, "interleave", "pp_pme", "cartesian", NULL }; -+ const char *dddlb_opt[] = -+ { NULL, "auto", "no", "yes", NULL }; -+ const char *thread_aff_opt[threadaffNR+1] = -+ { NULL, "auto", "on", "off", NULL }; -+ const char *nbpu_opt[] = -+ { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL }; -+ real rdd = 0.0, rconstr = 0.0, dlb_scale = 0.8, pforce = -1; -+ char *ddcsx = NULL, *ddcsy = NULL, *ddcsz = NULL; -+ real cpt_period = 15.0, max_hours = -1; -+ gmx_bool bAppendFiles = TRUE; -+ gmx_bool bKeepAndNumCPT = FALSE; -+ gmx_bool bResetCountersHalfWay = FALSE; -+ output_env_t oenv = NULL; -+ const char *deviceOptions = ""; -+ -+ /* Non transparent initialization of a complex gmx_hw_opt_t struct. -+ * But unfortunately we are not allowed to call a function here, -+ * since declarations follow below. -+ */ -+ gmx_hw_opt_t hw_opt = { 0, 0, 0, 0, threadaffSEL, 0, 0, -+ { NULL, FALSE, 0, NULL } }; -+ -+ t_pargs pa[] = { -+ -+ { "-pd", FALSE, etBOOL, {&bPartDec}, -+ "Use particle decompostion" }, -+ { "-dd", FALSE, etRVEC, {&realddxyz}, -+ "Domain decomposition grid, 0 is optimize" }, -+ { "-ddorder", FALSE, etENUM, {ddno_opt}, -+ "DD node order" }, -+ { "-npme", FALSE, etINT, {&npme}, -+ "Number of separate nodes to be used for PME, -1 is guess" }, -+ { "-nt", FALSE, etINT, {&hw_opt.nthreads_tot}, -+ "Total number of threads to start (0 is guess)" }, -+ { "-ntmpi", FALSE, etINT, {&hw_opt.nthreads_tmpi}, -+ "Number of thread-MPI threads to start (0 is guess)" }, -+ { "-ntomp", FALSE, etINT, {&hw_opt.nthreads_omp}, -+ "Number of OpenMP threads per MPI process/thread to start (0 is guess)" }, -+ { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme}, -+ "Number of OpenMP threads per MPI process/thread to start (0 is -ntomp)" }, -+ { "-pin", FALSE, etENUM, {thread_aff_opt}, -+ "Fix threads (or processes) to specific cores" }, -+ { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset}, -+ "The starting logical core number for pinning to cores; used to avoid pinning threads from different mdrun instances to the same core" }, -+ { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride}, -+ "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" }, -+ { "-gpu_id", FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id}, -+ "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" }, -+ { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck}, -+ "Check for all bonded interactions with DD" }, -+ { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm}, -+ "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" }, -+ { "-rdd", FALSE, etREAL, {&rdd}, -+ "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" }, -+ { "-rcon", FALSE, etREAL, {&rconstr}, -+ "Maximum distance for P-LINCS (nm), 0 is estimate" }, -+ { "-dlb", FALSE, etENUM, {dddlb_opt}, -+ "Dynamic load balancing (with DD)" }, -+ { "-dds", FALSE, etREAL, {&dlb_scale}, -+ "Minimum allowed dlb scaling of the DD cell size" }, -+ { "-ddcsx", FALSE, etSTR, {&ddcsx}, -+ "HIDDENThe DD cell sizes in x" }, -+ { "-ddcsy", FALSE, etSTR, {&ddcsy}, -+ "HIDDENThe DD cell sizes in y" }, -+ { "-ddcsz", FALSE, etSTR, {&ddcsz}, -+ "HIDDENThe DD cell sizes in z" }, -+ { "-gcom", FALSE, etINT, {&nstglobalcomm}, -+ "Global communication frequency" }, -+ { "-nb", FALSE, etENUM, {&nbpu_opt}, -+ "Calculate non-bonded interactions on" }, -+ { "-tunepme", FALSE, etBOOL, {&bTunePME}, -+ "Optimize PME load between PP/PME nodes or GPU/CPU" }, -+ { "-testverlet", FALSE, etBOOL, {&bTestVerlet}, -+ "Test the Verlet non-bonded scheme" }, -+ { "-v", FALSE, etBOOL, {&bVerbose}, -+ "Be loud and noisy" }, -+ { "-compact", FALSE, etBOOL, {&bCompact}, -+ "Write a compact log file" }, -+ { "-seppot", FALSE, etBOOL, {&bSepPot}, -+ "Write separate V and dVdl terms for each interaction type and node to the log file(s)" }, -+ { "-pforce", FALSE, etREAL, {&pforce}, -+ "Print all forces larger than this (kJ/mol nm)" }, -+ { "-reprod", FALSE, etBOOL, {&bReproducible}, -+ "Try to avoid optimizations that affect binary reproducibility" }, -+ { "-cpt", FALSE, etREAL, {&cpt_period}, -+ "Checkpoint interval (minutes)" }, -+ { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT}, -+ "Keep and number checkpoint files" }, -+ { "-append", FALSE, etBOOL, {&bAppendFiles}, -+ "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" }, -+ { "-nsteps", FALSE, etGMX_LARGE_INT, {&nsteps}, -+ "Run this number of steps, overrides .mdp file option" }, -+ { "-maxh", FALSE, etREAL, {&max_hours}, -+ "Terminate after 0.99 times this time (hours)" }, -+ { "-multi", FALSE, etINT, {&nmultisim}, -+ "Do multiple simulations in parallel" }, -+ { "-replex", FALSE, etINT, {&repl_ex_nst}, -+ "Attempt replica exchange periodically with this period (steps)" }, -+ { "-nex", FALSE, etINT, {&repl_ex_nex}, -+ "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). -nex zero or not specified gives neighbor replica exchange." }, -+ { "-reseed", FALSE, etINT, {&repl_ex_seed}, -+ "Seed for replica exchange, -1 is generate a seed" }, -+ { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite}, -+ "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" }, -+ { "-ionize", FALSE, etBOOL, {&bIonize}, -+ "Do a simulation including the effect of an X-Ray bombardment on your system" }, -+ { "-confout", FALSE, etBOOL, {&bConfout}, -+ "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" }, -+ { "-stepout", FALSE, etINT, {&nstepout}, -+ "HIDDENFrequency of writing the remaining runtime" }, -+ { "-resetstep", FALSE, etINT, {&resetstep}, -+ "HIDDENReset cycle counters after these many time steps" }, -+ { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay}, -+ "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" } -+ }; -+ gmx_edsam_t ed; -+ unsigned long Flags, PCA_Flags; -+ ivec ddxyz; -+ int dd_node_order; -+ gmx_bool bAddPart; -+ FILE *fplog, *fpmulti; -+ int sim_part, sim_part_fn; -+ const char *part_suffix = ".part"; -+ char suffix[STRLEN]; -+ int rc; -+ char **multidir = NULL; -+ -+ -+ cr = init_par(&argc, &argv); -+ -+ if (MASTER(cr)) -+ { -+ CopyRight(stderr, argv[0]); -+ } -+ -+ PCA_Flags = (PCA_CAN_SET_DEFFNM | (MASTER(cr) ? 0 : PCA_QUIET)); -+ -+ /* Comment this in to do fexist calls only on master -+ * works not with rerun or tables at the moment -+ * also comment out the version of init_forcerec in md.c -+ * with NULL instead of opt2fn -+ */ -+ /* -+ if (!MASTER(cr)) -+ { -+ PCA_Flags |= PCA_NOT_READ_NODE; -+ } -+ */ -+ -+ parse_common_args(&argc, argv, PCA_Flags, NFILE, fnm, asize(pa), pa, -+ asize(desc), desc, 0, NULL, &oenv); -+ -+ -+ /* we set these early because they might be used in init_multisystem() -+ Note that there is the potential for npme>nnodes until the number of -+ threads is set later on, if there's thread parallelization. That shouldn't -+ lead to problems. */ -+ dd_node_order = nenum(ddno_opt); -+ cr->npmenodes = npme; -+ -+ hw_opt.thread_affinity = nenum(thread_aff_opt); -+ -+ /* now check the -multi and -multidir option */ -+ if (opt2bSet("-multidir", NFILE, fnm)) -+ { -+ int i; -+ if (nmultisim > 0) -+ { -+ gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive."); -+ } -+ nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm); -+ } -+ -+ -+ if (repl_ex_nst != 0 && nmultisim < 2) -+ { -+ gmx_fatal(FARGS, "Need at least two replicas for replica exchange (option -multi)"); -+ } -+ -+ if (repl_ex_nex < 0) -+ { -+ gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive"); -+ } -+ -+ if (nmultisim > 1) -+ { -+#ifndef GMX_THREAD_MPI -+ gmx_bool bParFn = (multidir == NULL); -+ init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn); -+#else -+ gmx_fatal(FARGS, "mdrun -multi is not supported with the thread library.Please compile GROMACS with MPI support"); -+#endif -+ } -+ -+ bAddPart = !bAppendFiles; -+ -+ /* Check if there is ANY checkpoint file available */ -+ sim_part = 1; -+ sim_part_fn = sim_part; -+ if (opt2bSet("-cpi", NFILE, fnm)) -+ { -+ if (bSepPot && bAppendFiles) -+ { -+ gmx_fatal(FARGS, "Output file appending is not supported with -seppot"); -+ } -+ -+ bAppendFiles = -+ read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE, -+ fnm, cr), -+ &sim_part_fn, NULL, cr, -+ bAppendFiles, NFILE, fnm, -+ part_suffix, &bAddPart); -+ if (sim_part_fn == 0 && MULTIMASTER(cr)) -+ { -+ fprintf(stdout, "No previous checkpoint file present, assuming this is a new run.\n"); -+ } -+ else -+ { -+ sim_part = sim_part_fn + 1; -+ } -+ -+ if (MULTISIM(cr) && MASTER(cr)) -+ { -+ if (MULTIMASTER(cr)) -+ { -+ /* Log file is not yet available, so if there's a -+ * problem we can only write to stderr. */ -+ fpmulti = stderr; -+ } -+ else -+ { -+ fpmulti = NULL; -+ } -+ check_multi_int(fpmulti, cr->ms, sim_part, "simulation part", TRUE); -+ } -+ } -+ else -+ { -+ bAppendFiles = FALSE; -+ } -+ -+ if (!bAppendFiles) -+ { -+ sim_part_fn = sim_part; -+ } -+ -+ if (bAddPart) -+ { -+ /* Rename all output files (except checkpoint files) */ -+ /* create new part name first (zero-filled) */ -+ sprintf(suffix, "%s%04d", part_suffix, sim_part_fn); -+ -+ add_suffix_to_output_names(fnm, NFILE, suffix); -+ if (MULTIMASTER(cr)) -+ { -+ fprintf(stdout, "Checkpoint file is from part %d, new output files will be suffixed '%s'.\n", sim_part-1, suffix); -+ } -+ } -+ -+ Flags = opt2bSet("-rerun", NFILE, fnm) ? MD_RERUN : 0; -+ Flags = Flags | (bSepPot ? MD_SEPPOT : 0); -+ Flags = Flags | (bIonize ? MD_IONIZE : 0); -+ Flags = Flags | (bPartDec ? MD_PARTDEC : 0); -+ Flags = Flags | (bDDBondCheck ? MD_DDBONDCHECK : 0); -+ Flags = Flags | (bDDBondComm ? MD_DDBONDCOMM : 0); -+ Flags = Flags | (bTunePME ? MD_TUNEPME : 0); -+ Flags = Flags | (bTestVerlet ? MD_TESTVERLET : 0); -+ Flags = Flags | (bConfout ? MD_CONFOUT : 0); -+ Flags = Flags | (bRerunVSite ? MD_RERUN_VSITE : 0); -+ Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0); -+ Flags = Flags | (bAppendFiles ? MD_APPENDFILES : 0); -+ Flags = Flags | (opt2parg_bSet("-append", asize(pa), pa) ? MD_APPENDFILESSET : 0); -+ Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0); -+ Flags = Flags | (sim_part > 1 ? MD_STARTFROMCPT : 0); -+ Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0); -+ -+ -+ /* We postpone opening the log file if we are appending, so we can -+ first truncate the old log file and append to the correct position -+ there instead. */ -+ if ((MASTER(cr) || bSepPot) && !bAppendFiles) -+ { -+ gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr, -+ !bSepPot, Flags & MD_APPENDFILES, &fplog); -+ CopyRight(fplog, argv[0]); -+ please_cite(fplog, "Hess2008b"); -+ please_cite(fplog, "Spoel2005a"); -+ please_cite(fplog, "Lindahl2001a"); -+ please_cite(fplog, "Berendsen95a"); -+ } -+ else if (!MASTER(cr) && bSepPot) -+ { -+ gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr, !bSepPot, Flags, &fplog); -+ } -+ else -+ { -+ fplog = NULL; -+ } -+ -+ ddxyz[XX] = (int)(realddxyz[XX] + 0.5); -+ ddxyz[YY] = (int)(realddxyz[YY] + 0.5); -+ ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5); -+ -+ rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact, -+ nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr, -+ dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz, -+ nbpu_opt[0], -+ nsteps, nstepout, resetstep, -+ nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, -+ pforce, cpt_period, max_hours, deviceOptions, Flags); -+ -+ gmx_finalize_par(); -+ -+ if (MULTIMASTER(cr)) -+ { -+ thanx(stderr); -+ } -+ -+ /* Log file has to be closed in mdrunner if we are appending to it -+ (fplog not set here) */ -+ if (MASTER(cr) && !bAppendFiles) -+ { -+ gmx_log_close(fplog); -+ } -+ -+ return rc; -+} -diff --git a/src/kernel/repl_ex.c b/src/kernel/repl_ex.c -index 0f094d4..7f27136 100644 ---- a/src/kernel/repl_ex.c -+++ b/src/kernel/repl_ex.c -@@ -53,6 +53,12 @@ - #include "domdec.h" - #include "partdec.h" - -+/* PLUMED */ -+#include "../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+/* END PLUMED */ -+ - #define PROBABILITYCUTOFF 100 - /* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ - -@@ -113,14 +119,16 @@ static gmx_bool repl_quantity(FILE *fplog, const gmx_multisim_t *ms, - qall[re->repl] = q; - gmx_sum_sim(ms->nsim, qall, ms); - -- bDiff = FALSE; -- for (s = 1; s < ms->nsim; s++) -- { -- if (qall[s] != qall[0]) -- { -+ /* PLUMED */ -+ //bDiff = FALSE; -+ //for (s = 1; s < ms->nsim; s++) -+ //{ -+ // if (qall[s] != qall[0]) -+ // { - bDiff = TRUE; -- } -- } -+ // } -+ //} -+ /* END PLUMED */ - - if (bDiff) - { -@@ -257,6 +265,10 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - re->ind[i] = i; - } - -+ /* PLUMED */ -+ // plumed2: check if we want alternative patterns (i.e. for bias-exchange metaD) -+ // in those cases replicas can share the same temperature. -+ /* - if (re->type < ereENDSINGLE) - { - -@@ -266,10 +278,6 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - { - if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) - { -- /* Unordered replicas are supposed to work, but there -- * is still an issues somewhere. -- * Note that at this point still re->ind[i]=i. -- */ - gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", - i, j, - erename[re->type], -@@ -287,6 +295,8 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - } - } - } -+ */ -+ /* END PLUMED */ - - /* keep track of all the swaps, starting with the initial placement. */ - snew(re->allswaps, re->nrepl); -@@ -988,6 +998,10 @@ test_for_replica_exchange(FILE *fplog, - pind[i] = re->ind[i]; - } - -+ /* PLUMED */ -+ int plumed_test_exchange_pattern=0; -+ /* END PLUMED */ -+ - if (bMultiEx) - { - /* multiple random switch exchange */ -@@ -1057,6 +1071,31 @@ test_for_replica_exchange(FILE *fplog, - { - /* standard nearest neighbor replica exchange */ - m = (step / re->nst) % 2; -+ /* PLUMED */ -+ if(plumedswitch){ -+ int partner=re->repl; -+ plumed_cmd(plumedmain,"getExchangesFlag",&plumed_test_exchange_pattern); -+ if(plumed_test_exchange_pattern>0){ -+ int *list; -+ snew(list,re->nrepl); -+ plumed_cmd(plumedmain,"setNumberOfReplicas",&(re->nrepl)); -+ plumed_cmd(plumedmain,"getExchangesList",list); -+ for(i=0; inrepl; i++) re->ind[i]=list[i]; -+ sfree(list); -+ } -+ -+ for(i=1; inrepl; i++) { -+ if (i % 2 != m) continue; -+ a = re->ind[i-1]; -+ b = re->ind[i]; -+ if(re->repl==a) partner=b; -+ if(re->repl==b) partner=a; -+ } -+ plumed_cmd(plumedmain,"GREX setPartner",&partner); -+ plumed_cmd(plumedmain,"GREX calculate",NULL); -+ plumed_cmd(plumedmain,"GREX shareAllDeltaBias",NULL); -+ } -+ /* END PLUMED */ - for (i = 1; i < re->nrepl; i++) - { - a = re->ind[i-1]; -@@ -1066,6 +1105,18 @@ test_for_replica_exchange(FILE *fplog, - if (i % 2 == m) - { - delta = calc_delta(fplog, bPrint, re, a, b, a, b); -+ /* PLUMED */ -+ if(plumedswitch){ -+ real adb,bdb,dplumed; -+ char buf[300]; -+ sprintf(buf,"GREX getDeltaBias %d",a); plumed_cmd(plumedmain,buf,&adb); -+ sprintf(buf,"GREX getDeltaBias %d",b); plumed_cmd(plumedmain,buf,&bdb); -+ dplumed=adb*re->beta[a]+bdb*re->beta[b]; -+ delta+=dplumed; -+ if (bPrint) -+ fprintf(fplog,"dplumed = %10.3e dE_Term = %10.3e (kT)\n",dplumed,delta); -+ } -+ /* END PLUMED */ - if (delta <= 0) - { - /* accepted */ -@@ -1089,11 +1140,22 @@ test_for_replica_exchange(FILE *fplog, - - if (bEx[i]) - { -+ /* PLUMED */ -+ if(!plumed_test_exchange_pattern) { -+ /* standard neighbour swapping */ - /* swap these two */ - tmp = pind[i-1]; - pind[i-1] = pind[i]; - pind[i] = tmp; - re->nexchange[i]++; /* statistics for back compatibility */ -+ } else { -+ /* alternative swapping patterns */ -+ tmp = pind[a]; -+ pind[a] = pind[b]; -+ pind[b] = tmp; -+ re->nexchange[i]++; /* statistics for back compatibility */ -+ } -+ /* END PLUMED */ - } - } - else -@@ -1109,6 +1171,15 @@ test_for_replica_exchange(FILE *fplog, - re->nattempt[m]++; - } - -+ /* PLUMED */ -+ if(plumed_test_exchange_pattern>0) { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->ind[i] = i; -+ } -+ } -+ /* END PLUMED */ -+ - /* record which moves were made and accepted */ - for (i = 0; i < re->nrepl; i++) - { -@@ -1314,6 +1385,10 @@ gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex * - /* The order in which multiple exchanges will occur. */ - gmx_bool bThisReplicaExchanged = FALSE; - -+ /* PLUMED */ -+ if(plumedswitch)plumed_cmd(plumedmain,"GREX prepare",NULL); -+ /* END PLUMED */ -+ - if (MASTER(cr)) - { - replica_id = re->repl; -diff --git a/src/kernel/repl_ex.c.preplumed b/src/kernel/repl_ex.c.preplumed -new file mode 100644 -index 0000000..0f094d4 ---- /dev/null -+++ b/src/kernel/repl_ex.c.preplumed -@@ -0,0 +1,1450 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team, -+ * check out http://www.gromacs.org for more information. -+ * Copyright (c) 2012,2013, by the GROMACS development team, led by -+ * David van der Spoel, Berk Hess, Erik Lindahl, and including many -+ * others, as listed in the AUTHORS file in the top-level source -+ * directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include "repl_ex.h" -+#include "network.h" -+#include "random.h" -+#include "smalloc.h" -+#include "physics.h" -+#include "copyrite.h" -+#include "macros.h" -+#include "vec.h" -+#include "names.h" -+#include "mvdata.h" -+#include "domdec.h" -+#include "partdec.h" -+ -+#define PROBABILITYCUTOFF 100 -+/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ -+ -+enum { -+ ereTEMP, ereLAMBDA, ereENDSINGLE, ereTL, ereNR -+}; -+const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"}; -+/* end_single_marker merely notes the end of single variable replica exchange. All types higher than -+ it are multiple replica exchange methods */ -+/* Eventually, should add 'pressure', 'temperature and pressure', 'lambda_and_pressure', 'temperature_lambda_pressure'?; -+ Let's wait until we feel better about the pressure control methods giving exact ensembles. Right now, we assume constant pressure */ -+ -+typedef struct gmx_repl_ex -+{ -+ int repl; -+ int nrepl; -+ real temp; -+ int type; -+ real **q; -+ gmx_bool bNPT; -+ real *pres; -+ int *ind; -+ int *allswaps; -+ int nst; -+ int nex; -+ int seed; -+ int nattempt[2]; -+ real *prob_sum; -+ int **nmoves; -+ int *nexchange; -+ -+ /* these are helper arrays for replica exchange; allocated here so they -+ don't have to be allocated each time */ -+ int *destinations; -+ int **cyclic; -+ int **order; -+ int *tmpswap; -+ gmx_bool *incycle; -+ gmx_bool *bEx; -+ -+ /* helper arrays to hold the quantities that are exchanged */ -+ real *prob; -+ real *Epot; -+ real *beta; -+ real *Vol; -+ real **de; -+ -+} t_gmx_repl_ex; -+ -+static gmx_bool repl_quantity(FILE *fplog, const gmx_multisim_t *ms, -+ struct gmx_repl_ex *re, int ere, real q) -+{ -+ real *qall; -+ gmx_bool bDiff; -+ int i, s; -+ -+ snew(qall, ms->nsim); -+ qall[re->repl] = q; -+ gmx_sum_sim(ms->nsim, qall, ms); -+ -+ bDiff = FALSE; -+ for (s = 1; s < ms->nsim; s++) -+ { -+ if (qall[s] != qall[0]) -+ { -+ bDiff = TRUE; -+ } -+ } -+ -+ if (bDiff) -+ { -+ /* Set the replica exchange type and quantities */ -+ re->type = ere; -+ -+ snew(re->q[ere], re->nrepl); -+ for (s = 0; s < ms->nsim; s++) -+ { -+ re->q[ere][s] = qall[s]; -+ } -+ } -+ sfree(qall); -+ return bDiff; -+} -+ -+gmx_repl_ex_t init_replica_exchange(FILE *fplog, -+ const gmx_multisim_t *ms, -+ const t_state *state, -+ const t_inputrec *ir, -+ int nst, int nex, int init_seed) -+{ -+ real temp, pres; -+ int i, j, k; -+ struct gmx_repl_ex *re; -+ gmx_bool bTemp; -+ gmx_bool bLambda = FALSE; -+ -+ fprintf(fplog, "\nInitializing Replica Exchange\n"); -+ -+ if (ms == NULL || ms->nsim == 1) -+ { -+ gmx_fatal(FARGS, "Nothing to exchange with only one replica, maybe you forgot to set the -multi option of mdrun?"); -+ } -+ -+ snew(re, 1); -+ -+ re->repl = ms->sim; -+ re->nrepl = ms->nsim; -+ snew(re->q, ereENDSINGLE); -+ -+ fprintf(fplog, "Repl There are %d replicas:\n", re->nrepl); -+ -+ check_multi_int(fplog, ms, state->natoms, "the number of atoms", FALSE); -+ check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE); -+ check_multi_large_int(fplog, ms, ir->init_step+ir->nsteps, "init_step+nsteps", FALSE); -+ check_multi_large_int(fplog, ms, (ir->init_step+nst-1)/nst, -+ "first exchange step: init_step/-replex", FALSE); -+ check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE); -+ check_multi_int(fplog, ms, ir->opts.ngtc, -+ "the number of temperature coupling groups", FALSE); -+ check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE); -+ check_multi_int(fplog, ms, ir->efep, "free energy", FALSE); -+ check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE); -+ -+ re->temp = ir->opts.ref_t[0]; -+ for (i = 1; (i < ir->opts.ngtc); i++) -+ { -+ if (ir->opts.ref_t[i] != re->temp) -+ { -+ fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); -+ fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); -+ } -+ } -+ -+ re->type = -1; -+ bTemp = repl_quantity(fplog, ms, re, ereTEMP, re->temp); -+ if (ir->efep != efepNO) -+ { -+ bLambda = repl_quantity(fplog, ms, re, ereLAMBDA, (real)ir->fepvals->init_fep_state); -+ } -+ if (re->type == -1) /* nothing was assigned */ -+ { -+ gmx_fatal(FARGS, "The properties of the %d systems are all the same, there is nothing to exchange", re->nrepl); -+ } -+ if (bLambda && bTemp) -+ { -+ re->type = ereTL; -+ } -+ -+ if (bTemp) -+ { -+ please_cite(fplog, "Sugita1999a"); -+ if (ir->epc != epcNO) -+ { -+ re->bNPT = TRUE; -+ fprintf(fplog, "Repl Using Constant Pressure REMD.\n"); -+ please_cite(fplog, "Okabe2001a"); -+ } -+ if (ir->etc == etcBERENDSEN) -+ { -+ gmx_fatal(FARGS, "REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead", -+ ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE)); -+ } -+ } -+ if (bLambda) -+ { -+ if (ir->fepvals->delta_lambda != 0) /* check this? */ -+ { -+ gmx_fatal(FARGS, "delta_lambda is not zero"); -+ } -+ } -+ if (re->bNPT) -+ { -+ snew(re->pres, re->nrepl); -+ if (ir->epct == epctSURFACETENSION) -+ { -+ pres = ir->ref_p[ZZ][ZZ]; -+ } -+ else -+ { -+ pres = 0; -+ j = 0; -+ for (i = 0; i < DIM; i++) -+ { -+ if (ir->compress[i][i] != 0) -+ { -+ pres += ir->ref_p[i][i]; -+ j++; -+ } -+ } -+ pres /= j; -+ } -+ re->pres[re->repl] = pres; -+ gmx_sum_sim(re->nrepl, re->pres, ms); -+ } -+ -+ /* Make an index for increasing replica order */ -+ /* only makes sense if one or the other is varying, not both! -+ if both are varying, we trust the order the person gave. */ -+ snew(re->ind, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->ind[i] = i; -+ } -+ -+ if (re->type < ereENDSINGLE) -+ { -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = i+1; j < re->nrepl; j++) -+ { -+ if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) -+ { -+ /* Unordered replicas are supposed to work, but there -+ * is still an issues somewhere. -+ * Note that at this point still re->ind[i]=i. -+ */ -+ gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", -+ i, j, -+ erename[re->type], -+ re->q[re->type][i], re->q[re->type][j], -+ erename[re->type]); -+ -+ k = re->ind[i]; -+ re->ind[i] = re->ind[j]; -+ re->ind[j] = k; -+ } -+ else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]]) -+ { -+ gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]); -+ } -+ } -+ } -+ } -+ -+ /* keep track of all the swaps, starting with the initial placement. */ -+ snew(re->allswaps, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->allswaps[i] = re->ind[i]; -+ } -+ -+ switch (re->type) -+ { -+ case ereTEMP: -+ fprintf(fplog, "\nReplica exchange in temperature\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ case ereLAMBDA: -+ fprintf(fplog, "\nReplica exchange in lambda\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %3d", (int)re->q[re->type][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ case ereTL: -+ fprintf(fplog, "\nReplica exchange in temperature and lambda state\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5d", (int)re->q[ereLAMBDA][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ default: -+ gmx_incons("Unknown replica exchange quantity"); -+ } -+ if (re->bNPT) -+ { -+ fprintf(fplog, "\nRepl p"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.2f", re->pres[re->ind[i]]); -+ } -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]])) -+ { -+ fprintf(fplog, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); -+ fprintf(stderr, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); -+ } -+ } -+ } -+ re->nst = nst; -+ if (init_seed == -1) -+ { -+ if (MASTERSIM(ms)) -+ { -+ re->seed = make_seed(); -+ } -+ else -+ { -+ re->seed = 0; -+ } -+ gmx_sumi_sim(1, &(re->seed), ms); -+ } -+ else -+ { -+ re->seed = init_seed; -+ } -+ fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst); -+ fprintf(fplog, "\nReplica random seed: %d\n", re->seed); -+ -+ re->nattempt[0] = 0; -+ re->nattempt[1] = 0; -+ -+ snew(re->prob_sum, re->nrepl); -+ snew(re->nexchange, re->nrepl); -+ snew(re->nmoves, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->nmoves[i], re->nrepl); -+ } -+ fprintf(fplog, "Replica exchange information below: x=exchange, pr=probability\n"); -+ -+ /* generate space for the helper functions so we don't have to snew each time */ -+ -+ snew(re->destinations, re->nrepl); -+ snew(re->incycle, re->nrepl); -+ snew(re->tmpswap, re->nrepl); -+ snew(re->cyclic, re->nrepl); -+ snew(re->order, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->cyclic[i], re->nrepl); -+ snew(re->order[i], re->nrepl); -+ } -+ /* allocate space for the functions storing the data for the replicas */ -+ /* not all of these arrays needed in all cases, but they don't take -+ up much space, since the max size is nrepl**2 */ -+ snew(re->prob, re->nrepl); -+ snew(re->bEx, re->nrepl); -+ snew(re->beta, re->nrepl); -+ snew(re->Vol, re->nrepl); -+ snew(re->Epot, re->nrepl); -+ snew(re->de, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->de[i], re->nrepl); -+ } -+ re->nex = nex; -+ return re; -+} -+ -+static void exchange_reals(const gmx_multisim_t *ms, int b, real *v, int n) -+{ -+ real *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+ -+static void exchange_ints(const gmx_multisim_t *ms, int b, int *v, int n) -+{ -+ int *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_doubles(const gmx_multisim_t *ms, int b, double *v, int n) -+{ -+ double *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_rvecs(const gmx_multisim_t *ms, int b, rvec *v, int n) -+{ -+ rvec *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v[0], n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, -+ buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ copy_rvec(buf[i], v[i]); -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_state(const gmx_multisim_t *ms, int b, t_state *state) -+{ -+ /* When t_state changes, this code should be updated. */ -+ int ngtc, nnhpres; -+ ngtc = state->ngtc * state->nhchainlength; -+ nnhpres = state->nnhpres* state->nhchainlength; -+ exchange_rvecs(ms, b, state->box, DIM); -+ exchange_rvecs(ms, b, state->box_rel, DIM); -+ exchange_rvecs(ms, b, state->boxv, DIM); -+ exchange_reals(ms, b, &(state->veta), 1); -+ exchange_reals(ms, b, &(state->vol0), 1); -+ exchange_rvecs(ms, b, state->svir_prev, DIM); -+ exchange_rvecs(ms, b, state->fvir_prev, DIM); -+ exchange_rvecs(ms, b, state->pres_prev, DIM); -+ exchange_doubles(ms, b, state->nosehoover_xi, ngtc); -+ exchange_doubles(ms, b, state->nosehoover_vxi, ngtc); -+ exchange_doubles(ms, b, state->nhpres_xi, nnhpres); -+ exchange_doubles(ms, b, state->nhpres_vxi, nnhpres); -+ exchange_doubles(ms, b, state->therm_integral, state->ngtc); -+ exchange_rvecs(ms, b, state->x, state->natoms); -+ exchange_rvecs(ms, b, state->v, state->natoms); -+ exchange_rvecs(ms, b, state->sd_X, state->natoms); -+} -+ -+static void copy_rvecs(rvec *s, rvec *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ copy_rvec(s[i], d[i]); -+ } -+ } -+} -+ -+static void copy_doubles(const double *s, double *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+static void copy_reals(const real *s, real *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+static void copy_ints(const int *s, int *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+#define scopy_rvecs(v, n) copy_rvecs(state->v, state_local->v, n); -+#define scopy_doubles(v, n) copy_doubles(state->v, state_local->v, n); -+#define scopy_reals(v, n) copy_reals(state->v, state_local->v, n); -+#define scopy_ints(v, n) copy_ints(state->v, state_local->v, n); -+ -+static void copy_state_nonatomdata(t_state *state, t_state *state_local) -+{ -+ /* When t_state changes, this code should be updated. */ -+ int ngtc, nnhpres; -+ ngtc = state->ngtc * state->nhchainlength; -+ nnhpres = state->nnhpres* state->nhchainlength; -+ scopy_rvecs(box, DIM); -+ scopy_rvecs(box_rel, DIM); -+ scopy_rvecs(boxv, DIM); -+ state_local->veta = state->veta; -+ state_local->vol0 = state->vol0; -+ scopy_rvecs(svir_prev, DIM); -+ scopy_rvecs(fvir_prev, DIM); -+ scopy_rvecs(pres_prev, DIM); -+ scopy_doubles(nosehoover_xi, ngtc); -+ scopy_doubles(nosehoover_vxi, ngtc); -+ scopy_doubles(nhpres_xi, nnhpres); -+ scopy_doubles(nhpres_vxi, nnhpres); -+ scopy_doubles(therm_integral, state->ngtc); -+ scopy_rvecs(x, state->natoms); -+ scopy_rvecs(v, state->natoms); -+ scopy_rvecs(sd_X, state->natoms); -+ copy_ints(&(state->fep_state), &(state_local->fep_state), 1); -+ scopy_reals(lambda, efptNR); -+} -+ -+static void scale_velocities(t_state *state, real fac) -+{ -+ int i; -+ -+ if (state->v) -+ { -+ for (i = 0; i < state->natoms; i++) -+ { -+ svmul(fac, state->v[i], state->v[i]); -+ } -+ } -+} -+ -+static void pd_collect_state(const t_commrec *cr, t_state *state) -+{ -+ int shift; -+ -+ if (debug) -+ { -+ fprintf(debug, "Collecting state before exchange\n"); -+ } -+ shift = cr->nnodes - cr->npmenodes - 1; -+ move_rvecs(cr, FALSE, FALSE, GMX_LEFT, GMX_RIGHT, state->x, NULL, shift, NULL); -+ if (state->v) -+ { -+ move_rvecs(cr, FALSE, FALSE, GMX_LEFT, GMX_RIGHT, state->v, NULL, shift, NULL); -+ } -+ if (state->sd_X) -+ { -+ move_rvecs(cr, FALSE, FALSE, GMX_LEFT, GMX_RIGHT, state->sd_X, NULL, shift, NULL); -+ } -+} -+ -+static void print_transition_matrix(FILE *fplog, const char *leg, int n, int **nmoves, int *nattempt) -+{ -+ int i, j, ntot; -+ float Tprint; -+ -+ ntot = nattempt[0] + nattempt[1]; -+ fprintf(fplog, "\n"); -+ fprintf(fplog, "Repl"); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, " "); /* put the title closer to the center */ -+ } -+ fprintf(fplog, "Empirical Transition Matrix\n"); -+ -+ fprintf(fplog, "Repl"); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%8d", (i+1)); -+ } -+ fprintf(fplog, "\n"); -+ -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "Repl"); -+ for (j = 0; j < n; j++) -+ { -+ Tprint = 0.0; -+ if (nmoves[i][j] > 0) -+ { -+ Tprint = nmoves[i][j]/(2.0*ntot); -+ } -+ fprintf(fplog, "%8.4f", Tprint); -+ } -+ fprintf(fplog, "%3d\n", i); -+ } -+} -+ -+static void print_ind(FILE *fplog, const char *leg, int n, int *ind, gmx_bool *bEx) -+{ -+ int i; -+ -+ fprintf(fplog, "Repl %2s %2d", leg, ind[0]); -+ for (i = 1; i < n; i++) -+ { -+ fprintf(fplog, " %c %2d", (bEx != 0 && bEx[i]) ? 'x' : ' ', ind[i]); -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static void print_allswitchind(FILE *fplog, int n, int *ind, int *pind, int *allswaps, int *tmpswap) -+{ -+ int i; -+ -+ for (i = 0; i < n; i++) -+ { -+ tmpswap[i] = allswaps[i]; -+ } -+ for (i = 0; i < n; i++) -+ { -+ allswaps[i] = tmpswap[pind[i]]; -+ } -+ -+ fprintf(fplog, "\nAccepted Exchanges: "); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%d ", pind[i]); -+ } -+ fprintf(fplog, "\n"); -+ -+ /* the "Order After Exchange" is the state label corresponding to the configuration that -+ started in state listed in order, i.e. -+ -+ 3 0 1 2 -+ -+ means that the: -+ configuration starting in simulation 3 is now in simulation 0, -+ configuration starting in simulation 0 is now in simulation 1, -+ configuration starting in simulation 1 is now in simulation 2, -+ configuration starting in simulation 2 is now in simulation 3 -+ */ -+ fprintf(fplog, "Order After Exchange: "); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%d ", allswaps[i]); -+ } -+ fprintf(fplog, "\n\n"); -+} -+ -+static void print_prob(FILE *fplog, const char *leg, int n, real *prob) -+{ -+ int i; -+ char buf[8]; -+ -+ fprintf(fplog, "Repl %2s ", leg); -+ for (i = 1; i < n; i++) -+ { -+ if (prob[i] >= 0) -+ { -+ sprintf(buf, "%4.2f", prob[i]); -+ fprintf(fplog, " %3s", buf[0] == '1' ? "1.0" : buf+1); -+ } -+ else -+ { -+ fprintf(fplog, " "); -+ } -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static void print_count(FILE *fplog, const char *leg, int n, int *count) -+{ -+ int i; -+ -+ fprintf(fplog, "Repl %2s ", leg); -+ for (i = 1; i < n; i++) -+ { -+ fprintf(fplog, " %4d", count[i]); -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, int a, int b, int ap, int bp) -+{ -+ -+ real ediff, dpV, delta = 0; -+ real *Epot = re->Epot; -+ real *Vol = re->Vol; -+ real **de = re->de; -+ real *beta = re->beta; -+ -+ /* Two cases; we are permuted and not. In all cases, setting ap = a and bp = b will reduce -+ to the non permuted case */ -+ -+ switch (re->type) -+ { -+ case ereTEMP: -+ /* -+ * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439 -+ */ -+ ediff = Epot[b] - Epot[a]; -+ delta = -(beta[bp] - beta[ap])*ediff; -+ break; -+ case ereLAMBDA: -+ /* two cases: when we are permuted, and not. */ -+ /* non-permuted: -+ ediff = E_new - E_old -+ = [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)] -+ = [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)] -+ = de[b][a] + de[a][b] */ -+ -+ /* permuted: -+ ediff = E_new - E_old -+ = [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)] -+ = [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)] -+ = [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)] -+ = [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)] -+ = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]) */ -+ /* but, in the current code implementation, we flip configurations, not indices . . . -+ So let's examine that. -+ = [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)] -+ = [H_b(x_ap) - H_a(x_ap)] + [H_a(x_bp) - H_b(x_pb)] -+ = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp] -+ So, if we exchange b<=> bp and a<=> ap, we return to the same result. -+ So the simple solution is to flip the -+ position of perturbed and original indices in the tests. -+ */ -+ -+ ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]); -+ delta = ediff*beta[a]; /* assume all same temperature in this case */ -+ break; -+ case ereTL: -+ /* not permuted: */ -+ /* delta = reduced E_new - reduced E_old -+ = [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)] -+ = [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)] -+ = [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] + -+ [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)] -+ = [beta_b dH_b(x_a) + [beta_a dH_a(x_b) + -+ beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b)) -+ = beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */ -+ /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */ -+ /* permuted (big breath!) */ -+ /* delta = reduced E_new - reduced E_old -+ = [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)] -+ = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] -+ = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] -+ - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a) -+ - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b) -+ = [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] + -+ [(beta_ap H_ap(x_b) - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))] -+ + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b) -+ = [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] + -+ [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))] -+ + beta_pb (H_a(x_a) - H_b(x_b)) - beta_ap (H_a(x_a) - H_b(x_b)) -+ = ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b] - beta_bp de[bp][b]) -+ + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b)) */ -+ delta = beta[bp]*(de[bp][a] - de[bp][b]) + beta[ap]*(de[ap][b] - de[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]); -+ break; -+ default: -+ gmx_incons("Unknown replica exchange quantity"); -+ } -+ if (bPrint) -+ { -+ fprintf(fplog, "Repl %d <-> %d dE_term = %10.3e (kT)\n", a, b, delta); -+ } -+ if (re->bNPT) -+ { -+ /* revist the calculation for 5.0. Might be some improvements. */ -+ dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC; -+ if (bPrint) -+ { -+ fprintf(fplog, " dpV = %10.3e d = %10.3e\n", dpV, delta + dpV); -+ } -+ delta += dpV; -+ } -+ return delta; -+} -+ -+static void -+test_for_replica_exchange(FILE *fplog, -+ const gmx_multisim_t *ms, -+ struct gmx_repl_ex *re, -+ gmx_enerdata_t *enerd, -+ real vol, -+ gmx_large_int_t step, -+ real time) -+{ -+ int m, i, j, a, b, ap, bp, i0, i1, tmp; -+ real ediff = 0, delta = 0, dpV = 0; -+ gmx_bool bPrint, bMultiEx; -+ gmx_bool *bEx = re->bEx; -+ real *prob = re->prob; -+ int *pind = re->destinations; /* permuted index */ -+ gmx_bool bEpot = FALSE; -+ gmx_bool bDLambda = FALSE; -+ gmx_bool bVol = FALSE; -+ -+ bMultiEx = (re->nex > 1); /* multiple exchanges at each state */ -+ fprintf(fplog, "Replica exchange at step " gmx_large_int_pfmt " time %.5f\n", step, time); -+ -+ if (re->bNPT) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->Vol[i] = 0; -+ } -+ bVol = TRUE; -+ re->Vol[re->repl] = vol; -+ } -+ if ((re->type == ereTEMP || re->type == ereTL)) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->Epot[i] = 0; -+ } -+ bEpot = TRUE; -+ re->Epot[re->repl] = enerd->term[F_EPOT]; -+ /* temperatures of different states*/ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ); -+ } -+ } -+ else -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->beta[i] = 1.0/(re->temp*BOLTZ); /* we have a single temperature */ -+ } -+ } -+ if (re->type == ereLAMBDA || re->type == ereTL) -+ { -+ bDLambda = TRUE; -+ /* lambda differences. */ -+ /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian -+ minus the energy of the jth simulation in the jth Hamiltonian */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = 0; j < re->nrepl; j++) -+ { -+ re->de[i][j] = 0; -+ } -+ } -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->de[i][re->repl] = (enerd->enerpart_lambda[(int)re->q[ereLAMBDA][i]+1]-enerd->enerpart_lambda[0]); -+ } -+ } -+ -+ /* now actually do the communication */ -+ if (bVol) -+ { -+ gmx_sum_sim(re->nrepl, re->Vol, ms); -+ } -+ if (bEpot) -+ { -+ gmx_sum_sim(re->nrepl, re->Epot, ms); -+ } -+ if (bDLambda) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ gmx_sum_sim(re->nrepl, re->de[i], ms); -+ } -+ } -+ -+ /* make a duplicate set of indices for shuffling */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ pind[i] = re->ind[i]; -+ } -+ -+ if (bMultiEx) -+ { -+ /* multiple random switch exchange */ -+ for (i = 0; i < re->nex; i++) -+ { -+ /* randomly select a pair */ -+ /* in theory, could reduce this by identifying only which switches had a nonneglibible -+ probability of occurring (log p > -100) and only operate on those switches */ -+ /* find out which state it is from, and what label that state currently has. Likely -+ more work that useful. */ -+ i0 = (int)(re->nrepl*rando(&(re->seed))); -+ i1 = (int)(re->nrepl*rando(&(re->seed))); -+ if (i0 == i1) -+ { -+ i--; -+ continue; /* self-exchange, back up and do it again */ -+ } -+ -+ a = re->ind[i0]; /* what are the indices of these states? */ -+ b = re->ind[i1]; -+ ap = pind[i0]; -+ bp = pind[i1]; -+ -+ bPrint = FALSE; /* too noisy */ -+ /* calculate the energy difference */ -+ /* if the code changes to flip the STATES, rather than the configurations, -+ use the commented version of the code */ -+ /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */ -+ delta = calc_delta(fplog, bPrint, re, ap, bp, a, b); -+ -+ /* we actually only use the first space in the prob and bEx array, -+ since there are actually many switches between pairs. */ -+ -+ if (delta <= 0) -+ { -+ /* accepted */ -+ prob[0] = 1; -+ bEx[0] = TRUE; -+ } -+ else -+ { -+ if (delta > PROBABILITYCUTOFF) -+ { -+ prob[0] = 0; -+ } -+ else -+ { -+ prob[0] = exp(-delta); -+ } -+ /* roll a number to determine if accepted */ -+ bEx[0] = (rando(&(re->seed)) < prob[0]); -+ } -+ re->prob_sum[0] += prob[0]; -+ -+ if (bEx[0]) -+ { -+ /* swap the states */ -+ tmp = pind[i0]; -+ pind[i0] = pind[i1]; -+ pind[i1] = tmp; -+ } -+ } -+ re->nattempt[0]++; /* keep track of total permutation trials here */ -+ print_allswitchind(fplog, re->nrepl, re->ind, pind, re->allswaps, re->tmpswap); -+ } -+ else -+ { -+ /* standard nearest neighbor replica exchange */ -+ m = (step / re->nst) % 2; -+ for (i = 1; i < re->nrepl; i++) -+ { -+ a = re->ind[i-1]; -+ b = re->ind[i]; -+ -+ bPrint = (re->repl == a || re->repl == b); -+ if (i % 2 == m) -+ { -+ delta = calc_delta(fplog, bPrint, re, a, b, a, b); -+ if (delta <= 0) -+ { -+ /* accepted */ -+ prob[i] = 1; -+ bEx[i] = TRUE; -+ } -+ else -+ { -+ if (delta > PROBABILITYCUTOFF) -+ { -+ prob[i] = 0; -+ } -+ else -+ { -+ prob[i] = exp(-delta); -+ } -+ /* roll a number to determine if accepted */ -+ bEx[i] = (rando(&(re->seed)) < prob[i]); -+ } -+ re->prob_sum[i] += prob[i]; -+ -+ if (bEx[i]) -+ { -+ /* swap these two */ -+ tmp = pind[i-1]; -+ pind[i-1] = pind[i]; -+ pind[i] = tmp; -+ re->nexchange[i]++; /* statistics for back compatibility */ -+ } -+ } -+ else -+ { -+ prob[i] = -1; -+ bEx[i] = FALSE; -+ } -+ } -+ /* print some statistics */ -+ print_ind(fplog, "ex", re->nrepl, re->ind, bEx); -+ print_prob(fplog, "pr", re->nrepl, prob); -+ fprintf(fplog, "\n"); -+ re->nattempt[m]++; -+ } -+ -+ /* record which moves were made and accepted */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->nmoves[re->ind[i]][pind[i]] += 1; -+ re->nmoves[pind[i]][re->ind[i]] += 1; -+ } -+ fflush(fplog); /* make sure we can see what the last exchange was */ -+} -+ -+static void write_debug_x(t_state *state) -+{ -+ int i; -+ -+ if (debug) -+ { -+ for (i = 0; i < state->natoms; i += 10) -+ { -+ fprintf(debug, "dx %5d %10.5f %10.5f %10.5f\n", i, state->x[i][XX], state->x[i][YY], state->x[i][ZZ]); -+ } -+ } -+} -+ -+static void -+cyclic_decomposition(FILE *fplog, -+ const int *destinations, -+ int **cyclic, -+ gmx_bool *incycle, -+ const int nrepl, -+ int *nswap) -+{ -+ -+ int i, j, c, p; -+ int maxlen = 1; -+ for (i = 0; i < nrepl; i++) -+ { -+ incycle[i] = FALSE; -+ } -+ for (i = 0; i < nrepl; i++) /* one cycle for each replica */ -+ { -+ if (incycle[i]) -+ { -+ cyclic[i][0] = -1; -+ continue; -+ } -+ cyclic[i][0] = i; -+ incycle[i] = TRUE; -+ c = 1; -+ p = i; -+ for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */ -+ { -+ p = destinations[p]; /* start permuting */ -+ if (p == i) -+ { -+ cyclic[i][c] = -1; -+ if (c > maxlen) -+ { -+ maxlen = c; -+ } -+ break; /* we've reached the original element, the cycle is complete, and we marked the end. */ -+ } -+ else -+ { -+ cyclic[i][c] = p; /* each permutation gives a new member of the cycle */ -+ incycle[p] = TRUE; -+ c++; -+ } -+ } -+ } -+ *nswap = maxlen - 1; -+ -+ if (debug) -+ { -+ for (i = 0; i < nrepl; i++) -+ { -+ fprintf(debug, "Cycle %d:", i); -+ for (j = 0; j < nrepl; j++) -+ { -+ if (cyclic[i][j] < 0) -+ { -+ break; -+ } -+ fprintf(debug, "%2d", cyclic[i][j]); -+ } -+ fprintf(debug, "\n"); -+ } -+ fflush(debug); -+ } -+} -+ -+static void -+compute_exchange_order(FILE *fplog, -+ int **cyclic, -+ int **order, -+ const int nrepl, -+ const int maxswap) -+{ -+ int i, j; -+ -+ for (j = 0; j < maxswap; j++) -+ { -+ for (i = 0; i < nrepl; i++) -+ { -+ if (cyclic[i][j+1] >= 0) -+ { -+ order[cyclic[i][j+1]][j] = cyclic[i][j]; -+ order[cyclic[i][j]][j] = cyclic[i][j+1]; -+ } -+ } -+ for (i = 0; i < nrepl; i++) -+ { -+ if (order[i][j] < 0) -+ { -+ order[i][j] = i; /* if it's not exchanging, it should stay this round*/ -+ } -+ } -+ } -+ -+ if (debug) -+ { -+ fprintf(fplog, "Replica Exchange Order\n"); -+ for (i = 0; i < nrepl; i++) -+ { -+ fprintf(fplog, "Replica %d:", i); -+ for (j = 0; j < maxswap; j++) -+ { -+ if (order[i][j] < 0) -+ { -+ break; -+ } -+ fprintf(debug, "%2d", order[i][j]); -+ } -+ fprintf(fplog, "\n"); -+ } -+ fflush(fplog); -+ } -+} -+ -+static void -+prepare_to_do_exchange(FILE *fplog, -+ struct gmx_repl_ex *re, -+ const int replica_id, -+ int *maxswap, -+ gmx_bool *bThisReplicaExchanged) -+{ -+ int i, j; -+ /* Hold the cyclic decomposition of the (multiple) replica -+ * exchange. */ -+ gmx_bool bAnyReplicaExchanged = FALSE; -+ *bThisReplicaExchanged = FALSE; -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ if (re->destinations[i] != re->ind[i]) -+ { -+ /* only mark as exchanged if the index has been shuffled */ -+ bAnyReplicaExchanged = TRUE; -+ break; -+ } -+ } -+ if (bAnyReplicaExchanged) -+ { -+ /* reinitialize the placeholder arrays */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = 0; j < re->nrepl; j++) -+ { -+ re->cyclic[i][j] = -1; -+ re->order[i][j] = -1; -+ } -+ } -+ -+ /* Identify the cyclic decomposition of the permutation (very -+ * fast if neighbor replica exchange). */ -+ cyclic_decomposition(fplog, re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap); -+ -+ /* Now translate the decomposition into a replica exchange -+ * order at each step. */ -+ compute_exchange_order(fplog, re->cyclic, re->order, re->nrepl, *maxswap); -+ -+ /* Did this replica do any exchange at any point? */ -+ for (j = 0; j < *maxswap; j++) -+ { -+ if (replica_id != re->order[replica_id][j]) -+ { -+ *bThisReplicaExchanged = TRUE; -+ break; -+ } -+ } -+ } -+} -+ -+gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex *re, -+ t_state *state, gmx_enerdata_t *enerd, -+ t_state *state_local, gmx_large_int_t step, real time) -+{ -+ int i, j; -+ int replica_id = 0; -+ int exchange_partner; -+ int maxswap = 0; -+ /* Number of rounds of exchanges needed to deal with any multiple -+ * exchanges. */ -+ /* Where each replica ends up after the exchange attempt(s). */ -+ /* The order in which multiple exchanges will occur. */ -+ gmx_bool bThisReplicaExchanged = FALSE; -+ -+ if (MASTER(cr)) -+ { -+ replica_id = re->repl; -+ test_for_replica_exchange(fplog, cr->ms, re, enerd, det(state_local->box), step, time); -+ prepare_to_do_exchange(fplog, re, replica_id, &maxswap, &bThisReplicaExchanged); -+ } -+ /* Do intra-simulation broadcast so all processors belonging to -+ * each simulation know whether they need to participate in -+ * collecting the state. Otherwise, they might as well get on with -+ * the next thing to do. */ -+ if (PAR(cr)) -+ { -+#ifdef GMX_MPI -+ MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), -+ cr->mpi_comm_mygroup); -+#endif -+ } -+ -+ if (bThisReplicaExchanged) -+ { -+ /* Exchange the states */ -+ -+ if (PAR(cr)) -+ { -+ /* Collect the global state on the master node */ -+ if (DOMAINDECOMP(cr)) -+ { -+ dd_collect_state(cr->dd, state_local, state); -+ } -+ else -+ { -+ pd_collect_state(cr, state); -+ } -+ } -+ else -+ { -+ copy_state_nonatomdata(state_local, state); -+ } -+ -+ if (MASTER(cr)) -+ { -+ /* There will be only one swap cycle with standard replica -+ * exchange, but there may be multiple swap cycles if we -+ * allow multiple swaps. */ -+ -+ for (j = 0; j < maxswap; j++) -+ { -+ exchange_partner = re->order[replica_id][j]; -+ -+ if (exchange_partner != replica_id) -+ { -+ /* Exchange the global states between the master nodes */ -+ if (debug) -+ { -+ fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner); -+ } -+ exchange_state(cr->ms, exchange_partner, state); -+ } -+ } -+ /* For temperature-type replica exchange, we need to scale -+ * the velocities. */ -+ if (re->type == ereTEMP || re->type == ereTL) -+ { -+ scale_velocities(state, sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]])); -+ } -+ -+ } -+ -+ /* With domain decomposition the global state is distributed later */ -+ if (!DOMAINDECOMP(cr)) -+ { -+ /* Copy the global state to the local state data structure */ -+ copy_state_nonatomdata(state, state_local); -+ -+ if (PAR(cr)) -+ { -+ bcast_state(cr, state, FALSE); -+ } -+ } -+ } -+ -+ return bThisReplicaExchanged; -+} -+ -+void print_replica_exchange_statistics(FILE *fplog, struct gmx_repl_ex *re) -+{ -+ int i; -+ -+ fprintf(fplog, "\nReplica exchange statistics\n"); -+ -+ if (re->nex == 0) -+ { -+ fprintf(fplog, "Repl %d attempts, %d odd, %d even\n", -+ re->nattempt[0]+re->nattempt[1], re->nattempt[1], re->nattempt[0]); -+ -+ fprintf(fplog, "Repl average probabilities:\n"); -+ for (i = 1; i < re->nrepl; i++) -+ { -+ if (re->nattempt[i%2] == 0) -+ { -+ re->prob[i] = 0; -+ } -+ else -+ { -+ re->prob[i] = re->prob_sum[i]/re->nattempt[i%2]; -+ } -+ } -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_prob(fplog, "", re->nrepl, re->prob); -+ -+ fprintf(fplog, "Repl number of exchanges:\n"); -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_count(fplog, "", re->nrepl, re->nexchange); -+ -+ fprintf(fplog, "Repl average number of exchanges:\n"); -+ for (i = 1; i < re->nrepl; i++) -+ { -+ if (re->nattempt[i%2] == 0) -+ { -+ re->prob[i] = 0; -+ } -+ else -+ { -+ re->prob[i] = ((real)re->nexchange[i])/re->nattempt[i%2]; -+ } -+ } -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_prob(fplog, "", re->nrepl, re->prob); -+ -+ fprintf(fplog, "\n"); -+ } -+ /* print the transition matrix */ -+ print_transition_matrix(fplog, "", re->nrepl, re->nmoves, re->nattempt); -+} -diff --git a/src/mdlib/force.c b/src/mdlib/force.c -index 75da6bd..a36cbd0 100644 ---- a/src/mdlib/force.c -+++ b/src/mdlib/force.c -@@ -67,6 +67,14 @@ - #include "mpelogging.h" - #include "gmx_omp_nthreads.h" - -+/* PLUMED */ -+#include "../../Plumed.h" -+int plumedswitch=0; -+plumed plumedmain; -+void(*plumedcmd)(plumed,const char*,const void*)=NULL; -+/* END PLUMED */ -+ -+ - void ns(FILE *fp, - t_forcerec *fr, - rvec x[], -@@ -692,6 +700,16 @@ void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, - - GMX_MPE_LOG(ev_force_finish); - -+ -+/* PLUMED */ -+ if(plumedswitch){ -+ int plumedNeedsEnergy; -+ (*plumedcmd)(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ if(!plumedNeedsEnergy) (*plumedcmd)(plumedmain,"performCalc",NULL); -+ } -+/* END PLUMED */ -+ -+ - } - - void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) -diff --git a/src/mdlib/force.c.preplumed b/src/mdlib/force.c.preplumed -new file mode 100644 -index 0000000..75da6bd ---- /dev/null -+++ b/src/mdlib/force.c.preplumed -@@ -0,0 +1,973 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team, -+ * check out http://www.gromacs.org for more information. -+ * Copyright (c) 2012,2013, by the GROMACS development team, led by -+ * David van der Spoel, Berk Hess, Erik Lindahl, and including many -+ * others, as listed in the AUTHORS file in the top-level source -+ * directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include -+#include -+#include "sysstuff.h" -+#include "typedefs.h" -+#include "macros.h" -+#include "smalloc.h" -+#include "macros.h" -+#include "physics.h" -+#include "force.h" -+#include "nonbonded.h" -+#include "names.h" -+#include "network.h" -+#include "pbc.h" -+#include "ns.h" -+#include "nrnb.h" -+#include "bondf.h" -+#include "mshift.h" -+#include "txtdump.h" -+#include "coulomb.h" -+#include "pme.h" -+#include "mdrun.h" -+#include "domdec.h" -+#include "partdec.h" -+#include "qmmm.h" -+#include "mpelogging.h" -+#include "gmx_omp_nthreads.h" -+ -+void ns(FILE *fp, -+ t_forcerec *fr, -+ rvec x[], -+ matrix box, -+ gmx_groups_t *groups, -+ t_grpopts *opts, -+ gmx_localtop_t *top, -+ t_mdatoms *md, -+ t_commrec *cr, -+ t_nrnb *nrnb, -+ real *lambda, -+ real *dvdlambda, -+ gmx_grppairener_t *grppener, -+ gmx_bool bFillGrid, -+ gmx_bool bDoLongRangeNS) -+{ -+ char *ptr; -+ int nsearch; -+ -+ GMX_MPE_LOG(ev_ns_start); -+ if (!fr->ns.nblist_initialized) -+ { -+ init_neighbor_list(fp, fr, md->homenr); -+ } -+ -+ if (fr->bTwinRange) -+ { -+ fr->nlr = 0; -+ } -+ -+ nsearch = search_neighbours(fp, fr, x, box, top, groups, cr, nrnb, md, -+ lambda, dvdlambda, grppener, -+ bFillGrid, bDoLongRangeNS, TRUE); -+ if (debug) -+ { -+ fprintf(debug, "nsearch = %d\n", nsearch); -+ } -+ -+ /* Check whether we have to do dynamic load balancing */ -+ /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0)) -+ count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr, -+ &(top->idef),opts->ngener); -+ */ -+ if (fr->ns.dump_nl > 0) -+ { -+ dump_nblist(fp, cr, fr, fr->ns.dump_nl); -+ } -+ -+ GMX_MPE_LOG(ev_ns_finish); -+} -+ -+static void reduce_thread_forces(int n, rvec *f, -+ tensor vir, -+ real *Vcorr, -+ int efpt_ind, real *dvdl, -+ int nthreads, f_thread_t *f_t) -+{ -+ int t, i; -+ -+ /* This reduction can run over any number of threads */ -+#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static) -+ for (i = 0; i < n; i++) -+ { -+ for (t = 1; t < nthreads; t++) -+ { -+ rvec_inc(f[i], f_t[t].f[i]); -+ } -+ } -+ for (t = 1; t < nthreads; t++) -+ { -+ *Vcorr += f_t[t].Vcorr; -+ *dvdl += f_t[t].dvdl[efpt_ind]; -+ m_add(vir, f_t[t].vir, vir); -+ } -+} -+ -+void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, -+ t_forcerec *fr, t_inputrec *ir, -+ t_idef *idef, t_commrec *cr, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ t_mdatoms *md, -+ t_grpopts *opts, -+ rvec x[], history_t *hist, -+ rvec f[], -+ rvec f_longrange[], -+ gmx_enerdata_t *enerd, -+ t_fcdata *fcd, -+ gmx_mtop_t *mtop, -+ gmx_localtop_t *top, -+ gmx_genborn_t *born, -+ t_atomtypes *atype, -+ gmx_bool bBornRadii, -+ matrix box, -+ t_lambda *fepvals, -+ real *lambda, -+ t_graph *graph, -+ t_blocka *excl, -+ rvec mu_tot[], -+ int flags, -+ float *cycles_pme) -+{ -+ int i, j, status; -+ int donb_flags; -+ gmx_bool bDoEpot, bSepDVDL, bSB; -+ int pme_flags; -+ matrix boxs; -+ rvec box_size; -+ real Vsr, Vlr, Vcorr = 0; -+ t_pbc pbc; -+ real dvdgb; -+ char buf[22]; -+ double clam_i, vlam_i; -+ real dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR]; -+ real dvdlsum; -+ -+#ifdef GMX_MPI -+ double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ -+#endif -+ -+#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) {fprintf(fplog, sepdvdlformat, s, v, dvdlambda); } -+ -+ GMX_MPE_LOG(ev_force_start); -+ set_pbc(&pbc, fr->ePBC, box); -+ -+ /* reset free energy components */ -+ for (i = 0; i < efptNR; i++) -+ { -+ dvdl_nb[i] = 0; -+ dvdl_dum[i] = 0; -+ } -+ -+ /* Reset box */ -+ for (i = 0; (i < DIM); i++) -+ { -+ box_size[i] = box[i][i]; -+ } -+ -+ bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); -+ debug_gmx(); -+ -+ /* do QMMM first if requested */ -+ if (fr->bQMMM) -+ { -+ enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, md); -+ } -+ -+ if (bSepDVDL) -+ { -+ fprintf(fplog, "Step %s: non-bonded V and dVdl for node %d:\n", -+ gmx_step_str(step, buf), cr->nodeid); -+ } -+ -+ /* Call the short range functions all in one go. */ -+ GMX_MPE_LOG(ev_do_fnbf_start); -+ -+#ifdef GMX_MPI -+ /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ -+#define TAKETIME FALSE -+ if (TAKETIME) -+ { -+ MPI_Barrier(cr->mpi_comm_mygroup); -+ t0 = MPI_Wtime(); -+ } -+#endif -+ -+ if (ir->nwall) -+ { -+ /* foreign lambda component for walls */ -+ dvdl = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], -+ enerd->grpp.ener[egLJSR], nrnb); -+ PRINT_SEPDVDL("Walls", 0.0, dvdl); -+ enerd->dvdl_lin[efptVDW] += dvdl; -+ } -+ -+ /* If doing GB, reset dvda and calculate the Born radii */ -+ if (ir->implicit_solvent) -+ { -+ wallcycle_sub_start(wcycle, ewcsNONBONDED); -+ -+ for (i = 0; i < born->nr; i++) -+ { -+ fr->dvda[i] = 0; -+ } -+ -+ if (bBornRadii) -+ { -+ calc_gb_rad(cr, fr, ir, top, atype, x, &(fr->gblist), born, md, nrnb); -+ } -+ -+ wallcycle_sub_stop(wcycle, ewcsNONBONDED); -+ } -+ -+ where(); -+ /* We only do non-bonded calculation with group scheme here, the verlet -+ * calls are done from do_force_cutsVERLET(). */ -+ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) -+ { -+ donb_flags = 0; -+ /* Add short-range interactions */ -+ donb_flags |= GMX_NONBONDED_DO_SR; -+ -+ if (flags & GMX_FORCE_FORCES) -+ { -+ donb_flags |= GMX_NONBONDED_DO_FORCE; -+ } -+ if (flags & GMX_FORCE_ENERGY) -+ { -+ donb_flags |= GMX_NONBONDED_DO_POTENTIAL; -+ } -+ if (flags & GMX_FORCE_DO_LR) -+ { -+ donb_flags |= GMX_NONBONDED_DO_LR; -+ } -+ -+ wallcycle_sub_start(wcycle, ewcsNONBONDED); -+ do_nonbonded(cr, fr, x, f, f_longrange, md, excl, -+ &enerd->grpp, box_size, nrnb, -+ lambda, dvdl_nb, -1, -1, donb_flags); -+ -+ /* If we do foreign lambda and we have soft-core interactions -+ * we have to recalculate the (non-linear) energies contributions. -+ */ -+ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) -+ { -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ for (j = 0; j < efptNR; j++) -+ { -+ lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); -+ } -+ reset_foreign_enerdata(enerd); -+ do_nonbonded(cr, fr, x, f, f_longrange, md, excl, -+ &(enerd->foreign_grpp), box_size, nrnb, -+ lam_i, dvdl_dum, -1, -1, -+ (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); -+ sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); -+ enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; -+ } -+ } -+ wallcycle_sub_stop(wcycle, ewcsNONBONDED); -+ where(); -+ } -+ -+ /* If we are doing GB, calculate bonded forces and apply corrections -+ * to the solvation forces */ -+ /* MRS: Eventually, many need to include free energy contribution here! */ -+ if (ir->implicit_solvent) -+ { -+ wallcycle_sub_start(wcycle, ewcsBONDED); -+ calc_gb_forces(cr, md, born, top, atype, x, f, fr, idef, -+ ir->gb_algorithm, ir->sa_algorithm, nrnb, bBornRadii, &pbc, graph, enerd); -+ wallcycle_sub_stop(wcycle, ewcsBONDED); -+ } -+ -+#ifdef GMX_MPI -+ if (TAKETIME) -+ { -+ t1 = MPI_Wtime(); -+ fr->t_fnbf += t1-t0; -+ } -+#endif -+ -+ if (fepvals->sc_alpha != 0) -+ { -+ enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; -+ } -+ else -+ { -+ enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; -+ } -+ -+ if (fepvals->sc_alpha != 0) -+ -+ /* even though coulomb part is linear, we already added it, beacuse we -+ need to go through the vdw calculation anyway */ -+ { -+ enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; -+ } -+ else -+ { -+ enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; -+ } -+ -+ Vsr = 0; -+ if (bSepDVDL) -+ { -+ for (i = 0; i < enerd->grpp.nener; i++) -+ { -+ Vsr += -+ (fr->bBHAM ? -+ enerd->grpp.ener[egBHAMSR][i] : -+ enerd->grpp.ener[egLJSR][i]) -+ + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; -+ } -+ dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; -+ PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", Vsr, dvdlsum); -+ } -+ debug_gmx(); -+ -+ GMX_MPE_LOG(ev_do_fnbf_finish); -+ -+ if (debug) -+ { -+ pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); -+ } -+ -+ /* Shift the coordinates. Must be done before bonded forces and PPPM, -+ * but is also necessary for SHAKE and update, therefore it can NOT -+ * go when no bonded forces have to be evaluated. -+ */ -+ -+ /* Here sometimes we would not need to shift with NBFonly, -+ * but we do so anyhow for consistency of the returned coordinates. -+ */ -+ if (graph) -+ { -+ shift_self(graph, box, x); -+ if (TRICLINIC(box)) -+ { -+ inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); -+ } -+ else -+ { -+ inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); -+ } -+ } -+ /* Check whether we need to do bondeds or correct for exclusions */ -+ if (fr->bMolPBC && -+ ((flags & GMX_FORCE_BONDED) -+ || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype))) -+ { -+ /* Since all atoms are in the rectangular or triclinic unit-cell, -+ * only single box vector shifts (2 in x) are required. -+ */ -+ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); -+ } -+ debug_gmx(); -+ -+ if (flags & GMX_FORCE_BONDED) -+ { -+ GMX_MPE_LOG(ev_calc_bonds_start); -+ -+ wallcycle_sub_start(wcycle, ewcsBONDED); -+ calc_bonds(fplog, cr->ms, -+ idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, -+ DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, -+ flags, -+ fr->bSepDVDL && do_per_step(step, ir->nstlog), step); -+ -+ /* Check if we have to determine energy differences -+ * at foreign lambda's. -+ */ -+ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && -+ idef->ilsort != ilsortNO_FE) -+ { -+ if (idef->ilsort != ilsortFE_SORTED) -+ { -+ gmx_incons("The bonded interactions are not sorted for free energy"); -+ } -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ reset_foreign_enerdata(enerd); -+ for (j = 0; j < efptNR; j++) -+ { -+ lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); -+ } -+ calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, -+ fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); -+ sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); -+ enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; -+ } -+ } -+ debug_gmx(); -+ GMX_MPE_LOG(ev_calc_bonds_finish); -+ wallcycle_sub_stop(wcycle, ewcsBONDED); -+ } -+ -+ where(); -+ -+ *cycles_pme = 0; -+ if (EEL_FULL(fr->eeltype)) -+ { -+ bSB = (ir->nwall == 2); -+ if (bSB) -+ { -+ copy_mat(box, boxs); -+ svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); -+ box_size[ZZ] *= ir->wall_ewald_zfac; -+ } -+ -+ clear_mat(fr->vir_el_recip); -+ -+ if (fr->bEwald) -+ { -+ Vcorr = 0; -+ dvdl = 0; -+ -+ /* With the Verlet scheme exclusion forces are calculated -+ * in the non-bonded kernel. -+ */ -+ /* The TPI molecule does not have exclusions with the rest -+ * of the system and no intra-molecular PME grid contributions -+ * will be calculated in gmx_pme_calc_energy. -+ */ -+ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || -+ ir->ewald_geometry != eewg3D || -+ ir->epsilon_surface != 0) -+ { -+ int nthreads, t; -+ -+ wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); -+ -+ if (fr->n_tpi > 0) -+ { -+ gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); -+ } -+ -+ nthreads = gmx_omp_nthreads_get(emntBonded); -+#pragma omp parallel for num_threads(nthreads) schedule(static) -+ for (t = 0; t < nthreads; t++) -+ { -+ int s, e, i; -+ rvec *fnv; -+ tensor *vir; -+ real *Vcorrt, *dvdlt; -+ if (t == 0) -+ { -+ fnv = fr->f_novirsum; -+ vir = &fr->vir_el_recip; -+ Vcorrt = &Vcorr; -+ dvdlt = &dvdl; -+ } -+ else -+ { -+ fnv = fr->f_t[t].f; -+ vir = &fr->f_t[t].vir; -+ Vcorrt = &fr->f_t[t].Vcorr; -+ dvdlt = &fr->f_t[t].dvdl[efptCOUL]; -+ for (i = 0; i < fr->natoms_force; i++) -+ { -+ clear_rvec(fnv[i]); -+ } -+ clear_mat(*vir); -+ } -+ *dvdlt = 0; -+ *Vcorrt = -+ ewald_LRcorrection(fplog, -+ fr->excl_load[t], fr->excl_load[t+1], -+ cr, t, fr, -+ md->chargeA, -+ md->nChargePerturbed ? md->chargeB : NULL, -+ ir->cutoff_scheme != ecutsVERLET, -+ excl, x, bSB ? boxs : box, mu_tot, -+ ir->ewald_geometry, -+ ir->epsilon_surface, -+ fnv, *vir, -+ lambda[efptCOUL], dvdlt); -+ } -+ if (nthreads > 1) -+ { -+ reduce_thread_forces(fr->natoms_force, fr->f_novirsum, -+ fr->vir_el_recip, -+ &Vcorr, efptCOUL, &dvdl, -+ nthreads, fr->f_t); -+ } -+ -+ wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); -+ } -+ -+ if (fr->n_tpi == 0) -+ { -+ Vcorr += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, -+ &dvdl, fr->vir_el_recip); -+ } -+ -+ PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr, dvdl); -+ enerd->dvdl_lin[efptCOUL] += dvdl; -+ } -+ -+ status = 0; -+ Vlr = 0; -+ dvdl = 0; -+ switch (fr->eeltype) -+ { -+ case eelPME: -+ case eelPMESWITCH: -+ case eelPMEUSER: -+ case eelPMEUSERSWITCH: -+ case eelP3M_AD: -+ if (cr->duty & DUTY_PME) -+ { -+ assert(fr->n_tpi >= 0); -+ if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) -+ { -+ pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE; -+ if (flags & GMX_FORCE_FORCES) -+ { -+ pme_flags |= GMX_PME_CALC_F; -+ } -+ if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)) -+ { -+ pme_flags |= GMX_PME_CALC_ENER_VIR; -+ } -+ if (fr->n_tpi > 0) -+ { -+ /* We don't calculate f, but we do want the potential */ -+ pme_flags |= GMX_PME_CALC_POT; -+ } -+ wallcycle_start(wcycle, ewcPMEMESH); -+ status = gmx_pme_do(fr->pmedata, -+ md->start, md->homenr - fr->n_tpi, -+ x, fr->f_novirsum, -+ md->chargeA, md->chargeB, -+ bSB ? boxs : box, cr, -+ DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, -+ DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, -+ nrnb, wcycle, -+ fr->vir_el_recip, fr->ewaldcoeff, -+ &Vlr, lambda[efptCOUL], &dvdl, -+ pme_flags); -+ *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); -+ -+ /* We should try to do as little computation after -+ * this as possible, because parallel PME synchronizes -+ * the nodes, so we want all load imbalance of the rest -+ * of the force calculation to be before the PME call. -+ * DD load balancing is done on the whole time of -+ * the force call (without PME). -+ */ -+ } -+ if (fr->n_tpi > 0) -+ { -+ /* Determine the PME grid energy of the test molecule -+ * with the PME grid potential of the other charges. -+ */ -+ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, -+ x + md->homenr - fr->n_tpi, -+ md->chargeA + md->homenr - fr->n_tpi, -+ &Vlr); -+ } -+ PRINT_SEPDVDL("PME mesh", Vlr, dvdl); -+ } -+ break; -+ case eelEWALD: -+ Vlr = do_ewald(fplog, FALSE, ir, x, fr->f_novirsum, -+ md->chargeA, md->chargeB, -+ box_size, cr, md->homenr, -+ fr->vir_el_recip, fr->ewaldcoeff, -+ lambda[efptCOUL], &dvdl, fr->ewald_table); -+ PRINT_SEPDVDL("Ewald long-range", Vlr, dvdl); -+ break; -+ default: -+ gmx_fatal(FARGS, "No such electrostatics method implemented %s", -+ eel_names[fr->eeltype]); -+ } -+ if (status != 0) -+ { -+ gmx_fatal(FARGS, "Error %d in long range electrostatics routine %s", -+ status, EELTYPE(fr->eeltype)); -+ } -+ /* Note that with separate PME nodes we get the real energies later */ -+ enerd->dvdl_lin[efptCOUL] += dvdl; -+ enerd->term[F_COUL_RECIP] = Vlr + Vcorr; -+ if (debug) -+ { -+ fprintf(debug, "Vlr = %g, Vcorr = %g, Vlr_corr = %g\n", -+ Vlr, Vcorr, enerd->term[F_COUL_RECIP]); -+ pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); -+ pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); -+ } -+ } -+ else -+ { -+ if (EEL_RF(fr->eeltype)) -+ { -+ /* With the Verlet scheme exclusion forces are calculated -+ * in the non-bonded kernel. -+ */ -+ if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC) -+ { -+ dvdl = 0; -+ enerd->term[F_RF_EXCL] = -+ RF_excl_correction(fplog, fr, graph, md, excl, x, f, -+ fr->fshift, &pbc, lambda[efptCOUL], &dvdl); -+ } -+ -+ enerd->dvdl_lin[efptCOUL] += dvdl; -+ PRINT_SEPDVDL("RF exclusion correction", -+ enerd->term[F_RF_EXCL], dvdl); -+ } -+ } -+ where(); -+ debug_gmx(); -+ -+ if (debug) -+ { -+ print_nrnb(debug, nrnb); -+ } -+ debug_gmx(); -+ -+#ifdef GMX_MPI -+ if (TAKETIME) -+ { -+ t2 = MPI_Wtime(); -+ MPI_Barrier(cr->mpi_comm_mygroup); -+ t3 = MPI_Wtime(); -+ fr->t_wait += t3-t2; -+ if (fr->timesteps == 11) -+ { -+ fprintf(stderr, "* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", -+ cr->nodeid, gmx_step_str(fr->timesteps, buf), -+ 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), -+ (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); -+ } -+ fr->timesteps++; -+ } -+#endif -+ -+ if (debug) -+ { -+ pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); -+ } -+ -+ GMX_MPE_LOG(ev_force_finish); -+ -+} -+ -+void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) -+{ -+ int i, n2; -+ -+ for (i = 0; i < F_NRE; i++) -+ { -+ enerd->term[i] = 0; -+ enerd->foreign_term[i] = 0; -+ } -+ -+ -+ for (i = 0; i < efptNR; i++) -+ { -+ enerd->dvdl_lin[i] = 0; -+ enerd->dvdl_nonlin[i] = 0; -+ } -+ -+ n2 = ngener*ngener; -+ if (debug) -+ { -+ fprintf(debug, "Creating %d sized group matrix for energies\n", n2); -+ } -+ enerd->grpp.nener = n2; -+ enerd->foreign_grpp.nener = n2; -+ for (i = 0; (i < egNR); i++) -+ { -+ snew(enerd->grpp.ener[i], n2); -+ snew(enerd->foreign_grpp.ener[i], n2); -+ } -+ -+ if (n_lambda) -+ { -+ enerd->n_lambda = 1 + n_lambda; -+ snew(enerd->enerpart_lambda, enerd->n_lambda); -+ } -+ else -+ { -+ enerd->n_lambda = 0; -+ } -+} -+ -+void destroy_enerdata(gmx_enerdata_t *enerd) -+{ -+ int i; -+ -+ for (i = 0; (i < egNR); i++) -+ { -+ sfree(enerd->grpp.ener[i]); -+ } -+ -+ for (i = 0; (i < egNR); i++) -+ { -+ sfree(enerd->foreign_grpp.ener[i]); -+ } -+ -+ if (enerd->n_lambda) -+ { -+ sfree(enerd->enerpart_lambda); -+ } -+} -+ -+static real sum_v(int n, real v[]) -+{ -+ real t; -+ int i; -+ -+ t = 0.0; -+ for (i = 0; (i < n); i++) -+ { -+ t = t + v[i]; -+ } -+ -+ return t; -+} -+ -+void sum_epot(t_grpopts *opts, gmx_grppairener_t *grpp, real *epot) -+{ -+ int i; -+ -+ /* Accumulate energies */ -+ epot[F_COUL_SR] = sum_v(grpp->nener, grpp->ener[egCOULSR]); -+ epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); -+ epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); -+ epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); -+ epot[F_COUL_LR] = sum_v(grpp->nener, grpp->ener[egCOULLR]); -+ epot[F_LJ_LR] = sum_v(grpp->nener, grpp->ener[egLJLR]); -+ /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */ -+ epot[F_GBPOL] += sum_v(grpp->nener, grpp->ener[egGB]); -+ -+/* lattice part of LR doesnt belong to any group -+ * and has been added earlier -+ */ -+ epot[F_BHAM] = sum_v(grpp->nener, grpp->ener[egBHAMSR]); -+ epot[F_BHAM_LR] = sum_v(grpp->nener, grpp->ener[egBHAMLR]); -+ -+ epot[F_EPOT] = 0; -+ for (i = 0; (i < F_EPOT); i++) -+ { -+ if (i != F_DISRESVIOL && i != F_ORIRESDEV) -+ { -+ epot[F_EPOT] += epot[i]; -+ } -+ } -+} -+ -+void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals) -+{ -+ int i, j, index; -+ double dlam; -+ -+ enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW]; /* include dispersion correction */ -+ enerd->term[F_DVDL] = 0.0; -+ for (i = 0; i < efptNR; i++) -+ { -+ if (fepvals->separate_dvdl[i]) -+ { -+ /* could this be done more readably/compactly? */ -+ switch (i) -+ { -+ case (efptMASS): -+ index = F_DKDL; -+ break; -+ case (efptCOUL): -+ index = F_DVDL_COUL; -+ break; -+ case (efptVDW): -+ index = F_DVDL_VDW; -+ break; -+ case (efptBONDED): -+ index = F_DVDL_BONDED; -+ break; -+ case (efptRESTRAINT): -+ index = F_DVDL_RESTRAINT; -+ break; -+ default: -+ index = F_DVDL; -+ break; -+ } -+ enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; -+ if (debug) -+ { -+ fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n", -+ efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); -+ } -+ } -+ else -+ { -+ enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; -+ if (debug) -+ { -+ fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n", -+ efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); -+ } -+ } -+ } -+ -+ /* Notes on the foreign lambda free energy difference evaluation: -+ * Adding the potential and ekin terms that depend linearly on lambda -+ * as delta lam * dvdl to the energy differences is exact. -+ * For the constraints this is not exact, but we have no other option -+ * without literally changing the lengths and reevaluating the energies at each step. -+ * (try to remedy this post 4.6 - MRS) -+ * For the non-bonded LR term we assume that the soft-core (if present) -+ * no longer affects the energy beyond the short-range cut-off, -+ * which is a very good approximation (except for exotic settings). -+ * (investigate how to overcome this post 4.6 - MRS) -+ */ -+ if (fepvals->separate_dvdl[efptBONDED]) -+ { -+ enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; -+ } -+ else -+ { -+ enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR]; -+ } -+ enerd->term[F_DVDL_CONSTR] = 0; -+ -+ for (i = 0; i < fepvals->n_lambda; i++) -+ { /* note we are iterating over fepvals here! -+ For the current lam, dlam = 0 automatically, -+ so we don't need to add anything to the -+ enerd->enerpart_lambda[0] */ -+ -+ /* we don't need to worry about dvdl_lin contributions to dE at -+ current lambda, because the contributions to the current -+ lambda are automatically zeroed */ -+ -+ for (j = 0; j < efptNR; j++) -+ { -+ /* Note that this loop is over all dhdl components, not just the separated ones */ -+ dlam = (fepvals->all_lambda[j][i]-lambda[j]); -+ enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j]; -+ if (debug) -+ { -+ fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n", -+ fepvals->all_lambda[j][i], efpt_names[j], -+ (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]), -+ dlam, enerd->dvdl_lin[j]); -+ } -+ } -+ } -+} -+ -+ -+void reset_foreign_enerdata(gmx_enerdata_t *enerd) -+{ -+ int i, j; -+ -+ /* First reset all foreign energy components. Foreign energies always called on -+ neighbor search steps */ -+ for (i = 0; (i < egNR); i++) -+ { -+ for (j = 0; (j < enerd->grpp.nener); j++) -+ { -+ enerd->foreign_grpp.ener[i][j] = 0.0; -+ } -+ } -+ -+ /* potential energy components */ -+ for (i = 0; (i <= F_EPOT); i++) -+ { -+ enerd->foreign_term[i] = 0.0; -+ } -+} -+ -+void reset_enerdata(t_grpopts *opts, -+ t_forcerec *fr, gmx_bool bNS, -+ gmx_enerdata_t *enerd, -+ gmx_bool bMaster) -+{ -+ gmx_bool bKeepLR; -+ int i, j; -+ -+ /* First reset all energy components, except for the long range terms -+ * on the master at non neighbor search steps, since the long range -+ * terms have already been summed at the last neighbor search step. -+ */ -+ bKeepLR = (fr->bTwinRange && !bNS); -+ for (i = 0; (i < egNR); i++) -+ { -+ if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) -+ { -+ for (j = 0; (j < enerd->grpp.nener); j++) -+ { -+ enerd->grpp.ener[i][j] = 0.0; -+ } -+ } -+ } -+ for (i = 0; i < efptNR; i++) -+ { -+ enerd->dvdl_lin[i] = 0.0; -+ enerd->dvdl_nonlin[i] = 0.0; -+ } -+ -+ /* Normal potential energy components */ -+ for (i = 0; (i <= F_EPOT); i++) -+ { -+ enerd->term[i] = 0.0; -+ } -+ /* Initialize the dVdlambda term with the long range contribution */ -+ /* Initialize the dvdl term with the long range contribution */ -+ enerd->term[F_DVDL] = 0.0; -+ enerd->term[F_DVDL_COUL] = 0.0; -+ enerd->term[F_DVDL_VDW] = 0.0; -+ enerd->term[F_DVDL_BONDED] = 0.0; -+ enerd->term[F_DVDL_RESTRAINT] = 0.0; -+ enerd->term[F_DKDL] = 0.0; -+ if (enerd->n_lambda > 0) -+ { -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ enerd->enerpart_lambda[i] = 0.0; -+ } -+ } -+ /* reset foreign energy data - separate function since we also call it elsewhere */ -+ reset_foreign_enerdata(enerd); -+} -diff --git a/src/mdlib/minimize.c b/src/mdlib/minimize.c -index 8afe436..15fd15a 100644 ---- a/src/mdlib/minimize.c -+++ b/src/mdlib/minimize.c -@@ -83,6 +83,12 @@ - #include "gmx_omp_nthreads.h" - #include "md_logging.h" - -+/* PLUMED */ -+#include "../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+extern void(*plumedcmd)(plumed,const char*,const void*); -+/* END PLUMED */ - - typedef struct { - t_state s; -@@ -459,6 +465,47 @@ void init_em(FILE *fplog, const char *title, - - clear_rvec(mu_tot); - calc_shifts(ems->s.box, fr->shift_vec); -+ -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(cr->ms && cr->ms->nsim>1) { -+ if(MASTER(cr)) (*plumedcmd) (plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters); -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); -+ }else{ -+ (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); -+ } -+ } -+ (*plumedcmd) (plumedmain,"GREX init",NULL); -+ } -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); -+ }else{ -+ (*plumedcmd) (plumedmain,"setMPIComm",&cr->mpi_comm_mysim); -+ } -+ } -+ (*plumedcmd) (plumedmain,"setNatoms",&top_global->natoms); -+ (*plumedcmd) (plumedmain,"setMDEngine","gromacs"); -+ (*plumedcmd) (plumedmain,"setLog",fplog); -+ real real_delta_t; -+ real_delta_t=ir->delta_t; -+ (*plumedcmd) (plumedmain,"setTimestep",&real_delta_t); -+ (*plumedcmd) (plumedmain,"init",NULL); -+ -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ }else{ -+ (*plumedcmd) (plumedmain,"setAtomsNlocal",&mdatoms->homenr); -+ (*plumedcmd) (plumedmain,"setAtomsContiguous",&mdatoms->start); -+ } -+ } -+ } -+ /* END PLUMED */ -+ - } - - static void finish_em(FILE *fplog, t_commrec *cr, gmx_mdoutf_t *outf, -@@ -738,6 +785,12 @@ static void evaluate_energy(FILE *fplog, gmx_bool bVerbose, t_commrec *cr, - em_dd_partition_system(fplog, count, cr, top_global, inputrec, - ems, top, mdatoms, fr, vsite, constr, - nrnb, wcycle); -+ /* PLUMED */ -+ if(plumedswitch){ -+ (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ /* END PLUMED */ - } - } - -@@ -745,6 +798,22 @@ static void evaluate_energy(FILE *fplog, gmx_bool bVerbose, t_commrec *cr, - /* do_force always puts the charge groups in the box and shifts again - * We do not unshift, so molecules are always whole in congrad.c - */ -+ /* PLUMED */ -+ int plumedNeedsEnergy=0; -+ matrix plumed_vir; -+ if(plumedswitch){ -+ long int lstep=count; (*plumedcmd)(plumedmain,"setStepLong",&count); -+ (*plumedcmd) (plumedmain,"setPositions",&ems->s.x[mdatoms->start][0]); -+ (*plumedcmd) (plumedmain,"setMasses",&mdatoms->massT[mdatoms->start]); -+ (*plumedcmd) (plumedmain,"setCharges",&mdatoms->chargeA[mdatoms->start]); -+ (*plumedcmd) (plumedmain,"setBox",&ems->s.box[0][0]); -+ (*plumedcmd) (plumedmain,"prepareCalc",NULL); -+ (*plumedcmd) (plumedmain,"setForces",&ems->f[mdatoms->start][0]); -+ (*plumedcmd) (plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ clear_mat(plumed_vir); -+ (*plumedcmd) (plumedmain,"setVirial",&plumed_vir[0][0]); -+ } -+ /* END PLUMED */ - do_force(fplog, cr, inputrec, - count, nrnb, wcycle, top, top_global, &top_global->groups, - ems->s.box, ems->s.x, &ems->s.hist, -@@ -753,6 +822,19 @@ static void evaluate_energy(FILE *fplog, gmx_bool bVerbose, t_commrec *cr, - GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | - GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | - (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0)); -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(plumedNeedsEnergy) { -+ msmul(force_vir,2.0,plumed_vir); -+ (*plumedcmd) (plumedmain,"setEnergy",&enerd->term[F_EPOT]); -+ (*plumedcmd) (plumedmain,"performCalc",NULL); -+ msmul(plumed_vir,0.5,force_vir); -+ } else { -+ msmul(plumed_vir,0.5,plumed_vir); -+ m_add(force_vir,plumed_vir,force_vir); -+ } -+ } -+ /* END PLUMED */ - - /* Clear the unused shake virial and pressure */ - clear_mat(shake_vir); -diff --git a/src/mdlib/minimize.c.preplumed b/src/mdlib/minimize.c.preplumed -new file mode 100644 -index 0000000..8afe436 ---- /dev/null -+++ b/src/mdlib/minimize.c.preplumed -@@ -0,0 +1,2864 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team, -+ * check out http://www.gromacs.org for more information. -+ * Copyright (c) 2012,2013, by the GROMACS development team, led by -+ * David van der Spoel, Berk Hess, Erik Lindahl, and including many -+ * others, as listed in the AUTHORS file in the top-level source -+ * directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include -+#include -+#include "sysstuff.h" -+#include "string2.h" -+#include "network.h" -+#include "confio.h" -+#include "copyrite.h" -+#include "smalloc.h" -+#include "nrnb.h" -+#include "main.h" -+#include "force.h" -+#include "macros.h" -+#include "random.h" -+#include "names.h" -+#include "gmx_fatal.h" -+#include "txtdump.h" -+#include "typedefs.h" -+#include "update.h" -+#include "constr.h" -+#include "vec.h" -+#include "statutil.h" -+#include "tgroup.h" -+#include "mdebin.h" -+#include "vsite.h" -+#include "force.h" -+#include "mdrun.h" -+#include "md_support.h" -+#include "sim_util.h" -+#include "domdec.h" -+#include "partdec.h" -+#include "trnio.h" -+#include "sparsematrix.h" -+#include "mtxio.h" -+#include "mdatoms.h" -+#include "ns.h" -+#include "gmx_wallcycle.h" -+#include "mtop_util.h" -+#include "gmxfio.h" -+#include "pme.h" -+#include "bondf.h" -+#include "gmx_omp_nthreads.h" -+#include "md_logging.h" -+ -+ -+typedef struct { -+ t_state s; -+ rvec *f; -+ real epot; -+ real fnorm; -+ real fmax; -+ int a_fmax; -+} em_state_t; -+ -+static em_state_t *init_em_state() -+{ -+ em_state_t *ems; -+ -+ snew(ems, 1); -+ -+ /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */ -+ snew(ems->s.lambda, efptNR); -+ -+ return ems; -+} -+ -+static void print_em_start(FILE *fplog, t_commrec *cr, gmx_runtime_t *runtime, -+ gmx_wallcycle_t wcycle, -+ const char *name) -+{ -+ runtime_start(runtime); -+ wallcycle_start(wcycle, ewcRUN); -+ print_start(fplog, cr, runtime, name); -+} -+ -+static void em_time_end(FILE *fplog, t_commrec *cr, gmx_runtime_t *runtime, -+ gmx_wallcycle_t wcycle) -+{ -+ wallcycle_stop(wcycle, ewcRUN); -+ -+ runtime_end(runtime); -+} -+ -+static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps) -+{ -+ fprintf(out, "\n"); -+ fprintf(out, "%s:\n", minimizer); -+ fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol); -+ fprintf(out, " Number of steps = %12d\n", nsteps); -+} -+ -+static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain) -+{ -+ char buffer[2048]; -+ if (bLastStep) -+ { -+ sprintf(buffer, -+ "\nEnergy minimization reached the maximum number " -+ "of steps before the forces reached the requested " -+ "precision Fmax < %g.\n", ftol); -+ } -+ else -+ { -+ sprintf(buffer, -+ "\nEnergy minimization has stopped, but the forces have " -+ "not converged to the requested precision Fmax < %g (which " -+ "may not be possible for your system). It stopped " -+ "because the algorithm tried to make a new step whose size " -+ "was too small, or there was no change in the energy since " -+ "last step. Either way, we regard the minimization as " -+ "converged to within the available machine precision, " -+ "given your starting configuration and EM parameters.\n%s%s", -+ ftol, -+ sizeof(real) < sizeof(double) ? -+ "\nDouble precision normally gives you higher accuracy, but " -+ "this is often not needed for preparing to run molecular " -+ "dynamics.\n" : -+ "", -+ bConstrain ? -+ "You might need to increase your constraint accuracy, or turn\n" -+ "off constraints altogether (set constraints = none in mdp file)\n" : -+ ""); -+ } -+ fputs(wrap_lines(buffer, 78, 0, FALSE), fp); -+} -+ -+ -+ -+static void print_converged(FILE *fp, const char *alg, real ftol, -+ gmx_large_int_t count, gmx_bool bDone, gmx_large_int_t nsteps, -+ real epot, real fmax, int nfmax, real fnorm) -+{ -+ char buf[STEPSTRSIZE]; -+ -+ if (bDone) -+ { -+ fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", -+ alg, ftol, gmx_step_str(count, buf)); -+ } -+ else if (count < nsteps) -+ { -+ fprintf(fp, "\n%s converged to machine precision in %s steps,\n" -+ "but did not reach the requested Fmax < %g.\n", -+ alg, gmx_step_str(count, buf), ftol); -+ } -+ else -+ { -+ fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", -+ alg, ftol, gmx_step_str(count, buf)); -+ } -+ -+#ifdef GMX_DOUBLE -+ fprintf(fp, "Potential Energy = %21.14e\n", epot); -+ fprintf(fp, "Maximum force = %21.14e on atom %d\n", fmax, nfmax+1); -+ fprintf(fp, "Norm of force = %21.14e\n", fnorm); -+#else -+ fprintf(fp, "Potential Energy = %14.7e\n", epot); -+ fprintf(fp, "Maximum force = %14.7e on atom %d\n", fmax, nfmax+1); -+ fprintf(fp, "Norm of force = %14.7e\n", fnorm); -+#endif -+} -+ -+static void get_f_norm_max(t_commrec *cr, -+ t_grpopts *opts, t_mdatoms *mdatoms, rvec *f, -+ real *fnorm, real *fmax, int *a_fmax) -+{ -+ double fnorm2, *sum; -+ real fmax2, fmax2_0, fam; -+ int la_max, a_max, start, end, i, m, gf; -+ -+ /* This routine finds the largest force and returns it. -+ * On parallel machines the global max is taken. -+ */ -+ fnorm2 = 0; -+ fmax2 = 0; -+ la_max = -1; -+ gf = 0; -+ start = mdatoms->start; -+ end = mdatoms->homenr + start; -+ if (mdatoms->cFREEZE) -+ { -+ for (i = start; i < end; i++) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ fam = 0; -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ fam += sqr(f[i][m]); -+ } -+ } -+ fnorm2 += fam; -+ if (fam > fmax2) -+ { -+ fmax2 = fam; -+ la_max = i; -+ } -+ } -+ } -+ else -+ { -+ for (i = start; i < end; i++) -+ { -+ fam = norm2(f[i]); -+ fnorm2 += fam; -+ if (fam > fmax2) -+ { -+ fmax2 = fam; -+ la_max = i; -+ } -+ } -+ } -+ -+ if (la_max >= 0 && DOMAINDECOMP(cr)) -+ { -+ a_max = cr->dd->gatindex[la_max]; -+ } -+ else -+ { -+ a_max = la_max; -+ } -+ if (PAR(cr)) -+ { -+ snew(sum, 2*cr->nnodes+1); -+ sum[2*cr->nodeid] = fmax2; -+ sum[2*cr->nodeid+1] = a_max; -+ sum[2*cr->nnodes] = fnorm2; -+ gmx_sumd(2*cr->nnodes+1, sum, cr); -+ fnorm2 = sum[2*cr->nnodes]; -+ /* Determine the global maximum */ -+ for (i = 0; i < cr->nnodes; i++) -+ { -+ if (sum[2*i] > fmax2) -+ { -+ fmax2 = sum[2*i]; -+ a_max = (int)(sum[2*i+1] + 0.5); -+ } -+ } -+ sfree(sum); -+ } -+ -+ if (fnorm) -+ { -+ *fnorm = sqrt(fnorm2); -+ } -+ if (fmax) -+ { -+ *fmax = sqrt(fmax2); -+ } -+ if (a_fmax) -+ { -+ *a_fmax = a_max; -+ } -+} -+ -+static void get_state_f_norm_max(t_commrec *cr, -+ t_grpopts *opts, t_mdatoms *mdatoms, -+ em_state_t *ems) -+{ -+ get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax); -+} -+ -+void init_em(FILE *fplog, const char *title, -+ t_commrec *cr, t_inputrec *ir, -+ t_state *state_global, gmx_mtop_t *top_global, -+ em_state_t *ems, gmx_localtop_t **top, -+ rvec **f, rvec **f_global, -+ t_nrnb *nrnb, rvec mu_tot, -+ t_forcerec *fr, gmx_enerdata_t **enerd, -+ t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int nfile, const t_filenm fnm[], -+ gmx_mdoutf_t **outf, t_mdebin **mdebin) -+{ -+ int start, homenr, i; -+ real dvdl_constr; -+ -+ if (fplog) -+ { -+ fprintf(fplog, "Initiating %s\n", title); -+ } -+ -+ state_global->ngtc = 0; -+ -+ /* Initialize lambda variables */ -+ initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL); -+ -+ init_nrnb(nrnb); -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ *top = dd_init_local_top(top_global); -+ -+ dd_init_local_state(cr->dd, state_global, &ems->s); -+ -+ *f = NULL; -+ -+ /* Distribute the charge groups over the nodes from the master node */ -+ dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ &ems->s, &ems->f, mdatoms, *top, -+ fr, vsite, NULL, constr, -+ nrnb, NULL, FALSE); -+ dd_store_state(cr->dd, &ems->s); -+ -+ if (ir->nstfout) -+ { -+ snew(*f_global, top_global->natoms); -+ } -+ else -+ { -+ *f_global = NULL; -+ } -+ *graph = NULL; -+ } -+ else -+ { -+ snew(*f, top_global->natoms); -+ -+ /* Just copy the state */ -+ ems->s = *state_global; -+ snew(ems->s.x, ems->s.nalloc); -+ snew(ems->f, ems->s.nalloc); -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(state_global->x[i], ems->s.x[i]); -+ } -+ copy_mat(state_global->box, ems->s.box); -+ -+ if (PAR(cr) && ir->eI != eiNM) -+ { -+ /* Initialize the particle decomposition and split the topology */ -+ *top = split_system(fplog, top_global, ir, cr); -+ -+ pd_cg_range(cr, &fr->cg0, &fr->hcg); -+ } -+ else -+ { -+ *top = gmx_mtop_generate_local_top(top_global, ir); -+ } -+ *f_global = *f; -+ -+ forcerec_set_excl_load(fr, *top, cr); -+ -+ setup_bonded_threading(fr, &(*top)->idef); -+ -+ if (ir->ePBC != epbcNONE && !fr->bMolPBC) -+ { -+ *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE); -+ } -+ else -+ { -+ *graph = NULL; -+ } -+ -+ if (PARTDECOMP(cr)) -+ { -+ pd_at_range(cr, &start, &homenr); -+ homenr -= start; -+ } -+ else -+ { -+ start = 0; -+ homenr = top_global->natoms; -+ } -+ atoms2md(top_global, ir, 0, NULL, start, homenr, mdatoms); -+ update_mdatoms(mdatoms, state_global->lambda[efptFEP]); -+ -+ if (vsite) -+ { -+ set_vsite_top(vsite, *top, mdatoms, cr); -+ } -+ } -+ -+ if (constr) -+ { -+ if (ir->eConstrAlg == econtSHAKE && -+ gmx_mtop_ftype_count(top_global, F_CONSTR) > 0) -+ { -+ gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n", -+ econstr_names[econtSHAKE], econstr_names[econtLINCS]); -+ } -+ -+ if (!DOMAINDECOMP(cr)) -+ { -+ set_constraints(constr, *top, ir, mdatoms, cr); -+ } -+ -+ if (!ir->bContinuation) -+ { -+ /* Constrain the starting coordinates */ -+ dvdl_constr = 0; -+ constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef, -+ ir, NULL, cr, -1, 0, mdatoms, -+ ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box, -+ ems->s.lambda[efptFEP], &dvdl_constr, -+ NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); -+ } -+ } -+ -+ if (PAR(cr)) -+ { -+ *gstat = global_stat_init(ir); -+ } -+ -+ *outf = init_mdoutf(nfile, fnm, 0, cr, ir, NULL); -+ -+ snew(*enerd, 1); -+ init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, -+ *enerd); -+ -+ if (mdebin != NULL) -+ { -+ /* Init bin for energy stuff */ -+ *mdebin = init_mdebin((*outf)->fp_ene, top_global, ir, NULL); -+ } -+ -+ clear_rvec(mu_tot); -+ calc_shifts(ems->s.box, fr->shift_vec); -+} -+ -+static void finish_em(FILE *fplog, t_commrec *cr, gmx_mdoutf_t *outf, -+ gmx_runtime_t *runtime, gmx_wallcycle_t wcycle) -+{ -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell the PME only node to finish */ -+ gmx_pme_send_finish(cr); -+ } -+ -+ done_mdoutf(outf); -+ -+ em_time_end(fplog, cr, runtime, wcycle); -+} -+ -+static void swap_em_state(em_state_t *ems1, em_state_t *ems2) -+{ -+ em_state_t tmp; -+ -+ tmp = *ems1; -+ *ems1 = *ems2; -+ *ems2 = tmp; -+} -+ -+static void copy_em_coords(em_state_t *ems, t_state *state) -+{ -+ int i; -+ -+ for (i = 0; (i < state->natoms); i++) -+ { -+ copy_rvec(ems->s.x[i], state->x[i]); -+ } -+} -+ -+static void write_em_traj(FILE *fplog, t_commrec *cr, -+ gmx_mdoutf_t *outf, -+ gmx_bool bX, gmx_bool bF, const char *confout, -+ gmx_mtop_t *top_global, -+ t_inputrec *ir, gmx_large_int_t step, -+ em_state_t *state, -+ t_state *state_global, rvec *f_global) -+{ -+ int mdof_flags; -+ -+ if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr)) -+ { -+ copy_em_coords(state, state_global); -+ f_global = state->f; -+ } -+ -+ mdof_flags = 0; -+ if (bX) -+ { -+ mdof_flags |= MDOF_X; -+ } -+ if (bF) -+ { -+ mdof_flags |= MDOF_F; -+ } -+ write_traj(fplog, cr, outf, mdof_flags, -+ top_global, step, (double)step, -+ &state->s, state_global, state->f, f_global, NULL, NULL); -+ -+ if (confout != NULL && MASTER(cr)) -+ { -+ if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr)) -+ { -+ /* Make molecules whole only for confout writing */ -+ do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global, -+ state_global->x); -+ } -+ -+ write_sto_conf_mtop(confout, -+ *top_global->name, top_global, -+ state_global->x, NULL, ir->ePBC, state_global->box); -+ } -+} -+ -+static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, -+ gmx_bool bMolPBC, -+ em_state_t *ems1, real a, rvec *f, em_state_t *ems2, -+ gmx_constr_t constr, gmx_localtop_t *top, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_large_int_t count) -+ -+{ -+ t_state *s1, *s2; -+ int i; -+ int start, end; -+ rvec *x1, *x2; -+ real dvdl_constr; -+ -+ s1 = &ems1->s; -+ s2 = &ems2->s; -+ -+ if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count) -+ { -+ gmx_incons("state mismatch in do_em_step"); -+ } -+ -+ s2->flags = s1->flags; -+ -+ if (s2->nalloc != s1->nalloc) -+ { -+ s2->nalloc = s1->nalloc; -+ srenew(s2->x, s1->nalloc); -+ srenew(ems2->f, s1->nalloc); -+ if (s2->flags & (1<cg_p, s1->nalloc); -+ } -+ } -+ -+ s2->natoms = s1->natoms; -+ copy_mat(s1->box, s2->box); -+ /* Copy free energy state */ -+ for (i = 0; i < efptNR; i++) -+ { -+ s2->lambda[i] = s1->lambda[i]; -+ } -+ copy_mat(s1->box, s2->box); -+ -+ start = md->start; -+ end = md->start + md->homenr; -+ -+ x1 = s1->x; -+ x2 = s2->x; -+ -+#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate)) -+ { -+ int gf, i, m; -+ -+ gf = 0; -+#pragma omp for schedule(static) nowait -+ for (i = start; i < end; i++) -+ { -+ if (md->cFREEZE) -+ { -+ gf = md->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (ir->opts.nFreeze[gf][m]) -+ { -+ x2[i][m] = x1[i][m]; -+ } -+ else -+ { -+ x2[i][m] = x1[i][m] + a*f[i][m]; -+ } -+ } -+ } -+ -+ if (s2->flags & (1<cg_p; -+ x2 = s2->cg_p; -+#pragma omp for schedule(static) nowait -+ for (i = start; i < end; i++) -+ { -+ copy_rvec(x1[i], x2[i]); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ s2->ddp_count = s1->ddp_count; -+ if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) -+ { -+#pragma omp barrier -+ s2->cg_gl_nalloc = s1->cg_gl_nalloc; -+ srenew(s2->cg_gl, s2->cg_gl_nalloc); -+#pragma omp barrier -+ } -+ s2->ncg_gl = s1->ncg_gl; -+#pragma omp for schedule(static) nowait -+ for (i = 0; i < s2->ncg_gl; i++) -+ { -+ s2->cg_gl[i] = s1->cg_gl[i]; -+ } -+ s2->ddp_count_cg_gl = s1->ddp_count_cg_gl; -+ } -+ } -+ -+ if (constr) -+ { -+ wallcycle_start(wcycle, ewcCONSTR); -+ dvdl_constr = 0; -+ constrain(NULL, TRUE, TRUE, constr, &top->idef, -+ ir, NULL, cr, count, 0, md, -+ s1->x, s2->x, NULL, bMolPBC, s2->box, -+ s2->lambda[efptBONDED], &dvdl_constr, -+ NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); -+ wallcycle_stop(wcycle, ewcCONSTR); -+ } -+} -+ -+static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr, -+ gmx_mtop_t *top_global, t_inputrec *ir, -+ em_state_t *ems, gmx_localtop_t *top, -+ t_mdatoms *mdatoms, t_forcerec *fr, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle) -+{ -+ /* Repartition the domain decomposition */ -+ wallcycle_start(wcycle, ewcDOMDEC); -+ dd_partition_system(fplog, step, cr, FALSE, 1, -+ NULL, top_global, ir, -+ &ems->s, &ems->f, -+ mdatoms, top, fr, vsite, NULL, constr, -+ nrnb, wcycle, FALSE); -+ dd_store_state(cr->dd, &ems->s); -+ wallcycle_stop(wcycle, ewcDOMDEC); -+} -+ -+static void evaluate_energy(FILE *fplog, gmx_bool bVerbose, t_commrec *cr, -+ t_state *state_global, gmx_mtop_t *top_global, -+ em_state_t *ems, gmx_localtop_t *top, -+ t_inputrec *inputrec, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_global_stat_t gstat, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ t_fcdata *fcd, -+ t_graph *graph, t_mdatoms *mdatoms, -+ t_forcerec *fr, rvec mu_tot, -+ gmx_enerdata_t *enerd, tensor vir, tensor pres, -+ gmx_large_int_t count, gmx_bool bFirst) -+{ -+ real t; -+ gmx_bool bNS; -+ int nabnsb; -+ tensor force_vir, shake_vir, ekin; -+ real dvdl_constr, prescorr, enercorr, dvdlcorr; -+ real terminate = 0; -+ -+ /* Set the time to the initial time, the time does not change during EM */ -+ t = inputrec->init_t; -+ -+ if (bFirst || -+ (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) -+ { -+ /* This the first state or an old state used before the last ns */ -+ bNS = TRUE; -+ } -+ else -+ { -+ bNS = FALSE; -+ if (inputrec->nstlist > 0) -+ { -+ bNS = TRUE; -+ } -+ else if (inputrec->nstlist == -1) -+ { -+ nabnsb = natoms_beyond_ns_buffer(inputrec, fr, &top->cgs, NULL, ems->s.x); -+ if (PAR(cr)) -+ { -+ gmx_sumi(1, &nabnsb, cr); -+ } -+ bNS = (nabnsb > 0); -+ } -+ } -+ -+ if (vsite) -+ { -+ construct_vsites(fplog, vsite, ems->s.x, nrnb, 1, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, graph, cr, ems->s.box); -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ if (bNS) -+ { -+ /* Repartition the domain decomposition */ -+ em_dd_partition_system(fplog, count, cr, top_global, inputrec, -+ ems, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ } -+ -+ /* Calc force & energy on new trial position */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole in congrad.c -+ */ -+ do_force(fplog, cr, inputrec, -+ count, nrnb, wcycle, top, top_global, &top_global->groups, -+ ems->s.box, ems->s.x, &ems->s.hist, -+ ems->f, force_vir, mdatoms, enerd, fcd, -+ ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE, -+ GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | -+ GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | -+ (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0)); -+ -+ /* Clear the unused shake virial and pressure */ -+ clear_mat(shake_vir); -+ clear_mat(pres); -+ -+ /* Communicate stuff when parallel */ -+ if (PAR(cr) && inputrec->eI != eiNM) -+ { -+ wallcycle_start(wcycle, ewcMoveE); -+ -+ global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot, -+ inputrec, NULL, NULL, NULL, 1, &terminate, -+ top_global, &ems->s, FALSE, -+ CGLO_ENERGY | -+ CGLO_PRESSURE | -+ CGLO_CONSTRAINT | -+ CGLO_FIRSTITERATE); -+ -+ wallcycle_stop(wcycle, ewcMoveE); -+ } -+ -+ /* Calculate long range corrections to pressure and energy */ -+ calc_dispcorr(fplog, inputrec, fr, count, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW], -+ pres, force_vir, &prescorr, &enercorr, &dvdlcorr); -+ enerd->term[F_DISPCORR] = enercorr; -+ enerd->term[F_EPOT] += enercorr; -+ enerd->term[F_PRES] += prescorr; -+ enerd->term[F_DVDL] += dvdlcorr; -+ -+ ems->epot = enerd->term[F_EPOT]; -+ -+ if (constr) -+ { -+ /* Project out the constraint components of the force */ -+ wallcycle_start(wcycle, ewcCONSTR); -+ dvdl_constr = 0; -+ constrain(NULL, FALSE, FALSE, constr, &top->idef, -+ inputrec, NULL, cr, count, 0, mdatoms, -+ ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box, -+ ems->s.lambda[efptBONDED], &dvdl_constr, -+ NULL, &shake_vir, nrnb, econqForceDispl, FALSE, 0, 0); -+ if (fr->bSepDVDL && fplog) -+ { -+ fprintf(fplog, sepdvdlformat, "Constraints", t, dvdl_constr); -+ } -+ enerd->term[F_DVDL_CONSTR] += dvdl_constr; -+ m_add(force_vir, shake_vir, vir); -+ wallcycle_stop(wcycle, ewcCONSTR); -+ } -+ else -+ { -+ copy_mat(force_vir, vir); -+ } -+ -+ clear_mat(ekin); -+ enerd->term[F_PRES] = -+ calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres); -+ -+ sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals); -+ -+ if (EI_ENERGY_MINIMIZATION(inputrec->eI)) -+ { -+ get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems); -+ } -+} -+ -+static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, -+ gmx_mtop_t *mtop, -+ em_state_t *s_min, em_state_t *s_b) -+{ -+ rvec *fm, *fb, *fmg; -+ t_block *cgs_gl; -+ int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m; -+ double partsum; -+ unsigned char *grpnrFREEZE; -+ -+ if (debug) -+ { -+ fprintf(debug, "Doing reorder_partsum\n"); -+ } -+ -+ fm = s_min->f; -+ fb = s_b->f; -+ -+ cgs_gl = dd_charge_groups_global(cr->dd); -+ index = cgs_gl->index; -+ -+ /* Collect fm in a global vector fmg. -+ * This conflicts with the spirit of domain decomposition, -+ * but to fully optimize this a much more complicated algorithm is required. -+ */ -+ snew(fmg, mtop->natoms); -+ -+ ncg = s_min->s.ncg_gl; -+ cg_gl = s_min->s.cg_gl; -+ i = 0; -+ for (c = 0; c < ncg; c++) -+ { -+ cg = cg_gl[c]; -+ a0 = index[cg]; -+ a1 = index[cg+1]; -+ for (a = a0; a < a1; a++) -+ { -+ copy_rvec(fm[i], fmg[a]); -+ i++; -+ } -+ } -+ gmx_sum(mtop->natoms*3, fmg[0], cr); -+ -+ /* Now we will determine the part of the sum for the cgs in state s_b */ -+ ncg = s_b->s.ncg_gl; -+ cg_gl = s_b->s.cg_gl; -+ partsum = 0; -+ i = 0; -+ gf = 0; -+ grpnrFREEZE = mtop->groups.grpnr[egcFREEZE]; -+ for (c = 0; c < ncg; c++) -+ { -+ cg = cg_gl[c]; -+ a0 = index[cg]; -+ a1 = index[cg+1]; -+ for (a = a0; a < a1; a++) -+ { -+ if (mdatoms->cFREEZE && grpnrFREEZE) -+ { -+ gf = grpnrFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ partsum += (fb[i][m] - fmg[a][m])*fb[i][m]; -+ } -+ } -+ i++; -+ } -+ } -+ -+ sfree(fmg); -+ -+ return partsum; -+} -+ -+static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, -+ gmx_mtop_t *mtop, -+ em_state_t *s_min, em_state_t *s_b) -+{ -+ rvec *fm, *fb; -+ double sum; -+ int gf, i, m; -+ -+ /* This is just the classical Polak-Ribiere calculation of beta; -+ * it looks a bit complicated since we take freeze groups into account, -+ * and might have to sum it in parallel runs. -+ */ -+ -+ if (!DOMAINDECOMP(cr) || -+ (s_min->s.ddp_count == cr->dd->ddp_count && -+ s_b->s.ddp_count == cr->dd->ddp_count)) -+ { -+ fm = s_min->f; -+ fb = s_b->f; -+ sum = 0; -+ gf = 0; -+ /* This part of code can be incorrect with DD, -+ * since the atom ordering in s_b and s_min might differ. -+ */ -+ for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ sum += (fb[i][m] - fm[i][m])*fb[i][m]; -+ } -+ } -+ } -+ } -+ else -+ { -+ /* We need to reorder cgs while summing */ -+ sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b); -+ } -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &sum, cr); -+ } -+ -+ return sum/sqr(s_min->fnorm); -+} -+ -+double do_cg(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, -+ int nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t ed, -+ t_forcerec *fr, -+ int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, -+ gmx_membed_t membed, -+ real cpt_period, real max_hours, -+ const char *deviceOptions, -+ unsigned long Flags, -+ gmx_runtime_t *runtime) -+{ -+ const char *CG = "Polak-Ribiere Conjugate Gradients"; -+ -+ em_state_t *s_min, *s_a, *s_b, *s_c; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ rvec *f_global, *p, *sf, *sfm; -+ double gpa, gpb, gpc, tmp, sum[2], minstep; -+ real fnormn; -+ real stepsize; -+ real a, b, c, beta = 0.0; -+ real epot_repl = 0; -+ real pnorm; -+ t_mdebin *mdebin; -+ gmx_bool converged, foundlower; -+ rvec mu_tot; -+ gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; -+ tensor vir, pres; -+ int number_steps, neval = 0, nstcg = inputrec->nstcgsteep; -+ gmx_mdoutf_t *outf; -+ int i, m, gf, step, nminstep; -+ real terminate = 0; -+ -+ step = 0; -+ -+ s_min = init_em_state(); -+ s_a = init_em_state(); -+ s_b = init_em_state(); -+ s_c = init_em_state(); -+ -+ /* Init em and store the local state in s_min */ -+ init_em(fplog, CG, cr, inputrec, -+ state_global, top_global, s_min, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin); -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, runtime, wcycle, CG); -+ -+ /* Max number of steps */ -+ number_steps = inputrec->nsteps; -+ -+ if (MASTER(cr)) -+ { -+ sp_header(stderr, CG, inputrec->em_tol, number_steps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, CG, inputrec->em_tol, number_steps); -+ } -+ -+ /* Call the force routine and some auxiliary (neighboursearching etc.) */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole in congrad.c -+ */ -+ evaluate_energy(fplog, bVerbose, cr, -+ state_global, top_global, s_min, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ where(); -+ -+ if (MASTER(cr)) -+ { -+ /* Copy stuff to the energy bin for easy printing etc. */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ print_ebin(outf->fp_ene, TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ where(); -+ -+ /* Estimate/guess the initial stepsize */ -+ stepsize = inputrec->em_stepsize/s_min->fnorm; -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, " F-max = %12.5e on atom %d\n", -+ s_min->fmax, s_min->a_fmax+1); -+ fprintf(stderr, " F-Norm = %12.5e\n", -+ s_min->fnorm/sqrt(state_global->natoms)); -+ fprintf(stderr, "\n"); -+ /* and copy to the log file too... */ -+ fprintf(fplog, " F-max = %12.5e on atom %d\n", -+ s_min->fmax, s_min->a_fmax+1); -+ fprintf(fplog, " F-Norm = %12.5e\n", -+ s_min->fnorm/sqrt(state_global->natoms)); -+ fprintf(fplog, "\n"); -+ } -+ /* Start the loop over CG steps. -+ * Each successful step is counted, and we continue until -+ * we either converge or reach the max number of steps. -+ */ -+ converged = FALSE; -+ for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++) -+ { -+ -+ /* start taking steps in a new direction -+ * First time we enter the routine, beta=0, and the direction is -+ * simply the negative gradient. -+ */ -+ -+ /* Calculate the new direction in p, and the gradient in this direction, gpa */ -+ p = s_min->s.cg_p; -+ sf = s_min->f; -+ gpa = 0; -+ gf = 0; -+ for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!inputrec->opts.nFreeze[gf][m]) -+ { -+ p[i][m] = sf[i][m] + beta*p[i][m]; -+ gpa -= p[i][m]*sf[i][m]; -+ /* f is negative gradient, thus the sign */ -+ } -+ else -+ { -+ p[i][m] = 0; -+ } -+ } -+ } -+ -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpa, cr); -+ } -+ -+ /* Calculate the norm of the search vector */ -+ get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL); -+ -+ /* Just in case stepsize reaches zero due to numerical precision... */ -+ if (stepsize <= 0) -+ { -+ stepsize = inputrec->em_stepsize/pnorm; -+ } -+ -+ /* -+ * Double check the value of the derivative in the search direction. -+ * If it is positive it must be due to the old information in the -+ * CG formula, so just remove that and start over with beta=0. -+ * This corresponds to a steepest descent step. -+ */ -+ if (gpa > 0) -+ { -+ beta = 0; -+ step--; /* Don't count this step since we are restarting */ -+ continue; /* Go back to the beginning of the big for-loop */ -+ } -+ -+ /* Calculate minimum allowed stepsize, before the average (norm) -+ * relative change in coordinate is smaller than precision -+ */ -+ minstep = 0; -+ for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ tmp = fabs(s_min->s.x[i][m]); -+ if (tmp < 1.0) -+ { -+ tmp = 1.0; -+ } -+ tmp = p[i][m]/tmp; -+ minstep += tmp*tmp; -+ } -+ } -+ /* Add up from all CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &minstep, cr); -+ } -+ -+ minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms)); -+ -+ if (stepsize < minstep) -+ { -+ converged = TRUE; -+ break; -+ } -+ -+ /* Write coordinates if necessary */ -+ do_x = do_per_step(step, inputrec->nstxout); -+ do_f = do_per_step(step, inputrec->nstfout); -+ -+ write_em_traj(fplog, cr, outf, do_x, do_f, NULL, -+ top_global, inputrec, step, -+ s_min, state_global, f_global); -+ -+ /* Take a step downhill. -+ * In theory, we should minimize the function along this direction. -+ * That is quite possible, but it turns out to take 5-10 function evaluations -+ * for each line. However, we dont really need to find the exact minimum - -+ * it is much better to start a new CG step in a modified direction as soon -+ * as we are close to it. This will save a lot of energy evaluations. -+ * -+ * In practice, we just try to take a single step. -+ * If it worked (i.e. lowered the energy), we increase the stepsize but -+ * the continue straight to the next CG step without trying to find any minimum. -+ * If it didn't work (higher energy), there must be a minimum somewhere between -+ * the old position and the new one. -+ * -+ * Due to the finite numerical accuracy, it turns out that it is a good idea -+ * to even accept a SMALL increase in energy, if the derivative is still downhill. -+ * This leads to lower final energies in the tests I've done. / Erik -+ */ -+ s_a->epot = s_min->epot; -+ a = 0.0; -+ c = a + stepsize; /* reference position along line is zero */ -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) -+ { -+ em_dd_partition_system(fplog, step, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ -+ /* Take a trial step (new coords in s_c) */ -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c, -+ constr, top, nrnb, wcycle, -1); -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ evaluate_energy(fplog, bVerbose, cr, -+ state_global, top_global, s_c, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, FALSE); -+ -+ /* Calc derivative along line */ -+ p = s_c->s.cg_p; -+ sf = s_c->f; -+ gpc = 0; -+ for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ -+ } -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpc, cr); -+ } -+ -+ /* This is the max amount of increase in energy we tolerate */ -+ tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot); -+ -+ /* Accept the step if the energy is lower, or if it is not significantly higher -+ * and the line derivative is still negative. -+ */ -+ if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) -+ { -+ foundlower = TRUE; -+ /* Great, we found a better energy. Increase step for next iteration -+ * if we are still going down, decrease it otherwise -+ */ -+ if (gpc < 0) -+ { -+ stepsize *= 1.618034; /* The golden section */ -+ } -+ else -+ { -+ stepsize *= 0.618034; /* 1/golden section */ -+ } -+ } -+ else -+ { -+ /* New energy is the same or higher. We will have to do some work -+ * to find a smaller value in the interval. Take smaller step next time! -+ */ -+ foundlower = FALSE; -+ stepsize *= 0.618034; -+ } -+ -+ -+ -+ -+ /* OK, if we didn't find a lower value we will have to locate one now - there must -+ * be one in the interval [a=0,c]. -+ * The same thing is valid here, though: Don't spend dozens of iterations to find -+ * the line minimum. We try to interpolate based on the derivative at the endpoints, -+ * and only continue until we find a lower value. In most cases this means 1-2 iterations. -+ * -+ * I also have a safeguard for potentially really patological functions so we never -+ * take more than 20 steps before we give up ... -+ * -+ * If we already found a lower value we just skip this step and continue to the update. -+ */ -+ if (!foundlower) -+ { -+ nminstep = 0; -+ -+ do -+ { -+ /* Select a new trial point. -+ * If the derivatives at points a & c have different sign we interpolate to zero, -+ * otherwise just do a bisection. -+ */ -+ if (gpa < 0 && gpc > 0) -+ { -+ b = a + gpa*(a-c)/(gpc-gpa); -+ } -+ else -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* safeguard if interpolation close to machine accuracy causes errors: -+ * never go outside the interval -+ */ -+ if (b <= a || b >= c) -+ { -+ b = 0.5*(a+c); -+ } -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) -+ { -+ /* Reload the old state */ -+ em_dd_partition_system(fplog, -1, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ -+ /* Take a trial step to this new point - new coords in s_b */ -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b, -+ constr, top, nrnb, wcycle, -1); -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ evaluate_energy(fplog, bVerbose, cr, -+ state_global, top_global, s_b, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, FALSE); -+ -+ /* p does not change within a step, but since the domain decomposition -+ * might change, we have to use cg_p of s_b here. -+ */ -+ p = s_b->s.cg_p; -+ sf = s_b->f; -+ gpb = 0; -+ for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ -+ } -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpb, cr); -+ } -+ -+ if (debug) -+ { -+ fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", -+ s_a->epot, s_b->epot, s_c->epot, gpb); -+ } -+ -+ epot_repl = s_b->epot; -+ -+ /* Keep one of the intervals based on the value of the derivative at the new point */ -+ if (gpb > 0) -+ { -+ /* Replace c endpoint with b */ -+ swap_em_state(s_b, s_c); -+ c = b; -+ gpc = gpb; -+ } -+ else -+ { -+ /* Replace a endpoint with b */ -+ swap_em_state(s_b, s_a); -+ a = b; -+ gpa = gpb; -+ } -+ -+ /* -+ * Stop search as soon as we find a value smaller than the endpoints. -+ * Never run more than 20 steps, no matter what. -+ */ -+ nminstep++; -+ } -+ while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && -+ (nminstep < 20)); -+ -+ if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS || -+ nminstep >= 20) -+ { -+ /* OK. We couldn't find a significantly lower energy. -+ * If beta==0 this was steepest descent, and then we give up. -+ * If not, set beta=0 and restart with steepest descent before quitting. -+ */ -+ if (beta == 0.0) -+ { -+ /* Converged */ -+ converged = TRUE; -+ break; -+ } -+ else -+ { -+ /* Reset memory before giving up */ -+ beta = 0.0; -+ continue; -+ } -+ } -+ -+ /* Select min energy state of A & C, put the best in B. -+ */ -+ if (s_c->epot < s_a->epot) -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", -+ s_c->epot, s_a->epot); -+ } -+ swap_em_state(s_b, s_c); -+ gpb = gpc; -+ b = c; -+ } -+ else -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", -+ s_a->epot, s_c->epot); -+ } -+ swap_em_state(s_b, s_a); -+ gpb = gpa; -+ b = a; -+ } -+ -+ } -+ else -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", -+ s_c->epot); -+ } -+ swap_em_state(s_b, s_c); -+ gpb = gpc; -+ b = c; -+ } -+ -+ /* new search direction */ -+ /* beta = 0 means forget all memory and restart with steepest descents. */ -+ if (nstcg && ((step % nstcg) == 0)) -+ { -+ beta = 0.0; -+ } -+ else -+ { -+ /* s_min->fnorm cannot be zero, because then we would have converged -+ * and broken out. -+ */ -+ -+ /* Polak-Ribiere update. -+ * Change to fnorm2/fnorm2_old for Fletcher-Reeves -+ */ -+ beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b); -+ } -+ /* Limit beta to prevent oscillations */ -+ if (fabs(beta) > 5.0) -+ { -+ beta = 0.0; -+ } -+ -+ -+ /* update positions */ -+ swap_em_state(s_min, s_b); -+ gpa = gpb; -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", -+ step, s_min->epot, s_min->fnorm/sqrt(state_global->natoms), -+ s_min->fmax, s_min->a_fmax+1); -+ } -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ do_log = do_per_step(step, inputrec->nstlog); -+ do_ene = do_per_step(step, inputrec->nstenergy); -+ if (do_log) -+ { -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ } -+ print_ebin(outf->fp_ene, do_ene, FALSE, FALSE, -+ do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Stop when the maximum force lies below tolerance. -+ * If we have reached machine precision, converged is already set to true. -+ */ -+ converged = converged || (s_min->fmax < inputrec->em_tol); -+ -+ } /* End of the loop */ -+ -+ if (converged) -+ { -+ step--; /* we never took that last step in this case */ -+ -+ } -+ if (s_min->fmax > inputrec->em_tol) -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); -+ warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); -+ } -+ converged = FALSE; -+ } -+ -+ if (MASTER(cr)) -+ { -+ /* If we printed energy and/or logfile last step (which was the last step) -+ * we don't have to do it again, but otherwise print the final values. -+ */ -+ if (!do_log) -+ { -+ /* Write final value to log since we didn't do anything the last step */ -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ } -+ if (!do_ene || !do_log) -+ { -+ /* Write final energy file entries */ -+ print_ebin(outf->fp_ene, !do_ene, FALSE, FALSE, -+ !do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ } -+ -+ /* Print some stuff... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ -+ /* IMPORTANT! -+ * For accurate normal mode calculation it is imperative that we -+ * store the last conformation into the full precision binary trajectory. -+ * -+ * However, we should only do it if we did NOT already write this step -+ * above (which we did if do_x or do_f was true). -+ */ -+ do_x = !do_per_step(step, inputrec->nstxout); -+ do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout)); -+ -+ write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, step, -+ s_min, state_global, f_global); -+ -+ fnormn = s_min->fnorm/sqrt(state_global->natoms); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ -+ fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); -+ } -+ -+ finish_em(fplog, cr, outf, runtime, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ runtime->nsteps_done = step; -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_lbfgs(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, -+ int nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t ed, -+ t_forcerec *fr, -+ int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, -+ gmx_membed_t membed, -+ real cpt_period, real max_hours, -+ const char *deviceOptions, -+ unsigned long Flags, -+ gmx_runtime_t *runtime) -+{ -+ static const char *LBFGS = "Low-Memory BFGS Minimizer"; -+ em_state_t ems; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ rvec *f_global; -+ int ncorr, nmaxcorr, point, cp, neval, nminstep; -+ double stepsize, gpa, gpb, gpc, tmp, minstep; -+ real *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg; -+ real *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp; -+ real a, b, c, maxdelta, delta; -+ real diag, Epot0, Epot, EpotA, EpotB, EpotC; -+ real dgdx, dgdg, sq, yr, beta; -+ t_mdebin *mdebin; -+ gmx_bool converged, first; -+ rvec mu_tot; -+ real fnorm, fmax; -+ gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; -+ tensor vir, pres; -+ int start, end, number_steps; -+ gmx_mdoutf_t *outf; -+ int i, k, m, n, nfmax, gf, step; -+ int mdof_flags; -+ /* not used */ -+ real terminate; -+ -+ if (PAR(cr)) -+ { -+ gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n"); -+ } -+ -+ if (NULL != constr) -+ { -+ gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent)."); -+ } -+ -+ n = 3*state->natoms; -+ nmaxcorr = inputrec->nbfgscorr; -+ -+ /* Allocate memory */ -+ /* Use pointers to real so we dont have to loop over both atoms and -+ * dimensions all the time... -+ * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real -+ * that point to the same memory. -+ */ -+ snew(xa, n); -+ snew(xb, n); -+ snew(xc, n); -+ snew(fa, n); -+ snew(fb, n); -+ snew(fc, n); -+ snew(frozen, n); -+ -+ snew(p, n); -+ snew(lastx, n); -+ snew(lastf, n); -+ snew(rho, nmaxcorr); -+ snew(alpha, nmaxcorr); -+ -+ snew(dx, nmaxcorr); -+ for (i = 0; i < nmaxcorr; i++) -+ { -+ snew(dx[i], n); -+ } -+ -+ snew(dg, nmaxcorr); -+ for (i = 0; i < nmaxcorr; i++) -+ { -+ snew(dg[i], n); -+ } -+ -+ step = 0; -+ neval = 0; -+ -+ /* Init em */ -+ init_em(fplog, LBFGS, cr, inputrec, -+ state, top_global, &ems, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin); -+ /* Do_lbfgs is not completely updated like do_steep and do_cg, -+ * so we free some memory again. -+ */ -+ sfree(ems.s.x); -+ sfree(ems.f); -+ -+ xx = (real *)state->x; -+ ff = (real *)f; -+ -+ start = mdatoms->start; -+ end = mdatoms->homenr + start; -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, runtime, wcycle, LBFGS); -+ -+ do_log = do_ene = do_x = do_f = TRUE; -+ -+ /* Max number of steps */ -+ number_steps = inputrec->nsteps; -+ -+ /* Create a 3*natoms index to tell whether each degree of freedom is frozen */ -+ gf = 0; -+ for (i = start; i < end; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ frozen[3*i+m] = inputrec->opts.nFreeze[gf][m]; -+ } -+ } -+ if (MASTER(cr)) -+ { -+ sp_header(stderr, LBFGS, inputrec->em_tol, number_steps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, LBFGS, inputrec->em_tol, number_steps); -+ } -+ -+ if (vsite) -+ { -+ construct_vsites(fplog, vsite, state->x, nrnb, 1, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, graph, cr, state->box); -+ } -+ -+ /* Call the force routine and some auxiliary (neighboursearching etc.) */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole -+ */ -+ neval++; -+ ems.s.x = state->x; -+ ems.f = f; -+ evaluate_energy(fplog, bVerbose, cr, -+ state, top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ where(); -+ -+ if (MASTER(cr)) -+ { -+ /* Copy stuff to the energy bin for easy printing etc. */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ print_ebin(outf->fp_ene, TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ where(); -+ -+ /* This is the starting energy */ -+ Epot = enerd->term[F_EPOT]; -+ -+ fnorm = ems.fnorm; -+ fmax = ems.fmax; -+ nfmax = ems.a_fmax; -+ -+ /* Set the initial step. -+ * since it will be multiplied by the non-normalized search direction -+ * vector (force vector the first time), we scale it by the -+ * norm of the force. -+ */ -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr); -+ fprintf(stderr, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); -+ fprintf(stderr, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms)); -+ fprintf(stderr, "\n"); -+ /* and copy to the log file too... */ -+ fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr); -+ fprintf(fplog, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); -+ fprintf(fplog, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms)); -+ fprintf(fplog, "\n"); -+ } -+ -+ point = 0; -+ for (i = 0; i < n; i++) -+ { -+ if (!frozen[i]) -+ { -+ dx[point][i] = ff[i]; /* Initial search direction */ -+ } -+ else -+ { -+ dx[point][i] = 0; -+ } -+ } -+ -+ stepsize = 1.0/fnorm; -+ converged = FALSE; -+ -+ /* Start the loop over BFGS steps. -+ * Each successful step is counted, and we continue until -+ * we either converge or reach the max number of steps. -+ */ -+ -+ ncorr = 0; -+ -+ /* Set the gradient from the force */ -+ converged = FALSE; -+ for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++) -+ { -+ -+ /* Write coordinates if necessary */ -+ do_x = do_per_step(step, inputrec->nstxout); -+ do_f = do_per_step(step, inputrec->nstfout); -+ -+ mdof_flags = 0; -+ if (do_x) -+ { -+ mdof_flags |= MDOF_X; -+ } -+ -+ if (do_f) -+ { -+ mdof_flags |= MDOF_F; -+ } -+ -+ write_traj(fplog, cr, outf, mdof_flags, -+ top_global, step, (real)step, state, state, f, f, NULL, NULL); -+ -+ /* Do the linesearching in the direction dx[point][0..(n-1)] */ -+ -+ /* pointer to current direction - point=0 first time here */ -+ s = dx[point]; -+ -+ /* calculate line gradient */ -+ for (gpa = 0, i = 0; i < n; i++) -+ { -+ gpa -= s[i]*ff[i]; -+ } -+ -+ /* Calculate minimum allowed stepsize, before the average (norm) -+ * relative change in coordinate is smaller than precision -+ */ -+ for (minstep = 0, i = 0; i < n; i++) -+ { -+ tmp = fabs(xx[i]); -+ if (tmp < 1.0) -+ { -+ tmp = 1.0; -+ } -+ tmp = s[i]/tmp; -+ minstep += tmp*tmp; -+ } -+ minstep = GMX_REAL_EPS/sqrt(minstep/n); -+ -+ if (stepsize < minstep) -+ { -+ converged = TRUE; -+ break; -+ } -+ -+ /* Store old forces and coordinates */ -+ for (i = 0; i < n; i++) -+ { -+ lastx[i] = xx[i]; -+ lastf[i] = ff[i]; -+ } -+ Epot0 = Epot; -+ -+ first = TRUE; -+ -+ for (i = 0; i < n; i++) -+ { -+ xa[i] = xx[i]; -+ } -+ -+ /* Take a step downhill. -+ * In theory, we should minimize the function along this direction. -+ * That is quite possible, but it turns out to take 5-10 function evaluations -+ * for each line. However, we dont really need to find the exact minimum - -+ * it is much better to start a new BFGS step in a modified direction as soon -+ * as we are close to it. This will save a lot of energy evaluations. -+ * -+ * In practice, we just try to take a single step. -+ * If it worked (i.e. lowered the energy), we increase the stepsize but -+ * the continue straight to the next BFGS step without trying to find any minimum. -+ * If it didn't work (higher energy), there must be a minimum somewhere between -+ * the old position and the new one. -+ * -+ * Due to the finite numerical accuracy, it turns out that it is a good idea -+ * to even accept a SMALL increase in energy, if the derivative is still downhill. -+ * This leads to lower final energies in the tests I've done. / Erik -+ */ -+ foundlower = FALSE; -+ EpotA = Epot0; -+ a = 0.0; -+ c = a + stepsize; /* reference position along line is zero */ -+ -+ /* Check stepsize first. We do not allow displacements -+ * larger than emstep. -+ */ -+ do -+ { -+ c = a + stepsize; -+ maxdelta = 0; -+ for (i = 0; i < n; i++) -+ { -+ delta = c*s[i]; -+ if (delta > maxdelta) -+ { -+ maxdelta = delta; -+ } -+ } -+ if (maxdelta > inputrec->em_stepsize) -+ { -+ stepsize *= 0.1; -+ } -+ } -+ while (maxdelta > inputrec->em_stepsize); -+ -+ /* Take a trial step */ -+ for (i = 0; i < n; i++) -+ { -+ xc[i] = lastx[i] + c*s[i]; -+ } -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ ems.s.x = (rvec *)xc; -+ ems.f = (rvec *)fc; -+ evaluate_energy(fplog, bVerbose, cr, -+ state, top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, step, FALSE); -+ EpotC = ems.epot; -+ -+ /* Calc derivative along line */ -+ for (gpc = 0, i = 0; i < n; i++) -+ { -+ gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */ -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpc, cr); -+ } -+ -+ /* This is the max amount of increase in energy we tolerate */ -+ tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA); -+ -+ /* Accept the step if the energy is lower, or if it is not significantly higher -+ * and the line derivative is still negative. -+ */ -+ if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp))) -+ { -+ foundlower = TRUE; -+ /* Great, we found a better energy. Increase step for next iteration -+ * if we are still going down, decrease it otherwise -+ */ -+ if (gpc < 0) -+ { -+ stepsize *= 1.618034; /* The golden section */ -+ } -+ else -+ { -+ stepsize *= 0.618034; /* 1/golden section */ -+ } -+ } -+ else -+ { -+ /* New energy is the same or higher. We will have to do some work -+ * to find a smaller value in the interval. Take smaller step next time! -+ */ -+ foundlower = FALSE; -+ stepsize *= 0.618034; -+ } -+ -+ /* OK, if we didn't find a lower value we will have to locate one now - there must -+ * be one in the interval [a=0,c]. -+ * The same thing is valid here, though: Don't spend dozens of iterations to find -+ * the line minimum. We try to interpolate based on the derivative at the endpoints, -+ * and only continue until we find a lower value. In most cases this means 1-2 iterations. -+ * -+ * I also have a safeguard for potentially really patological functions so we never -+ * take more than 20 steps before we give up ... -+ * -+ * If we already found a lower value we just skip this step and continue to the update. -+ */ -+ -+ if (!foundlower) -+ { -+ -+ nminstep = 0; -+ do -+ { -+ /* Select a new trial point. -+ * If the derivatives at points a & c have different sign we interpolate to zero, -+ * otherwise just do a bisection. -+ */ -+ -+ if (gpa < 0 && gpc > 0) -+ { -+ b = a + gpa*(a-c)/(gpc-gpa); -+ } -+ else -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* safeguard if interpolation close to machine accuracy causes errors: -+ * never go outside the interval -+ */ -+ if (b <= a || b >= c) -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* Take a trial step */ -+ for (i = 0; i < n; i++) -+ { -+ xb[i] = lastx[i] + b*s[i]; -+ } -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ ems.s.x = (rvec *)xb; -+ ems.f = (rvec *)fb; -+ evaluate_energy(fplog, bVerbose, cr, -+ state, top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, step, FALSE); -+ EpotB = ems.epot; -+ -+ fnorm = ems.fnorm; -+ -+ for (gpb = 0, i = 0; i < n; i++) -+ { -+ gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */ -+ -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpb, cr); -+ } -+ -+ /* Keep one of the intervals based on the value of the derivative at the new point */ -+ if (gpb > 0) -+ { -+ /* Replace c endpoint with b */ -+ EpotC = EpotB; -+ c = b; -+ gpc = gpb; -+ /* swap coord pointers b/c */ -+ xtmp = xb; -+ ftmp = fb; -+ xb = xc; -+ fb = fc; -+ xc = xtmp; -+ fc = ftmp; -+ } -+ else -+ { -+ /* Replace a endpoint with b */ -+ EpotA = EpotB; -+ a = b; -+ gpa = gpb; -+ /* swap coord pointers a/b */ -+ xtmp = xb; -+ ftmp = fb; -+ xb = xa; -+ fb = fa; -+ xa = xtmp; -+ fa = ftmp; -+ } -+ -+ /* -+ * Stop search as soon as we find a value smaller than the endpoints, -+ * or if the tolerance is below machine precision. -+ * Never run more than 20 steps, no matter what. -+ */ -+ nminstep++; -+ } -+ while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20)); -+ -+ if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20) -+ { -+ /* OK. We couldn't find a significantly lower energy. -+ * If ncorr==0 this was steepest descent, and then we give up. -+ * If not, reset memory to restart as steepest descent before quitting. -+ */ -+ if (ncorr == 0) -+ { -+ /* Converged */ -+ converged = TRUE; -+ break; -+ } -+ else -+ { -+ /* Reset memory */ -+ ncorr = 0; -+ /* Search in gradient direction */ -+ for (i = 0; i < n; i++) -+ { -+ dx[point][i] = ff[i]; -+ } -+ /* Reset stepsize */ -+ stepsize = 1.0/fnorm; -+ continue; -+ } -+ } -+ -+ /* Select min energy state of A & C, put the best in xx/ff/Epot -+ */ -+ if (EpotC < EpotA) -+ { -+ Epot = EpotC; -+ /* Use state C */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xc[i]; -+ ff[i] = fc[i]; -+ } -+ stepsize = c; -+ } -+ else -+ { -+ Epot = EpotA; -+ /* Use state A */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xa[i]; -+ ff[i] = fa[i]; -+ } -+ stepsize = a; -+ } -+ -+ } -+ else -+ { -+ /* found lower */ -+ Epot = EpotC; -+ /* Use state C */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xc[i]; -+ ff[i] = fc[i]; -+ } -+ stepsize = c; -+ } -+ -+ /* Update the memory information, and calculate a new -+ * approximation of the inverse hessian -+ */ -+ -+ /* Have new data in Epot, xx, ff */ -+ if (ncorr < nmaxcorr) -+ { -+ ncorr++; -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ dg[point][i] = lastf[i]-ff[i]; -+ dx[point][i] *= stepsize; -+ } -+ -+ dgdg = 0; -+ dgdx = 0; -+ for (i = 0; i < n; i++) -+ { -+ dgdg += dg[point][i]*dg[point][i]; -+ dgdx += dg[point][i]*dx[point][i]; -+ } -+ -+ diag = dgdx/dgdg; -+ -+ rho[point] = 1.0/dgdx; -+ point++; -+ -+ if (point >= nmaxcorr) -+ { -+ point = 0; -+ } -+ -+ /* Update */ -+ for (i = 0; i < n; i++) -+ { -+ p[i] = ff[i]; -+ } -+ -+ cp = point; -+ -+ /* Recursive update. First go back over the memory points */ -+ for (k = 0; k < ncorr; k++) -+ { -+ cp--; -+ if (cp < 0) -+ { -+ cp = ncorr-1; -+ } -+ -+ sq = 0; -+ for (i = 0; i < n; i++) -+ { -+ sq += dx[cp][i]*p[i]; -+ } -+ -+ alpha[cp] = rho[cp]*sq; -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] -= alpha[cp]*dg[cp][i]; -+ } -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] *= diag; -+ } -+ -+ /* And then go forward again */ -+ for (k = 0; k < ncorr; k++) -+ { -+ yr = 0; -+ for (i = 0; i < n; i++) -+ { -+ yr += p[i]*dg[cp][i]; -+ } -+ -+ beta = rho[cp]*yr; -+ beta = alpha[cp]-beta; -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] += beta*dx[cp][i]; -+ } -+ -+ cp++; -+ if (cp >= ncorr) -+ { -+ cp = 0; -+ } -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ if (!frozen[i]) -+ { -+ dx[point][i] = p[i]; -+ } -+ else -+ { -+ dx[point][i] = 0; -+ } -+ } -+ -+ stepsize = 1.0; -+ -+ /* Test whether the convergence criterion is met */ -+ get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax); -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", -+ step, Epot, fnorm/sqrt(state->natoms), fmax, nfmax+1); -+ } -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ do_log = do_per_step(step, inputrec->nstlog); -+ do_ene = do_per_step(step, inputrec->nstenergy); -+ if (do_log) -+ { -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ } -+ print_ebin(outf->fp_ene, do_ene, FALSE, FALSE, -+ do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Stop when the maximum force lies below tolerance. -+ * If we have reached machine precision, converged is already set to true. -+ */ -+ -+ converged = converged || (fmax < inputrec->em_tol); -+ -+ } /* End of the loop */ -+ -+ if (converged) -+ { -+ step--; /* we never took that last step in this case */ -+ -+ } -+ if (fmax > inputrec->em_tol) -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); -+ warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); -+ } -+ converged = FALSE; -+ } -+ -+ /* If we printed energy and/or logfile last step (which was the last step) -+ * we don't have to do it again, but otherwise print the final values. -+ */ -+ if (!do_log) /* Write final value to log since we didn't do anythin last step */ -+ { -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ } -+ if (!do_ene || !do_log) /* Write final energy file entries */ -+ { -+ print_ebin(outf->fp_ene, !do_ene, FALSE, FALSE, -+ !do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Print some stuff... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ -+ /* IMPORTANT! -+ * For accurate normal mode calculation it is imperative that we -+ * store the last conformation into the full precision binary trajectory. -+ * -+ * However, we should only do it if we did NOT already write this step -+ * above (which we did if do_x or do_f was true). -+ */ -+ do_x = !do_per_step(step, inputrec->nstxout); -+ do_f = !do_per_step(step, inputrec->nstfout); -+ write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, step, -+ &ems, state, f); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, -+ number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms)); -+ print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, -+ number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms)); -+ -+ fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); -+ } -+ -+ finish_em(fplog, cr, outf, runtime, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ runtime->nsteps_done = step; -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_steep(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, -+ int nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t ed, -+ t_forcerec *fr, -+ int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, -+ gmx_membed_t membed, -+ real cpt_period, real max_hours, -+ const char *deviceOptions, -+ unsigned long Flags, -+ gmx_runtime_t *runtime) -+{ -+ const char *SD = "Steepest Descents"; -+ em_state_t *s_min, *s_try; -+ rvec *f_global; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ real stepsize, constepsize; -+ real ustep, fnormn; -+ gmx_mdoutf_t *outf; -+ t_mdebin *mdebin; -+ gmx_bool bDone, bAbort, do_x, do_f; -+ tensor vir, pres; -+ rvec mu_tot; -+ int nsteps; -+ int count = 0; -+ int steps_accepted = 0; -+ /* not used */ -+ real terminate = 0; -+ -+ s_min = init_em_state(); -+ s_try = init_em_state(); -+ -+ /* Init em and store the local state in s_try */ -+ init_em(fplog, SD, cr, inputrec, -+ state_global, top_global, s_try, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin); -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, runtime, wcycle, SD); -+ -+ /* Set variables for stepsize (in nm). This is the largest -+ * step that we are going to make in any direction. -+ */ -+ ustep = inputrec->em_stepsize; -+ stepsize = 0; -+ -+ /* Max number of steps */ -+ nsteps = inputrec->nsteps; -+ -+ if (MASTER(cr)) -+ { -+ /* Print to the screen */ -+ sp_header(stderr, SD, inputrec->em_tol, nsteps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, SD, inputrec->em_tol, nsteps); -+ } -+ -+ /**** HERE STARTS THE LOOP **** -+ * count is the counter for the number of steps -+ * bDone will be TRUE when the minimization has converged -+ * bAbort will be TRUE when nsteps steps have been performed or when -+ * the stepsize becomes smaller than is reasonable for machine precision -+ */ -+ count = 0; -+ bDone = FALSE; -+ bAbort = FALSE; -+ while (!bDone && !bAbort) -+ { -+ bAbort = (nsteps >= 0) && (count == nsteps); -+ -+ /* set new coordinates, except for first step */ -+ if (count > 0) -+ { -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, -+ s_min, stepsize, s_min->f, s_try, -+ constr, top, nrnb, wcycle, count); -+ } -+ -+ evaluate_energy(fplog, bVerbose, cr, -+ state_global, top_global, s_try, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, count, count == 0); -+ -+ if (MASTER(cr)) -+ { -+ print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]); -+ } -+ -+ if (count == 0) -+ { -+ s_min->epot = s_try->epot + 1; -+ } -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c", -+ count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1, -+ (s_try->epot < s_min->epot) ? '\n' : '\r'); -+ } -+ -+ if (s_try->epot < s_min->epot) -+ { -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)count, -+ mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals, -+ s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ print_ebin(outf->fp_ene, TRUE, -+ do_per_step(steps_accepted, inputrec->nstdisreout), -+ do_per_step(steps_accepted, inputrec->nstorireout), -+ fplog, count, count, eprNORMAL, TRUE, -+ mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ fflush(fplog); -+ } -+ } -+ -+ /* Now if the new energy is smaller than the previous... -+ * or if this is the first step! -+ * or if we did random steps! -+ */ -+ -+ if ( (count == 0) || (s_try->epot < s_min->epot) ) -+ { -+ steps_accepted++; -+ -+ /* Test whether the convergence criterion is met... */ -+ bDone = (s_try->fmax < inputrec->em_tol); -+ -+ /* Copy the arrays for force, positions and energy */ -+ /* The 'Min' array always holds the coords and forces of the minimal -+ sampled energy */ -+ swap_em_state(s_min, s_try); -+ if (count > 0) -+ { -+ ustep *= 1.2; -+ } -+ -+ /* Write to trn, if necessary */ -+ do_x = do_per_step(steps_accepted, inputrec->nstxout); -+ do_f = do_per_step(steps_accepted, inputrec->nstfout); -+ write_em_traj(fplog, cr, outf, do_x, do_f, NULL, -+ top_global, inputrec, count, -+ s_min, state_global, f_global); -+ } -+ else -+ { -+ /* If energy is not smaller make the step smaller... */ -+ ustep *= 0.5; -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) -+ { -+ /* Reload the old state */ -+ em_dd_partition_system(fplog, count, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ } -+ -+ /* Determine new step */ -+ stepsize = ustep/s_min->fmax; -+ -+ /* Check if stepsize is too small, with 1 nm as a characteristic length */ -+#ifdef GMX_DOUBLE -+ if (count == nsteps || ustep < 1e-12) -+#else -+ if (count == nsteps || ustep < 1e-6) -+#endif -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL); -+ warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL); -+ } -+ bAbort = TRUE; -+ } -+ -+ count++; -+ } /* End of the loop */ -+ -+ /* Print some shit... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, count, -+ s_min, state_global, f_global); -+ -+ fnormn = s_min->fnorm/sqrt(state_global->natoms); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ } -+ -+ finish_em(fplog, cr, outf, runtime, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ inputrec->nsteps = count; -+ -+ runtime->nsteps_done = count; -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_nm(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, -+ int nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t ed, -+ t_forcerec *fr, -+ int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, -+ gmx_membed_t membed, -+ real cpt_period, real max_hours, -+ const char *deviceOptions, -+ unsigned long Flags, -+ gmx_runtime_t *runtime) -+{ -+ const char *NM = "Normal Mode Analysis"; -+ gmx_mdoutf_t *outf; -+ int natoms, atom, d; -+ int nnodes, node; -+ rvec *f_global; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ real t, t0, lambda, lam0; -+ gmx_bool bNS; -+ tensor vir, pres; -+ rvec mu_tot; -+ rvec *fneg, *dfdx; -+ gmx_bool bSparse; /* use sparse matrix storage format */ -+ size_t sz=0; -+ gmx_sparsematrix_t * sparse_matrix = NULL; -+ real * full_matrix = NULL; -+ em_state_t * state_work; -+ -+ /* added with respect to mdrun */ -+ int i, j, k, row, col; -+ real der_range = 10.0*sqrt(GMX_REAL_EPS); -+ real x_min; -+ real fnorm, fmax; -+ -+ if (constr != NULL) -+ { -+ gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported"); -+ } -+ -+ state_work = init_em_state(); -+ -+ /* Init em and store the local state in state_minimum */ -+ init_em(fplog, NM, cr, inputrec, -+ state_global, top_global, state_work, &top, -+ &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, NULL); -+ -+ natoms = top_global->natoms; -+ snew(fneg, natoms); -+ snew(dfdx, natoms); -+ -+#ifndef GMX_DOUBLE -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, -+ "NOTE: This version of Gromacs has been compiled in single precision,\n" -+ " which MIGHT not be accurate enough for normal mode analysis.\n" -+ " Gromacs now uses sparse matrix storage, so the memory requirements\n" -+ " are fairly modest even if you recompile in double precision.\n\n"); -+ } -+#endif -+ -+ /* Check if we can/should use sparse storage format. -+ * -+ * Sparse format is only useful when the Hessian itself is sparse, which it -+ * will be when we use a cutoff. -+ * For small systems (n<1000) it is easier to always use full matrix format, though. -+ */ -+ if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0) -+ { -+ md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n"); -+ bSparse = FALSE; -+ } -+ else if (top_global->natoms < 1000) -+ { -+ md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms); -+ bSparse = FALSE; -+ } -+ else -+ { -+ md_print_info(cr, fplog, "Using compressed symmetric sparse Hessian format.\n"); -+ bSparse = TRUE; -+ } -+ -+ if (MASTER(cr)) -+ { -+ sz = DIM*top_global->natoms; -+ -+ fprintf(stderr, "Allocating Hessian memory...\n\n"); -+ -+ if (bSparse) -+ { -+ sparse_matrix = gmx_sparsematrix_init(sz); -+ sparse_matrix->compressed_symmetric = TRUE; -+ } -+ else -+ { -+ snew(full_matrix, sz*sz); -+ } -+ } -+ -+ /* Initial values */ -+ t0 = inputrec->init_t; -+ lam0 = inputrec->fepvals->init_lambda; -+ t = t0; -+ lambda = lam0; -+ -+ init_nrnb(nrnb); -+ -+ where(); -+ -+ /* Write start time and temperature */ -+ print_em_start(fplog, cr, runtime, wcycle, NM); -+ -+ /* fudge nr of steps to nr of atoms */ -+ inputrec->nsteps = natoms*2; -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n", -+ *(top_global->name), (int)inputrec->nsteps); -+ } -+ -+ nnodes = cr->nnodes; -+ -+ /* Make evaluate_energy do a single node force calculation */ -+ cr->nnodes = 1; -+ evaluate_energy(fplog, bVerbose, cr, -+ state_global, top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ cr->nnodes = nnodes; -+ -+ /* if forces are not small, warn user */ -+ get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work); -+ -+ md_print_info(cr, fplog, "Maximum force:%12.5e\n", state_work->fmax); -+ if (state_work->fmax > 1.0e-3) -+ { -+ md_print_info(cr, fplog, -+ "The force is probably not small enough to " -+ "ensure that you are at a minimum.\n" -+ "Be aware that negative eigenvalues may occur\n" -+ "when the resulting matrix is diagonalized.\n\n"); -+ } -+ -+ /*********************************************************** -+ * -+ * Loop over all pairs in matrix -+ * -+ * do_force called twice. Once with positive and -+ * once with negative displacement -+ * -+ ************************************************************/ -+ -+ /* Steps are divided one by one over the nodes */ -+ for (atom = cr->nodeid; atom < natoms; atom += nnodes) -+ { -+ -+ for (d = 0; d < DIM; d++) -+ { -+ x_min = state_work->s.x[atom][d]; -+ -+ state_work->s.x[atom][d] = x_min - der_range; -+ -+ /* Make evaluate_energy do a single node force calculation */ -+ cr->nnodes = 1; -+ evaluate_energy(fplog, bVerbose, cr, -+ state_global, top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, atom*2, FALSE); -+ -+ for (i = 0; i < natoms; i++) -+ { -+ copy_rvec(state_work->f[i], fneg[i]); -+ } -+ -+ state_work->s.x[atom][d] = x_min + der_range; -+ -+ evaluate_energy(fplog, bVerbose, cr, -+ state_global, top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, atom*2+1, FALSE); -+ cr->nnodes = nnodes; -+ -+ /* x is restored to original */ -+ state_work->s.x[atom][d] = x_min; -+ -+ for (j = 0; j < natoms; j++) -+ { -+ for (k = 0; (k < DIM); k++) -+ { -+ dfdx[j][k] = -+ -(state_work->f[j][k] - fneg[j][k])/(2*der_range); -+ } -+ } -+ -+ if (!MASTER(cr)) -+ { -+#ifdef GMX_MPI -+#ifdef GMX_DOUBLE -+#define mpi_type MPI_DOUBLE -+#else -+#define mpi_type MPI_FLOAT -+#endif -+ MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid, -+ cr->mpi_comm_mygroup); -+#endif -+ } -+ else -+ { -+ for (node = 0; (node < nnodes && atom+node < natoms); node++) -+ { -+ if (node > 0) -+ { -+#ifdef GMX_MPI -+ MPI_Status stat; -+ MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node, -+ cr->mpi_comm_mygroup, &stat); -+#undef mpi_type -+#endif -+ } -+ -+ row = (atom + node)*DIM + d; -+ -+ for (j = 0; j < natoms; j++) -+ { -+ for (k = 0; k < DIM; k++) -+ { -+ col = j*DIM + k; -+ -+ if (bSparse) -+ { -+ if (col >= row && dfdx[j][k] != 0.0) -+ { -+ gmx_sparsematrix_increment_value(sparse_matrix, -+ row, col, dfdx[j][k]); -+ } -+ } -+ else -+ { -+ full_matrix[row*sz+col] = dfdx[j][k]; -+ } -+ } -+ } -+ } -+ } -+ -+ if (bVerbose && fplog) -+ { -+ fflush(fplog); -+ } -+ } -+ /* write progress */ -+ if (MASTER(cr) && bVerbose) -+ { -+ fprintf(stderr, "\rFinished step %d out of %d", -+ min(atom+nnodes, natoms), natoms); -+ fflush(stderr); -+ } -+ } -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\n\nWriting Hessian...\n"); -+ gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); -+ } -+ -+ finish_em(fplog, cr, outf, runtime, wcycle); -+ -+ runtime->nsteps_done = natoms*2; -+ -+ return 0; -+} diff --git a/g/GROMACS/gromacs-5.0.4-plumed-2.1.3-mpi.patch b/g/GROMACS/gromacs-5.0.4-plumed-2.1.3-mpi.patch deleted file mode 100644 index 50df9639..00000000 --- a/g/GROMACS/gromacs-5.0.4-plumed-2.1.3-mpi.patch +++ /dev/null @@ -1,9575 +0,0 @@ -diff --git a/Plumed.cmake b/Plumed.cmake -new file mode 100644 -index 0000000..01472f0 ---- /dev/null -+++ b/Plumed.cmake -@@ -0,0 +1,3 @@ -+# PLUMED: shared installation -+set(PLUMED_LOAD /apps/all/PLUMED/2.1.3-foss-2015g/lib/plumed///src/lib/libplumed.so -ldl ) -+set(PLUMED_DEPENDENCIES /apps/all/PLUMED/2.1.3-foss-2015g/lib/plumed///src/lib/libplumed.so) -diff --git a/Plumed.h b/Plumed.h -new file mode 100644 -index 0000000..16da74a ---- /dev/null -+++ b/Plumed.h -@@ -0,0 +1,494 @@ -+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -+ Copyright (c) 2011-2014 The plumed team -+ (see the PEOPLE file at the root of the distribution for a list of names) -+ -+ See http://www.plumed-code.org for more information. -+ -+ This file is part of plumed, version 2. -+ -+ plumed is free software: you can redistribute it and/or modify -+ it under the terms of the GNU Lesser General Public License as published by -+ the Free Software Foundation, either version 3 of the License, or -+ (at your option) any later version. -+ -+ plumed is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public License -+ along with plumed. If not, see . -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ -+#ifndef __PLUMED_wrapper_Plumed_h -+#define __PLUMED_wrapper_Plumed_h -+ -+/** -+\page ReferencePlumedH Reference for interfacing MD codes with PLUMED -+ -+ Plumed.h and Plumed.c contain the external plumed interface, which is used to -+ integrate it with MD engines. This interface is very general, and is expected -+ not to change across plumed versions. Plumed.c also implements a dummy version -+ of the interface, so as to allow a code to be fully linked even if the plumed -+ library is not available yet. These files could be directly included in the official -+ host MD distribution. In this manner, it will be sufficient to link the plumed -+ library at link time (on all systems) or directly at runtime (on system where -+ dynamic loading is enabled) to include plumed features. -+ -+ Why is Plumed.c written in C and not C++? The reason is that the resulting Plumed.o -+ needs to be linked with the host MD code immediately (whereas the rest of plumed -+ could be linked a posteriori). Imagine the MD code is written in FORTRAN: when we -+ link the Plumed.o file we would like not to need any C++ library linked. In this -+ manner, we do not need to know which C++ compiler will be used to compile plumed. -+ The C++ library is only linked to the "rest" of plumed, which actually use it. -+ Anyway, Plumed.c is written in such a manner to allow its compilation also in C++ -+ (C++ is a bit stricter than C; compatibility is checked when PlumedStatic.cpp, -+ which basically includes Plumed.c, is compiled with the C++ compiler). This will -+ allow e.g. MD codes written in C++ to just incorporate Plumed.c (maybe renamed into -+ Plumed.cpp), without the need of configuring a plain C compiler. -+ -+ Plumed interface can be used from C, C++ and FORTRAN. Everything concerning plumed -+ is hidden inside a single object type, which is described in C by a structure -+ (struct \ref plumed), in C++ by a class (PLMD::Plumed) and in FORTRAN by a -+ fixed-length string (CHARACTER(LEN=32)). Obviously C++ can use both struct -+ and class interfaces, but the first should be preferred. The reference interface -+ is the C one, whereas FORTRAN and C++ interfaces are implemented as wrappers -+ around it. -+ -+ In the C++ interface, all the routines are implemented as methods of PLMD::Plumed. -+ In the C and FORTRAN interfaces, all the routines are named plumed_*, to -+ avoid potential name clashes. Notice that the entire plumed library -+ is implemented in C++, and it is hidden inside the PLMD namespace. -+ -+ Handlers to the plumed object can be converted among different representations, -+ to allow inter-operability among languages. In C, there are tools to convert -+ to/from FORTRAN, whereas in C++ there are tools to convert to/from FORTRAN and C. -+ -+ These handlers only contain a pointer to the real structure, so that -+ when a plumed object is brought from one language to another, -+ it brings a reference to the same environment. -+ -+ Moreover, to simplify life in all cases where a single Plumed object is -+ required for the entire simulation (which covers most of the practical -+ applications with conventional MD codes) it is possible to take advantage -+ of a global interface, which is implicitly referring to a unique global instance. -+ The global object should still be initialized and finalized properly. -+ -+ The basic method to send a message to plumed is -+\verbatim -+ (C) plumed_cmd -+ (C++) PLMD::Plumed::cmd -+ (FORTRAN) PLUMED_F_CMD -+\endverbatim -+ -+ To initialize a plumed object, use: -+\verbatim -+ (C) plumed_create -+ (C++) (constructor of PLMD::Plumed) -+ (FORTRAN) PLUMED_F_CREATE -+\endverbatim -+ -+ To finalize it, use -+\verbatim -+ (C) plumed_finalize -+ (C++) (destructor of PLMD::Plumed) -+ (FORTRAN) PLUMED_F_FINALIZE -+\endverbatim -+ -+ To access to the global-object, use -+\verbatim -+ (C) plumed_gcreate, plumed_gfinalize, plumed_gcmd -+ (C++) PLMD::Plumed::gcreate, PLMD::Plumed::gfinalize, PLMD::Plumed::gcmd -+ (FORTRAN) PLUMED_F_GCREATE, PLUMED_F_GFINALIZE, PLUMED_F_GCMD -+\endverbatim -+ -+ To check if the global object has been initialized, use -+\verbatim -+ (C) plumed_ginitialized -+ (C++) PLMD::Plumed::ginitialized -+ (FORTRAN) PLUMED_F_GINITIALIZED -+\endverbatim -+ -+ To check if plumed library is available (this is useful for runtime linking), use -+\verbatim -+ (C) plumed_installed -+ (C++) PLMD::Plumed::installed -+ (FORTRAN) PLUMED_F_INSTALLED -+\endverbatim -+ -+ To convert handlers use -+\verbatim -+ (C) plumed_c2f (C to FORTRAN) -+ (C) plumed_f2c (FORTRAN to C) -+ (C++) Plumed(plumed) constructor (C to C++) -+ (C++) operator plumed() cast (C++ to C) -+ (C++) Plumed(char*) constructor (FORTRAN to C++) -+ (C++) toFortran(char*) (C++ to FORTRAN) -+\endverbatim -+ -+\verbatim -+ FORTRAN interface -+ SUBROUTINE PLUMED_F_INSTALLED(i) -+ INTEGER, INTENT(OUT) :: i -+ SUBROUTINE PLUMED_F_GINITIALIZED(i) -+ INTEGER, INTENT(OUT) :: i -+ SUBROUTINE PLUMED_F_GCREATE() -+ SUBROUTINE PLUMED_F_GCMD(key,val) -+ CHARACTER(LEN=*), INTENT(IN) :: key -+ UNSPECIFIED_TYPE, INTENT(INOUT) :: val(*) -+ SUBROUTINE PLUMED_F_GFINALIZE() -+ SUBROUTINE PLUMED_F_GLOBAL(p) -+ CHARACTER(LEN=32), INTENT(OUT) :: p -+ SUBROUTINE PLUMED_F_CREATE(p) -+ CHARACTER(LEN=32), INTENT(OUT) :: p -+ SUBROUTINE PLUMED_F_CMD(p,key,val) -+ CHARACTER(LEN=32), INTENT(IN) :: p -+ CHARACTER(LEN=*), INTENT(IN) :: key -+ UNSPECIFIED_TYPE, INTENT(INOUT) :: val(*) -+ SUBROUTINE PLUMED_F_FINALIZE(p) -+ CHARACTER(LEN=32), INTENT(IN) :: p -+\endverbatim -+ -+ The main routine is "cmd", which accepts two arguments: -+ key is a string containing the name of the command -+ val is the argument. it is declared const so as to use allow passing const objects, but in practice plumed -+ is going to modify val in several cases (using a const_cast). -+ In some cases val can be omitted: just pass a NULL pointer (in C++, val is optional and can be omitted). -+ The set of possible keys is the real API of the plumed library, and will be expanded with time. -+ New commands will be added, but backward compatibility will be retained as long as possible. -+ -+ To pass plumed a callback function use the following syntax (not available in FORTRAN yet) -+\verbatim -+ plumed_function_holder ff; -+ ff.p=your_function; -+ plumed_cmd(plumed,"xxxx",&ff); -+\endverbatim -+ (this is passing the your_function() function to the "xxxx" command) -+*/ -+ -+#ifdef __cplusplus -+ extern "C" { -+#endif -+ -+/* Generic function pointer */ -+typedef void (*plumed_function_pointer)(void); -+ -+/** -+ \brief Holder for function pointer. -+ -+ To pass plumed a callback function use the following syntax: -+\verbatim -+ plumed_function_holder ff; -+ ff.p=your_function; -+ plumed_cmd(plumed,"xxxx",&ff); -+\endverbatim -+ (this is going to pass the your_function() function to the "xxxx" command) -+*/ -+ -+typedef struct { -+ plumed_function_pointer p; -+} plumed_function_holder; -+ -+/** -+ \brief Main plumed object -+ -+ This is an object containing a Plumed instance, which should be used in -+ the MD engine. It should first be initialized with plumed_create(), -+ then it communicates with the MD engine using plumed_cmd(). Finally, -+ before the termination, it should be deallocated with plumed_finalize(). -+ Its interface is very simple and general, and is expected -+ not to change across plumed versions. See \ref ReferencePlumedH. -+*/ -+typedef struct { -+/** -+ \private -+ \brief Void pointer holding the real PlumedMain structure -+*/ -+ void*p; -+} plumed; -+ -+/** \relates plumed -+ \brief Constructor -+ -+ \return The constructed plumed object -+*/ -+plumed plumed_create(void); -+ -+/** \relates plumed -+ \brief Tells p to execute a command -+ -+ \param p The plumed object on which command is acting -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like plumed_cmd(p,"A","B"), -+ but for some choice of key it can change the content -+*/ -+void plumed_cmd(plumed p,const char*key,const void*val); -+ -+/** \relates plumed -+ \brief Destructor -+ -+ \param p The plumed object to be deallocated -+*/ -+void plumed_finalize(plumed p); -+ -+/** \relates plumed -+ \brief Check if plumed is installed (for runtime binding) -+ -+ \return 1 if plumed is installed, to 0 otherwise -+*/ -+int plumed_installed(void); -+ -+/** \relates plumed -+ \brief Retrieves an handler to the global structure. -+*/ -+plumed plumed_global(void); -+ -+/** \relates plumed -+ \brief Check if the global interface has been initialized -+ -+ \return 1 if plumed has been initialized, 0 otherwise -+*/ -+int plumed_ginitialized(void); -+ -+/* global C interface, working on a global object */ -+ -+/** \relates plumed -+ \brief Constructor for the global interface. -+ -+ \note Equivalent to plumed_create(), but initialize a static global plumed object -+*/ -+void plumed_gcreate(void); -+ -+/** \relates plumed -+ \brief Tells to the global interface to execute a command. -+ -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like plumed_gcmd("A","B"), -+ but for some choice of key it can change the content -+ -+ \note Equivalent to plumed_cmd(), but skipping the plumed argument -+*/ -+void plumed_gcmd(const char* key,const void* val); -+ -+/** \relates plumed -+ \brief Destructor for the global interface. -+ -+ \note Equivalent to plumed_finalize(), but skipping the plumed argument -+*/ -+void plumed_gfinalize(void); -+ -+/* routines to convert char handler from/to plumed objects */ -+ -+/** \related plumed -+ \brief Converts a C handler to a FORTRAN handler -+ -+ \param p The C handler -+ \param c The FORTRAN handler (a char[32]) -+*/ -+void plumed_c2f(plumed p,char* c); -+ -+/** \related plumed -+ \brief Converts a FORTRAN handler to a C handler -+ \param c The FORTRAN handler (a char[32]) -+ \return The C handler -+*/ -+plumed plumed_f2c(const char* c); -+ -+#ifdef __cplusplus -+ } -+#endif -+ -+#ifdef __cplusplus -+ -+/* this is to include the NULL pointer */ -+#include -+ -+/* C++ interface is hidden in PLMD namespace (same as plumed library) */ -+namespace PLMD { -+ -+/** -+ C++ wrapper for \ref plumed. -+ -+ This class provides a C++ interface to PLUMED. -+*/ -+ -+class Plumed{ -+ plumed main; -+/** -+ keeps track if the object was created from scratch using -+ the defaults destructor (cloned=false) or if it was imported -+ from C or FORTRAN (cloned-true). In the latter case, the -+ plumed_finalize() method is not called when destructing the object, -+ since it is expected to be finalized in the C/FORTRAN code -+*/ -+ bool cloned; -+public: -+/** -+ Check if plumed is installed (for runtime binding) -+ \return true if plumed is installed, false otherwise -+*/ -+ static bool installed(); -+/** -+ Check if global-plumed has been initialized -+ \return true if global plumed object (see global()) is initialized (i.e. if gcreate() has been -+ called), false otherwise. -+*/ -+ static bool ginitialized(); -+/** -+ Initialize global-plumed -+*/ -+ static void gcreate(); -+/** -+ Send a command to global-plumed -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like gcmd("A","B"), -+ but for some choice of key it can change the content -+*/ -+ static void gcmd(const char* key,const void* val); -+/** -+ Finalize global-plumed -+*/ -+ static void gfinalize(); -+/** -+ Returns the Plumed global object -+ \return The Plumed global object -+*/ -+ static Plumed global(); -+/** -+ Constructor -+*/ -+ Plumed(); -+/** -+ Clone a Plumed object from a FORTRAN char* handler -+ \param c The FORTRAN handler (a char[32]). -+ -+ \attention The Plumed object created in this manner -+ will not finalize the corresponding plumed structure. -+ It is expected that the FORTRAN code calls plumed_c_finalize for it -+*/ -+ Plumed(const char*c); -+/** -+ Clone a Plumed object from a C plumed structure -+ \param p The C plumed structure. -+ -+ \attention The Plumed object created in this manner -+ will not finalize the corresponding plumed structure. -+ It is expected that the C code calls plumed_finalize for it -+*/ -+ Plumed(plumed p); -+private: -+/** Copy constructor is disabled (private and unimplemented) -+ The problem here is that after copying it will not be clear who is -+ going to finalize the corresponding plumed structure. -+*/ -+ Plumed(const Plumed&); -+/** Assignment operator is disabled (private and unimplemented) -+ The problem here is that after copying it will not be clear who is -+ going to finalize the corresponding plumed structure. -+*/ -+ Plumed&operator=(const Plumed&); -+public: -+/** -+ Retrieve the C plumed structure for this object -+*/ -+ operator plumed()const; -+/** -+ Retrieve a FORTRAN handler for this object -+ \param c The FORTRAN handler (a char[32]). -+*/ -+ void toFortran(char*c)const; -+/** -+ Send a command to this plumed object -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like p.cmd("A","B"), -+ but for some choice of key it can change the content -+*/ -+ void cmd(const char*key,const void*val=NULL); -+/** -+ Destructor -+ -+ Destructor is virtual so as to allow correct inheritance from Plumed object. -+ To avoid linking problems with g++, I specify "inline" also here (in principle -+ it should be enough to specify it down in the definition of the function, but -+ for some reason that I do not understand g++ does not inline it properly in that -+ case and complains when Plumed.h is included but Plumed.o is not linked. Anyway, the -+ way it is done here seems to work properly). -+*/ -+ inline virtual ~Plumed(); -+}; -+ -+/* All methods are inlined so as to avoid the compilation of an extra c++ file */ -+ -+inline -+bool Plumed::installed(){ -+ return plumed_installed(); -+} -+ -+inline -+Plumed::Plumed(): -+ main(plumed_create()), -+ cloned(false) -+{} -+ -+inline -+Plumed::Plumed(const char*c): -+ main(plumed_f2c(c)), -+ cloned(true) -+{} -+ -+inline -+Plumed::Plumed(plumed p): -+ main(p), -+ cloned(true) -+{} -+ -+inline -+Plumed::operator plumed()const{ -+ return main; -+} -+ -+inline -+void Plumed::toFortran(char*c)const{ -+ plumed_c2f(main,c); -+} -+ -+inline -+void Plumed::cmd(const char*key,const void*val){ -+ plumed_cmd(main,key,val); -+} -+ -+inline -+Plumed::~Plumed(){ -+ if(!cloned)plumed_finalize(main); -+} -+ -+inline -+bool Plumed::ginitialized(){ -+ return plumed_ginitialized(); -+} -+ -+inline -+void Plumed::gcreate(){ -+ plumed_gcreate(); -+} -+ -+inline -+void Plumed::gcmd(const char* key,const void* val){ -+ plumed_gcmd(key,val); -+} -+ -+inline -+void Plumed::gfinalize(){ -+ plumed_gfinalize(); -+} -+ -+inline -+Plumed Plumed::global(){ -+ return plumed_global(); -+} -+ -+} -+ -+#endif -+ -+ -+#endif -diff --git a/Plumed.inc b/Plumed.inc -new file mode 100644 -index 0000000..e1e29a7 ---- /dev/null -+++ b/Plumed.inc -@@ -0,0 +1,3 @@ -+# PLUMED: shared installation -+PLUMED_LOAD= /apps/all/PLUMED/2.1.3-foss-2015g/lib/plumed///src/lib/libplumed.so -ldl -+PLUMED_DEPENDENCIES= /apps/all/PLUMED/2.1.3-foss-2015g/lib/plumed///src/lib/libplumed.so -diff --git a/src/gromacs/CMakeLists.txt b/src/gromacs/CMakeLists.txt -index 6db37e2..cc97aa8 100644 ---- a/src/gromacs/CMakeLists.txt -+++ b/src/gromacs/CMakeLists.txt -@@ -32,6 +32,8 @@ - # To help us fund GROMACS development, we humbly ask that you cite - # the research papers on the package. Check out http://www.gromacs.org. - -+include(${CMAKE_SOURCE_DIR}/Plumed.cmake) -+ - set(LIBGROMACS_SOURCES) - - function (gmx_install_headers DESTINATION) -@@ -189,7 +191,7 @@ target_link_libraries(libgromacs - ${TNG_IO_LIBRARIES} - ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} - ${XML_LIBRARIES} -- ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS}) -+ ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} ${PLUMED_LOAD}) - set_target_properties(libgromacs PROPERTIES - OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" - SOVERSION ${LIBRARY_SOVERSION} -diff --git a/src/gromacs/CMakeLists.txt.preplumed b/src/gromacs/CMakeLists.txt.preplumed -new file mode 100644 -index 0000000..6db37e2 ---- /dev/null -+++ b/src/gromacs/CMakeLists.txt.preplumed -@@ -0,0 +1,232 @@ -+# -+# This file is part of the GROMACS molecular simulation package. -+# -+# Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by -+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+# and including many others, as listed in the AUTHORS file in the -+# top-level source directory and at http://www.gromacs.org. -+# -+# GROMACS is free software; you can redistribute it and/or -+# modify it under the terms of the GNU Lesser General Public License -+# as published by the Free Software Foundation; either version 2.1 -+# of the License, or (at your option) any later version. -+# -+# GROMACS is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# Lesser General Public License for more details. -+# -+# You should have received a copy of the GNU Lesser General Public -+# License along with GROMACS; if not, see -+# http://www.gnu.org/licenses, or write to the Free Software Foundation, -+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+# -+# If you want to redistribute modifications to GROMACS, please -+# consider that scientific software is very special. Version -+# control is crucial - bugs must be traceable. We will be happy to -+# consider code for inclusion in the official distribution, but -+# derived work must not be called official GROMACS. Details are found -+# in the README & COPYING files - if they are missing, get the -+# official version at http://www.gromacs.org. -+# -+# To help us fund GROMACS development, we humbly ask that you cite -+# the research papers on the package. Check out http://www.gromacs.org. -+ -+set(LIBGROMACS_SOURCES) -+ -+function (gmx_install_headers DESTINATION) -+ if (NOT GMX_BUILD_MDRUN_ONLY) -+ if (DESTINATION) -+ set(DESTINATION ${INCL_INSTALL_DIR}/gromacs/${DESTINATION}) -+ else() -+ set(DESTINATION ${INCL_INSTALL_DIR}/gromacs) -+ endif() -+ install(FILES ${ARGN} DESTINATION ${DESTINATION} COMPONENT development) -+ endif() -+endfunction () -+ -+if(GMX_USE_TNG) -+ option(GMX_EXTERNAL_TNG "Use external TNG instead of compiling the version shipped with GROMACS." -+ OFF) -+ # Detect TNG if GMX_EXTERNAL_TNG is explicitly ON -+ if(GMX_EXTERNAL_TNG) -+ find_package(TNG_IO 1.6.0) -+ if(NOT TNG_IO_FOUND) -+ message(FATAL_ERROR -+ "TNG >= 1.6.0 not found. " -+ "You can set GMX_EXTERNAL_TNG=OFF to compile TNG.") -+ endif() -+ include_directories(${TNG_IO_INCLUDE_DIRS}) -+ endif() -+ if(NOT GMX_EXTERNAL_TNG) -+ include(${CMAKE_SOURCE_DIR}/src/external/tng_io/BuildTNG.cmake) -+ tng_get_source_list(TNG_SOURCES TNG_IO_DEFINITIONS) -+ list(APPEND LIBGROMACS_SOURCES ${TNG_SOURCES}) -+ tng_set_source_properties(WITH_ZLIB ${HAVE_ZLIB}) -+ -+ if (HAVE_ZLIB) -+ list(APPEND GMX_EXTRA_LIBRARIES ${ZLIB_LIBRARIES}) -+ include_directories(${ZLIB_INCLUDE_DIRS}) -+ endif() -+ endif() -+else() -+ # We still need to get tng/tng_io_fwd.h from somewhere! -+ include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/tng_io/include) -+endif() -+ -+add_subdirectory(gmxlib) -+add_subdirectory(mdlib) -+add_subdirectory(gmxpreprocess) -+add_subdirectory(commandline) -+add_subdirectory(fft) -+add_subdirectory(linearalgebra) -+add_subdirectory(math) -+add_subdirectory(random) -+add_subdirectory(onlinehelp) -+add_subdirectory(options) -+add_subdirectory(timing) -+add_subdirectory(utility) -+add_subdirectory(fileio) -+add_subdirectory(swap) -+add_subdirectory(essentialdynamics) -+add_subdirectory(pulling) -+add_subdirectory(simd) -+add_subdirectory(imd) -+if (NOT GMX_BUILD_MDRUN_ONLY) -+ add_subdirectory(legacyheaders) -+ add_subdirectory(gmxana) -+ add_subdirectory(statistics) -+ add_subdirectory(analysisdata) -+ add_subdirectory(selection) -+ add_subdirectory(trajectoryanalysis) -+ add_subdirectory(tools) -+endif() -+ -+list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES}) -+ -+# This would be the standard way to include thread_mpi, but -+# we want libgromacs to link the functions directly -+#if(GMX_THREAD_MPI) -+# add_subdirectory(thread_mpi) -+#endif() -+#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB}) -+ -+tmpi_get_source_list(THREAD_MPI_SOURCES ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/src) -+list(APPEND LIBGROMACS_SOURCES ${THREAD_MPI_SOURCES}) -+ -+file(GLOB LIBGROMACS_HEADERS *.h) -+configure_file(version.h.cmakein version.h) -+gmx_install_headers("" ${LIBGROMACS_HEADERS}) -+gmx_install_headers("" ${CMAKE_CURRENT_BINARY_DIR}/version.h) -+ -+# Add target that generates baseversion-gen.c every time make is run -+# if git version info is requested, or create it statically. -+# This code is here instead of utility/CMakeLists.txt because CMake -+# ignores set_source_file_properties from subdirectories. -+set(GENERATED_VERSION_FILE -+ ${CMAKE_CURRENT_BINARY_DIR}/utility/baseversion-gen.c) -+set(GENERATED_VERSION_FILE_SOURCE -+ ${CMAKE_CURRENT_SOURCE_DIR}/utility/baseversion-gen.c.cmakein) -+if (GMX_GIT_VERSION_INFO) -+ add_custom_target(gmx-version ALL -+ COMMAND ${CMAKE_COMMAND} -+ -D GIT_EXECUTABLE="${GIT_EXECUTABLE}" -+ -D PROJECT_VERSION="${PROJECT_VERSION}" -+ -D PROJECT_SOURCE_DIR="${PROJECT_SOURCE_DIR}" -+ -D VERSION_CMAKEIN=${GENERATED_VERSION_FILE_SOURCE} -+ -D VERSION_OUT=${GENERATED_VERSION_FILE} -+ -P ${CMAKE_SOURCE_DIR}/cmake/gmxGenerateVersionInfo.cmake -+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} -+ DEPENDS ${GENERATED_VERSION_FILE_SOURCE} -+ COMMENT "Generating git version information") -+ set_source_files_properties(${GENERATED_VERSION_FILE} -+ PROPERTIES GENERATED true) -+else() -+ set(GMX_PROJECT_VERSION_STR ${PROJECT_VERSION}) -+ configure_file(${GENERATED_VERSION_FILE_SOURCE} ${GENERATED_VERSION_FILE}) -+endif() -+list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE}) -+ -+# apply gcc 4.4.x bug workaround -+if(GMX_USE_GCC44_BUG_WORKAROUND) -+ include(gmxGCC44O3BugWorkaround) -+ gmx_apply_gcc44_bug_workaround("gmxlib/bondfree.c") -+ gmx_apply_gcc44_bug_workaround("mdlib/force.c") -+ gmx_apply_gcc44_bug_workaround("mdlib/constr.c") -+endif() -+ -+add_library(libgromacs ${LIBGROMACS_SOURCES}) -+if (GMX_GIT_VERSION_INFO) -+ add_dependencies(libgromacs gmx-version) -+endif() -+ -+# Recent versions of gcc and clang give warnings on scanner.cpp, which -+# is a generated source file. These are awkward to suppress inline, so -+# we do it in the compilation command (after testing that the compiler -+# supports the suppressions). Setting the properties only works after -+# the related target has been created, e.g. after when the file is -+# used with add_library(). -+include(CheckCXXCompilerFlag) -+check_cxx_compiler_flag(-Wno-unused-parameter HAS_NO_UNUSED_PARAMETER) -+if (HAS_NO_UNUSED_PARAMETER) -+ set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-unused-parameter") -+endif() -+check_cxx_compiler_flag(-Wno-deprecated-register HAS_NO_DEPRECATED_REGISTER) -+if (HAS_NO_DEPRECATED_REGISTER) -+ set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated-register") -+else() -+ check_cxx_compiler_flag(-Wno-deprecated HAS_NO_DEPRECATED) -+ if (HAS_NO_DEPRECATED) -+ set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated") -+ endif() -+endif() -+set_source_files_properties(selection/scanner.cpp PROPERTIES COMPILE_FLAGS "${_scanner_cpp_compiler_flags}") -+ -+target_link_libraries(libgromacs -+ ${EXTRAE_LIBRARIES} -+ ${GMX_GPU_LIBRARIES} -+ ${GMX_EXTRA_LIBRARIES} -+ ${TNG_IO_LIBRARIES} -+ ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} -+ ${XML_LIBRARIES} -+ ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS}) -+set_target_properties(libgromacs PROPERTIES -+ OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" -+ SOVERSION ${LIBRARY_SOVERSION} -+ VERSION ${LIBRARY_VERSION} -+ COMPILE_FLAGS "${OpenMP_C_FLAGS}") -+ -+# Only install the library in mdrun-only mode if it is actually necessary -+# for the binary -+if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS) -+ install(TARGETS libgromacs -+ LIBRARY DESTINATION ${LIB_INSTALL_DIR} -+ RUNTIME DESTINATION ${BIN_INSTALL_DIR} -+ ARCHIVE DESTINATION ${LIB_INSTALL_DIR} -+ COMPONENT libraries) -+endif() -+ -+if (NOT GMX_BUILD_MDRUN_ONLY) -+ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgromacs.pc.cmakein -+ ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc @ONLY) -+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc -+ DESTINATION ${LIB_INSTALL_DIR}/pkgconfig -+ RENAME "libgromacs${GMX_LIBS_SUFFIX}.pc" -+ COMPONENT development) -+endif() -+ -+if (INSTALL_CUDART_LIB) #can be set manual by user -+ if (GMX_GPU) -+ foreach(CUDA_LIB ${CUDA_LIBRARIES}) -+ string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB}) -+ if(IS_CUDART) #libcuda should not be installed -+ #install also name-links (linker uses those) -+ file(GLOB CUDA_LIBS ${CUDA_LIB}*) -+ install(FILES ${CUDA_LIBS} DESTINATION -+ ${LIB_INSTALL_DIR} COMPONENT libraries) -+ endif() -+ endforeach() -+ else() -+ message(WARNING "INSTALL_CUDART_LIB only makes sense with GMX_GPU") -+ endif() -+endif() -diff --git a/src/gromacs/mdlib/force.c b/src/gromacs/mdlib/force.c -index 5230983..8227d5b 100644 ---- a/src/gromacs/mdlib/force.c -+++ b/src/gromacs/mdlib/force.c -@@ -67,6 +67,14 @@ - #include "gromacs/timing/wallcycle.h" - #include "gmx_fatal.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+int plumedswitch=0; -+plumed plumedmain; -+void(*plumedcmd)(plumed,const char*,const void*)=NULL; -+/* END PLUMED */ -+ -+ - void ns(FILE *fp, - t_forcerec *fr, - matrix box, -@@ -737,6 +745,13 @@ void do_force_lowlevel(FILE *fplog, gmx_int64_t step, - pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); - } - -+ /* PLUMED */ -+ if(plumedswitch){ -+ int plumedNeedsEnergy; -+ (*plumedcmd)(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ if(!plumedNeedsEnergy) (*plumedcmd)(plumedmain,"performCalc",NULL); -+ } -+ /* END PLUMED */ - } - - void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) -diff --git a/src/gromacs/mdlib/force.c.preplumed b/src/gromacs/mdlib/force.c.preplumed -new file mode 100644 -index 0000000..5230983 ---- /dev/null -+++ b/src/gromacs/mdlib/force.c.preplumed -@@ -0,0 +1,1018 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include -+#include -+#include "sysstuff.h" -+#include "typedefs.h" -+#include "macros.h" -+#include "gromacs/utility/smalloc.h" -+#include "macros.h" -+#include "physics.h" -+#include "force.h" -+#include "nonbonded.h" -+#include "names.h" -+#include "network.h" -+#include "pbc.h" -+#include "ns.h" -+#include "nrnb.h" -+#include "bondf.h" -+#include "mshift.h" -+#include "txtdump.h" -+#include "coulomb.h" -+#include "pme.h" -+#include "mdrun.h" -+#include "domdec.h" -+#include "qmmm.h" -+#include "gmx_omp_nthreads.h" -+ -+#include "gromacs/timing/wallcycle.h" -+#include "gmx_fatal.h" -+ -+void ns(FILE *fp, -+ t_forcerec *fr, -+ matrix box, -+ gmx_groups_t *groups, -+ gmx_localtop_t *top, -+ t_mdatoms *md, -+ t_commrec *cr, -+ t_nrnb *nrnb, -+ gmx_bool bFillGrid, -+ gmx_bool bDoLongRangeNS) -+{ -+ char *ptr; -+ int nsearch; -+ -+ -+ if (!fr->ns.nblist_initialized) -+ { -+ init_neighbor_list(fp, fr, md->homenr); -+ } -+ -+ if (fr->bTwinRange) -+ { -+ fr->nlr = 0; -+ } -+ -+ nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md, -+ bFillGrid, bDoLongRangeNS); -+ if (debug) -+ { -+ fprintf(debug, "nsearch = %d\n", nsearch); -+ } -+ -+ /* Check whether we have to do dynamic load balancing */ -+ /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0)) -+ count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr, -+ &(top->idef),opts->ngener); -+ */ -+ if (fr->ns.dump_nl > 0) -+ { -+ dump_nblist(fp, cr, fr, fr->ns.dump_nl); -+ } -+} -+ -+static void reduce_thread_forces(int n, rvec *f, -+ tensor vir_q, tensor vir_lj, -+ real *Vcorr_q, real *Vcorr_lj, -+ real *dvdl_q, real *dvdl_lj, -+ int nthreads, f_thread_t *f_t) -+{ -+ int t, i; -+ int nthreads_loop gmx_unused; -+ -+ /* This reduction can run over any number of threads */ -+ nthreads_loop = gmx_omp_nthreads_get(emntBonded); -+#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static) -+ for (i = 0; i < n; i++) -+ { -+ for (t = 1; t < nthreads; t++) -+ { -+ rvec_inc(f[i], f_t[t].f[i]); -+ } -+ } -+ for (t = 1; t < nthreads; t++) -+ { -+ *Vcorr_q += f_t[t].Vcorr_q; -+ *Vcorr_lj += f_t[t].Vcorr_lj; -+ *dvdl_q += f_t[t].dvdl[efptCOUL]; -+ *dvdl_lj += f_t[t].dvdl[efptVDW]; -+ m_add(vir_q, f_t[t].vir_q, vir_q); -+ m_add(vir_lj, f_t[t].vir_lj, vir_lj); -+ } -+} -+ -+void gmx_print_sepdvdl(FILE *fplog, const char *s, real v, real dvdlambda) -+{ -+ fprintf(fplog, " %-30s V %12.5e dVdl %12.5e\n", s, v, dvdlambda); -+} -+ -+void do_force_lowlevel(FILE *fplog, gmx_int64_t step, -+ t_forcerec *fr, t_inputrec *ir, -+ t_idef *idef, t_commrec *cr, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ t_mdatoms *md, -+ rvec x[], history_t *hist, -+ rvec f[], -+ rvec f_longrange[], -+ gmx_enerdata_t *enerd, -+ t_fcdata *fcd, -+ gmx_localtop_t *top, -+ gmx_genborn_t *born, -+ t_atomtypes *atype, -+ gmx_bool bBornRadii, -+ matrix box, -+ t_lambda *fepvals, -+ real *lambda, -+ t_graph *graph, -+ t_blocka *excl, -+ rvec mu_tot[], -+ int flags, -+ float *cycles_pme) -+{ -+ int i, j; -+ int donb_flags; -+ gmx_bool bDoEpot, bSepDVDL, bSB; -+ int pme_flags; -+ matrix boxs; -+ rvec box_size; -+ t_pbc pbc; -+ char buf[22]; -+ double clam_i, vlam_i; -+ real dvdl_dum[efptNR], dvdl_nb[efptNR], lam_i[efptNR]; -+ real dvdl_q, dvdl_lj; -+ -+#ifdef GMX_MPI -+ double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ -+#endif -+ -+#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) { gmx_print_sepdvdl(fplog, s, v, dvdlambda); } -+ -+ set_pbc(&pbc, fr->ePBC, box); -+ -+ /* reset free energy components */ -+ for (i = 0; i < efptNR; i++) -+ { -+ dvdl_nb[i] = 0; -+ dvdl_dum[i] = 0; -+ } -+ -+ /* Reset box */ -+ for (i = 0; (i < DIM); i++) -+ { -+ box_size[i] = box[i][i]; -+ } -+ -+ bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); -+ debug_gmx(); -+ -+ /* do QMMM first if requested */ -+ if (fr->bQMMM) -+ { -+ enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); -+ } -+ -+ if (bSepDVDL) -+ { -+ fprintf(fplog, "Step %s: non-bonded V and dVdl for rank %d:\n", -+ gmx_step_str(step, buf), cr->nodeid); -+ } -+ -+ /* Call the short range functions all in one go. */ -+ -+#ifdef GMX_MPI -+ /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ -+#define TAKETIME FALSE -+ if (TAKETIME) -+ { -+ MPI_Barrier(cr->mpi_comm_mygroup); -+ t0 = MPI_Wtime(); -+ } -+#endif -+ -+ if (ir->nwall) -+ { -+ /* foreign lambda component for walls */ -+ real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], -+ enerd->grpp.ener[egLJSR], nrnb); -+ PRINT_SEPDVDL("Walls", 0.0, dvdl_walls); -+ enerd->dvdl_lin[efptVDW] += dvdl_walls; -+ } -+ -+ /* If doing GB, reset dvda and calculate the Born radii */ -+ if (ir->implicit_solvent) -+ { -+ wallcycle_sub_start(wcycle, ewcsNONBONDED); -+ -+ for (i = 0; i < born->nr; i++) -+ { -+ fr->dvda[i] = 0; -+ } -+ -+ if (bBornRadii) -+ { -+ calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb); -+ } -+ -+ wallcycle_sub_stop(wcycle, ewcsNONBONDED); -+ } -+ -+ where(); -+ /* We only do non-bonded calculation with group scheme here, the verlet -+ * calls are done from do_force_cutsVERLET(). */ -+ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) -+ { -+ donb_flags = 0; -+ /* Add short-range interactions */ -+ donb_flags |= GMX_NONBONDED_DO_SR; -+ -+ /* Currently all group scheme kernels always calculate (shift-)forces */ -+ if (flags & GMX_FORCE_FORCES) -+ { -+ donb_flags |= GMX_NONBONDED_DO_FORCE; -+ } -+ if (flags & GMX_FORCE_VIRIAL) -+ { -+ donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; -+ } -+ if (flags & GMX_FORCE_ENERGY) -+ { -+ donb_flags |= GMX_NONBONDED_DO_POTENTIAL; -+ } -+ if (flags & GMX_FORCE_DO_LR) -+ { -+ donb_flags |= GMX_NONBONDED_DO_LR; -+ } -+ -+ wallcycle_sub_start(wcycle, ewcsNONBONDED); -+ do_nonbonded(fr, x, f, f_longrange, md, excl, -+ &enerd->grpp, nrnb, -+ lambda, dvdl_nb, -1, -1, donb_flags); -+ -+ /* If we do foreign lambda and we have soft-core interactions -+ * we have to recalculate the (non-linear) energies contributions. -+ */ -+ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) -+ { -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ for (j = 0; j < efptNR; j++) -+ { -+ lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); -+ } -+ reset_foreign_enerdata(enerd); -+ do_nonbonded(fr, x, f, f_longrange, md, excl, -+ &(enerd->foreign_grpp), nrnb, -+ lam_i, dvdl_dum, -1, -1, -+ (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); -+ sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); -+ enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; -+ } -+ } -+ wallcycle_sub_stop(wcycle, ewcsNONBONDED); -+ where(); -+ } -+ -+ /* If we are doing GB, calculate bonded forces and apply corrections -+ * to the solvation forces */ -+ /* MRS: Eventually, many need to include free energy contribution here! */ -+ if (ir->implicit_solvent) -+ { -+ wallcycle_sub_start(wcycle, ewcsBONDED); -+ calc_gb_forces(cr, md, born, top, x, f, fr, idef, -+ ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); -+ wallcycle_sub_stop(wcycle, ewcsBONDED); -+ } -+ -+#ifdef GMX_MPI -+ if (TAKETIME) -+ { -+ t1 = MPI_Wtime(); -+ fr->t_fnbf += t1-t0; -+ } -+#endif -+ -+ if (fepvals->sc_alpha != 0) -+ { -+ enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; -+ } -+ else -+ { -+ enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; -+ } -+ -+ if (fepvals->sc_alpha != 0) -+ -+ /* even though coulomb part is linear, we already added it, beacuse we -+ need to go through the vdw calculation anyway */ -+ { -+ enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; -+ } -+ else -+ { -+ enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; -+ } -+ -+ if (bSepDVDL) -+ { -+ real V_short_range = 0; -+ real dvdl_short_range = 0; -+ -+ for (i = 0; i < enerd->grpp.nener; i++) -+ { -+ V_short_range += -+ (fr->bBHAM ? -+ enerd->grpp.ener[egBHAMSR][i] : -+ enerd->grpp.ener[egLJSR][i]) -+ + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; -+ } -+ dvdl_short_range = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; -+ PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", -+ V_short_range, -+ dvdl_short_range); -+ } -+ debug_gmx(); -+ -+ -+ if (debug) -+ { -+ pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); -+ } -+ -+ /* Shift the coordinates. Must be done before bonded forces and PPPM, -+ * but is also necessary for SHAKE and update, therefore it can NOT -+ * go when no bonded forces have to be evaluated. -+ */ -+ -+ /* Here sometimes we would not need to shift with NBFonly, -+ * but we do so anyhow for consistency of the returned coordinates. -+ */ -+ if (graph) -+ { -+ shift_self(graph, box, x); -+ if (TRICLINIC(box)) -+ { -+ inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); -+ } -+ else -+ { -+ inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); -+ } -+ } -+ /* Check whether we need to do bondeds or correct for exclusions */ -+ if (fr->bMolPBC && -+ ((flags & GMX_FORCE_BONDED) -+ || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) -+ { -+ /* Since all atoms are in the rectangular or triclinic unit-cell, -+ * only single box vector shifts (2 in x) are required. -+ */ -+ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); -+ } -+ debug_gmx(); -+ -+ if (flags & GMX_FORCE_BONDED) -+ { -+ wallcycle_sub_start(wcycle, ewcsBONDED); -+ calc_bonds(fplog, cr->ms, -+ idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, -+ DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, -+ flags, -+ fr->bSepDVDL && do_per_step(step, ir->nstlog), step); -+ -+ /* Check if we have to determine energy differences -+ * at foreign lambda's. -+ */ -+ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && -+ idef->ilsort != ilsortNO_FE) -+ { -+ if (idef->ilsort != ilsortFE_SORTED) -+ { -+ gmx_incons("The bonded interactions are not sorted for free energy"); -+ } -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ reset_foreign_enerdata(enerd); -+ for (j = 0; j < efptNR; j++) -+ { -+ lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); -+ } -+ calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, -+ fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); -+ sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); -+ enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; -+ } -+ } -+ debug_gmx(); -+ -+ wallcycle_sub_stop(wcycle, ewcsBONDED); -+ } -+ -+ where(); -+ -+ *cycles_pme = 0; -+ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) -+ { -+ real Vlr = 0, Vcorr = 0; -+ real dvdl_long_range = 0; -+ int status = 0; -+ -+ bSB = (ir->nwall == 2); -+ if (bSB) -+ { -+ copy_mat(box, boxs); -+ svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); -+ box_size[ZZ] *= ir->wall_ewald_zfac; -+ } -+ } -+ -+ /* Do long-range electrostatics and/or LJ-PME, including related short-range -+ * corrections. -+ */ -+ -+ clear_mat(fr->vir_el_recip); -+ clear_mat(fr->vir_lj_recip); -+ -+ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) -+ { -+ real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; -+ real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; -+ int status = 0; -+ -+ if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) -+ { -+ real dvdl_long_range_correction_q = 0; -+ real dvdl_long_range_correction_lj = 0; -+ /* With the Verlet scheme exclusion forces are calculated -+ * in the non-bonded kernel. -+ */ -+ /* The TPI molecule does not have exclusions with the rest -+ * of the system and no intra-molecular PME grid -+ * contributions will be calculated in -+ * gmx_pme_calc_energy. -+ */ -+ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || -+ ir->ewald_geometry != eewg3D || -+ ir->epsilon_surface != 0) -+ { -+ int nthreads, t; -+ -+ wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); -+ -+ if (fr->n_tpi > 0) -+ { -+ gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); -+ } -+ -+ nthreads = gmx_omp_nthreads_get(emntBonded); -+#pragma omp parallel for num_threads(nthreads) schedule(static) -+ for (t = 0; t < nthreads; t++) -+ { -+ int s, e, i; -+ rvec *fnv; -+ tensor *vir_q, *vir_lj; -+ real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; -+ if (t == 0) -+ { -+ fnv = fr->f_novirsum; -+ vir_q = &fr->vir_el_recip; -+ vir_lj = &fr->vir_lj_recip; -+ Vcorrt_q = &Vcorr_q; -+ Vcorrt_lj = &Vcorr_lj; -+ dvdlt_q = &dvdl_long_range_correction_q; -+ dvdlt_lj = &dvdl_long_range_correction_lj; -+ } -+ else -+ { -+ fnv = fr->f_t[t].f; -+ vir_q = &fr->f_t[t].vir_q; -+ vir_lj = &fr->f_t[t].vir_lj; -+ Vcorrt_q = &fr->f_t[t].Vcorr_q; -+ Vcorrt_lj = &fr->f_t[t].Vcorr_lj; -+ dvdlt_q = &fr->f_t[t].dvdl[efptCOUL]; -+ dvdlt_lj = &fr->f_t[t].dvdl[efptVDW]; -+ for (i = 0; i < fr->natoms_force; i++) -+ { -+ clear_rvec(fnv[i]); -+ } -+ clear_mat(*vir_q); -+ clear_mat(*vir_lj); -+ } -+ *dvdlt_q = 0; -+ *dvdlt_lj = 0; -+ -+ ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], -+ cr, t, fr, -+ md->chargeA, md->chargeB, -+ md->sqrt_c6A, md->sqrt_c6B, -+ md->sigmaA, md->sigmaB, -+ md->sigma3A, md->sigma3B, -+ md->nChargePerturbed || md->nTypePerturbed, -+ ir->cutoff_scheme != ecutsVERLET, -+ excl, x, bSB ? boxs : box, mu_tot, -+ ir->ewald_geometry, -+ ir->epsilon_surface, -+ fnv, *vir_q, *vir_lj, -+ Vcorrt_q, Vcorrt_lj, -+ lambda[efptCOUL], lambda[efptVDW], -+ dvdlt_q, dvdlt_lj); -+ } -+ if (nthreads > 1) -+ { -+ reduce_thread_forces(fr->natoms_force, fr->f_novirsum, -+ fr->vir_el_recip, fr->vir_lj_recip, -+ &Vcorr_q, &Vcorr_lj, -+ &dvdl_long_range_correction_q, -+ &dvdl_long_range_correction_lj, -+ nthreads, fr->f_t); -+ } -+ wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); -+ } -+ -+ if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0) -+ { -+ Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, -+ &dvdl_long_range_correction_q, -+ fr->vir_el_recip); -+ } -+ -+ PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr_q, dvdl_long_range_correction_q); -+ PRINT_SEPDVDL("Ewald excl. corr. LJ", Vcorr_lj, dvdl_long_range_correction_lj); -+ enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q; -+ enerd->dvdl_lin[efptVDW] += dvdl_long_range_correction_lj; -+ } -+ -+ if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype))) -+ { -+ if (cr->duty & DUTY_PME) -+ { -+ /* Do reciprocal PME for Coulomb and/or LJ. */ -+ assert(fr->n_tpi >= 0); -+ if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) -+ { -+ pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; -+ if (EEL_PME(fr->eeltype)) -+ { -+ pme_flags |= GMX_PME_DO_COULOMB; -+ } -+ if (EVDW_PME(fr->vdwtype)) -+ { -+ pme_flags |= GMX_PME_DO_LJ; -+ } -+ if (flags & GMX_FORCE_FORCES) -+ { -+ pme_flags |= GMX_PME_CALC_F; -+ } -+ if (flags & GMX_FORCE_VIRIAL) -+ { -+ pme_flags |= GMX_PME_CALC_ENER_VIR; -+ } -+ if (fr->n_tpi > 0) -+ { -+ /* We don't calculate f, but we do want the potential */ -+ pme_flags |= GMX_PME_CALC_POT; -+ } -+ wallcycle_start(wcycle, ewcPMEMESH); -+ status = gmx_pme_do(fr->pmedata, -+ 0, md->homenr - fr->n_tpi, -+ x, fr->f_novirsum, -+ md->chargeA, md->chargeB, -+ md->sqrt_c6A, md->sqrt_c6B, -+ md->sigmaA, md->sigmaB, -+ bSB ? boxs : box, cr, -+ DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, -+ DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, -+ nrnb, wcycle, -+ fr->vir_el_recip, fr->ewaldcoeff_q, -+ fr->vir_lj_recip, fr->ewaldcoeff_lj, -+ &Vlr_q, &Vlr_lj, -+ lambda[efptCOUL], lambda[efptVDW], -+ &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags); -+ *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); -+ if (status != 0) -+ { -+ gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); -+ } -+ /* We should try to do as little computation after -+ * this as possible, because parallel PME synchronizes -+ * the nodes, so we want all load imbalance of the -+ * rest of the force calculation to be before the PME -+ * call. DD load balancing is done on the whole time -+ * of the force call (without PME). -+ */ -+ } -+ if (fr->n_tpi > 0) -+ { -+ if (EVDW_PME(ir->vdwtype)) -+ { -+ -+ gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME"); -+ } -+ /* Determine the PME grid energy of the test molecule -+ * with the PME grid potential of the other charges. -+ */ -+ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, -+ x + md->homenr - fr->n_tpi, -+ md->chargeA + md->homenr - fr->n_tpi, -+ &Vlr_q); -+ } -+ PRINT_SEPDVDL("PME mesh", Vlr_q + Vlr_lj, dvdl_long_range_q+dvdl_long_range_lj); -+ } -+ } -+ -+ if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype)) -+ { -+ Vlr_q = do_ewald(ir, x, fr->f_novirsum, -+ md->chargeA, md->chargeB, -+ box_size, cr, md->homenr, -+ fr->vir_el_recip, fr->ewaldcoeff_q, -+ lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table); -+ PRINT_SEPDVDL("Ewald long-range", Vlr_q, dvdl_long_range_q); -+ } -+ -+ /* Note that with separate PME nodes we get the real energies later */ -+ enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q; -+ enerd->dvdl_lin[efptVDW] += dvdl_long_range_lj; -+ enerd->term[F_COUL_RECIP] = Vlr_q + Vcorr_q; -+ enerd->term[F_LJ_RECIP] = Vlr_lj + Vcorr_lj; -+ if (debug) -+ { -+ fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", -+ Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]); -+ pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); -+ pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); -+ fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", -+ Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]); -+ pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM); -+ } -+ } -+ else -+ { -+ /* Is there a reaction-field exclusion correction needed? */ -+ if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype) -+ { -+ /* With the Verlet scheme, exclusion forces are calculated -+ * in the non-bonded kernel. -+ */ -+ if (ir->cutoff_scheme != ecutsVERLET) -+ { -+ real dvdl_rf_excl = 0; -+ enerd->term[F_RF_EXCL] = -+ RF_excl_correction(fr, graph, md, excl, x, f, -+ fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); -+ -+ enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; -+ PRINT_SEPDVDL("RF exclusion correction", -+ enerd->term[F_RF_EXCL], dvdl_rf_excl); -+ } -+ } -+ } -+ where(); -+ debug_gmx(); -+ -+ if (debug) -+ { -+ print_nrnb(debug, nrnb); -+ } -+ debug_gmx(); -+ -+#ifdef GMX_MPI -+ if (TAKETIME) -+ { -+ t2 = MPI_Wtime(); -+ MPI_Barrier(cr->mpi_comm_mygroup); -+ t3 = MPI_Wtime(); -+ fr->t_wait += t3-t2; -+ if (fr->timesteps == 11) -+ { -+ fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", -+ cr->nodeid, gmx_step_str(fr->timesteps, buf), -+ 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), -+ (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); -+ } -+ fr->timesteps++; -+ } -+#endif -+ -+ if (debug) -+ { -+ pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); -+ } -+ -+} -+ -+void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) -+{ -+ int i, n2; -+ -+ for (i = 0; i < F_NRE; i++) -+ { -+ enerd->term[i] = 0; -+ enerd->foreign_term[i] = 0; -+ } -+ -+ -+ for (i = 0; i < efptNR; i++) -+ { -+ enerd->dvdl_lin[i] = 0; -+ enerd->dvdl_nonlin[i] = 0; -+ } -+ -+ n2 = ngener*ngener; -+ if (debug) -+ { -+ fprintf(debug, "Creating %d sized group matrix for energies\n", n2); -+ } -+ enerd->grpp.nener = n2; -+ enerd->foreign_grpp.nener = n2; -+ for (i = 0; (i < egNR); i++) -+ { -+ snew(enerd->grpp.ener[i], n2); -+ snew(enerd->foreign_grpp.ener[i], n2); -+ } -+ -+ if (n_lambda) -+ { -+ enerd->n_lambda = 1 + n_lambda; -+ snew(enerd->enerpart_lambda, enerd->n_lambda); -+ } -+ else -+ { -+ enerd->n_lambda = 0; -+ } -+} -+ -+void destroy_enerdata(gmx_enerdata_t *enerd) -+{ -+ int i; -+ -+ for (i = 0; (i < egNR); i++) -+ { -+ sfree(enerd->grpp.ener[i]); -+ } -+ -+ for (i = 0; (i < egNR); i++) -+ { -+ sfree(enerd->foreign_grpp.ener[i]); -+ } -+ -+ if (enerd->n_lambda) -+ { -+ sfree(enerd->enerpart_lambda); -+ } -+} -+ -+static real sum_v(int n, real v[]) -+{ -+ real t; -+ int i; -+ -+ t = 0.0; -+ for (i = 0; (i < n); i++) -+ { -+ t = t + v[i]; -+ } -+ -+ return t; -+} -+ -+void sum_epot(gmx_grppairener_t *grpp, real *epot) -+{ -+ int i; -+ -+ /* Accumulate energies */ -+ epot[F_COUL_SR] = sum_v(grpp->nener, grpp->ener[egCOULSR]); -+ epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); -+ epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); -+ epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); -+ epot[F_COUL_LR] = sum_v(grpp->nener, grpp->ener[egCOULLR]); -+ epot[F_LJ_LR] = sum_v(grpp->nener, grpp->ener[egLJLR]); -+ /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */ -+ epot[F_GBPOL] += sum_v(grpp->nener, grpp->ener[egGB]); -+ -+/* lattice part of LR doesnt belong to any group -+ * and has been added earlier -+ */ -+ epot[F_BHAM] = sum_v(grpp->nener, grpp->ener[egBHAMSR]); -+ epot[F_BHAM_LR] = sum_v(grpp->nener, grpp->ener[egBHAMLR]); -+ -+ epot[F_EPOT] = 0; -+ for (i = 0; (i < F_EPOT); i++) -+ { -+ if (i != F_DISRESVIOL && i != F_ORIRESDEV) -+ { -+ epot[F_EPOT] += epot[i]; -+ } -+ } -+} -+ -+void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals) -+{ -+ int i, j, index; -+ double dlam; -+ -+ enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW]; /* include dispersion correction */ -+ enerd->term[F_DVDL] = 0.0; -+ for (i = 0; i < efptNR; i++) -+ { -+ if (fepvals->separate_dvdl[i]) -+ { -+ /* could this be done more readably/compactly? */ -+ switch (i) -+ { -+ case (efptMASS): -+ index = F_DKDL; -+ break; -+ case (efptCOUL): -+ index = F_DVDL_COUL; -+ break; -+ case (efptVDW): -+ index = F_DVDL_VDW; -+ break; -+ case (efptBONDED): -+ index = F_DVDL_BONDED; -+ break; -+ case (efptRESTRAINT): -+ index = F_DVDL_RESTRAINT; -+ break; -+ default: -+ index = F_DVDL; -+ break; -+ } -+ enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; -+ if (debug) -+ { -+ fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n", -+ efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); -+ } -+ } -+ else -+ { -+ enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; -+ if (debug) -+ { -+ fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n", -+ efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); -+ } -+ } -+ } -+ -+ /* Notes on the foreign lambda free energy difference evaluation: -+ * Adding the potential and ekin terms that depend linearly on lambda -+ * as delta lam * dvdl to the energy differences is exact. -+ * For the constraints this is not exact, but we have no other option -+ * without literally changing the lengths and reevaluating the energies at each step. -+ * (try to remedy this post 4.6 - MRS) -+ * For the non-bonded LR term we assume that the soft-core (if present) -+ * no longer affects the energy beyond the short-range cut-off, -+ * which is a very good approximation (except for exotic settings). -+ * (investigate how to overcome this post 4.6 - MRS) -+ */ -+ if (fepvals->separate_dvdl[efptBONDED]) -+ { -+ enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; -+ } -+ else -+ { -+ enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR]; -+ } -+ enerd->term[F_DVDL_CONSTR] = 0; -+ -+ for (i = 0; i < fepvals->n_lambda; i++) -+ { -+ /* note we are iterating over fepvals here! -+ For the current lam, dlam = 0 automatically, -+ so we don't need to add anything to the -+ enerd->enerpart_lambda[0] */ -+ -+ /* we don't need to worry about dvdl_lin contributions to dE at -+ current lambda, because the contributions to the current -+ lambda are automatically zeroed */ -+ -+ for (j = 0; j < efptNR; j++) -+ { -+ /* Note that this loop is over all dhdl components, not just the separated ones */ -+ dlam = (fepvals->all_lambda[j][i]-lambda[j]); -+ enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j]; -+ if (debug) -+ { -+ fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n", -+ fepvals->all_lambda[j][i], efpt_names[j], -+ (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]), -+ dlam, enerd->dvdl_lin[j]); -+ } -+ } -+ } -+} -+ -+ -+void reset_foreign_enerdata(gmx_enerdata_t *enerd) -+{ -+ int i, j; -+ -+ /* First reset all foreign energy components. Foreign energies always called on -+ neighbor search steps */ -+ for (i = 0; (i < egNR); i++) -+ { -+ for (j = 0; (j < enerd->grpp.nener); j++) -+ { -+ enerd->foreign_grpp.ener[i][j] = 0.0; -+ } -+ } -+ -+ /* potential energy components */ -+ for (i = 0; (i <= F_EPOT); i++) -+ { -+ enerd->foreign_term[i] = 0.0; -+ } -+} -+ -+void reset_enerdata(t_forcerec *fr, gmx_bool bNS, -+ gmx_enerdata_t *enerd, -+ gmx_bool bMaster) -+{ -+ gmx_bool bKeepLR; -+ int i, j; -+ -+ /* First reset all energy components, except for the long range terms -+ * on the master at non neighbor search steps, since the long range -+ * terms have already been summed at the last neighbor search step. -+ */ -+ bKeepLR = (fr->bTwinRange && !bNS); -+ for (i = 0; (i < egNR); i++) -+ { -+ if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) -+ { -+ for (j = 0; (j < enerd->grpp.nener); j++) -+ { -+ enerd->grpp.ener[i][j] = 0.0; -+ } -+ } -+ } -+ for (i = 0; i < efptNR; i++) -+ { -+ enerd->dvdl_lin[i] = 0.0; -+ enerd->dvdl_nonlin[i] = 0.0; -+ } -+ -+ /* Normal potential energy components */ -+ for (i = 0; (i <= F_EPOT); i++) -+ { -+ enerd->term[i] = 0.0; -+ } -+ /* Initialize the dVdlambda term with the long range contribution */ -+ /* Initialize the dvdl term with the long range contribution */ -+ enerd->term[F_DVDL] = 0.0; -+ enerd->term[F_DVDL_COUL] = 0.0; -+ enerd->term[F_DVDL_VDW] = 0.0; -+ enerd->term[F_DVDL_BONDED] = 0.0; -+ enerd->term[F_DVDL_RESTRAINT] = 0.0; -+ enerd->term[F_DKDL] = 0.0; -+ if (enerd->n_lambda > 0) -+ { -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ enerd->enerpart_lambda[i] = 0.0; -+ } -+ } -+ /* reset foreign energy data - separate function since we also call it elsewhere */ -+ reset_foreign_enerdata(enerd); -+} -diff --git a/src/gromacs/mdlib/minimize.c b/src/gromacs/mdlib/minimize.c -index 69008f5..5114fa0 100644 ---- a/src/gromacs/mdlib/minimize.c -+++ b/src/gromacs/mdlib/minimize.c -@@ -80,6 +80,13 @@ - #include "gromacs/timing/walltime_accounting.h" - #include "gromacs/imd/imd.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+extern void(*plumedcmd)(plumed,const char*,const void*); -+/* END PLUMED */ -+ - typedef struct { - t_state s; - rvec *f; -@@ -442,6 +449,43 @@ void init_em(FILE *fplog, const char *title, - - clear_rvec(mu_tot); - calc_shifts(ems->s.box, fr->shift_vec); -+ -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(cr->ms && cr->ms->nsim>1) { -+ if(MASTER(cr)) (*plumedcmd) (plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters); -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); -+ }else{ -+ (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); -+ } -+ } -+ (*plumedcmd) (plumedmain,"GREX init",NULL); -+ } -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); -+ }else{ -+ (*plumedcmd) (plumedmain,"setMPIComm",&cr->mpi_comm_mysim); -+ } -+ } -+ (*plumedcmd) (plumedmain,"setNatoms",&top_global->natoms); -+ (*plumedcmd) (plumedmain,"setMDEngine","gromacs"); -+ (*plumedcmd) (plumedmain,"setLog",fplog); -+ real real_delta_t; -+ real_delta_t=ir->delta_t; -+ (*plumedcmd) (plumedmain,"setTimestep",&real_delta_t); -+ (*plumedcmd) (plumedmain,"init",NULL); -+ -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ } -+ } -+ /* END PLUMED */ - } - - static void finish_em(t_commrec *cr, gmx_mdoutf_t outf, -@@ -737,12 +781,34 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr, - em_dd_partition_system(fplog, count, cr, top_global, inputrec, - ems, top, mdatoms, fr, vsite, constr, - nrnb, wcycle); -+ /* PLUMED */ -+ if(plumedswitch){ -+ (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ /* END PLUMED */ - } - - /* Calc force & energy on new trial position */ - /* do_force always puts the charge groups in the box and shifts again - * We do not unshift, so molecules are always whole in congrad.c - */ -+ /* PLUMED */ -+ int plumedNeedsEnergy=0; -+ matrix plumed_vir; -+ if(plumedswitch){ -+ long int lstep=count; (*plumedcmd)(plumedmain,"setStepLong",&count); -+ (*plumedcmd) (plumedmain,"setPositions",&ems->s.x[0][0]); -+ (*plumedcmd) (plumedmain,"setMasses",&mdatoms->massT[0]); -+ (*plumedcmd) (plumedmain,"setCharges",&mdatoms->chargeA[0]); -+ (*plumedcmd) (plumedmain,"setBox",&ems->s.box[0][0]); -+ (*plumedcmd) (plumedmain,"prepareCalc",NULL); -+ (*plumedcmd) (plumedmain,"setForces",&ems->f[0][0]); -+ (*plumedcmd) (plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ clear_mat(plumed_vir); -+ (*plumedcmd) (plumedmain,"setVirial",&plumed_vir[0][0]); -+ } -+ /* END PLUMED */ - do_force(fplog, cr, inputrec, - count, nrnb, wcycle, top, &top_global->groups, - ems->s.box, ems->s.x, &ems->s.hist, -@@ -751,6 +817,19 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr, - GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | - GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | - (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0)); -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(plumedNeedsEnergy) { -+ msmul(force_vir,2.0,plumed_vir); -+ (*plumedcmd) (plumedmain,"setEnergy",&enerd->term[F_EPOT]); -+ (*plumedcmd) (plumedmain,"performCalc",NULL); -+ msmul(plumed_vir,0.5,force_vir); -+ } else { -+ msmul(plumed_vir,0.5,plumed_vir); -+ m_add(force_vir,plumed_vir,force_vir); -+ } -+ } -+ /* END PLUMED */ - - /* Clear the unused shake virial and pressure */ - clear_mat(shake_vir); -diff --git a/src/gromacs/mdlib/minimize.c.preplumed b/src/gromacs/mdlib/minimize.c.preplumed -new file mode 100644 -index 0000000..69008f5 ---- /dev/null -+++ b/src/gromacs/mdlib/minimize.c.preplumed -@@ -0,0 +1,2906 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include -+#include -+#include "sysstuff.h" -+#include "gromacs/utility/cstringutil.h" -+#include "network.h" -+#include "gromacs/utility/smalloc.h" -+#include "nrnb.h" -+#include "main.h" -+#include "force.h" -+#include "macros.h" -+#include "names.h" -+#include "gmx_fatal.h" -+#include "txtdump.h" -+#include "typedefs.h" -+#include "update.h" -+#include "constr.h" -+#include "vec.h" -+#include "tgroup.h" -+#include "mdebin.h" -+#include "vsite.h" -+#include "force.h" -+#include "mdrun.h" -+#include "md_support.h" -+#include "sim_util.h" -+#include "domdec.h" -+#include "mdatoms.h" -+#include "ns.h" -+#include "mtop_util.h" -+#include "pme.h" -+#include "bondf.h" -+#include "gmx_omp_nthreads.h" -+#include "md_logging.h" -+ -+#include "gromacs/fileio/confio.h" -+#include "gromacs/fileio/trajectory_writing.h" -+#include "gromacs/linearalgebra/mtxio.h" -+#include "gromacs/linearalgebra/sparsematrix.h" -+#include "gromacs/timing/wallcycle.h" -+#include "gromacs/timing/walltime_accounting.h" -+#include "gromacs/imd/imd.h" -+ -+typedef struct { -+ t_state s; -+ rvec *f; -+ real epot; -+ real fnorm; -+ real fmax; -+ int a_fmax; -+} em_state_t; -+ -+static em_state_t *init_em_state() -+{ -+ em_state_t *ems; -+ -+ snew(ems, 1); -+ -+ /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */ -+ snew(ems->s.lambda, efptNR); -+ -+ return ems; -+} -+ -+static void print_em_start(FILE *fplog, -+ t_commrec *cr, -+ gmx_walltime_accounting_t walltime_accounting, -+ gmx_wallcycle_t wcycle, -+ const char *name) -+{ -+ walltime_accounting_start(walltime_accounting); -+ wallcycle_start(wcycle, ewcRUN); -+ print_start(fplog, cr, walltime_accounting, name); -+} -+static void em_time_end(gmx_walltime_accounting_t walltime_accounting, -+ gmx_wallcycle_t wcycle) -+{ -+ wallcycle_stop(wcycle, ewcRUN); -+ -+ walltime_accounting_end(walltime_accounting); -+} -+ -+static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps) -+{ -+ fprintf(out, "\n"); -+ fprintf(out, "%s:\n", minimizer); -+ fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol); -+ fprintf(out, " Number of steps = %12d\n", nsteps); -+} -+ -+static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain) -+{ -+ char buffer[2048]; -+ if (bLastStep) -+ { -+ sprintf(buffer, -+ "\nEnergy minimization reached the maximum number " -+ "of steps before the forces reached the requested " -+ "precision Fmax < %g.\n", ftol); -+ } -+ else -+ { -+ sprintf(buffer, -+ "\nEnergy minimization has stopped, but the forces have " -+ "not converged to the requested precision Fmax < %g (which " -+ "may not be possible for your system). It stopped " -+ "because the algorithm tried to make a new step whose size " -+ "was too small, or there was no change in the energy since " -+ "last step. Either way, we regard the minimization as " -+ "converged to within the available machine precision, " -+ "given your starting configuration and EM parameters.\n%s%s", -+ ftol, -+ sizeof(real) < sizeof(double) ? -+ "\nDouble precision normally gives you higher accuracy, but " -+ "this is often not needed for preparing to run molecular " -+ "dynamics.\n" : -+ "", -+ bConstrain ? -+ "You might need to increase your constraint accuracy, or turn\n" -+ "off constraints altogether (set constraints = none in mdp file)\n" : -+ ""); -+ } -+ fputs(wrap_lines(buffer, 78, 0, FALSE), fp); -+} -+ -+ -+ -+static void print_converged(FILE *fp, const char *alg, real ftol, -+ gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps, -+ real epot, real fmax, int nfmax, real fnorm) -+{ -+ char buf[STEPSTRSIZE]; -+ -+ if (bDone) -+ { -+ fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", -+ alg, ftol, gmx_step_str(count, buf)); -+ } -+ else if (count < nsteps) -+ { -+ fprintf(fp, "\n%s converged to machine precision in %s steps,\n" -+ "but did not reach the requested Fmax < %g.\n", -+ alg, gmx_step_str(count, buf), ftol); -+ } -+ else -+ { -+ fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", -+ alg, ftol, gmx_step_str(count, buf)); -+ } -+ -+#ifdef GMX_DOUBLE -+ fprintf(fp, "Potential Energy = %21.14e\n", epot); -+ fprintf(fp, "Maximum force = %21.14e on atom %d\n", fmax, nfmax+1); -+ fprintf(fp, "Norm of force = %21.14e\n", fnorm); -+#else -+ fprintf(fp, "Potential Energy = %14.7e\n", epot); -+ fprintf(fp, "Maximum force = %14.7e on atom %d\n", fmax, nfmax+1); -+ fprintf(fp, "Norm of force = %14.7e\n", fnorm); -+#endif -+} -+ -+static void get_f_norm_max(t_commrec *cr, -+ t_grpopts *opts, t_mdatoms *mdatoms, rvec *f, -+ real *fnorm, real *fmax, int *a_fmax) -+{ -+ double fnorm2, *sum; -+ real fmax2, fmax2_0, fam; -+ int la_max, a_max, start, end, i, m, gf; -+ -+ /* This routine finds the largest force and returns it. -+ * On parallel machines the global max is taken. -+ */ -+ fnorm2 = 0; -+ fmax2 = 0; -+ la_max = -1; -+ gf = 0; -+ start = 0; -+ end = mdatoms->homenr; -+ if (mdatoms->cFREEZE) -+ { -+ for (i = start; i < end; i++) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ fam = 0; -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ fam += sqr(f[i][m]); -+ } -+ } -+ fnorm2 += fam; -+ if (fam > fmax2) -+ { -+ fmax2 = fam; -+ la_max = i; -+ } -+ } -+ } -+ else -+ { -+ for (i = start; i < end; i++) -+ { -+ fam = norm2(f[i]); -+ fnorm2 += fam; -+ if (fam > fmax2) -+ { -+ fmax2 = fam; -+ la_max = i; -+ } -+ } -+ } -+ -+ if (la_max >= 0 && DOMAINDECOMP(cr)) -+ { -+ a_max = cr->dd->gatindex[la_max]; -+ } -+ else -+ { -+ a_max = la_max; -+ } -+ if (PAR(cr)) -+ { -+ snew(sum, 2*cr->nnodes+1); -+ sum[2*cr->nodeid] = fmax2; -+ sum[2*cr->nodeid+1] = a_max; -+ sum[2*cr->nnodes] = fnorm2; -+ gmx_sumd(2*cr->nnodes+1, sum, cr); -+ fnorm2 = sum[2*cr->nnodes]; -+ /* Determine the global maximum */ -+ for (i = 0; i < cr->nnodes; i++) -+ { -+ if (sum[2*i] > fmax2) -+ { -+ fmax2 = sum[2*i]; -+ a_max = (int)(sum[2*i+1] + 0.5); -+ } -+ } -+ sfree(sum); -+ } -+ -+ if (fnorm) -+ { -+ *fnorm = sqrt(fnorm2); -+ } -+ if (fmax) -+ { -+ *fmax = sqrt(fmax2); -+ } -+ if (a_fmax) -+ { -+ *a_fmax = a_max; -+ } -+} -+ -+static void get_state_f_norm_max(t_commrec *cr, -+ t_grpopts *opts, t_mdatoms *mdatoms, -+ em_state_t *ems) -+{ -+ get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax); -+} -+ -+void init_em(FILE *fplog, const char *title, -+ t_commrec *cr, t_inputrec *ir, -+ t_state *state_global, gmx_mtop_t *top_global, -+ em_state_t *ems, gmx_localtop_t **top, -+ rvec **f, rvec **f_global, -+ t_nrnb *nrnb, rvec mu_tot, -+ t_forcerec *fr, gmx_enerdata_t **enerd, -+ t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int nfile, const t_filenm fnm[], -+ gmx_mdoutf_t *outf, t_mdebin **mdebin, -+ int imdport, unsigned long gmx_unused Flags, -+ gmx_wallcycle_t wcycle) -+{ -+ int i; -+ real dvdl_constr; -+ -+ if (fplog) -+ { -+ fprintf(fplog, "Initiating %s\n", title); -+ } -+ -+ state_global->ngtc = 0; -+ -+ /* Initialize lambda variables */ -+ initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL); -+ -+ init_nrnb(nrnb); -+ -+ /* Interactive molecular dynamics */ -+ init_IMD(ir, cr, top_global, fplog, 1, state_global->x, -+ nfile, fnm, NULL, imdport, Flags); -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ *top = dd_init_local_top(top_global); -+ -+ dd_init_local_state(cr->dd, state_global, &ems->s); -+ -+ *f = NULL; -+ -+ /* Distribute the charge groups over the nodes from the master node */ -+ dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ &ems->s, &ems->f, mdatoms, *top, -+ fr, vsite, NULL, constr, -+ nrnb, NULL, FALSE); -+ dd_store_state(cr->dd, &ems->s); -+ -+ if (ir->nstfout) -+ { -+ snew(*f_global, top_global->natoms); -+ } -+ else -+ { -+ *f_global = NULL; -+ } -+ *graph = NULL; -+ } -+ else -+ { -+ snew(*f, top_global->natoms); -+ -+ /* Just copy the state */ -+ ems->s = *state_global; -+ snew(ems->s.x, ems->s.nalloc); -+ snew(ems->f, ems->s.nalloc); -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(state_global->x[i], ems->s.x[i]); -+ } -+ copy_mat(state_global->box, ems->s.box); -+ -+ *top = gmx_mtop_generate_local_top(top_global, ir); -+ *f_global = *f; -+ -+ forcerec_set_excl_load(fr, *top); -+ -+ setup_bonded_threading(fr, &(*top)->idef); -+ -+ if (ir->ePBC != epbcNONE && !fr->bMolPBC) -+ { -+ *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE); -+ } -+ else -+ { -+ *graph = NULL; -+ } -+ -+ atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms); -+ update_mdatoms(mdatoms, state_global->lambda[efptFEP]); -+ -+ if (vsite) -+ { -+ set_vsite_top(vsite, *top, mdatoms, cr); -+ } -+ } -+ -+ if (constr) -+ { -+ if (ir->eConstrAlg == econtSHAKE && -+ gmx_mtop_ftype_count(top_global, F_CONSTR) > 0) -+ { -+ gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n", -+ econstr_names[econtSHAKE], econstr_names[econtLINCS]); -+ } -+ -+ if (!DOMAINDECOMP(cr)) -+ { -+ set_constraints(constr, *top, ir, mdatoms, cr); -+ } -+ -+ if (!ir->bContinuation) -+ { -+ /* Constrain the starting coordinates */ -+ dvdl_constr = 0; -+ constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef, -+ ir, NULL, cr, -1, 0, 1.0, mdatoms, -+ ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box, -+ ems->s.lambda[efptFEP], &dvdl_constr, -+ NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); -+ } -+ } -+ -+ if (PAR(cr)) -+ { -+ *gstat = global_stat_init(ir); -+ } -+ -+ *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL, wcycle); -+ -+ snew(*enerd, 1); -+ init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, -+ *enerd); -+ -+ if (mdebin != NULL) -+ { -+ /* Init bin for energy stuff */ -+ *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, NULL); -+ } -+ -+ clear_rvec(mu_tot); -+ calc_shifts(ems->s.box, fr->shift_vec); -+} -+ -+static void finish_em(t_commrec *cr, gmx_mdoutf_t outf, -+ gmx_walltime_accounting_t walltime_accounting, -+ gmx_wallcycle_t wcycle) -+{ -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell the PME only node to finish */ -+ gmx_pme_send_finish(cr); -+ } -+ -+ done_mdoutf(outf); -+ -+ em_time_end(walltime_accounting, wcycle); -+} -+ -+static void swap_em_state(em_state_t *ems1, em_state_t *ems2) -+{ -+ em_state_t tmp; -+ -+ tmp = *ems1; -+ *ems1 = *ems2; -+ *ems2 = tmp; -+} -+ -+static void copy_em_coords(em_state_t *ems, t_state *state) -+{ -+ int i; -+ -+ for (i = 0; (i < state->natoms); i++) -+ { -+ copy_rvec(ems->s.x[i], state->x[i]); -+ } -+} -+ -+static void write_em_traj(FILE *fplog, t_commrec *cr, -+ gmx_mdoutf_t outf, -+ gmx_bool bX, gmx_bool bF, const char *confout, -+ gmx_mtop_t *top_global, -+ t_inputrec *ir, gmx_int64_t step, -+ em_state_t *state, -+ t_state *state_global, rvec *f_global) -+{ -+ int mdof_flags; -+ gmx_bool bIMDout = FALSE; -+ -+ -+ /* Shall we do IMD output? */ -+ if (ir->bIMD) -+ { -+ bIMDout = do_per_step(step, IMD_get_step(ir->imd->setup)); -+ } -+ -+ if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr)) -+ { -+ copy_em_coords(state, state_global); -+ f_global = state->f; -+ } -+ -+ mdof_flags = 0; -+ if (bX) -+ { -+ mdof_flags |= MDOF_X; -+ } -+ if (bF) -+ { -+ mdof_flags |= MDOF_F; -+ } -+ -+ /* If we want IMD output, set appropriate MDOF flag */ -+ if (ir->bIMD) -+ { -+ mdof_flags |= MDOF_IMD; -+ } -+ -+ mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, -+ top_global, step, (double)step, -+ &state->s, state_global, state->f, f_global); -+ -+ if (confout != NULL && MASTER(cr)) -+ { -+ if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr)) -+ { -+ /* Make molecules whole only for confout writing */ -+ do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global, -+ state_global->x); -+ } -+ -+ write_sto_conf_mtop(confout, -+ *top_global->name, top_global, -+ state_global->x, NULL, ir->ePBC, state_global->box); -+ } -+} -+ -+static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, -+ gmx_bool bMolPBC, -+ em_state_t *ems1, real a, rvec *f, em_state_t *ems2, -+ gmx_constr_t constr, gmx_localtop_t *top, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_int64_t count) -+ -+{ -+ t_state *s1, *s2; -+ int i; -+ int start, end; -+ rvec *x1, *x2; -+ real dvdl_constr; -+ int nthreads gmx_unused; -+ -+ s1 = &ems1->s; -+ s2 = &ems2->s; -+ -+ if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count) -+ { -+ gmx_incons("state mismatch in do_em_step"); -+ } -+ -+ s2->flags = s1->flags; -+ -+ if (s2->nalloc != s1->nalloc) -+ { -+ s2->nalloc = s1->nalloc; -+ srenew(s2->x, s1->nalloc); -+ srenew(ems2->f, s1->nalloc); -+ if (s2->flags & (1<cg_p, s1->nalloc); -+ } -+ } -+ -+ s2->natoms = s1->natoms; -+ copy_mat(s1->box, s2->box); -+ /* Copy free energy state */ -+ for (i = 0; i < efptNR; i++) -+ { -+ s2->lambda[i] = s1->lambda[i]; -+ } -+ copy_mat(s1->box, s2->box); -+ -+ start = 0; -+ end = md->homenr; -+ -+ x1 = s1->x; -+ x2 = s2->x; -+ -+ nthreads = gmx_omp_nthreads_get(emntUpdate); -+#pragma omp parallel num_threads(nthreads) -+ { -+ int gf, i, m; -+ -+ gf = 0; -+#pragma omp for schedule(static) nowait -+ for (i = start; i < end; i++) -+ { -+ if (md->cFREEZE) -+ { -+ gf = md->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (ir->opts.nFreeze[gf][m]) -+ { -+ x2[i][m] = x1[i][m]; -+ } -+ else -+ { -+ x2[i][m] = x1[i][m] + a*f[i][m]; -+ } -+ } -+ } -+ -+ if (s2->flags & (1<cg_p; -+ x2 = s2->cg_p; -+#pragma omp for schedule(static) nowait -+ for (i = start; i < end; i++) -+ { -+ copy_rvec(x1[i], x2[i]); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ s2->ddp_count = s1->ddp_count; -+ if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) -+ { -+#pragma omp barrier -+ s2->cg_gl_nalloc = s1->cg_gl_nalloc; -+ srenew(s2->cg_gl, s2->cg_gl_nalloc); -+#pragma omp barrier -+ } -+ s2->ncg_gl = s1->ncg_gl; -+#pragma omp for schedule(static) nowait -+ for (i = 0; i < s2->ncg_gl; i++) -+ { -+ s2->cg_gl[i] = s1->cg_gl[i]; -+ } -+ s2->ddp_count_cg_gl = s1->ddp_count_cg_gl; -+ } -+ } -+ -+ if (constr) -+ { -+ wallcycle_start(wcycle, ewcCONSTR); -+ dvdl_constr = 0; -+ constrain(NULL, TRUE, TRUE, constr, &top->idef, -+ ir, NULL, cr, count, 0, 1.0, md, -+ s1->x, s2->x, NULL, bMolPBC, s2->box, -+ s2->lambda[efptBONDED], &dvdl_constr, -+ NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); -+ wallcycle_stop(wcycle, ewcCONSTR); -+ } -+} -+ -+static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr, -+ gmx_mtop_t *top_global, t_inputrec *ir, -+ em_state_t *ems, gmx_localtop_t *top, -+ t_mdatoms *mdatoms, t_forcerec *fr, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle) -+{ -+ /* Repartition the domain decomposition */ -+ wallcycle_start(wcycle, ewcDOMDEC); -+ dd_partition_system(fplog, step, cr, FALSE, 1, -+ NULL, top_global, ir, -+ &ems->s, &ems->f, -+ mdatoms, top, fr, vsite, NULL, constr, -+ nrnb, wcycle, FALSE); -+ dd_store_state(cr->dd, &ems->s); -+ wallcycle_stop(wcycle, ewcDOMDEC); -+} -+ -+static void evaluate_energy(FILE *fplog, t_commrec *cr, -+ gmx_mtop_t *top_global, -+ em_state_t *ems, gmx_localtop_t *top, -+ t_inputrec *inputrec, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_global_stat_t gstat, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ t_fcdata *fcd, -+ t_graph *graph, t_mdatoms *mdatoms, -+ t_forcerec *fr, rvec mu_tot, -+ gmx_enerdata_t *enerd, tensor vir, tensor pres, -+ gmx_int64_t count, gmx_bool bFirst) -+{ -+ real t; -+ gmx_bool bNS; -+ int nabnsb; -+ tensor force_vir, shake_vir, ekin; -+ real dvdl_constr, prescorr, enercorr, dvdlcorr; -+ real terminate = 0; -+ -+ /* Set the time to the initial time, the time does not change during EM */ -+ t = inputrec->init_t; -+ -+ if (bFirst || -+ (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) -+ { -+ /* This is the first state or an old state used before the last ns */ -+ bNS = TRUE; -+ } -+ else -+ { -+ bNS = FALSE; -+ if (inputrec->nstlist > 0) -+ { -+ bNS = TRUE; -+ } -+ else if (inputrec->nstlist == -1) -+ { -+ nabnsb = natoms_beyond_ns_buffer(inputrec, fr, &top->cgs, NULL, ems->s.x); -+ if (PAR(cr)) -+ { -+ gmx_sumi(1, &nabnsb, cr); -+ } -+ bNS = (nabnsb > 0); -+ } -+ } -+ -+ if (vsite) -+ { -+ construct_vsites(vsite, ems->s.x, 1, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, ems->s.box); -+ } -+ -+ if (DOMAINDECOMP(cr) && bNS) -+ { -+ /* Repartition the domain decomposition */ -+ em_dd_partition_system(fplog, count, cr, top_global, inputrec, -+ ems, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ -+ /* Calc force & energy on new trial position */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole in congrad.c -+ */ -+ do_force(fplog, cr, inputrec, -+ count, nrnb, wcycle, top, &top_global->groups, -+ ems->s.box, ems->s.x, &ems->s.hist, -+ ems->f, force_vir, mdatoms, enerd, fcd, -+ ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE, -+ GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | -+ GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | -+ (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0)); -+ -+ /* Clear the unused shake virial and pressure */ -+ clear_mat(shake_vir); -+ clear_mat(pres); -+ -+ /* Communicate stuff when parallel */ -+ if (PAR(cr) && inputrec->eI != eiNM) -+ { -+ wallcycle_start(wcycle, ewcMoveE); -+ -+ global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot, -+ inputrec, NULL, NULL, NULL, 1, &terminate, -+ top_global, &ems->s, FALSE, -+ CGLO_ENERGY | -+ CGLO_PRESSURE | -+ CGLO_CONSTRAINT | -+ CGLO_FIRSTITERATE); -+ -+ wallcycle_stop(wcycle, ewcMoveE); -+ } -+ -+ /* Calculate long range corrections to pressure and energy */ -+ calc_dispcorr(fplog, inputrec, fr, count, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW], -+ pres, force_vir, &prescorr, &enercorr, &dvdlcorr); -+ enerd->term[F_DISPCORR] = enercorr; -+ enerd->term[F_EPOT] += enercorr; -+ enerd->term[F_PRES] += prescorr; -+ enerd->term[F_DVDL] += dvdlcorr; -+ -+ ems->epot = enerd->term[F_EPOT]; -+ -+ if (constr) -+ { -+ /* Project out the constraint components of the force */ -+ wallcycle_start(wcycle, ewcCONSTR); -+ dvdl_constr = 0; -+ constrain(NULL, FALSE, FALSE, constr, &top->idef, -+ inputrec, NULL, cr, count, 0, 1.0, mdatoms, -+ ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box, -+ ems->s.lambda[efptBONDED], &dvdl_constr, -+ NULL, &shake_vir, nrnb, econqForceDispl, FALSE, 0, 0); -+ if (fr->bSepDVDL && fplog) -+ { -+ gmx_print_sepdvdl(fplog, "Constraints", t, dvdl_constr); -+ } -+ enerd->term[F_DVDL_CONSTR] += dvdl_constr; -+ m_add(force_vir, shake_vir, vir); -+ wallcycle_stop(wcycle, ewcCONSTR); -+ } -+ else -+ { -+ copy_mat(force_vir, vir); -+ } -+ -+ clear_mat(ekin); -+ enerd->term[F_PRES] = -+ calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres); -+ -+ sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals); -+ -+ if (EI_ENERGY_MINIMIZATION(inputrec->eI)) -+ { -+ get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems); -+ } -+} -+ -+static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, -+ gmx_mtop_t *mtop, -+ em_state_t *s_min, em_state_t *s_b) -+{ -+ rvec *fm, *fb, *fmg; -+ t_block *cgs_gl; -+ int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m; -+ double partsum; -+ unsigned char *grpnrFREEZE; -+ -+ if (debug) -+ { -+ fprintf(debug, "Doing reorder_partsum\n"); -+ } -+ -+ fm = s_min->f; -+ fb = s_b->f; -+ -+ cgs_gl = dd_charge_groups_global(cr->dd); -+ index = cgs_gl->index; -+ -+ /* Collect fm in a global vector fmg. -+ * This conflicts with the spirit of domain decomposition, -+ * but to fully optimize this a much more complicated algorithm is required. -+ */ -+ snew(fmg, mtop->natoms); -+ -+ ncg = s_min->s.ncg_gl; -+ cg_gl = s_min->s.cg_gl; -+ i = 0; -+ for (c = 0; c < ncg; c++) -+ { -+ cg = cg_gl[c]; -+ a0 = index[cg]; -+ a1 = index[cg+1]; -+ for (a = a0; a < a1; a++) -+ { -+ copy_rvec(fm[i], fmg[a]); -+ i++; -+ } -+ } -+ gmx_sum(mtop->natoms*3, fmg[0], cr); -+ -+ /* Now we will determine the part of the sum for the cgs in state s_b */ -+ ncg = s_b->s.ncg_gl; -+ cg_gl = s_b->s.cg_gl; -+ partsum = 0; -+ i = 0; -+ gf = 0; -+ grpnrFREEZE = mtop->groups.grpnr[egcFREEZE]; -+ for (c = 0; c < ncg; c++) -+ { -+ cg = cg_gl[c]; -+ a0 = index[cg]; -+ a1 = index[cg+1]; -+ for (a = a0; a < a1; a++) -+ { -+ if (mdatoms->cFREEZE && grpnrFREEZE) -+ { -+ gf = grpnrFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ partsum += (fb[i][m] - fmg[a][m])*fb[i][m]; -+ } -+ } -+ i++; -+ } -+ } -+ -+ sfree(fmg); -+ -+ return partsum; -+} -+ -+static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, -+ gmx_mtop_t *mtop, -+ em_state_t *s_min, em_state_t *s_b) -+{ -+ rvec *fm, *fb; -+ double sum; -+ int gf, i, m; -+ -+ /* This is just the classical Polak-Ribiere calculation of beta; -+ * it looks a bit complicated since we take freeze groups into account, -+ * and might have to sum it in parallel runs. -+ */ -+ -+ if (!DOMAINDECOMP(cr) || -+ (s_min->s.ddp_count == cr->dd->ddp_count && -+ s_b->s.ddp_count == cr->dd->ddp_count)) -+ { -+ fm = s_min->f; -+ fb = s_b->f; -+ sum = 0; -+ gf = 0; -+ /* This part of code can be incorrect with DD, -+ * since the atom ordering in s_b and s_min might differ. -+ */ -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ sum += (fb[i][m] - fm[i][m])*fb[i][m]; -+ } -+ } -+ } -+ } -+ else -+ { -+ /* We need to reorder cgs while summing */ -+ sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b); -+ } -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &sum, cr); -+ } -+ -+ return sum/sqr(s_min->fnorm); -+} -+ -+double do_cg(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ const char *CG = "Polak-Ribiere Conjugate Gradients"; -+ -+ em_state_t *s_min, *s_a, *s_b, *s_c; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ rvec *f_global, *p, *sf, *sfm; -+ double gpa, gpb, gpc, tmp, sum[2], minstep; -+ real fnormn; -+ real stepsize; -+ real a, b, c, beta = 0.0; -+ real epot_repl = 0; -+ real pnorm; -+ t_mdebin *mdebin; -+ gmx_bool converged, foundlower; -+ rvec mu_tot; -+ gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; -+ tensor vir, pres; -+ int number_steps, neval = 0, nstcg = inputrec->nstcgsteep; -+ gmx_mdoutf_t outf; -+ int i, m, gf, step, nminstep; -+ real terminate = 0; -+ -+ step = 0; -+ -+ s_min = init_em_state(); -+ s_a = init_em_state(); -+ s_b = init_em_state(); -+ s_c = init_em_state(); -+ -+ /* Init em and store the local state in s_min */ -+ init_em(fplog, CG, cr, inputrec, -+ state_global, top_global, s_min, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, CG); -+ -+ /* Max number of steps */ -+ number_steps = inputrec->nsteps; -+ -+ if (MASTER(cr)) -+ { -+ sp_header(stderr, CG, inputrec->em_tol, number_steps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, CG, inputrec->em_tol, number_steps); -+ } -+ -+ /* Call the force routine and some auxiliary (neighboursearching etc.) */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole in congrad.c -+ */ -+ evaluate_energy(fplog, cr, -+ top_global, s_min, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ where(); -+ -+ if (MASTER(cr)) -+ { -+ /* Copy stuff to the energy bin for easy printing etc. */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ where(); -+ -+ /* Estimate/guess the initial stepsize */ -+ stepsize = inputrec->em_stepsize/s_min->fnorm; -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, " F-max = %12.5e on atom %d\n", -+ s_min->fmax, s_min->a_fmax+1); -+ fprintf(stderr, " F-Norm = %12.5e\n", -+ s_min->fnorm/sqrt(state_global->natoms)); -+ fprintf(stderr, "\n"); -+ /* and copy to the log file too... */ -+ fprintf(fplog, " F-max = %12.5e on atom %d\n", -+ s_min->fmax, s_min->a_fmax+1); -+ fprintf(fplog, " F-Norm = %12.5e\n", -+ s_min->fnorm/sqrt(state_global->natoms)); -+ fprintf(fplog, "\n"); -+ } -+ /* Start the loop over CG steps. -+ * Each successful step is counted, and we continue until -+ * we either converge or reach the max number of steps. -+ */ -+ converged = FALSE; -+ for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++) -+ { -+ -+ /* start taking steps in a new direction -+ * First time we enter the routine, beta=0, and the direction is -+ * simply the negative gradient. -+ */ -+ -+ /* Calculate the new direction in p, and the gradient in this direction, gpa */ -+ p = s_min->s.cg_p; -+ sf = s_min->f; -+ gpa = 0; -+ gf = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!inputrec->opts.nFreeze[gf][m]) -+ { -+ p[i][m] = sf[i][m] + beta*p[i][m]; -+ gpa -= p[i][m]*sf[i][m]; -+ /* f is negative gradient, thus the sign */ -+ } -+ else -+ { -+ p[i][m] = 0; -+ } -+ } -+ } -+ -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpa, cr); -+ } -+ -+ /* Calculate the norm of the search vector */ -+ get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL); -+ -+ /* Just in case stepsize reaches zero due to numerical precision... */ -+ if (stepsize <= 0) -+ { -+ stepsize = inputrec->em_stepsize/pnorm; -+ } -+ -+ /* -+ * Double check the value of the derivative in the search direction. -+ * If it is positive it must be due to the old information in the -+ * CG formula, so just remove that and start over with beta=0. -+ * This corresponds to a steepest descent step. -+ */ -+ if (gpa > 0) -+ { -+ beta = 0; -+ step--; /* Don't count this step since we are restarting */ -+ continue; /* Go back to the beginning of the big for-loop */ -+ } -+ -+ /* Calculate minimum allowed stepsize, before the average (norm) -+ * relative change in coordinate is smaller than precision -+ */ -+ minstep = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ tmp = fabs(s_min->s.x[i][m]); -+ if (tmp < 1.0) -+ { -+ tmp = 1.0; -+ } -+ tmp = p[i][m]/tmp; -+ minstep += tmp*tmp; -+ } -+ } -+ /* Add up from all CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &minstep, cr); -+ } -+ -+ minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms)); -+ -+ if (stepsize < minstep) -+ { -+ converged = TRUE; -+ break; -+ } -+ -+ /* Write coordinates if necessary */ -+ do_x = do_per_step(step, inputrec->nstxout); -+ do_f = do_per_step(step, inputrec->nstfout); -+ -+ write_em_traj(fplog, cr, outf, do_x, do_f, NULL, -+ top_global, inputrec, step, -+ s_min, state_global, f_global); -+ -+ /* Take a step downhill. -+ * In theory, we should minimize the function along this direction. -+ * That is quite possible, but it turns out to take 5-10 function evaluations -+ * for each line. However, we dont really need to find the exact minimum - -+ * it is much better to start a new CG step in a modified direction as soon -+ * as we are close to it. This will save a lot of energy evaluations. -+ * -+ * In practice, we just try to take a single step. -+ * If it worked (i.e. lowered the energy), we increase the stepsize but -+ * the continue straight to the next CG step without trying to find any minimum. -+ * If it didn't work (higher energy), there must be a minimum somewhere between -+ * the old position and the new one. -+ * -+ * Due to the finite numerical accuracy, it turns out that it is a good idea -+ * to even accept a SMALL increase in energy, if the derivative is still downhill. -+ * This leads to lower final energies in the tests I've done. / Erik -+ */ -+ s_a->epot = s_min->epot; -+ a = 0.0; -+ c = a + stepsize; /* reference position along line is zero */ -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) -+ { -+ em_dd_partition_system(fplog, step, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ -+ /* Take a trial step (new coords in s_c) */ -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c, -+ constr, top, nrnb, wcycle, -1); -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ evaluate_energy(fplog, cr, -+ top_global, s_c, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, FALSE); -+ -+ /* Calc derivative along line */ -+ p = s_c->s.cg_p; -+ sf = s_c->f; -+ gpc = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ -+ } -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpc, cr); -+ } -+ -+ /* This is the max amount of increase in energy we tolerate */ -+ tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot); -+ -+ /* Accept the step if the energy is lower, or if it is not significantly higher -+ * and the line derivative is still negative. -+ */ -+ if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) -+ { -+ foundlower = TRUE; -+ /* Great, we found a better energy. Increase step for next iteration -+ * if we are still going down, decrease it otherwise -+ */ -+ if (gpc < 0) -+ { -+ stepsize *= 1.618034; /* The golden section */ -+ } -+ else -+ { -+ stepsize *= 0.618034; /* 1/golden section */ -+ } -+ } -+ else -+ { -+ /* New energy is the same or higher. We will have to do some work -+ * to find a smaller value in the interval. Take smaller step next time! -+ */ -+ foundlower = FALSE; -+ stepsize *= 0.618034; -+ } -+ -+ -+ -+ -+ /* OK, if we didn't find a lower value we will have to locate one now - there must -+ * be one in the interval [a=0,c]. -+ * The same thing is valid here, though: Don't spend dozens of iterations to find -+ * the line minimum. We try to interpolate based on the derivative at the endpoints, -+ * and only continue until we find a lower value. In most cases this means 1-2 iterations. -+ * -+ * I also have a safeguard for potentially really patological functions so we never -+ * take more than 20 steps before we give up ... -+ * -+ * If we already found a lower value we just skip this step and continue to the update. -+ */ -+ if (!foundlower) -+ { -+ nminstep = 0; -+ -+ do -+ { -+ /* Select a new trial point. -+ * If the derivatives at points a & c have different sign we interpolate to zero, -+ * otherwise just do a bisection. -+ */ -+ if (gpa < 0 && gpc > 0) -+ { -+ b = a + gpa*(a-c)/(gpc-gpa); -+ } -+ else -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* safeguard if interpolation close to machine accuracy causes errors: -+ * never go outside the interval -+ */ -+ if (b <= a || b >= c) -+ { -+ b = 0.5*(a+c); -+ } -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) -+ { -+ /* Reload the old state */ -+ em_dd_partition_system(fplog, -1, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ -+ /* Take a trial step to this new point - new coords in s_b */ -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b, -+ constr, top, nrnb, wcycle, -1); -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ evaluate_energy(fplog, cr, -+ top_global, s_b, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, FALSE); -+ -+ /* p does not change within a step, but since the domain decomposition -+ * might change, we have to use cg_p of s_b here. -+ */ -+ p = s_b->s.cg_p; -+ sf = s_b->f; -+ gpb = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ -+ } -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpb, cr); -+ } -+ -+ if (debug) -+ { -+ fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", -+ s_a->epot, s_b->epot, s_c->epot, gpb); -+ } -+ -+ epot_repl = s_b->epot; -+ -+ /* Keep one of the intervals based on the value of the derivative at the new point */ -+ if (gpb > 0) -+ { -+ /* Replace c endpoint with b */ -+ swap_em_state(s_b, s_c); -+ c = b; -+ gpc = gpb; -+ } -+ else -+ { -+ /* Replace a endpoint with b */ -+ swap_em_state(s_b, s_a); -+ a = b; -+ gpa = gpb; -+ } -+ -+ /* -+ * Stop search as soon as we find a value smaller than the endpoints. -+ * Never run more than 20 steps, no matter what. -+ */ -+ nminstep++; -+ } -+ while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && -+ (nminstep < 20)); -+ -+ if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS || -+ nminstep >= 20) -+ { -+ /* OK. We couldn't find a significantly lower energy. -+ * If beta==0 this was steepest descent, and then we give up. -+ * If not, set beta=0 and restart with steepest descent before quitting. -+ */ -+ if (beta == 0.0) -+ { -+ /* Converged */ -+ converged = TRUE; -+ break; -+ } -+ else -+ { -+ /* Reset memory before giving up */ -+ beta = 0.0; -+ continue; -+ } -+ } -+ -+ /* Select min energy state of A & C, put the best in B. -+ */ -+ if (s_c->epot < s_a->epot) -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", -+ s_c->epot, s_a->epot); -+ } -+ swap_em_state(s_b, s_c); -+ gpb = gpc; -+ b = c; -+ } -+ else -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", -+ s_a->epot, s_c->epot); -+ } -+ swap_em_state(s_b, s_a); -+ gpb = gpa; -+ b = a; -+ } -+ -+ } -+ else -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", -+ s_c->epot); -+ } -+ swap_em_state(s_b, s_c); -+ gpb = gpc; -+ b = c; -+ } -+ -+ /* new search direction */ -+ /* beta = 0 means forget all memory and restart with steepest descents. */ -+ if (nstcg && ((step % nstcg) == 0)) -+ { -+ beta = 0.0; -+ } -+ else -+ { -+ /* s_min->fnorm cannot be zero, because then we would have converged -+ * and broken out. -+ */ -+ -+ /* Polak-Ribiere update. -+ * Change to fnorm2/fnorm2_old for Fletcher-Reeves -+ */ -+ beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b); -+ } -+ /* Limit beta to prevent oscillations */ -+ if (fabs(beta) > 5.0) -+ { -+ beta = 0.0; -+ } -+ -+ -+ /* update positions */ -+ swap_em_state(s_min, s_b); -+ gpa = gpb; -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", -+ step, s_min->epot, s_min->fnorm/sqrt(state_global->natoms), -+ s_min->fmax, s_min->a_fmax+1); -+ } -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ do_log = do_per_step(step, inputrec->nstlog); -+ do_ene = do_per_step(step, inputrec->nstenergy); -+ -+ /* Prepare IMD energy record, if bIMD is TRUE. */ -+ IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE); -+ -+ if (do_log) -+ { -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ } -+ print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, -+ do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Send energies and positions to the IMD client if bIMD is TRUE. */ -+ if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) -+ { -+ IMD_send_positions(inputrec->imd); -+ } -+ -+ /* Stop when the maximum force lies below tolerance. -+ * If we have reached machine precision, converged is already set to true. -+ */ -+ converged = converged || (s_min->fmax < inputrec->em_tol); -+ -+ } /* End of the loop */ -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(inputrec->bIMD, inputrec->imd); -+ -+ if (converged) -+ { -+ step--; /* we never took that last step in this case */ -+ -+ } -+ if (s_min->fmax > inputrec->em_tol) -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); -+ warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); -+ } -+ converged = FALSE; -+ } -+ -+ if (MASTER(cr)) -+ { -+ /* If we printed energy and/or logfile last step (which was the last step) -+ * we don't have to do it again, but otherwise print the final values. -+ */ -+ if (!do_log) -+ { -+ /* Write final value to log since we didn't do anything the last step */ -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ } -+ if (!do_ene || !do_log) -+ { -+ /* Write final energy file entries */ -+ print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, -+ !do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ } -+ -+ /* Print some stuff... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ -+ /* IMPORTANT! -+ * For accurate normal mode calculation it is imperative that we -+ * store the last conformation into the full precision binary trajectory. -+ * -+ * However, we should only do it if we did NOT already write this step -+ * above (which we did if do_x or do_f was true). -+ */ -+ do_x = !do_per_step(step, inputrec->nstxout); -+ do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout)); -+ -+ write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, step, -+ s_min, state_global, f_global); -+ -+ fnormn = s_min->fnorm/sqrt(state_global->natoms); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ -+ fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ walltime_accounting_set_nsteps_done(walltime_accounting, step); -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_lbfgs(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ static const char *LBFGS = "Low-Memory BFGS Minimizer"; -+ em_state_t ems; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ rvec *f_global; -+ int ncorr, nmaxcorr, point, cp, neval, nminstep; -+ double stepsize, gpa, gpb, gpc, tmp, minstep; -+ real *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg; -+ real *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp; -+ real a, b, c, maxdelta, delta; -+ real diag, Epot0, Epot, EpotA, EpotB, EpotC; -+ real dgdx, dgdg, sq, yr, beta; -+ t_mdebin *mdebin; -+ gmx_bool converged, first; -+ rvec mu_tot; -+ real fnorm, fmax; -+ gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; -+ tensor vir, pres; -+ int start, end, number_steps; -+ gmx_mdoutf_t outf; -+ int i, k, m, n, nfmax, gf, step; -+ int mdof_flags; -+ /* not used */ -+ real terminate; -+ -+ if (PAR(cr)) -+ { -+ gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n"); -+ } -+ -+ if (NULL != constr) -+ { -+ gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent)."); -+ } -+ -+ n = 3*state->natoms; -+ nmaxcorr = inputrec->nbfgscorr; -+ -+ /* Allocate memory */ -+ /* Use pointers to real so we dont have to loop over both atoms and -+ * dimensions all the time... -+ * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real -+ * that point to the same memory. -+ */ -+ snew(xa, n); -+ snew(xb, n); -+ snew(xc, n); -+ snew(fa, n); -+ snew(fb, n); -+ snew(fc, n); -+ snew(frozen, n); -+ -+ snew(p, n); -+ snew(lastx, n); -+ snew(lastf, n); -+ snew(rho, nmaxcorr); -+ snew(alpha, nmaxcorr); -+ -+ snew(dx, nmaxcorr); -+ for (i = 0; i < nmaxcorr; i++) -+ { -+ snew(dx[i], n); -+ } -+ -+ snew(dg, nmaxcorr); -+ for (i = 0; i < nmaxcorr; i++) -+ { -+ snew(dg[i], n); -+ } -+ -+ step = 0; -+ neval = 0; -+ -+ /* Init em */ -+ init_em(fplog, LBFGS, cr, inputrec, -+ state, top_global, &ems, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); -+ /* Do_lbfgs is not completely updated like do_steep and do_cg, -+ * so we free some memory again. -+ */ -+ sfree(ems.s.x); -+ sfree(ems.f); -+ -+ xx = (real *)state->x; -+ ff = (real *)f; -+ -+ start = 0; -+ end = mdatoms->homenr; -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); -+ -+ do_log = do_ene = do_x = do_f = TRUE; -+ -+ /* Max number of steps */ -+ number_steps = inputrec->nsteps; -+ -+ /* Create a 3*natoms index to tell whether each degree of freedom is frozen */ -+ gf = 0; -+ for (i = start; i < end; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ frozen[3*i+m] = inputrec->opts.nFreeze[gf][m]; -+ } -+ } -+ if (MASTER(cr)) -+ { -+ sp_header(stderr, LBFGS, inputrec->em_tol, number_steps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, LBFGS, inputrec->em_tol, number_steps); -+ } -+ -+ if (vsite) -+ { -+ construct_vsites(vsite, state->x, 1, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ } -+ -+ /* Call the force routine and some auxiliary (neighboursearching etc.) */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole -+ */ -+ neval++; -+ ems.s.x = state->x; -+ ems.f = f; -+ evaluate_energy(fplog, cr, -+ top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ where(); -+ -+ if (MASTER(cr)) -+ { -+ /* Copy stuff to the energy bin for easy printing etc. */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ where(); -+ -+ /* This is the starting energy */ -+ Epot = enerd->term[F_EPOT]; -+ -+ fnorm = ems.fnorm; -+ fmax = ems.fmax; -+ nfmax = ems.a_fmax; -+ -+ /* Set the initial step. -+ * since it will be multiplied by the non-normalized search direction -+ * vector (force vector the first time), we scale it by the -+ * norm of the force. -+ */ -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr); -+ fprintf(stderr, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); -+ fprintf(stderr, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms)); -+ fprintf(stderr, "\n"); -+ /* and copy to the log file too... */ -+ fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr); -+ fprintf(fplog, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); -+ fprintf(fplog, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms)); -+ fprintf(fplog, "\n"); -+ } -+ -+ point = 0; -+ for (i = 0; i < n; i++) -+ { -+ if (!frozen[i]) -+ { -+ dx[point][i] = ff[i]; /* Initial search direction */ -+ } -+ else -+ { -+ dx[point][i] = 0; -+ } -+ } -+ -+ stepsize = 1.0/fnorm; -+ converged = FALSE; -+ -+ /* Start the loop over BFGS steps. -+ * Each successful step is counted, and we continue until -+ * we either converge or reach the max number of steps. -+ */ -+ -+ ncorr = 0; -+ -+ /* Set the gradient from the force */ -+ converged = FALSE; -+ for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++) -+ { -+ -+ /* Write coordinates if necessary */ -+ do_x = do_per_step(step, inputrec->nstxout); -+ do_f = do_per_step(step, inputrec->nstfout); -+ -+ mdof_flags = 0; -+ if (do_x) -+ { -+ mdof_flags |= MDOF_X; -+ } -+ -+ if (do_f) -+ { -+ mdof_flags |= MDOF_F; -+ } -+ -+ if (inputrec->bIMD) -+ { -+ mdof_flags |= MDOF_IMD; -+ } -+ -+ mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, -+ top_global, step, (real)step, state, state, f, f); -+ -+ /* Do the linesearching in the direction dx[point][0..(n-1)] */ -+ -+ /* pointer to current direction - point=0 first time here */ -+ s = dx[point]; -+ -+ /* calculate line gradient */ -+ for (gpa = 0, i = 0; i < n; i++) -+ { -+ gpa -= s[i]*ff[i]; -+ } -+ -+ /* Calculate minimum allowed stepsize, before the average (norm) -+ * relative change in coordinate is smaller than precision -+ */ -+ for (minstep = 0, i = 0; i < n; i++) -+ { -+ tmp = fabs(xx[i]); -+ if (tmp < 1.0) -+ { -+ tmp = 1.0; -+ } -+ tmp = s[i]/tmp; -+ minstep += tmp*tmp; -+ } -+ minstep = GMX_REAL_EPS/sqrt(minstep/n); -+ -+ if (stepsize < minstep) -+ { -+ converged = TRUE; -+ break; -+ } -+ -+ /* Store old forces and coordinates */ -+ for (i = 0; i < n; i++) -+ { -+ lastx[i] = xx[i]; -+ lastf[i] = ff[i]; -+ } -+ Epot0 = Epot; -+ -+ first = TRUE; -+ -+ for (i = 0; i < n; i++) -+ { -+ xa[i] = xx[i]; -+ } -+ -+ /* Take a step downhill. -+ * In theory, we should minimize the function along this direction. -+ * That is quite possible, but it turns out to take 5-10 function evaluations -+ * for each line. However, we dont really need to find the exact minimum - -+ * it is much better to start a new BFGS step in a modified direction as soon -+ * as we are close to it. This will save a lot of energy evaluations. -+ * -+ * In practice, we just try to take a single step. -+ * If it worked (i.e. lowered the energy), we increase the stepsize but -+ * the continue straight to the next BFGS step without trying to find any minimum. -+ * If it didn't work (higher energy), there must be a minimum somewhere between -+ * the old position and the new one. -+ * -+ * Due to the finite numerical accuracy, it turns out that it is a good idea -+ * to even accept a SMALL increase in energy, if the derivative is still downhill. -+ * This leads to lower final energies in the tests I've done. / Erik -+ */ -+ foundlower = FALSE; -+ EpotA = Epot0; -+ a = 0.0; -+ c = a + stepsize; /* reference position along line is zero */ -+ -+ /* Check stepsize first. We do not allow displacements -+ * larger than emstep. -+ */ -+ do -+ { -+ c = a + stepsize; -+ maxdelta = 0; -+ for (i = 0; i < n; i++) -+ { -+ delta = c*s[i]; -+ if (delta > maxdelta) -+ { -+ maxdelta = delta; -+ } -+ } -+ if (maxdelta > inputrec->em_stepsize) -+ { -+ stepsize *= 0.1; -+ } -+ } -+ while (maxdelta > inputrec->em_stepsize); -+ -+ /* Take a trial step */ -+ for (i = 0; i < n; i++) -+ { -+ xc[i] = lastx[i] + c*s[i]; -+ } -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ ems.s.x = (rvec *)xc; -+ ems.f = (rvec *)fc; -+ evaluate_energy(fplog, cr, -+ top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, step, FALSE); -+ EpotC = ems.epot; -+ -+ /* Calc derivative along line */ -+ for (gpc = 0, i = 0; i < n; i++) -+ { -+ gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */ -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpc, cr); -+ } -+ -+ /* This is the max amount of increase in energy we tolerate */ -+ tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA); -+ -+ /* Accept the step if the energy is lower, or if it is not significantly higher -+ * and the line derivative is still negative. -+ */ -+ if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp))) -+ { -+ foundlower = TRUE; -+ /* Great, we found a better energy. Increase step for next iteration -+ * if we are still going down, decrease it otherwise -+ */ -+ if (gpc < 0) -+ { -+ stepsize *= 1.618034; /* The golden section */ -+ } -+ else -+ { -+ stepsize *= 0.618034; /* 1/golden section */ -+ } -+ } -+ else -+ { -+ /* New energy is the same or higher. We will have to do some work -+ * to find a smaller value in the interval. Take smaller step next time! -+ */ -+ foundlower = FALSE; -+ stepsize *= 0.618034; -+ } -+ -+ /* OK, if we didn't find a lower value we will have to locate one now - there must -+ * be one in the interval [a=0,c]. -+ * The same thing is valid here, though: Don't spend dozens of iterations to find -+ * the line minimum. We try to interpolate based on the derivative at the endpoints, -+ * and only continue until we find a lower value. In most cases this means 1-2 iterations. -+ * -+ * I also have a safeguard for potentially really patological functions so we never -+ * take more than 20 steps before we give up ... -+ * -+ * If we already found a lower value we just skip this step and continue to the update. -+ */ -+ -+ if (!foundlower) -+ { -+ -+ nminstep = 0; -+ do -+ { -+ /* Select a new trial point. -+ * If the derivatives at points a & c have different sign we interpolate to zero, -+ * otherwise just do a bisection. -+ */ -+ -+ if (gpa < 0 && gpc > 0) -+ { -+ b = a + gpa*(a-c)/(gpc-gpa); -+ } -+ else -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* safeguard if interpolation close to machine accuracy causes errors: -+ * never go outside the interval -+ */ -+ if (b <= a || b >= c) -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* Take a trial step */ -+ for (i = 0; i < n; i++) -+ { -+ xb[i] = lastx[i] + b*s[i]; -+ } -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ ems.s.x = (rvec *)xb; -+ ems.f = (rvec *)fb; -+ evaluate_energy(fplog, cr, -+ top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, step, FALSE); -+ EpotB = ems.epot; -+ -+ fnorm = ems.fnorm; -+ -+ for (gpb = 0, i = 0; i < n; i++) -+ { -+ gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */ -+ -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpb, cr); -+ } -+ -+ /* Keep one of the intervals based on the value of the derivative at the new point */ -+ if (gpb > 0) -+ { -+ /* Replace c endpoint with b */ -+ EpotC = EpotB; -+ c = b; -+ gpc = gpb; -+ /* swap coord pointers b/c */ -+ xtmp = xb; -+ ftmp = fb; -+ xb = xc; -+ fb = fc; -+ xc = xtmp; -+ fc = ftmp; -+ } -+ else -+ { -+ /* Replace a endpoint with b */ -+ EpotA = EpotB; -+ a = b; -+ gpa = gpb; -+ /* swap coord pointers a/b */ -+ xtmp = xb; -+ ftmp = fb; -+ xb = xa; -+ fb = fa; -+ xa = xtmp; -+ fa = ftmp; -+ } -+ -+ /* -+ * Stop search as soon as we find a value smaller than the endpoints, -+ * or if the tolerance is below machine precision. -+ * Never run more than 20 steps, no matter what. -+ */ -+ nminstep++; -+ } -+ while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20)); -+ -+ if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20) -+ { -+ /* OK. We couldn't find a significantly lower energy. -+ * If ncorr==0 this was steepest descent, and then we give up. -+ * If not, reset memory to restart as steepest descent before quitting. -+ */ -+ if (ncorr == 0) -+ { -+ /* Converged */ -+ converged = TRUE; -+ break; -+ } -+ else -+ { -+ /* Reset memory */ -+ ncorr = 0; -+ /* Search in gradient direction */ -+ for (i = 0; i < n; i++) -+ { -+ dx[point][i] = ff[i]; -+ } -+ /* Reset stepsize */ -+ stepsize = 1.0/fnorm; -+ continue; -+ } -+ } -+ -+ /* Select min energy state of A & C, put the best in xx/ff/Epot -+ */ -+ if (EpotC < EpotA) -+ { -+ Epot = EpotC; -+ /* Use state C */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xc[i]; -+ ff[i] = fc[i]; -+ } -+ stepsize = c; -+ } -+ else -+ { -+ Epot = EpotA; -+ /* Use state A */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xa[i]; -+ ff[i] = fa[i]; -+ } -+ stepsize = a; -+ } -+ -+ } -+ else -+ { -+ /* found lower */ -+ Epot = EpotC; -+ /* Use state C */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xc[i]; -+ ff[i] = fc[i]; -+ } -+ stepsize = c; -+ } -+ -+ /* Update the memory information, and calculate a new -+ * approximation of the inverse hessian -+ */ -+ -+ /* Have new data in Epot, xx, ff */ -+ if (ncorr < nmaxcorr) -+ { -+ ncorr++; -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ dg[point][i] = lastf[i]-ff[i]; -+ dx[point][i] *= stepsize; -+ } -+ -+ dgdg = 0; -+ dgdx = 0; -+ for (i = 0; i < n; i++) -+ { -+ dgdg += dg[point][i]*dg[point][i]; -+ dgdx += dg[point][i]*dx[point][i]; -+ } -+ -+ diag = dgdx/dgdg; -+ -+ rho[point] = 1.0/dgdx; -+ point++; -+ -+ if (point >= nmaxcorr) -+ { -+ point = 0; -+ } -+ -+ /* Update */ -+ for (i = 0; i < n; i++) -+ { -+ p[i] = ff[i]; -+ } -+ -+ cp = point; -+ -+ /* Recursive update. First go back over the memory points */ -+ for (k = 0; k < ncorr; k++) -+ { -+ cp--; -+ if (cp < 0) -+ { -+ cp = ncorr-1; -+ } -+ -+ sq = 0; -+ for (i = 0; i < n; i++) -+ { -+ sq += dx[cp][i]*p[i]; -+ } -+ -+ alpha[cp] = rho[cp]*sq; -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] -= alpha[cp]*dg[cp][i]; -+ } -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] *= diag; -+ } -+ -+ /* And then go forward again */ -+ for (k = 0; k < ncorr; k++) -+ { -+ yr = 0; -+ for (i = 0; i < n; i++) -+ { -+ yr += p[i]*dg[cp][i]; -+ } -+ -+ beta = rho[cp]*yr; -+ beta = alpha[cp]-beta; -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] += beta*dx[cp][i]; -+ } -+ -+ cp++; -+ if (cp >= ncorr) -+ { -+ cp = 0; -+ } -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ if (!frozen[i]) -+ { -+ dx[point][i] = p[i]; -+ } -+ else -+ { -+ dx[point][i] = 0; -+ } -+ } -+ -+ stepsize = 1.0; -+ -+ /* Test whether the convergence criterion is met */ -+ get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax); -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", -+ step, Epot, fnorm/sqrt(state->natoms), fmax, nfmax+1); -+ } -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ do_log = do_per_step(step, inputrec->nstlog); -+ do_ene = do_per_step(step, inputrec->nstenergy); -+ if (do_log) -+ { -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ } -+ print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, -+ do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Send x and E to IMD client, if bIMD is TRUE. */ -+ if (do_IMD(inputrec->bIMD, step, cr, TRUE, state->box, state->x, inputrec, 0, wcycle) && MASTER(cr)) -+ { -+ IMD_send_positions(inputrec->imd); -+ } -+ -+ /* Stop when the maximum force lies below tolerance. -+ * If we have reached machine precision, converged is already set to true. -+ */ -+ -+ converged = converged || (fmax < inputrec->em_tol); -+ -+ } /* End of the loop */ -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(inputrec->bIMD, inputrec->imd); -+ -+ if (converged) -+ { -+ step--; /* we never took that last step in this case */ -+ -+ } -+ if (fmax > inputrec->em_tol) -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); -+ warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); -+ } -+ converged = FALSE; -+ } -+ -+ /* If we printed energy and/or logfile last step (which was the last step) -+ * we don't have to do it again, but otherwise print the final values. -+ */ -+ if (!do_log) /* Write final value to log since we didn't do anythin last step */ -+ { -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ } -+ if (!do_ene || !do_log) /* Write final energy file entries */ -+ { -+ print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, -+ !do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Print some stuff... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ -+ /* IMPORTANT! -+ * For accurate normal mode calculation it is imperative that we -+ * store the last conformation into the full precision binary trajectory. -+ * -+ * However, we should only do it if we did NOT already write this step -+ * above (which we did if do_x or do_f was true). -+ */ -+ do_x = !do_per_step(step, inputrec->nstxout); -+ do_f = !do_per_step(step, inputrec->nstfout); -+ write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, step, -+ &ems, state, f); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, -+ number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms)); -+ print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, -+ number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms)); -+ -+ fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ walltime_accounting_set_nsteps_done(walltime_accounting, step); -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_steep(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ const char *SD = "Steepest Descents"; -+ em_state_t *s_min, *s_try; -+ rvec *f_global; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ real stepsize, constepsize; -+ real ustep, fnormn; -+ gmx_mdoutf_t outf; -+ t_mdebin *mdebin; -+ gmx_bool bDone, bAbort, do_x, do_f; -+ tensor vir, pres; -+ rvec mu_tot; -+ int nsteps; -+ int count = 0; -+ int steps_accepted = 0; -+ /* not used */ -+ real terminate = 0; -+ -+ s_min = init_em_state(); -+ s_try = init_em_state(); -+ -+ /* Init em and store the local state in s_try */ -+ init_em(fplog, SD, cr, inputrec, -+ state_global, top_global, s_try, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, SD); -+ -+ /* Set variables for stepsize (in nm). This is the largest -+ * step that we are going to make in any direction. -+ */ -+ ustep = inputrec->em_stepsize; -+ stepsize = 0; -+ -+ /* Max number of steps */ -+ nsteps = inputrec->nsteps; -+ -+ if (MASTER(cr)) -+ { -+ /* Print to the screen */ -+ sp_header(stderr, SD, inputrec->em_tol, nsteps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, SD, inputrec->em_tol, nsteps); -+ } -+ -+ /**** HERE STARTS THE LOOP **** -+ * count is the counter for the number of steps -+ * bDone will be TRUE when the minimization has converged -+ * bAbort will be TRUE when nsteps steps have been performed or when -+ * the stepsize becomes smaller than is reasonable for machine precision -+ */ -+ count = 0; -+ bDone = FALSE; -+ bAbort = FALSE; -+ while (!bDone && !bAbort) -+ { -+ bAbort = (nsteps >= 0) && (count == nsteps); -+ -+ /* set new coordinates, except for first step */ -+ if (count > 0) -+ { -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, -+ s_min, stepsize, s_min->f, s_try, -+ constr, top, nrnb, wcycle, count); -+ } -+ -+ evaluate_energy(fplog, cr, -+ top_global, s_try, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, count, count == 0); -+ -+ if (MASTER(cr)) -+ { -+ print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]); -+ } -+ -+ if (count == 0) -+ { -+ s_min->epot = s_try->epot + 1; -+ } -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c", -+ count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1, -+ (s_try->epot < s_min->epot) ? '\n' : '\r'); -+ } -+ -+ if (s_try->epot < s_min->epot) -+ { -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)count, -+ mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals, -+ s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ /* Prepare IMD energy record, if bIMD is TRUE. */ -+ IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE); -+ -+ print_ebin(mdoutf_get_fp_ene(outf), TRUE, -+ do_per_step(steps_accepted, inputrec->nstdisreout), -+ do_per_step(steps_accepted, inputrec->nstorireout), -+ fplog, count, count, eprNORMAL, TRUE, -+ mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ fflush(fplog); -+ } -+ } -+ -+ /* Now if the new energy is smaller than the previous... -+ * or if this is the first step! -+ * or if we did random steps! -+ */ -+ -+ if ( (count == 0) || (s_try->epot < s_min->epot) ) -+ { -+ steps_accepted++; -+ -+ /* Test whether the convergence criterion is met... */ -+ bDone = (s_try->fmax < inputrec->em_tol); -+ -+ /* Copy the arrays for force, positions and energy */ -+ /* The 'Min' array always holds the coords and forces of the minimal -+ sampled energy */ -+ swap_em_state(s_min, s_try); -+ if (count > 0) -+ { -+ ustep *= 1.2; -+ } -+ -+ /* Write to trn, if necessary */ -+ do_x = do_per_step(steps_accepted, inputrec->nstxout); -+ do_f = do_per_step(steps_accepted, inputrec->nstfout); -+ write_em_traj(fplog, cr, outf, do_x, do_f, NULL, -+ top_global, inputrec, count, -+ s_min, state_global, f_global); -+ } -+ else -+ { -+ /* If energy is not smaller make the step smaller... */ -+ ustep *= 0.5; -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) -+ { -+ /* Reload the old state */ -+ em_dd_partition_system(fplog, count, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ } -+ -+ /* Determine new step */ -+ stepsize = ustep/s_min->fmax; -+ -+ /* Check if stepsize is too small, with 1 nm as a characteristic length */ -+#ifdef GMX_DOUBLE -+ if (count == nsteps || ustep < 1e-12) -+#else -+ if (count == nsteps || ustep < 1e-6) -+#endif -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL); -+ warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL); -+ } -+ bAbort = TRUE; -+ } -+ -+ /* Send IMD energies and positions, if bIMD is TRUE. */ -+ if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) -+ { -+ IMD_send_positions(inputrec->imd); -+ } -+ -+ count++; -+ } /* End of the loop */ -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(inputrec->bIMD, inputrec->imd); -+ -+ /* Print some data... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, count, -+ s_min, state_global, f_global); -+ -+ fnormn = s_min->fnorm/sqrt(state_global->natoms); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ inputrec->nsteps = count; -+ -+ walltime_accounting_set_nsteps_done(walltime_accounting, count); -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_nm(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ const char *NM = "Normal Mode Analysis"; -+ gmx_mdoutf_t outf; -+ int natoms, atom, d; -+ int nnodes, node; -+ rvec *f_global; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ real t, t0, lambda, lam0; -+ gmx_bool bNS; -+ tensor vir, pres; -+ rvec mu_tot; -+ rvec *fneg, *dfdx; -+ gmx_bool bSparse; /* use sparse matrix storage format */ -+ size_t sz = 0; -+ gmx_sparsematrix_t * sparse_matrix = NULL; -+ real * full_matrix = NULL; -+ em_state_t * state_work; -+ -+ /* added with respect to mdrun */ -+ int i, j, k, row, col; -+ real der_range = 10.0*sqrt(GMX_REAL_EPS); -+ real x_min; -+ real fnorm, fmax; -+ -+ if (constr != NULL) -+ { -+ gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported"); -+ } -+ -+ state_work = init_em_state(); -+ -+ /* Init em and store the local state in state_minimum */ -+ init_em(fplog, NM, cr, inputrec, -+ state_global, top_global, state_work, &top, -+ &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, NULL, imdport, Flags, wcycle); -+ -+ natoms = top_global->natoms; -+ snew(fneg, natoms); -+ snew(dfdx, natoms); -+ -+#ifndef GMX_DOUBLE -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, -+ "NOTE: This version of Gromacs has been compiled in single precision,\n" -+ " which MIGHT not be accurate enough for normal mode analysis.\n" -+ " Gromacs now uses sparse matrix storage, so the memory requirements\n" -+ " are fairly modest even if you recompile in double precision.\n\n"); -+ } -+#endif -+ -+ /* Check if we can/should use sparse storage format. -+ * -+ * Sparse format is only useful when the Hessian itself is sparse, which it -+ * will be when we use a cutoff. -+ * For small systems (n<1000) it is easier to always use full matrix format, though. -+ */ -+ if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0) -+ { -+ md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n"); -+ bSparse = FALSE; -+ } -+ else if (top_global->natoms < 1000) -+ { -+ md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms); -+ bSparse = FALSE; -+ } -+ else -+ { -+ md_print_info(cr, fplog, "Using compressed symmetric sparse Hessian format.\n"); -+ bSparse = TRUE; -+ } -+ -+ if (MASTER(cr)) -+ { -+ sz = DIM*top_global->natoms; -+ -+ fprintf(stderr, "Allocating Hessian memory...\n\n"); -+ -+ if (bSparse) -+ { -+ sparse_matrix = gmx_sparsematrix_init(sz); -+ sparse_matrix->compressed_symmetric = TRUE; -+ } -+ else -+ { -+ snew(full_matrix, sz*sz); -+ } -+ } -+ -+ /* Initial values */ -+ t0 = inputrec->init_t; -+ lam0 = inputrec->fepvals->init_lambda; -+ t = t0; -+ lambda = lam0; -+ -+ init_nrnb(nrnb); -+ -+ where(); -+ -+ /* Write start time and temperature */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, NM); -+ -+ /* fudge nr of steps to nr of atoms */ -+ inputrec->nsteps = natoms*2; -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n", -+ *(top_global->name), (int)inputrec->nsteps); -+ } -+ -+ nnodes = cr->nnodes; -+ -+ /* Make evaluate_energy do a single node force calculation */ -+ cr->nnodes = 1; -+ evaluate_energy(fplog, cr, -+ top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ cr->nnodes = nnodes; -+ -+ /* if forces are not small, warn user */ -+ get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work); -+ -+ md_print_info(cr, fplog, "Maximum force:%12.5e\n", state_work->fmax); -+ if (state_work->fmax > 1.0e-3) -+ { -+ md_print_info(cr, fplog, -+ "The force is probably not small enough to " -+ "ensure that you are at a minimum.\n" -+ "Be aware that negative eigenvalues may occur\n" -+ "when the resulting matrix is diagonalized.\n\n"); -+ } -+ -+ /*********************************************************** -+ * -+ * Loop over all pairs in matrix -+ * -+ * do_force called twice. Once with positive and -+ * once with negative displacement -+ * -+ ************************************************************/ -+ -+ /* Steps are divided one by one over the nodes */ -+ for (atom = cr->nodeid; atom < natoms; atom += nnodes) -+ { -+ -+ for (d = 0; d < DIM; d++) -+ { -+ x_min = state_work->s.x[atom][d]; -+ -+ state_work->s.x[atom][d] = x_min - der_range; -+ -+ /* Make evaluate_energy do a single node force calculation */ -+ cr->nnodes = 1; -+ evaluate_energy(fplog, cr, -+ top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, atom*2, FALSE); -+ -+ for (i = 0; i < natoms; i++) -+ { -+ copy_rvec(state_work->f[i], fneg[i]); -+ } -+ -+ state_work->s.x[atom][d] = x_min + der_range; -+ -+ evaluate_energy(fplog, cr, -+ top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, atom*2+1, FALSE); -+ cr->nnodes = nnodes; -+ -+ /* x is restored to original */ -+ state_work->s.x[atom][d] = x_min; -+ -+ for (j = 0; j < natoms; j++) -+ { -+ for (k = 0; (k < DIM); k++) -+ { -+ dfdx[j][k] = -+ -(state_work->f[j][k] - fneg[j][k])/(2*der_range); -+ } -+ } -+ -+ if (!MASTER(cr)) -+ { -+#ifdef GMX_MPI -+#ifdef GMX_DOUBLE -+#define mpi_type MPI_DOUBLE -+#else -+#define mpi_type MPI_FLOAT -+#endif -+ MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid, -+ cr->mpi_comm_mygroup); -+#endif -+ } -+ else -+ { -+ for (node = 0; (node < nnodes && atom+node < natoms); node++) -+ { -+ if (node > 0) -+ { -+#ifdef GMX_MPI -+ MPI_Status stat; -+ MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node, -+ cr->mpi_comm_mygroup, &stat); -+#undef mpi_type -+#endif -+ } -+ -+ row = (atom + node)*DIM + d; -+ -+ for (j = 0; j < natoms; j++) -+ { -+ for (k = 0; k < DIM; k++) -+ { -+ col = j*DIM + k; -+ -+ if (bSparse) -+ { -+ if (col >= row && dfdx[j][k] != 0.0) -+ { -+ gmx_sparsematrix_increment_value(sparse_matrix, -+ row, col, dfdx[j][k]); -+ } -+ } -+ else -+ { -+ full_matrix[row*sz+col] = dfdx[j][k]; -+ } -+ } -+ } -+ } -+ } -+ -+ if (bVerbose && fplog) -+ { -+ fflush(fplog); -+ } -+ } -+ /* write progress */ -+ if (MASTER(cr) && bVerbose) -+ { -+ fprintf(stderr, "\rFinished step %d out of %d", -+ min(atom+nnodes, natoms), natoms); -+ fflush(stderr); -+ } -+ } -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\n\nWriting Hessian...\n"); -+ gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ walltime_accounting_set_nsteps_done(walltime_accounting, natoms*2); -+ -+ return 0; -+} -diff --git a/src/programs/mdrun/md.c b/src/programs/mdrun/md.c -index 3d98d59..b34d23c 100644 ---- a/src/programs/mdrun/md.c -+++ b/src/programs/mdrun/md.c -@@ -96,6 +96,12 @@ - #include "gromacs/swap/swapcoords.h" - #include "gromacs/imd/imd.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+/* END PLUMED */ -+ - #ifdef GMX_FAHCORE - #include "corewrap.h" - #endif -@@ -224,6 +230,12 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - /* Interactive MD */ - gmx_bool bIMDstep = FALSE; - -+ /* PLUMED */ -+ int plumedNeedsEnergy=0; -+ int plumedWantsToStop=0; -+ matrix plumed_vir; -+ /* END PLUMED */ -+ - #ifdef GMX_FAHCORE - /* Temporary addition for FAHCORE checkpointing */ - int chkpt_ret; -@@ -651,6 +663,48 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - fprintf(fplog, "\n"); - } - -+ /* PLUMED */ -+ if(plumedswitch){ -+ /* detect plumed API version */ -+ int pversion=0; -+ plumed_cmd(plumedmain,"getApiVersion",&pversion); -+ /* setting kbT is only implemented with api>1) */ -+ real kbT=ir->opts.ref_t[0]*BOLTZ; -+ if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT); -+ -+ if(cr->ms && cr->ms->nsim>1) { -+ if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters); -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); -+ }else{ -+ plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); -+ } -+ } -+ plumed_cmd(plumedmain,"GREX init",NULL); -+ } -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); -+ } -+ } -+ plumed_cmd(plumedmain,"setNatoms",&top_global->natoms); -+ plumed_cmd(plumedmain,"setMDEngine","gromacs"); -+ plumed_cmd(plumedmain,"setLog",fplog); -+ real real_delta_t; -+ real_delta_t=ir->delta_t; -+ plumed_cmd(plumedmain,"setTimestep",&real_delta_t); -+ plumed_cmd(plumedmain,"init",NULL); -+ -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ } -+ } -+ /* END PLUMED */ -+ - walltime_accounting_start(walltime_accounting); - wallcycle_start(wcycle, ewcRUN); - print_start(fplog, cr, walltime_accounting, "mdrun"); -@@ -955,6 +1009,13 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - do_verbose && !bPMETuneRunning); - wallcycle_stop(wcycle, ewcDOMDEC); - /* If using an iterative integrator, reallocate space to match the decomposition */ -+ -+ /* PLUMED */ -+ if(plumedswitch){ -+ plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ /* END PLUMED */ - } - } - -@@ -1078,12 +1139,45 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - * This is parallellized as well, and does communication too. - * Check comments in sim_util.c - */ -+ -+ /* PLUMED */ -+ plumedNeedsEnergy=0; -+ if(plumedswitch){ -+ long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep); -+ plumed_cmd(plumedmain,"setPositions",&state->x[0][0]); -+ plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[0]); -+ plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[0]); -+ plumed_cmd(plumedmain,"setBox",&state->box[0][0]); -+ plumed_cmd(plumedmain,"prepareCalc",NULL); -+ plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop); -+ plumed_cmd(plumedmain,"setForces",&f[0][0]); -+ plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ clear_mat(plumed_vir); -+ plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]); -+ } -+ /* END PLUMED */ - do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups, - state->box, state->x, &state->hist, - f, force_vir, mdatoms, enerd, fcd, - state->lambda, graph, - fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii, - (bNS ? GMX_FORCE_NS : 0) | force_flags); -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(plumedNeedsEnergy){ -+ msmul(force_vir,2.0,plumed_vir); -+ plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]); -+ plumed_cmd(plumedmain,"performCalc",NULL); -+ msmul(plumed_vir,0.5,force_vir); -+ } else { -+ msmul(plumed_vir,0.5,plumed_vir); -+ m_add(force_vir,plumed_vir,force_vir); -+ } -+ if ((repl_ex_nst > 0) && (step > 0) && !bLastStep && -+ do_per_step(step,repl_ex_nst)) plumed_cmd(plumedmain,"GREX savePositions",NULL); -+ if(plumedWantsToStop) ir->nsteps=step_rel+1; -+ } -+ /* END PLUMED */ - } - - if (bVV && !bStartingFromCpt && !bRerunMD) -diff --git a/src/programs/mdrun/md.c.preplumed b/src/programs/mdrun/md.c.preplumed -new file mode 100644 -index 0000000..3d98d59 ---- /dev/null -+++ b/src/programs/mdrun/md.c.preplumed -@@ -0,0 +1,2058 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include "typedefs.h" -+#include "gromacs/utility/smalloc.h" -+#include "sysstuff.h" -+#include "vec.h" -+#include "vcm.h" -+#include "mdebin.h" -+#include "nrnb.h" -+#include "calcmu.h" -+#include "index.h" -+#include "vsite.h" -+#include "update.h" -+#include "ns.h" -+#include "mdrun.h" -+#include "md_support.h" -+#include "md_logging.h" -+#include "network.h" -+#include "xvgr.h" -+#include "physics.h" -+#include "names.h" -+#include "force.h" -+#include "disre.h" -+#include "orires.h" -+#include "pme.h" -+#include "mdatoms.h" -+#include "repl_ex.h" -+#include "deform.h" -+#include "qmmm.h" -+#include "domdec.h" -+#include "domdec_network.h" -+#include "gromacs/gmxlib/topsort.h" -+#include "coulomb.h" -+#include "constr.h" -+#include "shellfc.h" -+#include "gromacs/gmxpreprocess/compute_io.h" -+#include "checkpoint.h" -+#include "mtop_util.h" -+#include "sighandler.h" -+#include "txtdump.h" -+#include "gromacs/utility/cstringutil.h" -+#include "pme_loadbal.h" -+#include "bondf.h" -+#include "membed.h" -+#include "types/nlistheuristics.h" -+#include "types/iteratedconstraints.h" -+#include "nbnxn_cuda_data_mgmt.h" -+ -+#include "gromacs/utility/gmxmpi.h" -+#include "gromacs/fileio/confio.h" -+#include "gromacs/fileio/trajectory_writing.h" -+#include "gromacs/fileio/trnio.h" -+#include "gromacs/fileio/trxio.h" -+#include "gromacs/fileio/xtcio.h" -+#include "gromacs/timing/wallcycle.h" -+#include "gromacs/timing/walltime_accounting.h" -+#include "gromacs/pulling/pull.h" -+#include "gromacs/swap/swapcoords.h" -+#include "gromacs/imd/imd.h" -+ -+#ifdef GMX_FAHCORE -+#include "corewrap.h" -+#endif -+ -+static void reset_all_counters(FILE *fplog, t_commrec *cr, -+ gmx_int64_t step, -+ gmx_int64_t *step_rel, t_inputrec *ir, -+ gmx_wallcycle_t wcycle, t_nrnb *nrnb, -+ gmx_walltime_accounting_t walltime_accounting, -+ nbnxn_cuda_ptr_t cu_nbv) -+{ -+ char sbuf[STEPSTRSIZE]; -+ -+ /* Reset all the counters related to performance over the run */ -+ md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n", -+ gmx_step_str(step, sbuf)); -+ -+ if (cu_nbv) -+ { -+ nbnxn_cuda_reset_timings(cu_nbv); -+ } -+ -+ wallcycle_stop(wcycle, ewcRUN); -+ wallcycle_reset_all(wcycle); -+ if (DOMAINDECOMP(cr)) -+ { -+ reset_dd_statistics_counters(cr->dd); -+ } -+ init_nrnb(nrnb); -+ ir->init_step += *step_rel; -+ ir->nsteps -= *step_rel; -+ *step_rel = 0; -+ wallcycle_start(wcycle, ewcRUN); -+ walltime_accounting_start(walltime_accounting); -+ print_date_and_time(fplog, cr->nodeid, "Restarted time", gmx_gettime()); -+} -+ -+double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], -+ const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, -+ int nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int stepout, t_inputrec *ir, -+ gmx_mtop_t *top_global, -+ t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t ed, t_forcerec *fr, -+ int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed, -+ real cpt_period, real max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ gmx_mdoutf_t outf = NULL; -+ gmx_int64_t step, step_rel; -+ double elapsed_time; -+ double t, t0, lam0[efptNR]; -+ gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEner; -+ gmx_bool bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE, -+ bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep, -+ bBornRadii, bStartingFromCpt; -+ gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; -+ gmx_bool do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE, -+ bForceUpdate = FALSE, bCPT; -+ gmx_bool bMasterState; -+ int force_flags, cglo_flags; -+ tensor force_vir, shake_vir, total_vir, tmp_vir, pres; -+ int i, m; -+ t_trxstatus *status; -+ rvec mu_tot; -+ t_vcm *vcm; -+ t_state *bufstate = NULL; -+ matrix *scale_tot, pcoupl_mu, M, ebox; -+ gmx_nlheur_t nlh; -+ t_trxframe rerun_fr; -+ gmx_repl_ex_t repl_ex = NULL; -+ int nchkpt = 1; -+ gmx_localtop_t *top; -+ t_mdebin *mdebin = NULL; -+ t_state *state = NULL; -+ rvec *f_global = NULL; -+ gmx_enerdata_t *enerd; -+ rvec *f = NULL; -+ gmx_global_stat_t gstat; -+ gmx_update_t upd = NULL; -+ t_graph *graph = NULL; -+ globsig_t gs; -+ gmx_groups_t *groups; -+ gmx_ekindata_t *ekind, *ekind_save; -+ gmx_shellfc_t shellfc; -+ int count, nconverged = 0; -+ real timestep = 0; -+ double tcount = 0; -+ gmx_bool bConverged = TRUE, bOK, bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition; -+ gmx_bool bAppend; -+ gmx_bool bResetCountersHalfMaxH = FALSE; -+ gmx_bool bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter; -+ gmx_bool bUpdateDoLR; -+ real dvdl_constr; -+ rvec *cbuf = NULL; -+ matrix lastbox; -+ real veta_save, scalevir, tracevir; -+ real vetanew = 0; -+ int lamnew = 0; -+ /* for FEP */ -+ int nstfep; -+ double cycles; -+ real saved_conserved_quantity = 0; -+ real last_ekin = 0; -+ int iter_i; -+ t_extmass MassQ; -+ int **trotter_seq; -+ char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; -+ int handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/ -+ gmx_iterate_t iterate; -+ gmx_int64_t multisim_nsteps = -1; /* number of steps to do before first multisim -+ simulation stops. If equal to zero, don't -+ communicate any more between multisims.*/ -+ /* PME load balancing data for GPU kernels */ -+ pme_load_balancing_t pme_loadbal = NULL; -+ double cycles_pmes; -+ gmx_bool bPMETuneTry = FALSE, bPMETuneRunning = FALSE; -+ -+ /* Interactive MD */ -+ gmx_bool bIMDstep = FALSE; -+ -+#ifdef GMX_FAHCORE -+ /* Temporary addition for FAHCORE checkpointing */ -+ int chkpt_ret; -+#endif -+ -+ /* Check for special mdrun options */ -+ bRerunMD = (Flags & MD_RERUN); -+ bAppend = (Flags & MD_APPENDFILES); -+ if (Flags & MD_RESETCOUNTERSHALFWAY) -+ { -+ if (ir->nsteps > 0) -+ { -+ /* Signal to reset the counters half the simulation steps. */ -+ wcycle_set_reset_counters(wcycle, ir->nsteps/2); -+ } -+ /* Signal to reset the counters halfway the simulation time. */ -+ bResetCountersHalfMaxH = (max_hours > 0); -+ } -+ -+ /* md-vv uses averaged full step velocities for T-control -+ md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) -+ md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ -+ bVV = EI_VV(ir->eI); -+ if (bVV) /* to store the initial velocities while computing virial */ -+ { -+ snew(cbuf, top_global->natoms); -+ } -+ /* all the iteratative cases - only if there are constraints */ -+ bIterativeCase = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD)); -+ gmx_iterate_init(&iterate, FALSE); /* The default value of iterate->bIterationActive is set to -+ false in this step. The correct value, true or false, -+ is set at each step, as it depends on the frequency of temperature -+ and pressure control.*/ -+ bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir))); -+ -+ if (bRerunMD) -+ { -+ /* Since we don't know if the frames read are related in any way, -+ * rebuild the neighborlist at every step. -+ */ -+ ir->nstlist = 1; -+ ir->nstcalcenergy = 1; -+ nstglobalcomm = 1; -+ } -+ -+ check_ir_old_tpx_versions(cr, fplog, ir, top_global); -+ -+ nstglobalcomm = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir); -+ bGStatEveryStep = (nstglobalcomm == 1); -+ -+ if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL) -+ { -+ fprintf(fplog, -+ "To reduce the energy communication with nstlist = -1\n" -+ "the neighbor list validity should not be checked at every step,\n" -+ "this means that exact integration is not guaranteed.\n" -+ "The neighbor list validity is checked after:\n" -+ " - 2*std.dev.(n.list life time) steps.\n" -+ "In most cases this will result in exact integration.\n" -+ "This reduces the energy communication by a factor of 2 to 3.\n" -+ "If you want less energy communication, set nstlist > 3.\n\n"); -+ } -+ -+ if (bRerunMD) -+ { -+ ir->nstxout_compressed = 0; -+ } -+ groups = &top_global->groups; -+ -+ /* Initial values */ -+ init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda, -+ &(state_global->fep_state), lam0, -+ nrnb, top_global, &upd, -+ nfile, fnm, &outf, &mdebin, -+ force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags, wcycle); -+ -+ clear_mat(total_vir); -+ clear_mat(pres); -+ /* Energy terms and groups */ -+ snew(enerd, 1); -+ init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, -+ enerd); -+ if (DOMAINDECOMP(cr)) -+ { -+ f = NULL; -+ } -+ else -+ { -+ snew(f, top_global->natoms); -+ } -+ -+ /* Kinetic energy data */ -+ snew(ekind, 1); -+ init_ekindata(fplog, top_global, &(ir->opts), ekind); -+ /* needed for iteration of constraints */ -+ snew(ekind_save, 1); -+ init_ekindata(fplog, top_global, &(ir->opts), ekind_save); -+ /* Copy the cos acceleration to the groups struct */ -+ ekind->cosacc.cos_accel = ir->cos_accel; -+ -+ gstat = global_stat_init(ir); -+ debug_gmx(); -+ -+ /* Check for polarizable models and flexible constraints */ -+ shellfc = init_shell_flexcon(fplog, -+ top_global, n_flexible_constraints(constr), -+ (ir->bContinuation || -+ (DOMAINDECOMP(cr) && !MASTER(cr))) ? -+ NULL : state_global->x); -+ if (shellfc && ir->nstcalcenergy != 1) -+ { -+ gmx_fatal(FARGS, "You have nstcalcenergy set to a value (%d) that is different from 1.\nThis is not supported in combinations with shell particles.\nPlease make a new tpr file.", ir->nstcalcenergy); -+ } -+ if (shellfc && DOMAINDECOMP(cr)) -+ { -+ gmx_fatal(FARGS, "Shell particles are not implemented with domain decomposition, use a single rank"); -+ } -+ if (shellfc && ir->eI == eiNM) -+ { -+ /* Currently shells don't work with Normal Modes */ -+ gmx_fatal(FARGS, "Normal Mode analysis is not supported with shells.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n"); -+ } -+ -+ if (vsite && ir->eI == eiNM) -+ { -+ /* Currently virtual sites don't work with Normal Modes */ -+ gmx_fatal(FARGS, "Normal Mode analysis is not supported with virtual sites.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n"); -+ } -+ -+ if (DEFORM(*ir)) -+ { -+ tMPI_Thread_mutex_lock(&deform_init_box_mutex); -+ set_deform_reference_box(upd, -+ deform_init_init_step_tpx, -+ deform_init_box_tpx); -+ tMPI_Thread_mutex_unlock(&deform_init_box_mutex); -+ } -+ -+ { -+ double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1); -+ if ((io > 2000) && MASTER(cr)) -+ { -+ fprintf(stderr, -+ "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", -+ io); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ top = dd_init_local_top(top_global); -+ -+ snew(state, 1); -+ dd_init_local_state(cr->dd, state_global, state); -+ -+ if (DDMASTER(cr->dd) && ir->nstfout) -+ { -+ snew(f_global, state_global->natoms); -+ } -+ } -+ else -+ { -+ top = gmx_mtop_generate_local_top(top_global, ir); -+ -+ forcerec_set_excl_load(fr, top); -+ -+ state = serial_init_local_state(state_global); -+ f_global = f; -+ -+ atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms); -+ -+ if (vsite) -+ { -+ set_vsite_top(vsite, top, mdatoms, cr); -+ } -+ -+ if (ir->ePBC != epbcNONE && !fr->bMolPBC) -+ { -+ graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE); -+ } -+ -+ if (shellfc) -+ { -+ make_local_shells(cr, mdatoms, shellfc); -+ } -+ -+ setup_bonded_threading(fr, &top->idef); -+ } -+ -+ /* Set up interactive MD (IMD) */ -+ init_IMD(ir, cr, top_global, fplog, ir->nstcalcenergy, state_global->x, -+ nfile, fnm, oenv, imdport, Flags); -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ /* Distribute the charge groups over the nodes from the master node */ -+ dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, FALSE); -+ -+ } -+ -+ update_mdatoms(mdatoms, state->lambda[efptMASS]); -+ -+ if (opt2bSet("-cpi", nfile, fnm)) -+ { -+ bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr); -+ } -+ else -+ { -+ bStateFromCP = FALSE; -+ } -+ -+ if (ir->bExpanded) -+ { -+ init_expanded_ensemble(bStateFromCP, ir, &state->dfhist); -+ } -+ -+ if (MASTER(cr)) -+ { -+ if (bStateFromCP) -+ { -+ /* Update mdebin with energy history if appending to output files */ -+ if (Flags & MD_APPENDFILES) -+ { -+ restore_energyhistory_from_state(mdebin, &state_global->enerhist); -+ } -+ else -+ { -+ /* We might have read an energy history from checkpoint, -+ * free the allocated memory and reset the counts. -+ */ -+ done_energyhistory(&state_global->enerhist); -+ init_energyhistory(&state_global->enerhist); -+ } -+ } -+ /* Set the initial energy history in state by updating once */ -+ update_energyhistory(&state_global->enerhist, mdebin); -+ } -+ -+ /* Initialize constraints */ -+ if (constr && !DOMAINDECOMP(cr)) -+ { -+ set_constraints(constr, top, ir, mdatoms, cr); -+ } -+ -+ if (repl_ex_nst > 0 && MASTER(cr)) -+ { -+ repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir, -+ repl_ex_nst, repl_ex_nex, repl_ex_seed); -+ } -+ -+ /* PME tuning is only supported with GPUs or PME nodes and not with rerun. -+ * PME tuning is not supported with PME only for LJ and not for Coulomb. -+ */ -+ if ((Flags & MD_TUNEPME) && -+ EEL_PME(fr->eeltype) && -+ ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) && -+ !bRerunMD) -+ { -+ pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata); -+ cycles_pmes = 0; -+ if (cr->duty & DUTY_PME) -+ { -+ /* Start tuning right away, as we can't measure the load */ -+ bPMETuneRunning = TRUE; -+ } -+ else -+ { -+ /* Separate PME nodes, we can measure the PP/PME load balance */ -+ bPMETuneTry = TRUE; -+ } -+ } -+ -+ if (!ir->bContinuation && !bRerunMD) -+ { -+ if (mdatoms->cFREEZE && (state->flags & (1<homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) -+ { -+ state->v[i][m] = 0; -+ } -+ } -+ } -+ } -+ -+ if (constr) -+ { -+ /* Constrain the initial coordinates and velocities */ -+ do_constrain_first(fplog, constr, ir, mdatoms, state, -+ cr, nrnb, fr, top); -+ } -+ if (vsite) -+ { -+ /* Construct the virtual sites for the initial configuration */ -+ construct_vsites(vsite, state->x, ir->delta_t, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ } -+ } -+ -+ debug_gmx(); -+ -+ /* set free energy calculation frequency as the minimum -+ greatest common denominator of nstdhdl, nstexpanded, and repl_ex_nst*/ -+ nstfep = ir->fepvals->nstdhdl; -+ if (ir->bExpanded) -+ { -+ nstfep = gmx_greatest_common_divisor(ir->fepvals->nstdhdl, nstfep); -+ } -+ if (repl_ex_nst > 0) -+ { -+ nstfep = gmx_greatest_common_divisor(repl_ex_nst, nstfep); -+ } -+ -+ /* I'm assuming we need global communication the first time! MRS */ -+ cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT -+ | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM : 0) -+ | (bVV ? CGLO_PRESSURE : 0) -+ | (bVV ? CGLO_CONSTRAINT : 0) -+ | (bRerunMD ? CGLO_RERUNMD : 0) -+ | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0)); -+ -+ bSumEkinhOld = FALSE; -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, cglo_flags); -+ if (ir->eI == eiVVAK) -+ { -+ /* a second call to get the half step temperature initialized as well */ -+ /* we do the same call as above, but turn the pressure off -- internally to -+ compute_globals, this is recognized as a velocity verlet half-step -+ kinetic energy calculation. This minimized excess variables, but -+ perhaps loses some logic?*/ -+ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE)); -+ } -+ -+ /* Calculate the initial half step temperature, and save the ekinh_old */ -+ if (!(Flags & MD_STARTFROMCPT)) -+ { -+ for (i = 0; (i < ir->opts.ngtc); i++) -+ { -+ copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); -+ } -+ } -+ if (ir->eI != eiVV) -+ { -+ enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step, -+ and there is no previous step */ -+ } -+ -+ /* if using an iterative algorithm, we need to create a working directory for the state. */ -+ if (bIterativeCase) -+ { -+ bufstate = init_bufstate(state); -+ } -+ -+ /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter -+ temperature control */ -+ trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); -+ -+ if (MASTER(cr)) -+ { -+ if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS) -+ { -+ fprintf(fplog, -+ "RMS relative constraint deviation after constraining: %.2e\n", -+ constr_rmsd(constr, FALSE)); -+ } -+ if (EI_STATE_VELOCITY(ir->eI)) -+ { -+ fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]); -+ } -+ if (bRerunMD) -+ { -+ fprintf(stderr, "starting md rerun '%s', reading coordinates from" -+ " input trajectory '%s'\n\n", -+ *(top_global->name), opt2fn("-rerun", nfile, fnm)); -+ if (bVerbose) -+ { -+ fprintf(stderr, "Calculated time to finish depends on nsteps from " -+ "run input file,\nwhich may not correspond to the time " -+ "needed to process input trajectory.\n\n"); -+ } -+ } -+ else -+ { -+ char tbuf[20]; -+ fprintf(stderr, "starting mdrun '%s'\n", -+ *(top_global->name)); -+ if (ir->nsteps >= 0) -+ { -+ sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t); -+ } -+ else -+ { -+ sprintf(tbuf, "%s", "infinite"); -+ } -+ if (ir->init_step > 0) -+ { -+ fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", -+ gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf, -+ gmx_step_str(ir->init_step, sbuf2), -+ ir->init_step*ir->delta_t); -+ } -+ else -+ { -+ fprintf(stderr, "%s steps, %s ps.\n", -+ gmx_step_str(ir->nsteps, sbuf), tbuf); -+ } -+ } -+ fprintf(fplog, "\n"); -+ } -+ -+ walltime_accounting_start(walltime_accounting); -+ wallcycle_start(wcycle, ewcRUN); -+ print_start(fplog, cr, walltime_accounting, "mdrun"); -+ -+ /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ -+#ifdef GMX_FAHCORE -+ chkpt_ret = fcCheckPointParallel( cr->nodeid, -+ NULL, 0); -+ if (chkpt_ret == 0) -+ { -+ gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 ); -+ } -+#endif -+ -+ debug_gmx(); -+ /*********************************************************** -+ * -+ * Loop over MD steps -+ * -+ ************************************************************/ -+ -+ /* if rerunMD then read coordinates and velocities from input trajectory */ -+ if (bRerunMD) -+ { -+ if (getenv("GMX_FORCE_UPDATE")) -+ { -+ bForceUpdate = TRUE; -+ } -+ -+ rerun_fr.natoms = 0; -+ if (MASTER(cr)) -+ { -+ bNotLastFrame = read_first_frame(oenv, &status, -+ opt2fn("-rerun", nfile, fnm), -+ &rerun_fr, TRX_NEED_X | TRX_READ_V); -+ if (rerun_fr.natoms != top_global->natoms) -+ { -+ gmx_fatal(FARGS, -+ "Number of atoms in trajectory (%d) does not match the " -+ "run input file (%d)\n", -+ rerun_fr.natoms, top_global->natoms); -+ } -+ if (ir->ePBC != epbcNONE) -+ { -+ if (!rerun_fr.bBox) -+ { -+ gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time); -+ } -+ if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong)) -+ { -+ gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time); -+ } -+ } -+ } -+ -+ if (PAR(cr)) -+ { -+ rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); -+ } -+ -+ if (ir->ePBC != epbcNONE) -+ { -+ /* Set the shift vectors. -+ * Necessary here when have a static box different from the tpr box. -+ */ -+ calc_shifts(rerun_fr.box, fr->shift_vec); -+ } -+ } -+ -+ /* loop over MD steps or if rerunMD to end of input trajectory */ -+ bFirstStep = TRUE; -+ /* Skip the first Nose-Hoover integration when we get the state from tpx */ -+ bStateFromTPX = !bStateFromCP; -+ bInitStep = bFirstStep && (bStateFromTPX || bVV); -+ bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep; -+ bLastStep = FALSE; -+ bSumEkinhOld = FALSE; -+ bDoReplEx = FALSE; -+ bExchanged = FALSE; -+ bNeedRepartition = FALSE; -+ -+ init_global_signals(&gs, cr, ir, repl_ex_nst); -+ -+ step = ir->init_step; -+ step_rel = 0; -+ -+ if (ir->nstlist == -1) -+ { -+ init_nlistheuristics(&nlh, bGStatEveryStep, step); -+ } -+ -+ if (MULTISIM(cr) && (repl_ex_nst <= 0 )) -+ { -+ /* check how many steps are left in other sims */ -+ multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps); -+ } -+ -+ -+ /* and stop now if we should */ -+ bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) || -+ ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps ))); -+ while (!bLastStep || (bRerunMD && bNotLastFrame)) -+ { -+ -+ wallcycle_start(wcycle, ewcSTEP); -+ -+ if (bRerunMD) -+ { -+ if (rerun_fr.bStep) -+ { -+ step = rerun_fr.step; -+ step_rel = step - ir->init_step; -+ } -+ if (rerun_fr.bTime) -+ { -+ t = rerun_fr.time; -+ } -+ else -+ { -+ t = step; -+ } -+ } -+ else -+ { -+ bLastStep = (step_rel == ir->nsteps); -+ t = t0 + step*ir->delta_t; -+ } -+ -+ if (ir->efep != efepNO || ir->bSimTemp) -+ { -+ /* find and set the current lambdas. If rerunning, we either read in a state, or a lambda value, -+ requiring different logic. */ -+ -+ set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0); -+ bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); -+ bDoFEP = (do_per_step(step, nstfep) && (ir->efep != efepNO)); -+ bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) -+ && (ir->bExpanded) && (step > 0) && (!bStartingFromCpt)); -+ } -+ -+ bDoReplEx = ((repl_ex_nst > 0) && (step > 0) && !bLastStep && -+ do_per_step(step, repl_ex_nst)); -+ -+ if (bSimAnn) -+ { -+ update_annealing_target_temp(&(ir->opts), t); -+ } -+ -+ if (bRerunMD) -+ { -+ if (!DOMAINDECOMP(cr) || MASTER(cr)) -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(rerun_fr.x[i], state_global->x[i]); -+ } -+ if (rerun_fr.bV) -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(rerun_fr.v[i], state_global->v[i]); -+ } -+ } -+ else -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ clear_rvec(state_global->v[i]); -+ } -+ if (bRerunWarnNoV) -+ { -+ fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n" -+ " Ekin, temperature and pressure are incorrect,\n" -+ " the virial will be incorrect when constraints are present.\n" -+ "\n"); -+ bRerunWarnNoV = FALSE; -+ } -+ } -+ } -+ copy_mat(rerun_fr.box, state_global->box); -+ copy_mat(state_global->box, state->box); -+ -+ if (vsite && (Flags & MD_RERUN_VSITE)) -+ { -+ if (DOMAINDECOMP(cr)) -+ { -+ gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented with domain decomposition, use a single rank"); -+ } -+ if (graph) -+ { -+ /* Following is necessary because the graph may get out of sync -+ * with the coordinates if we only have every N'th coordinate set -+ */ -+ mk_mshift(fplog, graph, fr->ePBC, state->box, state->x); -+ shift_self(graph, state->box, state->x); -+ } -+ construct_vsites(vsite, state->x, ir->delta_t, state->v, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ if (graph) -+ { -+ unshift_self(graph, state->box, state->x); -+ } -+ } -+ } -+ -+ /* Stop Center of Mass motion */ -+ bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); -+ -+ if (bRerunMD) -+ { -+ /* for rerun MD always do Neighbour Searching */ -+ bNS = (bFirstStep || ir->nstlist != 0); -+ bNStList = bNS; -+ } -+ else -+ { -+ /* Determine whether or not to do Neighbour Searching and LR */ -+ bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); -+ -+ bNS = (bFirstStep || bExchanged || bNeedRepartition || bNStList || bDoFEP || -+ (ir->nstlist == -1 && nlh.nabnsb > 0)); -+ -+ if (bNS && ir->nstlist == -1) -+ { -+ set_nlistheuristics(&nlh, bFirstStep || bExchanged || bNeedRepartition || bDoFEP, step); -+ } -+ } -+ -+ /* check whether we should stop because another simulation has -+ stopped. */ -+ if (MULTISIM(cr)) -+ { -+ if ( (multisim_nsteps >= 0) && (step_rel >= multisim_nsteps) && -+ (multisim_nsteps != ir->nsteps) ) -+ { -+ if (bNS) -+ { -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, -+ "Stopping simulation %d because another one has finished\n", -+ cr->ms->sim); -+ } -+ bLastStep = TRUE; -+ gs.sig[eglsCHKPT] = 1; -+ } -+ } -+ } -+ -+ /* < 0 means stop at next step, > 0 means stop at next NS step */ -+ if ( (gs.set[eglsSTOPCOND] < 0) || -+ ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) ) -+ { -+ bLastStep = TRUE; -+ } -+ -+ /* Determine whether or not to update the Born radii if doing GB */ -+ bBornRadii = bFirstStep; -+ if (ir->implicit_solvent && (step % ir->nstgbradii == 0)) -+ { -+ bBornRadii = TRUE; -+ } -+ -+ do_log = do_per_step(step, ir->nstlog) || bFirstStep || bLastStep; -+ do_verbose = bVerbose && -+ (step % stepout == 0 || bFirstStep || bLastStep); -+ -+ if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD)) -+ { -+ if (bRerunMD) -+ { -+ bMasterState = TRUE; -+ } -+ else -+ { -+ bMasterState = FALSE; -+ /* Correct the new box if it is too skewed */ -+ if (DYNAMIC_BOX(*ir)) -+ { -+ if (correct_box(fplog, step, state->box, graph)) -+ { -+ bMasterState = TRUE; -+ } -+ } -+ if (DOMAINDECOMP(cr) && bMasterState) -+ { -+ dd_collect_state(cr->dd, state, state_global); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ /* Repartition the domain decomposition */ -+ wallcycle_start(wcycle, ewcDOMDEC); -+ dd_partition_system(fplog, step, cr, -+ bMasterState, nstglobalcomm, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, -+ do_verbose && !bPMETuneRunning); -+ wallcycle_stop(wcycle, ewcDOMDEC); -+ /* If using an iterative integrator, reallocate space to match the decomposition */ -+ } -+ } -+ -+ if (MASTER(cr) && do_log) -+ { -+ print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */ -+ } -+ -+ if (ir->efep != efepNO) -+ { -+ update_mdatoms(mdatoms, state->lambda[efptMASS]); -+ } -+ -+ if ((bRerunMD && rerun_fr.bV) || bExchanged) -+ { -+ -+ /* We need the kinetic energy at minus the half step for determining -+ * the full step kinetic energy and possibly for T-coupling.*/ -+ /* This may not be quite working correctly yet . . . . */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); -+ } -+ clear_mat(force_vir); -+ -+ /* We write a checkpoint at this MD step when: -+ * either at an NS step when we signalled through gs, -+ * or at the last step (but not when we do not want confout), -+ * but never at the first step or with rerun. -+ */ -+ bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) || -+ (bLastStep && (Flags & MD_CONFOUT))) && -+ step > ir->init_step && !bRerunMD); -+ if (bCPT) -+ { -+ gs.set[eglsCHKPT] = 0; -+ } -+ -+ /* Determine the energy and pressure: -+ * at nstcalcenergy steps and at energy output steps (set below). -+ */ -+ if (EI_VV(ir->eI) && (!bInitStep)) -+ { -+ /* for vv, the first half of the integration actually corresponds -+ to the previous step. bCalcEner is only required to be evaluated on the 'next' step, -+ but the virial needs to be calculated on both the current step and the 'next' step. Future -+ reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */ -+ -+ bCalcEner = do_per_step(step-1, ir->nstcalcenergy); -+ bCalcVir = bCalcEner || -+ (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple))); -+ } -+ else -+ { -+ bCalcEner = do_per_step(step, ir->nstcalcenergy); -+ bCalcVir = bCalcEner || -+ (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); -+ } -+ -+ /* Do we need global communication ? */ -+ bGStat = (bCalcVir || bCalcEner || bStopCM || -+ do_per_step(step, nstglobalcomm) || (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)) || -+ (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck)); -+ -+ do_ene = (do_per_step(step, ir->nstenergy) || bLastStep); -+ -+ if (do_ene || do_log || bDoReplEx) -+ { -+ bCalcVir = TRUE; -+ bCalcEner = TRUE; -+ bGStat = TRUE; -+ } -+ -+ /* these CGLO_ options remain the same throughout the iteration */ -+ cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) | -+ (bGStat ? CGLO_GSTAT : 0) -+ ); -+ -+ force_flags = (GMX_FORCE_STATECHANGED | -+ ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) | -+ GMX_FORCE_ALLFORCES | -+ GMX_FORCE_SEPLRF | -+ (bCalcVir ? GMX_FORCE_VIRIAL : 0) | -+ (bCalcEner ? GMX_FORCE_ENERGY : 0) | -+ (bDoFEP ? GMX_FORCE_DHDL : 0) -+ ); -+ -+ if (fr->bTwinRange) -+ { -+ if (do_per_step(step, ir->nstcalclr)) -+ { -+ force_flags |= GMX_FORCE_DO_LR; -+ } -+ } -+ -+ if (shellfc) -+ { -+ /* Now is the time to relax the shells */ -+ count = relax_shell_flexcon(fplog, cr, bVerbose, step, -+ ir, bNS, force_flags, -+ top, -+ constr, enerd, fcd, -+ state, f, force_vir, mdatoms, -+ nrnb, wcycle, graph, groups, -+ shellfc, fr, bBornRadii, t, mu_tot, -+ &bConverged, vsite, -+ mdoutf_get_fp_field(outf)); -+ tcount += count; -+ -+ if (bConverged) -+ { -+ nconverged++; -+ } -+ } -+ else -+ { -+ /* The coordinates (x) are shifted (to get whole molecules) -+ * in do_force. -+ * This is parallellized as well, and does communication too. -+ * Check comments in sim_util.c -+ */ -+ do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups, -+ state->box, state->x, &state->hist, -+ f, force_vir, mdatoms, enerd, fcd, -+ state->lambda, graph, -+ fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii, -+ (bNS ? GMX_FORCE_NS : 0) | force_flags); -+ } -+ -+ if (bVV && !bStartingFromCpt && !bRerunMD) -+ /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ -+ { -+ wallcycle_start(wcycle, ewcUPDATE); -+ if (ir->eI == eiVV && bInitStep) -+ { -+ /* if using velocity verlet with full time step Ekin, -+ * take the first half step only to compute the -+ * virial for the first step. From there, -+ * revert back to the initial coordinates -+ * so that the input is actually the initial step. -+ */ -+ copy_rvecn(state->v, cbuf, 0, state->natoms); /* should make this better for parallelizing? */ -+ } -+ else -+ { -+ /* this is for NHC in the Ekin(t+dt/2) version of vv */ -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1); -+ } -+ -+ /* If we are using twin-range interactions where the long-range component -+ * is only evaluated every nstcalclr>1 steps, we should do a special update -+ * step to combine the long-range forces on these steps. -+ * For nstcalclr=1 this is not done, since the forces would have been added -+ * directly to the short-range forces already. -+ * -+ * TODO Remove various aspects of VV+twin-range in master -+ * branch, because VV integrators did not ever support -+ * twin-range multiple time stepping with constraints. -+ */ -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, -+ f, bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, bInitStep, etrtVELOCITY1, -+ cr, nrnb, constr, &top->idef); -+ -+ if (bIterativeCase && do_per_step(step-1, ir->nstpcouple) && !bInitStep) -+ { -+ gmx_iterate_init(&iterate, TRUE); -+ } -+ /* for iterations, we save these vectors, as we will be self-consistently iterating -+ the calculations */ -+ -+ /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */ -+ -+ /* save the state */ -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); -+ } -+ -+ bFirstIterate = TRUE; -+ while (bFirstIterate || iterate.bIterationActive) -+ { -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); -+ if (bFirstIterate && bTrotter) -+ { -+ /* The first time through, we need a decent first estimate -+ of veta(t+dt) to compute the constraints. Do -+ this by computing the box volume part of the -+ trotter integration at this time. Nothing else -+ should be changed by this routine here. If -+ !(first time), we start with the previous value -+ of veta. */ -+ -+ veta_save = state->veta; -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ0); -+ vetanew = state->veta; -+ state->veta = veta_save; -+ } -+ } -+ -+ bOK = TRUE; -+ if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */ -+ { -+ wallcycle_stop(wcycle, ewcUPDATE); -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, shake_vir, -+ cr, nrnb, wcycle, upd, constr, -+ TRUE, bCalcVir, vetanew); -+ wallcycle_start(wcycle, ewcUPDATE); -+ -+ if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1) -+ { -+ /* Correct the virial for multiple time stepping */ -+ m_sub(shake_vir, fr->vir_twin_constr, shake_vir); -+ } -+ -+ if (!bOK) -+ { -+ gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); -+ } -+ -+ } -+ else if (graph) -+ { -+ /* Need to unshift here if a do_force has been -+ called in the previous step */ -+ unshift_self(graph, state->box, state->x); -+ } -+ -+ /* if VV, compute the pressure and constraints */ -+ /* For VV2, we strictly only need this if using pressure -+ * control, but we really would like to have accurate pressures -+ * printed out. -+ * Think about ways around this in the future? -+ * For now, keep this choice in comments. -+ */ -+ /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */ -+ /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/ -+ bPres = TRUE; -+ bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); -+ if (bCalcEner && ir->eI == eiVVAK) /*MRS: 7/9/2010 -- this still doesn't fix it?*/ -+ { -+ bSumEkinhOld = TRUE; -+ } -+ /* for vv, the first half of the integration actually corresponds to the previous step. -+ So we need information from the last step in the first half of the integration */ -+ if (bGStat || do_per_step(step-1, nstglobalcomm)) -+ { -+ wallcycle_stop(wcycle, ewcUPDATE); -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ cglo_flags -+ | CGLO_ENERGY -+ | (bTemp ? CGLO_TEMPERATURE : 0) -+ | (bPres ? CGLO_PRESSURE : 0) -+ | (bPres ? CGLO_CONSTRAINT : 0) -+ | ((iterate.bIterationActive) ? CGLO_ITERATE : 0) -+ | (bFirstIterate ? CGLO_FIRSTITERATE : 0) -+ | CGLO_SCALEEKIN -+ ); -+ /* explanation of above: -+ a) We compute Ekin at the full time step -+ if 1) we are using the AveVel Ekin, and it's not the -+ initial step, or 2) if we are using AveEkin, but need the full -+ time step kinetic energy for the pressure (always true now, since we want accurate statistics). -+ b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in -+ EkinAveVel because it's needed for the pressure */ -+ wallcycle_start(wcycle, ewcUPDATE); -+ } -+ /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ -+ if (!bInitStep) -+ { -+ if (bTrotter) -+ { -+ m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */ -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2); -+ } -+ else -+ { -+ if (bExchanged) -+ { -+ wallcycle_stop(wcycle, ewcUPDATE); -+ /* We need the kinetic energy at minus the half step for determining -+ * the full step kinetic energy and possibly for T-coupling.*/ -+ /* This may not be quite working correctly yet . . . . */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); -+ wallcycle_start(wcycle, ewcUPDATE); -+ } -+ } -+ } -+ -+ if (iterate.bIterationActive && -+ done_iterating(cr, fplog, step, &iterate, bFirstIterate, -+ state->veta, &vetanew)) -+ { -+ break; -+ } -+ bFirstIterate = FALSE; -+ } -+ -+ if (bTrotter && !bInitStep) -+ { -+ copy_mat(shake_vir, state->svir_prev); -+ copy_mat(force_vir, state->fvir_prev); -+ if (IR_NVT_TROTTER(ir) && ir->eI == eiVV) -+ { -+ /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ -+ enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE); -+ enerd->term[F_EKIN] = trace(ekind->ekin); -+ } -+ } -+ /* if it's the initial step, we performed this first step just to get the constraint virial */ -+ if (bInitStep && ir->eI == eiVV) -+ { -+ copy_rvecn(cbuf, state->v, 0, state->natoms); -+ } -+ wallcycle_stop(wcycle, ewcUPDATE); -+ } -+ -+ /* MRS -- now done iterating -- compute the conserved quantity */ -+ if (bVV) -+ { -+ saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ); -+ if (ir->eI == eiVV) -+ { -+ last_ekin = enerd->term[F_EKIN]; -+ } -+ if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) -+ { -+ saved_conserved_quantity -= enerd->term[F_DISPCORR]; -+ } -+ /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */ -+ if (!bRerunMD) -+ { -+ sum_dhdl(enerd, state->lambda, ir->fepvals); -+ } -+ } -+ -+ /* ######## END FIRST UPDATE STEP ############## */ -+ /* ######## If doing VV, we now have v(dt) ###### */ -+ if (bDoExpanded) -+ { -+ /* perform extended ensemble sampling in lambda - we don't -+ actually move to the new state before outputting -+ statistics, but if performing simulated tempering, we -+ do update the velocities and the tau_t. */ -+ -+ lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, &state->dfhist, step, state->v, mdatoms); -+ /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */ -+ copy_df_history(&state_global->dfhist, &state->dfhist); -+ } -+ -+ /* Now we have the energies and forces corresponding to the -+ * coordinates at time t. We must output all of this before -+ * the update. -+ */ -+ do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, -+ ir, state, state_global, top_global, fr, -+ outf, mdebin, ekind, f, f_global, -+ &nchkpt, -+ bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT), -+ bSumEkinhOld); -+ /* Check if IMD step and do IMD communication, if bIMD is TRUE. */ -+ bIMDstep = do_IMD(ir->bIMD, step, cr, bNS, state->box, state->x, ir, t, wcycle); -+ -+ /* kludge -- virial is lost with restart for NPT control. Must restart */ -+ if (bStartingFromCpt && bVV) -+ { -+ copy_mat(state->svir_prev, shake_vir); -+ copy_mat(state->fvir_prev, force_vir); -+ } -+ -+ elapsed_time = walltime_accounting_get_current_elapsed_time(walltime_accounting); -+ -+ /* Check whether everything is still allright */ -+ if (((int)gmx_get_stop_condition() > handled_stop_condition) -+#ifdef GMX_THREAD_MPI -+ && MASTER(cr) -+#endif -+ ) -+ { -+ /* this is just make gs.sig compatible with the hack -+ of sending signals around by MPI_Reduce with together with -+ other floats */ -+ if (gmx_get_stop_condition() == gmx_stop_cond_next_ns) -+ { -+ gs.sig[eglsSTOPCOND] = 1; -+ } -+ if (gmx_get_stop_condition() == gmx_stop_cond_next) -+ { -+ gs.sig[eglsSTOPCOND] = -1; -+ } -+ /* < 0 means stop at next step, > 0 means stop at next NS step */ -+ if (fplog) -+ { -+ fprintf(fplog, -+ "\n\nReceived the %s signal, stopping at the next %sstep\n\n", -+ gmx_get_signal_name(), -+ gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); -+ fflush(fplog); -+ } -+ fprintf(stderr, -+ "\n\nReceived the %s signal, stopping at the next %sstep\n\n", -+ gmx_get_signal_name(), -+ gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); -+ fflush(stderr); -+ handled_stop_condition = (int)gmx_get_stop_condition(); -+ } -+ else if (MASTER(cr) && (bNS || ir->nstlist <= 0) && -+ (max_hours > 0 && elapsed_time > max_hours*60.0*60.0*0.99) && -+ gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0) -+ { -+ /* Signal to terminate the run */ -+ gs.sig[eglsSTOPCOND] = 1; -+ if (fplog) -+ { -+ fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); -+ } -+ fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); -+ } -+ -+ if (bResetCountersHalfMaxH && MASTER(cr) && -+ elapsed_time > max_hours*60.0*60.0*0.495) -+ { -+ gs.sig[eglsRESETCOUNTERS] = 1; -+ } -+ -+ if (ir->nstlist == -1 && !bRerunMD) -+ { -+ /* When bGStatEveryStep=FALSE, global_stat is only called -+ * when we check the atom displacements, not at NS steps. -+ * This means that also the bonded interaction count check is not -+ * performed immediately after NS. Therefore a few MD steps could -+ * be performed with missing interactions. -+ * But wrong energies are never written to file, -+ * since energies are only written after global_stat -+ * has been called. -+ */ -+ if (step >= nlh.step_nscheck) -+ { -+ nlh.nabnsb = natoms_beyond_ns_buffer(ir, fr, &top->cgs, -+ nlh.scale_tot, state->x); -+ } -+ else -+ { -+ /* This is not necessarily true, -+ * but step_nscheck is determined quite conservatively. -+ */ -+ nlh.nabnsb = 0; -+ } -+ } -+ -+ /* In parallel we only have to check for checkpointing in steps -+ * where we do global communication, -+ * otherwise the other nodes don't know. -+ */ -+ if (MASTER(cr) && ((bGStat || !PAR(cr)) && -+ cpt_period >= 0 && -+ (cpt_period == 0 || -+ elapsed_time >= nchkpt*cpt_period*60.0)) && -+ gs.set[eglsCHKPT] == 0) -+ { -+ gs.sig[eglsCHKPT] = 1; -+ } -+ -+ /* at the start of step, randomize or scale the velocities (trotter done elsewhere) */ -+ if (EI_VV(ir->eI)) -+ { -+ if (!bInitStep) -+ { -+ update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); -+ } -+ if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ -+ { -+ gmx_bool bIfRandomize; -+ bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state, upd, constr); -+ /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ -+ if (constr && bIfRandomize) -+ { -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, tmp_vir, -+ cr, nrnb, wcycle, upd, constr, -+ TRUE, bCalcVir, vetanew); -+ } -+ } -+ } -+ -+ if (bIterativeCase && do_per_step(step, ir->nstpcouple)) -+ { -+ gmx_iterate_init(&iterate, TRUE); -+ /* for iterations, we save these vectors, as we will be redoing the calculations */ -+ copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); -+ } -+ -+ bFirstIterate = TRUE; -+ while (bFirstIterate || iterate.bIterationActive) -+ { -+ /* We now restore these vectors to redo the calculation with improved extended variables */ -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); -+ } -+ -+ /* We make the decision to break or not -after- the calculation of Ekin and Pressure, -+ so scroll down for that logic */ -+ -+ /* ######### START SECOND UPDATE STEP ################# */ -+ /* Box is changed in update() when we do pressure coupling, -+ * but we should still use the old box for energy corrections and when -+ * writing it to the energy file, so it matches the trajectory files for -+ * the same timestep above. Make a copy in a separate array. -+ */ -+ copy_mat(state->box, lastbox); -+ -+ bOK = TRUE; -+ dvdl_constr = 0; -+ -+ if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate)) -+ { -+ wallcycle_start(wcycle, ewcUPDATE); -+ /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ -+ if (bTrotter) -+ { -+ if (iterate.bIterationActive) -+ { -+ if (bFirstIterate) -+ { -+ scalevir = 1; -+ } -+ else -+ { -+ /* we use a new value of scalevir to converge the iterations faster */ -+ scalevir = tracevir/trace(shake_vir); -+ } -+ msmul(shake_vir, scalevir, shake_vir); -+ m_add(force_vir, shake_vir, total_vir); -+ clear_mat(shake_vir); -+ } -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); -+ /* We can only do Berendsen coupling after we have summed -+ * the kinetic energy or virial. Since the happens -+ * in global_state after update, we should only do it at -+ * step % nstlist = 1 with bGStatEveryStep=FALSE. -+ */ -+ } -+ else -+ { -+ update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); -+ update_pcouple(fplog, step, ir, state, pcoupl_mu, M, bInitStep); -+ } -+ -+ if (bVV) -+ { -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ /* velocity half-step update */ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, FALSE, etrtVELOCITY2, -+ cr, nrnb, constr, &top->idef); -+ } -+ -+ /* Above, initialize just copies ekinh into ekin, -+ * it doesn't copy position (for VV), -+ * and entire integrator for MD. -+ */ -+ -+ if (ir->eI == eiVVAK) -+ { -+ copy_rvecn(state->x, cbuf, 0, state->natoms); -+ } -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); -+ wallcycle_stop(wcycle, ewcUPDATE); -+ -+ update_constraints(fplog, step, &dvdl_constr, ir, ekind, mdatoms, state, -+ fr->bMolPBC, graph, f, -+ &top->idef, shake_vir, -+ cr, nrnb, wcycle, upd, constr, -+ FALSE, bCalcVir, state->veta); -+ -+ if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1) -+ { -+ /* Correct the virial for multiple time stepping */ -+ m_sub(shake_vir, fr->vir_twin_constr, shake_vir); -+ } -+ -+ if (ir->eI == eiVVAK) -+ { -+ /* erase F_EKIN and F_TEMP here? */ -+ /* just compute the kinetic energy at the half step to perform a trotter step */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, lastbox, -+ top_global, &bSumEkinhOld, -+ cglo_flags | CGLO_TEMPERATURE -+ ); -+ wallcycle_start(wcycle, ewcUPDATE); -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); -+ /* now we know the scaling, we can compute the positions again again */ -+ copy_rvecn(cbuf, state->x, 0, state->natoms); -+ -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); -+ wallcycle_stop(wcycle, ewcUPDATE); -+ -+ /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */ -+ /* are the small terms in the shake_vir here due -+ * to numerical errors, or are they important -+ * physically? I'm thinking they are just errors, but not completely sure. -+ * For now, will call without actually constraining, constr=NULL*/ -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, tmp_vir, -+ cr, nrnb, wcycle, upd, NULL, -+ FALSE, bCalcVir, -+ state->veta); -+ } -+ if (!bOK) -+ { -+ gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); -+ } -+ -+ if (fr->bSepDVDL && fplog && do_log) -+ { -+ gmx_print_sepdvdl(fplog, "Constraint dV/dl", 0.0, dvdl_constr); -+ } -+ if (bVV) -+ { -+ /* this factor or 2 correction is necessary -+ because half of the constraint force is removed -+ in the vv step, so we have to double it. See -+ the Redmine issue #1255. It is not yet clear -+ if the factor of 2 is exact, or just a very -+ good approximation, and this will be -+ investigated. The next step is to see if this -+ can be done adding a dhdl contribution from the -+ rattle step, but this is somewhat more -+ complicated with the current code. Will be -+ investigated, hopefully for 4.6.3. However, -+ this current solution is much better than -+ having it completely wrong. -+ */ -+ enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr; -+ } -+ else -+ { -+ enerd->term[F_DVDL_CONSTR] += dvdl_constr; -+ } -+ } -+ else if (graph) -+ { -+ /* Need to unshift here */ -+ unshift_self(graph, state->box, state->x); -+ } -+ -+ if (vsite != NULL) -+ { -+ wallcycle_start(wcycle, ewcVSITECONSTR); -+ if (graph != NULL) -+ { -+ shift_self(graph, state->box, state->x); -+ } -+ construct_vsites(vsite, state->x, ir->delta_t, state->v, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ -+ if (graph != NULL) -+ { -+ unshift_self(graph, state->box, state->x); -+ } -+ wallcycle_stop(wcycle, ewcVSITECONSTR); -+ } -+ -+ /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */ -+ /* With Leap-Frog we can skip compute_globals at -+ * non-communication steps, but we need to calculate -+ * the kinetic energy one step before communication. -+ */ -+ if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm))) -+ { -+ if (ir->nstlist == -1 && bFirstIterate) -+ { -+ gs.sig[eglsNABNSB] = nlh.nabnsb; -+ } -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, -+ bFirstIterate ? &gs : NULL, -+ (step_rel % gs.nstms == 0) && -+ (multisim_nsteps < 0 || (step_rel < multisim_nsteps)), -+ lastbox, -+ top_global, &bSumEkinhOld, -+ cglo_flags -+ | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0) -+ | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) -+ | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) -+ | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) -+ | (iterate.bIterationActive ? CGLO_ITERATE : 0) -+ | (bFirstIterate ? CGLO_FIRSTITERATE : 0) -+ | CGLO_CONSTRAINT -+ ); -+ if (ir->nstlist == -1 && bFirstIterate) -+ { -+ nlh.nabnsb = gs.set[eglsNABNSB]; -+ gs.set[eglsNABNSB] = 0; -+ } -+ } -+ /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */ -+ /* ############# END CALC EKIN AND PRESSURE ################# */ -+ -+ /* Note: this is OK, but there are some numerical precision issues with using the convergence of -+ the virial that should probably be addressed eventually. state->veta has better properies, -+ but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could -+ generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ -+ -+ if (iterate.bIterationActive && -+ done_iterating(cr, fplog, step, &iterate, bFirstIterate, -+ trace(shake_vir), &tracevir)) -+ { -+ break; -+ } -+ bFirstIterate = FALSE; -+ } -+ -+ if (!bVV || bRerunMD) -+ { -+ /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */ -+ sum_dhdl(enerd, state->lambda, ir->fepvals); -+ } -+ update_box(fplog, step, ir, mdatoms, state, f, -+ ir->nstlist == -1 ? &nlh.scale_tot : NULL, pcoupl_mu, nrnb, upd); -+ -+ /* ################# END UPDATE STEP 2 ################# */ -+ /* #### We now have r(t+dt) and v(t+dt/2) ############# */ -+ -+ /* The coordinates (x) were unshifted in update */ -+ if (!bGStat) -+ { -+ /* We will not sum ekinh_old, -+ * so signal that we still have to do it. -+ */ -+ bSumEkinhOld = TRUE; -+ } -+ -+ /* ######### BEGIN PREPARING EDR OUTPUT ########### */ -+ -+ /* use the directly determined last velocity, not actually the averaged half steps */ -+ if (bTrotter && ir->eI == eiVV) -+ { -+ enerd->term[F_EKIN] = last_ekin; -+ } -+ enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; -+ -+ if (bVV) -+ { -+ enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; -+ } -+ else -+ { -+ enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ); -+ } -+ /* ######### END PREPARING EDR OUTPUT ########### */ -+ -+ /* Output stuff */ -+ if (MASTER(cr)) -+ { -+ gmx_bool do_dr, do_or; -+ -+ if (fplog && do_log && bDoExpanded) -+ { -+ /* only needed if doing expanded ensemble */ -+ PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL, -+ &state_global->dfhist, state->fep_state, ir->nstlog, step); -+ } -+ if (!(bStartingFromCpt && (EI_VV(ir->eI)))) -+ { -+ if (bCalcEner) -+ { -+ upd_mdebin(mdebin, bDoDHDL, TRUE, -+ t, mdatoms->tmass, enerd, state, -+ ir->fepvals, ir->expandedvals, lastbox, -+ shake_vir, force_vir, total_vir, pres, -+ ekind, mu_tot, constr); -+ } -+ else -+ { -+ upd_mdebin_step(mdebin); -+ } -+ -+ do_dr = do_per_step(step, ir->nstdisreout); -+ do_or = do_per_step(step, ir->nstorireout); -+ -+ print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : NULL, -+ step, t, -+ eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts)); -+ } -+ if (ir->ePull != epullNO) -+ { -+ pull_print_output(ir->pull, step, t); -+ } -+ -+ if (do_per_step(step, ir->nstlog)) -+ { -+ if (fflush(fplog) != 0) -+ { -+ gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); -+ } -+ } -+ } -+ if (bDoExpanded) -+ { -+ /* Have to do this part _after_ outputting the logfile and the edr file */ -+ /* Gets written into the state at the beginning of next loop*/ -+ state->fep_state = lamnew; -+ } -+ /* Print the remaining wall clock time for the run */ -+ if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning) -+ { -+ if (shellfc) -+ { -+ fprintf(stderr, "\n"); -+ } -+ print_time(stderr, walltime_accounting, step, ir, cr); -+ } -+ -+ /* Ion/water position swapping. -+ * Not done in last step since trajectory writing happens before this call -+ * in the MD loop and exchanges would be lost anyway. */ -+ bNeedRepartition = FALSE; -+ if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && -+ do_per_step(step, ir->swap->nstswap)) -+ { -+ bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle, -+ bRerunMD ? rerun_fr.x : state->x, -+ bRerunMD ? rerun_fr.box : state->box, -+ top_global, MASTER(cr) && bVerbose, bRerunMD); -+ -+ if (bNeedRepartition && DOMAINDECOMP(cr)) -+ { -+ dd_collect_state(cr->dd, state, state_global); -+ } -+ } -+ -+ /* Replica exchange */ -+ bExchanged = FALSE; -+ if (bDoReplEx) -+ { -+ bExchanged = replica_exchange(fplog, cr, repl_ex, -+ state_global, enerd, -+ state, step, t); -+ } -+ -+ if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) ) -+ { -+ dd_partition_system(fplog, step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, FALSE); -+ } -+ -+ bFirstStep = FALSE; -+ bInitStep = FALSE; -+ bStartingFromCpt = FALSE; -+ -+ /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ -+ /* With all integrators, except VV, we need to retain the pressure -+ * at the current step for coupling at the next step. -+ */ -+ if ((state->flags & (1<nstpcouple > 0 && step % ir->nstpcouple == 0))) -+ { -+ /* Store the pressure in t_state for pressure coupling -+ * at the next MD step. -+ */ -+ copy_mat(pres, state->pres_prev); -+ } -+ -+ /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ -+ -+ if ( (membed != NULL) && (!bLastStep) ) -+ { -+ rescale_membed(step_rel, membed, state_global->x); -+ } -+ -+ if (bRerunMD) -+ { -+ if (MASTER(cr)) -+ { -+ /* read next frame from input trajectory */ -+ bNotLastFrame = read_next_frame(oenv, status, &rerun_fr); -+ } -+ -+ if (PAR(cr)) -+ { -+ rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); -+ } -+ } -+ -+ if (!bRerunMD || !rerun_fr.bStep) -+ { -+ /* increase the MD step number */ -+ step++; -+ step_rel++; -+ } -+ -+ cycles = wallcycle_stop(wcycle, ewcSTEP); -+ if (DOMAINDECOMP(cr) && wcycle) -+ { -+ dd_cycles_add(cr->dd, cycles, ddCyclStep); -+ } -+ -+ if (bPMETuneRunning || bPMETuneTry) -+ { -+ /* PME grid + cut-off optimization with GPUs or PME nodes */ -+ -+ /* Count the total cycles over the last steps */ -+ cycles_pmes += cycles; -+ -+ /* We can only switch cut-off at NS steps */ -+ if (step % ir->nstlist == 0) -+ { -+ /* PME grid + cut-off optimization with GPUs or PME nodes */ -+ if (bPMETuneTry) -+ { -+ if (DDMASTER(cr->dd)) -+ { -+ /* PME node load is too high, start tuning */ -+ bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05); -+ } -+ dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning); -+ -+ if (bPMETuneRunning && -+ fr->nbv->bUseGPU && DOMAINDECOMP(cr) && -+ !(cr->duty & DUTY_PME)) -+ { -+ /* Lock DLB=auto to off (does nothing when DLB=yes/no). -+ * With GPUs + separate PME ranks, we don't want DLB. -+ * This could happen when we scan coarse grids and -+ * it would then never be turned off again. -+ * This would hurt performance at the final, optimal -+ * grid spacing, where DLB almost never helps. -+ * Also, DLB can limit the cut-off for PME tuning. -+ */ -+ dd_dlb_set_lock(cr->dd, TRUE); -+ } -+ -+ if (bPMETuneRunning || step_rel > ir->nstlist*50) -+ { -+ bPMETuneTry = FALSE; -+ } -+ } -+ if (bPMETuneRunning) -+ { -+ /* init_step might not be a multiple of nstlist, -+ * but the first cycle is always skipped anyhow. -+ */ -+ bPMETuneRunning = -+ pme_load_balance(pme_loadbal, cr, -+ (bVerbose && MASTER(cr)) ? stderr : NULL, -+ fplog, -+ ir, state, cycles_pmes, -+ fr->ic, fr->nbv, &fr->pmedata, -+ step); -+ -+ /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */ -+ fr->ewaldcoeff_q = fr->ic->ewaldcoeff_q; -+ fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj; -+ fr->rlist = fr->ic->rlist; -+ fr->rlistlong = fr->ic->rlistlong; -+ fr->rcoulomb = fr->ic->rcoulomb; -+ fr->rvdw = fr->ic->rvdw; -+ -+ if (ir->eDispCorr != edispcNO) -+ { -+ calc_enervirdiff(NULL, ir->eDispCorr, fr); -+ } -+ -+ if (!bPMETuneRunning && -+ DOMAINDECOMP(cr) && -+ dd_dlb_is_locked(cr->dd)) -+ { -+ /* Unlock the DLB=auto, DLB is allowed to activate -+ * (but we don't expect it to activate in most cases). -+ */ -+ dd_dlb_set_lock(cr->dd, FALSE); -+ } -+ } -+ cycles_pmes = 0; -+ } -+ } -+ -+ if (step_rel == wcycle_get_reset_counters(wcycle) || -+ gs.set[eglsRESETCOUNTERS] != 0) -+ { -+ /* Reset all the counters related to performance over the run */ -+ reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting, -+ fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL); -+ wcycle_set_reset_counters(wcycle, -1); -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell our PME node to reset its counters */ -+ gmx_pme_send_resetcounters(cr, step); -+ } -+ /* Correct max_hours for the elapsed time */ -+ max_hours -= elapsed_time/(60.0*60.0); -+ bResetCountersHalfMaxH = FALSE; -+ gs.set[eglsRESETCOUNTERS] = 0; -+ } -+ -+ /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */ -+ IMD_prep_energies_send_positions(ir->bIMD && MASTER(cr), bIMDstep, ir->imd, enerd, step, bCalcEner, wcycle); -+ -+ } -+ /* End of main MD loop */ -+ debug_gmx(); -+ -+ /* Closing TNG files can include compressing data. Therefore it is good to do that -+ * before stopping the time measurements. */ -+ mdoutf_tng_close(outf); -+ -+ /* Stop measuring walltime */ -+ walltime_accounting_end(walltime_accounting); -+ -+ if (bRerunMD && MASTER(cr)) -+ { -+ close_trj(status); -+ } -+ -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell the PME only node to finish */ -+ gmx_pme_send_finish(cr); -+ } -+ -+ if (MASTER(cr)) -+ { -+ if (ir->nstcalcenergy > 0 && !bRerunMD) -+ { -+ print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t, -+ eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts)); -+ } -+ } -+ -+ done_mdoutf(outf); -+ debug_gmx(); -+ -+ if (ir->nstlist == -1 && nlh.nns > 0 && fplog) -+ { -+ fprintf(fplog, "Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n", nlh.s1/nlh.nns, sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns))); -+ fprintf(fplog, "Average number of atoms that crossed the half buffer length: %.1f\n\n", nlh.ab/nlh.nns); -+ } -+ -+ if (pme_loadbal != NULL) -+ { -+ pme_loadbal_done(pme_loadbal, cr, fplog, -+ fr->nbv != NULL && fr->nbv->bUseGPU); -+ } -+ -+ if (shellfc && fplog) -+ { -+ fprintf(fplog, "Fraction of iterations that converged: %.2f %%\n", -+ (nconverged*100.0)/step_rel); -+ fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n", -+ tcount/step_rel); -+ } -+ -+ if (repl_ex_nst > 0 && MASTER(cr)) -+ { -+ print_replica_exchange_statistics(fplog, repl_ex); -+ } -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(ir->bIMD, ir->imd); -+ -+ walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); -+ -+ return 0; -+} -diff --git a/src/programs/mdrun/mdrun.cpp b/src/programs/mdrun/mdrun.cpp -index 6bac3f0..e9fbf48 100644 ---- a/src/programs/mdrun/mdrun.cpp -+++ b/src/programs/mdrun/mdrun.cpp -@@ -55,6 +55,12 @@ - - #include "gromacs/commandline/pargs.h" - #include "gromacs/fileio/filenm.h" -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+extern void(*plumedcmd)(plumed,const char*,const void*); -+/* END PLUMED */ - - int gmx_mdrun(int argc, char *argv[]) - { -@@ -428,6 +434,7 @@ int gmx_mdrun(int argc, char *argv[]) - { efMTX, "-mtx", "nm", ffOPTWR }, - { efNDX, "-dn", "dipole", ffOPTWR }, - { efRND, "-multidir", NULL, ffOPTRDMULT}, -+ { efDAT, "-plumed", "plumed", ffOPTRD }, /* PLUMED */ - { efDAT, "-membed", "membed", ffOPTRD }, - { efTOP, "-mp", "membed", ffOPTRD }, - { efNDX, "-mn", "membed", ffOPTRD }, -@@ -780,6 +787,32 @@ int gmx_mdrun(int argc, char *argv[]) - ddxyz[YY] = (int)(realddxyz[YY] + 0.5); - ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5); - -+ /* PLUMED */ -+ plumedswitch=0; -+ if (opt2bSet("-plumed",NFILE,fnm)) plumedswitch=1; -+ if(plumedswitch){ -+ plumedcmd=plumed_cmd; -+ int plumed_is_there=0; -+ int real_precision=sizeof(real); -+ real energyUnits=1.0; -+ real lengthUnits=1.0; -+ real timeUnits=1.0; -+ -+ if(!plumed_installed()){ -+ gmx_fatal(FARGS,"Plumed is not available. Check your PLUMED_KERNEL variable."); -+ } -+ plumedmain=plumed_create(); -+ plumed_cmd(plumedmain,"setRealPrecision",&real_precision); -+ // this is not necessary for gromacs units: -+ plumed_cmd(plumedmain,"setMDEnergyUnits",&energyUnits); -+ plumed_cmd(plumedmain,"setMDLengthUnits",&lengthUnits); -+ plumed_cmd(plumedmain,"setMDTimeUnits",&timeUnits); -+ // -+ plumed_cmd(plumedmain,"setPlumedDat",ftp2fn(efDAT,NFILE,fnm)); -+ plumedswitch=1; -+ } -+ /* END PLUMED */ -+ - rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact, - nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr, - dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz, -@@ -788,6 +821,12 @@ int gmx_mdrun(int argc, char *argv[]) - nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, - pforce, cpt_period, max_hours, deviceOptions, imdport, Flags); - -+ /* PLUMED */ -+ if(plumedswitch){ -+ plumed_finalize(plumedmain); -+ } -+ /* END PLUMED */ -+ - /* Log file has to be closed in mdrunner if we are appending to it - (fplog not set here) */ - if (MASTER(cr) && !bAppendFiles) -diff --git a/src/programs/mdrun/mdrun.cpp.preplumed b/src/programs/mdrun/mdrun.cpp.preplumed -new file mode 100644 -index 0000000..6bac3f0 ---- /dev/null -+++ b/src/programs/mdrun/mdrun.cpp.preplumed -@@ -0,0 +1,799 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#include "mdrun_main.h" -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+ -+#include "gromacs/legacyheaders/checkpoint.h" -+#include "gromacs/legacyheaders/copyrite.h" -+#include "gromacs/legacyheaders/gmx_fatal.h" -+#include "gromacs/legacyheaders/macros.h" -+#include "gromacs/legacyheaders/main.h" -+#include "gromacs/legacyheaders/mdrun.h" -+#include "gromacs/legacyheaders/network.h" -+#include "gromacs/legacyheaders/readinp.h" -+#include "gromacs/legacyheaders/typedefs.h" -+#include "gromacs/legacyheaders/types/commrec.h" -+ -+#include "gromacs/commandline/pargs.h" -+#include "gromacs/fileio/filenm.h" -+ -+int gmx_mdrun(int argc, char *argv[]) -+{ -+ const char *desc[] = { -+ "[THISMODULE] is the main computational chemistry engine", -+ "within GROMACS. Obviously, it performs Molecular Dynamics simulations,", -+ "but it can also perform Stochastic Dynamics, Energy Minimization,", -+ "test particle insertion or (re)calculation of energies.", -+ "Normal mode analysis is another option. In this case [TT]mdrun[tt]", -+ "builds a Hessian matrix from single conformation.", -+ "For usual Normal Modes-like calculations, make sure that", -+ "the structure provided is properly energy-minimized.", -+ "The generated matrix can be diagonalized by [gmx-nmeig].[PAR]", -+ "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])", -+ "and distributes the topology over ranks if needed.", -+ "[TT]mdrun[tt] produces at least four output files.", -+ "A single log file ([TT]-g[tt]) is written, unless the option", -+ "[TT]-seppot[tt] is used, in which case each rank writes a log file.", -+ "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and", -+ "optionally forces.", -+ "The structure file ([TT]-c[tt]) contains the coordinates and", -+ "velocities of the last step.", -+ "The energy file ([TT]-e[tt]) contains energies, the temperature,", -+ "pressure, etc, a lot of these things are also printed in the log file.", -+ "Optionally coordinates can be written to a compressed trajectory file", -+ "([TT]-x[tt]).[PAR]", -+ "The option [TT]-dhdl[tt] is only used when free energy calculation is", -+ "turned on.[PAR]", -+ "A simulation can be run in parallel using two different parallelization", -+ "schemes: MPI parallelization and/or OpenMP thread parallelization.", -+ "The MPI parallelization uses multiple processes when [TT]mdrun[tt] is", -+ "compiled with a normal MPI library or threads when [TT]mdrun[tt] is", -+ "compiled with the GROMACS built-in thread-MPI library. OpenMP threads", -+ "are supported when [TT]mdrun[tt] is compiled with OpenMP. Full OpenMP support", -+ "is only available with the Verlet cut-off scheme, with the (older)", -+ "group scheme only PME-only ranks can use OpenMP parallelization.", -+ "In all cases [TT]mdrun[tt] will by default try to use all the available", -+ "hardware resources. With a normal MPI library only the options", -+ "[TT]-ntomp[tt] (with the Verlet cut-off scheme) and [TT]-ntomp_pme[tt],", -+ "for PME-only ranks, can be used to control the number of threads.", -+ "With thread-MPI there are additional options [TT]-nt[tt], which sets", -+ "the total number of threads, and [TT]-ntmpi[tt], which sets the number", -+ "of thread-MPI threads.", -+ "The number of OpenMP threads used by [TT]mdrun[tt] can also be set with", -+ "the standard environment variable, [TT]OMP_NUM_THREADS[tt].", -+ "The [TT]GMX_PME_NUM_THREADS[tt] environment variable can be used to specify", -+ "the number of threads used by the PME-only ranks.[PAR]", -+ "Note that combined MPI+OpenMP parallelization is in many cases", -+ "slower than either on its own. However, at high parallelization, using the", -+ "combination is often beneficial as it reduces the number of domains and/or", -+ "the number of MPI ranks. (Less and larger domains can improve scaling,", -+ "with separate PME ranks, using fewer MPI ranks reduces communication costs.)", -+ "OpenMP-only parallelization is typically faster than MPI-only parallelization", -+ "on a single CPU(-die). Since we currently don't have proper hardware", -+ "topology detection, [TT]mdrun[tt] compiled with thread-MPI will only", -+ "automatically use OpenMP-only parallelization when you use up to 4", -+ "threads, up to 12 threads with Intel Nehalem/Westmere, or up to 16", -+ "threads with Intel Sandy Bridge or newer CPUs. Otherwise MPI-only", -+ "parallelization is used (except with GPUs, see below).", -+ "[PAR]", -+ "To quickly test the performance of the new Verlet cut-off scheme", -+ "with old [TT].tpr[tt] files, either on CPUs or CPUs+GPUs, you can use", -+ "the [TT]-testverlet[tt] option. This should not be used for production,", -+ "since it can slightly modify potentials and it will remove charge groups", -+ "making analysis difficult, as the [TT].tpr[tt] file will still contain", -+ "charge groups. For production simulations it is highly recommended", -+ "to specify [TT]cutoff-scheme = Verlet[tt] in the [TT].mdp[tt] file.", -+ "[PAR]", -+ "With GPUs (only supported with the Verlet cut-off scheme), the number", -+ "of GPUs should match the number of particle-particle ranks, i.e.", -+ "excluding PME-only ranks. With thread-MPI, unless set on the command line, the number", -+ "of MPI threads will automatically be set to the number of GPUs detected.", -+ "To use a subset of the available GPUs, or to manually provide a mapping of", -+ "GPUs to PP ranks, you can use the [TT]-gpu_id[tt] option. The argument of [TT]-gpu_id[tt] is", -+ "a string of digits (without delimiter) representing device id-s of the GPUs to be used.", -+ "For example, \"[TT]02[tt]\" specifies using GPUs 0 and 2 in the first and second PP ranks per compute node", -+ "respectively. To select different sets of GPU-s", -+ "on different nodes of a compute cluster, use the [TT]GMX_GPU_ID[tt] environment", -+ "variable instead. The format for [TT]GMX_GPU_ID[tt] is identical to ", -+ "[TT]-gpu_id[tt], with the difference that an environment variable can have", -+ "different values on different compute nodes. Multiple MPI ranks on each node", -+ "can share GPUs. This is accomplished by specifying the id(s) of the GPU(s)", -+ "multiple times, e.g. \"[TT]0011[tt]\" for four ranks sharing two GPUs in this node.", -+ "This works within a single simulation, or a multi-simulation, with any form of MPI.", -+ "[PAR]", -+ "With the Verlet cut-off scheme and verlet-buffer-tolerance set,", -+ "the pair-list update interval nstlist can be chosen freely with", -+ "the option [TT]-nstlist[tt]. [TT]mdrun[tt] will then adjust", -+ "the pair-list cut-off to maintain accuracy, and not adjust nstlist.", -+ "Otherwise, by default, [TT]mdrun[tt] will try to increase the", -+ "value of nstlist set in the [TT].mdp[tt] file to improve the", -+ "performance. For CPU-only runs, nstlist might increase to 20, for", -+ "GPU runs up to 40. For medium to high parallelization or with", -+ "fast GPUs, a (user-supplied) larger nstlist value can give much", -+ "better performance.", -+ "[PAR]", -+ "When using PME with separate PME ranks or with a GPU, the two major", -+ "compute tasks, the non-bonded force calculation and the PME calculation", -+ "run on different compute resources. If this load is not balanced,", -+ "some of the resources will be idle part of time. With the Verlet", -+ "cut-off scheme this load is automatically balanced when the PME load", -+ "is too high (but not when it is too low). This is done by scaling", -+ "the Coulomb cut-off and PME grid spacing by the same amount. In the first", -+ "few hundred steps different settings are tried and the fastest is chosen", -+ "for the rest of the simulation. This does not affect the accuracy of", -+ "the results, but it does affect the decomposition of the Coulomb energy", -+ "into particle and mesh contributions. The auto-tuning can be turned off", -+ "with the option [TT]-notunepme[tt].", -+ "[PAR]", -+ "[TT]mdrun[tt] pins (sets affinity of) threads to specific cores,", -+ "when all (logical) cores on a compute node are used by [TT]mdrun[tt],", -+ "even when no multi-threading is used,", -+ "as this usually results in significantly better performance.", -+ "If the queuing systems or the OpenMP library pinned threads, we honor", -+ "this and don't pin again, even though the layout may be sub-optimal.", -+ "If you want to have [TT]mdrun[tt] override an already set thread affinity", -+ "or pin threads when using less cores, use [TT]-pin on[tt].", -+ "With SMT (simultaneous multithreading), e.g. Intel Hyper-Threading,", -+ "there are multiple logical cores per physical core.", -+ "The option [TT]-pinstride[tt] sets the stride in logical cores for", -+ "pinning consecutive threads. Without SMT, 1 is usually the best choice.", -+ "With Intel Hyper-Threading 2 is best when using half or less of the", -+ "logical cores, 1 otherwise. The default value of 0 do exactly that:", -+ "it minimizes the threads per logical core, to optimize performance.", -+ "If you want to run multiple [TT]mdrun[tt] jobs on the same physical node," -+ "you should set [TT]-pinstride[tt] to 1 when using all logical cores.", -+ "When running multiple [TT]mdrun[tt] (or other) simulations on the same physical", -+ "node, some simulations need to start pinning from a non-zero core", -+ "to avoid overloading cores; with [TT]-pinoffset[tt] you can specify", -+ "the offset in logical cores for pinning.", -+ "[PAR]", -+ "When [TT]mdrun[tt] is started with more than 1 rank,", -+ "parallelization with domain decomposition is used.", -+ "[PAR]", -+ "With domain decomposition, the spatial decomposition can be set", -+ "with option [TT]-dd[tt]. By default [TT]mdrun[tt] selects a good decomposition.", -+ "The user only needs to change this when the system is very inhomogeneous.", -+ "Dynamic load balancing is set with the option [TT]-dlb[tt],", -+ "which can give a significant performance improvement,", -+ "especially for inhomogeneous systems. The only disadvantage of", -+ "dynamic load balancing is that runs are no longer binary reproducible,", -+ "but in most cases this is not important.", -+ "By default the dynamic load balancing is automatically turned on", -+ "when the measured performance loss due to load imbalance is 5% or more.", -+ "At low parallelization these are the only important options", -+ "for domain decomposition.", -+ "At high parallelization the options in the next two sections", -+ "could be important for increasing the performace.", -+ "[PAR]", -+ "When PME is used with domain decomposition, separate ranks can", -+ "be assigned to do only the PME mesh calculation;", -+ "this is computationally more efficient starting at about 12 ranks,", -+ "or even fewer when OpenMP parallelization is used.", -+ "The number of PME ranks is set with option [TT]-npme[tt],", -+ "but this cannot be more than half of the ranks.", -+ "By default [TT]mdrun[tt] makes a guess for the number of PME", -+ "ranks when the number of ranks is larger than 16. With GPUs,", -+ "using separate PME ranks is not selected automatically,", -+ "since the optimal setup depends very much on the details", -+ "of the hardware. In all cases, you might gain performance", -+ "by optimizing [TT]-npme[tt]. Performance statistics on this issue", -+ "are written at the end of the log file.", -+ "For good load balancing at high parallelization, the PME grid x and y", -+ "dimensions should be divisible by the number of PME ranks", -+ "(the simulation will run correctly also when this is not the case).", -+ "[PAR]", -+ "This section lists all options that affect the domain decomposition.", -+ "[PAR]", -+ "Option [TT]-rdd[tt] can be used to set the required maximum distance", -+ "for inter charge-group bonded interactions.", -+ "Communication for two-body bonded interactions below the non-bonded", -+ "cut-off distance always comes for free with the non-bonded communication.", -+ "Atoms beyond the non-bonded cut-off are only communicated when they have", -+ "missing bonded interactions; this means that the extra cost is minor", -+ "and nearly indepedent of the value of [TT]-rdd[tt].", -+ "With dynamic load balancing option [TT]-rdd[tt] also sets", -+ "the lower limit for the domain decomposition cell sizes.", -+ "By default [TT]-rdd[tt] is determined by [TT]mdrun[tt] based on", -+ "the initial coordinates. The chosen value will be a balance", -+ "between interaction range and communication cost.", -+ "[PAR]", -+ "When inter charge-group bonded interactions are beyond", -+ "the bonded cut-off distance, [TT]mdrun[tt] terminates with an error message.", -+ "For pair interactions and tabulated bonds", -+ "that do not generate exclusions, this check can be turned off", -+ "with the option [TT]-noddcheck[tt].", -+ "[PAR]", -+ "When constraints are present, option [TT]-rcon[tt] influences", -+ "the cell size limit as well.", -+ "Atoms connected by NC constraints, where NC is the LINCS order plus 1,", -+ "should not be beyond the smallest cell size. A error message is", -+ "generated when this happens and the user should change the decomposition", -+ "or decrease the LINCS order and increase the number of LINCS iterations.", -+ "By default [TT]mdrun[tt] estimates the minimum cell size required for P-LINCS", -+ "in a conservative fashion. For high parallelization it can be useful", -+ "to set the distance required for P-LINCS with the option [TT]-rcon[tt].", -+ "[PAR]", -+ "The [TT]-dds[tt] option sets the minimum allowed x, y and/or z scaling", -+ "of the cells with dynamic load balancing. [TT]mdrun[tt] will ensure that", -+ "the cells can scale down by at least this factor. This option is used", -+ "for the automated spatial decomposition (when not using [TT]-dd[tt])", -+ "as well as for determining the number of grid pulses, which in turn", -+ "sets the minimum allowed cell size. Under certain circumstances", -+ "the value of [TT]-dds[tt] might need to be adjusted to account for", -+ "high or low spatial inhomogeneity of the system.", -+ "[PAR]", -+ "The option [TT]-gcom[tt] can be used to only do global communication", -+ "every n steps.", -+ "This can improve performance for highly parallel simulations", -+ "where this global communication step becomes the bottleneck.", -+ "For a global thermostat and/or barostat the temperature", -+ "and/or pressure will also only be updated every [TT]-gcom[tt] steps.", -+ "By default it is set to the minimum of nstcalcenergy and nstlist.[PAR]", -+ "With [TT]-rerun[tt] an input trajectory can be given for which ", -+ "forces and energies will be (re)calculated. Neighbor searching will be", -+ "performed for every frame, unless [TT]nstlist[tt] is zero", -+ "(see the [TT].mdp[tt] file).[PAR]", -+ "ED (essential dynamics) sampling and/or additional flooding potentials", -+ "are switched on by using the [TT]-ei[tt] flag followed by an [TT].edi[tt]", -+ "file. The [TT].edi[tt] file can be produced with the [TT]make_edi[tt] tool", -+ "or by using options in the essdyn menu of the WHAT IF program.", -+ "[TT]mdrun[tt] produces a [TT].xvg[tt] output file that", -+ "contains projections of positions, velocities and forces onto selected", -+ "eigenvectors.[PAR]", -+ "When user-defined potential functions have been selected in the", -+ "[TT].mdp[tt] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]", -+ "a formatted table with potential functions. The file is read from", -+ "either the current directory or from the [TT]GMXLIB[tt] directory.", -+ "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,", -+ "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with", -+ "normal Coulomb.", -+ "When pair interactions are present, a separate table for pair interaction", -+ "functions is read using the [TT]-tablep[tt] option.[PAR]", -+ "When tabulated bonded functions are present in the topology,", -+ "interaction functions are read using the [TT]-tableb[tt] option.", -+ "For each different tabulated interaction type the table file name is", -+ "modified in a different way: before the file extension an underscore is", -+ "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals", -+ "and finally the table number of the interaction type.[PAR]", -+ "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM", -+ "coordinates and forces when pulling is selected", -+ "in the [TT].mdp[tt] file.[PAR]", -+ "With [TT]-multi[tt] or [TT]-multidir[tt], multiple systems can be ", -+ "simulated in parallel.", -+ "As many input files/directories are required as the number of systems. ", -+ "The [TT]-multidir[tt] option takes a list of directories (one for each ", -+ "system) and runs in each of them, using the input/output file names, ", -+ "such as specified by e.g. the [TT]-s[tt] option, relative to these ", -+ "directories.", -+ "With [TT]-multi[tt], the system number is appended to the run input ", -+ "and each output filename, for instance [TT]topol.tpr[tt] becomes", -+ "[TT]topol0.tpr[tt], [TT]topol1.tpr[tt] etc.", -+ "The number of ranks per system is the total number of ranks", -+ "divided by the number of systems.", -+ "One use of this option is for NMR refinement: when distance", -+ "or orientation restraints are present these can be ensemble averaged", -+ "over all the systems.[PAR]", -+ "With [TT]-replex[tt] replica exchange is attempted every given number", -+ "of steps. The number of replicas is set with the [TT]-multi[tt] or ", -+ "[TT]-multidir[tt] option, described above.", -+ "All run input files should use a different coupling temperature,", -+ "the order of the files is not important. The random seed is set with", -+ "[TT]-reseed[tt]. The velocities are scaled and neighbor searching", -+ "is performed after every exchange.[PAR]", -+ "Finally some experimental algorithms can be tested when the", -+ "appropriate options have been given. Currently under", -+ "investigation are: polarizability.", -+ "[PAR]", -+ "The option [TT]-membed[tt] does what used to be g_membed, i.e. embed", -+ "a protein into a membrane. The data file should contain the options", -+ "that where passed to g_membed before. The [TT]-mn[tt] and [TT]-mp[tt]", -+ "both apply to this as well.", -+ "[PAR]", -+ "The option [TT]-pforce[tt] is useful when you suspect a simulation", -+ "crashes due to too large forces. With this option coordinates and", -+ "forces of atoms with a force larger than a certain value will", -+ "be printed to stderr.", -+ "[PAR]", -+ "Checkpoints containing the complete state of the system are written", -+ "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],", -+ "unless option [TT]-cpt[tt] is set to -1.", -+ "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to", -+ "make sure that a recent state of the system is always available,", -+ "even when the simulation is terminated while writing a checkpoint.", -+ "With [TT]-cpnum[tt] all checkpoint files are kept and appended", -+ "with the step number.", -+ "A simulation can be continued by reading the full state from file", -+ "with option [TT]-cpi[tt]. This option is intelligent in the way that", -+ "if no checkpoint file is found, Gromacs just assumes a normal run and", -+ "starts from the first step of the [TT].tpr[tt] file. By default the output", -+ "will be appending to the existing output files. The checkpoint file", -+ "contains checksums of all output files, such that you will never", -+ "loose data when some output files are modified, corrupt or removed.", -+ "There are three scenarios with [TT]-cpi[tt]:[PAR]", -+ "[TT]*[tt] no files with matching names are present: new output files are written[PAR]", -+ "[TT]*[tt] all files are present with names and checksums matching those stored", -+ "in the checkpoint file: files are appended[PAR]", -+ "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]", -+ "With [TT]-noappend[tt] new output files are opened and the simulation", -+ "part number is added to all output file names.", -+ "Note that in all cases the checkpoint file itself is not renamed", -+ "and will be overwritten, unless its name does not match", -+ "the [TT]-cpo[tt] option.", -+ "[PAR]", -+ "With checkpointing the output is appended to previously written", -+ "output files, unless [TT]-noappend[tt] is used or none of the previous", -+ "output files are present (except for the checkpoint file).", -+ "The integrity of the files to be appended is verified using checksums", -+ "which are stored in the checkpoint file. This ensures that output can", -+ "not be mixed up or corrupted due to file appending. When only some", -+ "of the previous output files are present, a fatal error is generated", -+ "and no old output files are modified and no new output files are opened.", -+ "The result with appending will be the same as from a single run.", -+ "The contents will be binary identical, unless you use a different number", -+ "of ranks or dynamic load balancing or the FFT library uses optimizations", -+ "through timing.", -+ "[PAR]", -+ "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint", -+ "file is written at the first neighbor search step where the run time", -+ "exceeds [TT]-maxh[tt]*0.99 hours.", -+ "[PAR]", -+ "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current", -+ "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is", -+ "pressed), it will stop after the next neighbor search step ", -+ "(with nstlist=0 at the next step).", -+ "In both cases all the usual output will be written to file.", -+ "When running with MPI, a signal to one of the [TT]mdrun[tt] ranks", -+ "is sufficient, this signal should not be sent to mpirun or", -+ "the [TT]mdrun[tt] process that is the parent of the others.", -+ "[PAR]", -+ "Interactive molecular dynamics (IMD) can be activated by using at least one", -+ "of the three IMD switches: The [TT]-imdterm[tt] switch allows to terminate the", -+ "simulation from the molecular viewer (e.g. VMD). With [TT]-imdwait[tt],", -+ "[TT]mdrun[tt] pauses whenever no IMD client is connected. Pulling from the", -+ "IMD remote can be turned on by [TT]-imdpull[tt].", -+ "The port [TT]mdrun[tt] listens to can be altered by [TT]-imdport[tt].The", -+ "file pointed to by [TT]-if[tt] contains atom indices and forces if IMD", -+ "pulling is used." -+ "[PAR]", -+ "When [TT]mdrun[tt] is started with MPI, it does not run niced by default." -+ }; -+ t_commrec *cr; -+ t_filenm fnm[] = { -+ { efTPX, NULL, NULL, ffREAD }, -+ { efTRN, "-o", NULL, ffWRITE }, -+ { efCOMPRESSED, "-x", NULL, ffOPTWR }, -+ { efCPT, "-cpi", NULL, ffOPTRD }, -+ { efCPT, "-cpo", NULL, ffOPTWR }, -+ { efSTO, "-c", "confout", ffWRITE }, -+ { efEDR, "-e", "ener", ffWRITE }, -+ { efLOG, "-g", "md", ffWRITE }, -+ { efXVG, "-dhdl", "dhdl", ffOPTWR }, -+ { efXVG, "-field", "field", ffOPTWR }, -+ { efXVG, "-table", "table", ffOPTRD }, -+ { efXVG, "-tabletf", "tabletf", ffOPTRD }, -+ { efXVG, "-tablep", "tablep", ffOPTRD }, -+ { efXVG, "-tableb", "table", ffOPTRD }, -+ { efTRX, "-rerun", "rerun", ffOPTRD }, -+ { efXVG, "-tpi", "tpi", ffOPTWR }, -+ { efXVG, "-tpid", "tpidist", ffOPTWR }, -+ { efEDI, "-ei", "sam", ffOPTRD }, -+ { efXVG, "-eo", "edsam", ffOPTWR }, -+ { efXVG, "-devout", "deviatie", ffOPTWR }, -+ { efXVG, "-runav", "runaver", ffOPTWR }, -+ { efXVG, "-px", "pullx", ffOPTWR }, -+ { efXVG, "-pf", "pullf", ffOPTWR }, -+ { efXVG, "-ro", "rotation", ffOPTWR }, -+ { efLOG, "-ra", "rotangles", ffOPTWR }, -+ { efLOG, "-rs", "rotslabs", ffOPTWR }, -+ { efLOG, "-rt", "rottorque", ffOPTWR }, -+ { efMTX, "-mtx", "nm", ffOPTWR }, -+ { efNDX, "-dn", "dipole", ffOPTWR }, -+ { efRND, "-multidir", NULL, ffOPTRDMULT}, -+ { efDAT, "-membed", "membed", ffOPTRD }, -+ { efTOP, "-mp", "membed", ffOPTRD }, -+ { efNDX, "-mn", "membed", ffOPTRD }, -+ { efXVG, "-if", "imdforces", ffOPTWR }, -+ { efXVG, "-swap", "swapions", ffOPTWR } -+ }; -+#define NFILE asize(fnm) -+ -+ /* Command line options ! */ -+ gmx_bool bDDBondCheck = TRUE; -+ gmx_bool bDDBondComm = TRUE; -+ gmx_bool bTunePME = TRUE; -+ gmx_bool bTestVerlet = FALSE; -+ gmx_bool bVerbose = FALSE; -+ gmx_bool bCompact = TRUE; -+ gmx_bool bSepPot = FALSE; -+ gmx_bool bRerunVSite = FALSE; -+ gmx_bool bConfout = TRUE; -+ gmx_bool bReproducible = FALSE; -+ gmx_bool bIMDwait = FALSE; -+ gmx_bool bIMDterm = FALSE; -+ gmx_bool bIMDpull = FALSE; -+ -+ int npme = -1; -+ int nstlist = 0; -+ int nmultisim = 0; -+ int nstglobalcomm = -1; -+ int repl_ex_nst = 0; -+ int repl_ex_seed = -1; -+ int repl_ex_nex = 0; -+ int nstepout = 100; -+ int resetstep = -1; -+ gmx_int64_t nsteps = -2; /* the value -2 means that the mdp option will be used */ -+ int imdport = 8888; /* can be almost anything, 8888 is easy to remember */ -+ -+ rvec realddxyz = {0, 0, 0}; -+ const char *ddno_opt[ddnoNR+1] = -+ { NULL, "interleave", "pp_pme", "cartesian", NULL }; -+ const char *dddlb_opt[] = -+ { NULL, "auto", "no", "yes", NULL }; -+ const char *thread_aff_opt[threadaffNR+1] = -+ { NULL, "auto", "on", "off", NULL }; -+ const char *nbpu_opt[] = -+ { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL }; -+ real rdd = 0.0, rconstr = 0.0, dlb_scale = 0.8, pforce = -1; -+ char *ddcsx = NULL, *ddcsy = NULL, *ddcsz = NULL; -+ real cpt_period = 15.0, max_hours = -1; -+ gmx_bool bAppendFiles = TRUE; -+ gmx_bool bKeepAndNumCPT = FALSE; -+ gmx_bool bResetCountersHalfWay = FALSE; -+ output_env_t oenv = NULL; -+ const char *deviceOptions = ""; -+ -+ /* Non transparent initialization of a complex gmx_hw_opt_t struct. -+ * But unfortunately we are not allowed to call a function here, -+ * since declarations follow below. -+ */ -+ gmx_hw_opt_t hw_opt = { -+ 0, 0, 0, 0, threadaffSEL, 0, 0, -+ { NULL, FALSE, 0, NULL } -+ }; -+ -+ t_pargs pa[] = { -+ -+ { "-dd", FALSE, etRVEC, {&realddxyz}, -+ "Domain decomposition grid, 0 is optimize" }, -+ { "-ddorder", FALSE, etENUM, {ddno_opt}, -+ "DD rank order" }, -+ { "-npme", FALSE, etINT, {&npme}, -+ "Number of separate ranks to be used for PME, -1 is guess" }, -+ { "-nt", FALSE, etINT, {&hw_opt.nthreads_tot}, -+ "Total number of threads to start (0 is guess)" }, -+ { "-ntmpi", FALSE, etINT, {&hw_opt.nthreads_tmpi}, -+ "Number of thread-MPI threads to start (0 is guess)" }, -+ { "-ntomp", FALSE, etINT, {&hw_opt.nthreads_omp}, -+ "Number of OpenMP threads per MPI rank to start (0 is guess)" }, -+ { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme}, -+ "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" }, -+ { "-pin", FALSE, etENUM, {thread_aff_opt}, -+ "Set thread affinities" }, -+ { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset}, -+ "The starting logical core number for pinning to cores; used to avoid pinning threads from different mdrun instances to the same core" }, -+ { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride}, -+ "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" }, -+ { "-gpu_id", FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id}, -+ "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" }, -+ { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck}, -+ "Check for all bonded interactions with DD" }, -+ { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm}, -+ "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" }, -+ { "-rdd", FALSE, etREAL, {&rdd}, -+ "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" }, -+ { "-rcon", FALSE, etREAL, {&rconstr}, -+ "Maximum distance for P-LINCS (nm), 0 is estimate" }, -+ { "-dlb", FALSE, etENUM, {dddlb_opt}, -+ "Dynamic load balancing (with DD)" }, -+ { "-dds", FALSE, etREAL, {&dlb_scale}, -+ "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in order to " -+ "provide a margin in which dynamic load balancing can act while preserving the minimum cell size." }, -+ { "-ddcsx", FALSE, etSTR, {&ddcsx}, -+ "HIDDENA string containing a vector of the relative sizes in the x " -+ "direction of the corresponding DD cells. Only effective with static " -+ "load balancing." }, -+ { "-ddcsy", FALSE, etSTR, {&ddcsy}, -+ "HIDDENA string containing a vector of the relative sizes in the y " -+ "direction of the corresponding DD cells. Only effective with static " -+ "load balancing." }, -+ { "-ddcsz", FALSE, etSTR, {&ddcsz}, -+ "HIDDENA string containing a vector of the relative sizes in the z " -+ "direction of the corresponding DD cells. Only effective with static " -+ "load balancing." }, -+ { "-gcom", FALSE, etINT, {&nstglobalcomm}, -+ "Global communication frequency" }, -+ { "-nb", FALSE, etENUM, {&nbpu_opt}, -+ "Calculate non-bonded interactions on" }, -+ { "-nstlist", FALSE, etINT, {&nstlist}, -+ "Set nstlist when using a Verlet buffer tolerance (0 is guess)" }, -+ { "-tunepme", FALSE, etBOOL, {&bTunePME}, -+ "Optimize PME load between PP/PME ranks or GPU/CPU" }, -+ { "-testverlet", FALSE, etBOOL, {&bTestVerlet}, -+ "Test the Verlet non-bonded scheme" }, -+ { "-v", FALSE, etBOOL, {&bVerbose}, -+ "Be loud and noisy" }, -+ { "-compact", FALSE, etBOOL, {&bCompact}, -+ "Write a compact log file" }, -+ { "-seppot", FALSE, etBOOL, {&bSepPot}, -+ "Write separate V and dVdl terms for each interaction type and rank to the log file(s)" }, -+ { "-pforce", FALSE, etREAL, {&pforce}, -+ "Print all forces larger than this (kJ/mol nm)" }, -+ { "-reprod", FALSE, etBOOL, {&bReproducible}, -+ "Try to avoid optimizations that affect binary reproducibility" }, -+ { "-cpt", FALSE, etREAL, {&cpt_period}, -+ "Checkpoint interval (minutes)" }, -+ { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT}, -+ "Keep and number checkpoint files" }, -+ { "-append", FALSE, etBOOL, {&bAppendFiles}, -+ "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" }, -+ { "-nsteps", FALSE, etINT64, {&nsteps}, -+ "Run this number of steps, overrides .mdp file option" }, -+ { "-maxh", FALSE, etREAL, {&max_hours}, -+ "Terminate after 0.99 times this time (hours)" }, -+ { "-multi", FALSE, etINT, {&nmultisim}, -+ "Do multiple simulations in parallel" }, -+ { "-replex", FALSE, etINT, {&repl_ex_nst}, -+ "Attempt replica exchange periodically with this period (steps)" }, -+ { "-nex", FALSE, etINT, {&repl_ex_nex}, -+ "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). -nex zero or not specified gives neighbor replica exchange." }, -+ { "-reseed", FALSE, etINT, {&repl_ex_seed}, -+ "Seed for replica exchange, -1 is generate a seed" }, -+ { "-imdport", FALSE, etINT, {&imdport}, -+ "HIDDENIMD listening port" }, -+ { "-imdwait", FALSE, etBOOL, {&bIMDwait}, -+ "HIDDENPause the simulation while no IMD client is connected" }, -+ { "-imdterm", FALSE, etBOOL, {&bIMDterm}, -+ "HIDDENAllow termination of the simulation from IMD client" }, -+ { "-imdpull", FALSE, etBOOL, {&bIMDpull}, -+ "HIDDENAllow pulling in the simulation from IMD client" }, -+ { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite}, -+ "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" }, -+ { "-confout", FALSE, etBOOL, {&bConfout}, -+ "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" }, -+ { "-stepout", FALSE, etINT, {&nstepout}, -+ "HIDDENFrequency of writing the remaining wall clock time for the run" }, -+ { "-resetstep", FALSE, etINT, {&resetstep}, -+ "HIDDENReset cycle counters after these many time steps" }, -+ { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay}, -+ "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" } -+ }; -+ unsigned long Flags, PCA_Flags; -+ ivec ddxyz; -+ int dd_node_order; -+ gmx_bool bAddPart; -+ FILE *fplog, *fpmulti; -+ int sim_part, sim_part_fn; -+ const char *part_suffix = ".part"; -+ char suffix[STRLEN]; -+ int rc; -+ char **multidir = NULL; -+ -+ -+ cr = init_commrec(); -+ -+ PCA_Flags = (PCA_CAN_SET_DEFFNM | (MASTER(cr) ? 0 : PCA_QUIET)); -+ -+ /* Comment this in to do fexist calls only on master -+ * works not with rerun or tables at the moment -+ * also comment out the version of init_forcerec in md.c -+ * with NULL instead of opt2fn -+ */ -+ /* -+ if (!MASTER(cr)) -+ { -+ PCA_Flags |= PCA_NOT_READ_NODE; -+ } -+ */ -+ -+ if (!parse_common_args(&argc, argv, PCA_Flags, NFILE, fnm, asize(pa), pa, -+ asize(desc), desc, 0, NULL, &oenv)) -+ { -+ return 0; -+ } -+ -+ -+ /* we set these early because they might be used in init_multisystem() -+ Note that there is the potential for npme>nnodes until the number of -+ threads is set later on, if there's thread parallelization. That shouldn't -+ lead to problems. */ -+ dd_node_order = nenum(ddno_opt); -+ cr->npmenodes = npme; -+ -+ hw_opt.thread_affinity = nenum(thread_aff_opt); -+ -+ /* now check the -multi and -multidir option */ -+ if (opt2bSet("-multidir", NFILE, fnm)) -+ { -+ if (nmultisim > 0) -+ { -+ gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive."); -+ } -+ nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm); -+ } -+ -+ -+ if (repl_ex_nst != 0 && nmultisim < 2) -+ { -+ gmx_fatal(FARGS, "Need at least two replicas for replica exchange (option -multi)"); -+ } -+ -+ if (repl_ex_nex < 0) -+ { -+ gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive"); -+ } -+ -+ if (nmultisim > 1) -+ { -+#ifndef GMX_THREAD_MPI -+ gmx_bool bParFn = (multidir == NULL); -+ init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn); -+#else -+ gmx_fatal(FARGS, "mdrun -multi is not supported with the thread library. " -+ "Please compile GROMACS with MPI support"); -+#endif -+ } -+ -+ bAddPart = !bAppendFiles; -+ -+ /* Check if there is ANY checkpoint file available */ -+ sim_part = 1; -+ sim_part_fn = sim_part; -+ if (opt2bSet("-cpi", NFILE, fnm)) -+ { -+ if (bSepPot && bAppendFiles) -+ { -+ gmx_fatal(FARGS, "Output file appending is not supported with -seppot"); -+ } -+ -+ bAppendFiles = -+ read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE, -+ fnm, cr), -+ &sim_part_fn, NULL, cr, -+ bAppendFiles, NFILE, fnm, -+ part_suffix, &bAddPart); -+ if (sim_part_fn == 0 && MULTIMASTER(cr)) -+ { -+ fprintf(stdout, "No previous checkpoint file present, assuming this is a new run.\n"); -+ } -+ else -+ { -+ sim_part = sim_part_fn + 1; -+ } -+ -+ if (MULTISIM(cr) && MASTER(cr)) -+ { -+ if (MULTIMASTER(cr)) -+ { -+ /* Log file is not yet available, so if there's a -+ * problem we can only write to stderr. */ -+ fpmulti = stderr; -+ } -+ else -+ { -+ fpmulti = NULL; -+ } -+ check_multi_int(fpmulti, cr->ms, sim_part, "simulation part", TRUE); -+ } -+ } -+ else -+ { -+ bAppendFiles = FALSE; -+ } -+ -+ if (!bAppendFiles) -+ { -+ sim_part_fn = sim_part; -+ } -+ -+ if (bAddPart) -+ { -+ /* Rename all output files (except checkpoint files) */ -+ /* create new part name first (zero-filled) */ -+ sprintf(suffix, "%s%04d", part_suffix, sim_part_fn); -+ -+ add_suffix_to_output_names(fnm, NFILE, suffix); -+ if (MULTIMASTER(cr)) -+ { -+ fprintf(stdout, "Checkpoint file is from part %d, new output files will be suffixed '%s'.\n", sim_part-1, suffix); -+ } -+ } -+ -+ Flags = opt2bSet("-rerun", NFILE, fnm) ? MD_RERUN : 0; -+ Flags = Flags | (bSepPot ? MD_SEPPOT : 0); -+ Flags = Flags | (bDDBondCheck ? MD_DDBONDCHECK : 0); -+ Flags = Flags | (bDDBondComm ? MD_DDBONDCOMM : 0); -+ Flags = Flags | (bTunePME ? MD_TUNEPME : 0); -+ Flags = Flags | (bTestVerlet ? MD_TESTVERLET : 0); -+ Flags = Flags | (bConfout ? MD_CONFOUT : 0); -+ Flags = Flags | (bRerunVSite ? MD_RERUN_VSITE : 0); -+ Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0); -+ Flags = Flags | (bAppendFiles ? MD_APPENDFILES : 0); -+ Flags = Flags | (opt2parg_bSet("-append", asize(pa), pa) ? MD_APPENDFILESSET : 0); -+ Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0); -+ Flags = Flags | (sim_part > 1 ? MD_STARTFROMCPT : 0); -+ Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0); -+ Flags = Flags | (bIMDwait ? MD_IMDWAIT : 0); -+ Flags = Flags | (bIMDterm ? MD_IMDTERM : 0); -+ Flags = Flags | (bIMDpull ? MD_IMDPULL : 0); -+ -+ /* We postpone opening the log file if we are appending, so we can -+ first truncate the old log file and append to the correct position -+ there instead. */ -+ if ((MASTER(cr) || bSepPot) && !bAppendFiles) -+ { -+ gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr, -+ !bSepPot, Flags & MD_APPENDFILES, &fplog); -+ please_cite(fplog, "Hess2008b"); -+ please_cite(fplog, "Spoel2005a"); -+ please_cite(fplog, "Lindahl2001a"); -+ please_cite(fplog, "Berendsen95a"); -+ } -+ else if (!MASTER(cr) && bSepPot) -+ { -+ gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr, !bSepPot, Flags, &fplog); -+ } -+ else -+ { -+ fplog = NULL; -+ } -+ -+ ddxyz[XX] = (int)(realddxyz[XX] + 0.5); -+ ddxyz[YY] = (int)(realddxyz[YY] + 0.5); -+ ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5); -+ -+ rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact, -+ nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr, -+ dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz, -+ nbpu_opt[0], nstlist, -+ nsteps, nstepout, resetstep, -+ nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, -+ pforce, cpt_period, max_hours, deviceOptions, imdport, Flags); -+ -+ /* Log file has to be closed in mdrunner if we are appending to it -+ (fplog not set here) */ -+ if (MASTER(cr) && !bAppendFiles) -+ { -+ gmx_log_close(fplog); -+ } -+ -+ return rc; -+} -diff --git a/src/programs/mdrun/repl_ex.c b/src/programs/mdrun/repl_ex.c -index 46a9bc0..cfb0b7f 100644 ---- a/src/programs/mdrun/repl_ex.c -+++ b/src/programs/mdrun/repl_ex.c -@@ -51,6 +51,12 @@ - #include "domdec.h" - #include "gromacs/random/random.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+/* END PLUMED */ -+ - #define PROBABILITYCUTOFF 100 - /* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ - -@@ -112,14 +118,16 @@ static gmx_bool repl_quantity(const gmx_multisim_t *ms, - qall[re->repl] = q; - gmx_sum_sim(ms->nsim, qall, ms); - -- bDiff = FALSE; -- for (s = 1; s < ms->nsim; s++) -- { -- if (qall[s] != qall[0]) -- { -+ /* PLUMED */ -+ //bDiff = FALSE; -+ //for (s = 1; s < ms->nsim; s++) -+ //{ -+ // if (qall[s] != qall[0]) -+ // { - bDiff = TRUE; -- } -- } -+ // } -+ //} -+ /* END PLUMED */ - - if (bDiff) - { -@@ -269,6 +277,10 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - re->ind[i] = i; - } - -+ /* PLUMED */ -+ // plumed2: check if we want alternative patterns (i.e. for bias-exchange metaD) -+ // in those cases replicas can share the same temperature. -+ /* - if (re->type < ereENDSINGLE) - { - -@@ -277,11 +289,12 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - for (j = i+1; j < re->nrepl; j++) - { - if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) -- { -+ {*/ - /* Unordered replicas are supposed to work, but there - * is still an issues somewhere. - * Note that at this point still re->ind[i]=i. - */ -+ /* - gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", - i, j, - erename[re->type], -@@ -299,6 +312,8 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - } - } - } -+ */ -+ /* END PLUMED */ - - /* keep track of all the swaps, starting with the initial placement. */ - snew(re->allswaps, re->nrepl); -@@ -982,6 +997,10 @@ test_for_replica_exchange(FILE *fplog, - pind[i] = re->ind[i]; - } - -+ /* PLUMED */ -+ int plumed_test_exchange_pattern=0; -+ /* END PLUMED */ -+ - if (bMultiEx) - { - /* multiple random switch exchange */ -@@ -1057,6 +1076,31 @@ test_for_replica_exchange(FILE *fplog, - /* standard nearest neighbor replica exchange */ - - m = (step / re->nst) % 2; -+ /* PLUMED */ -+ if(plumedswitch){ -+ int partner=re->repl; -+ plumed_cmd(plumedmain,"getExchangesFlag",&plumed_test_exchange_pattern); -+ if(plumed_test_exchange_pattern>0){ -+ int *list; -+ snew(list,re->nrepl); -+ plumed_cmd(plumedmain,"setNumberOfReplicas",&(re->nrepl)); -+ plumed_cmd(plumedmain,"getExchangesList",list); -+ for(i=0; inrepl; i++) re->ind[i]=list[i]; -+ sfree(list); -+ } -+ -+ for(i=1; inrepl; i++) { -+ if (i % 2 != m) continue; -+ a = re->ind[i-1]; -+ b = re->ind[i]; -+ if(re->repl==a) partner=b; -+ if(re->repl==b) partner=a; -+ } -+ plumed_cmd(plumedmain,"GREX setPartner",&partner); -+ plumed_cmd(plumedmain,"GREX calculate",NULL); -+ plumed_cmd(plumedmain,"GREX shareAllDeltaBias",NULL); -+ } -+ /* END PLUMED */ - for (i = 1; i < re->nrepl; i++) - { - a = re->ind[i-1]; -@@ -1066,6 +1110,18 @@ test_for_replica_exchange(FILE *fplog, - if (i % 2 == m) - { - delta = calc_delta(fplog, bPrint, re, a, b, a, b); -+ /* PLUMED */ -+ if(plumedswitch){ -+ real adb,bdb,dplumed; -+ char buf[300]; -+ sprintf(buf,"GREX getDeltaBias %d",a); plumed_cmd(plumedmain,buf,&adb); -+ sprintf(buf,"GREX getDeltaBias %d",b); plumed_cmd(plumedmain,buf,&bdb); -+ dplumed=adb*re->beta[a]+bdb*re->beta[b]; -+ delta+=dplumed; -+ if (bPrint) -+ fprintf(fplog,"dplumed = %10.3e dE_Term = %10.3e (kT)\n",dplumed,delta); -+ } -+ /* END PLUMED */ - if (delta <= 0) - { - /* accepted */ -@@ -1092,11 +1148,22 @@ test_for_replica_exchange(FILE *fplog, - - if (bEx[i]) - { -+ /* PLUMED */ -+ if(!plumed_test_exchange_pattern) { -+ /* standard neighbour swapping */ - /* swap these two */ - tmp = pind[i-1]; - pind[i-1] = pind[i]; - pind[i] = tmp; - re->nexchange[i]++; /* statistics for back compatibility */ -+ } else { -+ /* alternative swapping patterns */ -+ tmp = pind[a]; -+ pind[a] = pind[b]; -+ pind[b] = tmp; -+ re->nexchange[i]++; /* statistics for back compatibility */ -+ } -+ /* END PLUMED */ - } - } - else -@@ -1112,6 +1179,15 @@ test_for_replica_exchange(FILE *fplog, - re->nattempt[m]++; - } - -+ /* PLUMED */ -+ if(plumed_test_exchange_pattern>0) { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->ind[i] = i; -+ } -+ } -+ /* END PLUMED */ -+ - /* record which moves were made and accepted */ - for (i = 0; i < re->nrepl; i++) - { -@@ -1316,6 +1392,10 @@ gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex * - /* The order in which multiple exchanges will occur. */ - gmx_bool bThisReplicaExchanged = FALSE; - -+ /* PLUMED */ -+ if(plumedswitch)plumed_cmd(plumedmain,"GREX prepare",NULL); -+ /* END PLUMED */ -+ - if (MASTER(cr)) - { - replica_id = re->repl; -diff --git a/src/programs/mdrun/repl_ex.c.preplumed b/src/programs/mdrun/repl_ex.c.preplumed -new file mode 100644 -index 0000000..46a9bc0 ---- /dev/null -+++ b/src/programs/mdrun/repl_ex.c.preplumed -@@ -0,0 +1,1439 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include "repl_ex.h" -+#include "network.h" -+#include "gromacs/random/random.h" -+#include "gromacs/utility/smalloc.h" -+#include "physics.h" -+#include "copyrite.h" -+#include "macros.h" -+#include "vec.h" -+#include "names.h" -+#include "domdec.h" -+#include "gromacs/random/random.h" -+ -+#define PROBABILITYCUTOFF 100 -+/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ -+ -+enum { -+ ereTEMP, ereLAMBDA, ereENDSINGLE, ereTL, ereNR -+}; -+const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"}; -+/* end_single_marker merely notes the end of single variable replica exchange. All types higher than -+ it are multiple replica exchange methods */ -+/* Eventually, should add 'pressure', 'temperature and pressure', 'lambda_and_pressure', 'temperature_lambda_pressure'?; -+ Let's wait until we feel better about the pressure control methods giving exact ensembles. Right now, we assume constant pressure */ -+ -+typedef struct gmx_repl_ex -+{ -+ int repl; -+ int nrepl; -+ real temp; -+ int type; -+ real **q; -+ gmx_bool bNPT; -+ real *pres; -+ int *ind; -+ int *allswaps; -+ int nst; -+ int nex; -+ int seed; -+ int nattempt[2]; -+ real *prob_sum; -+ int **nmoves; -+ int *nexchange; -+ gmx_rng_t rng; -+ -+ /* these are helper arrays for replica exchange; allocated here so they -+ don't have to be allocated each time */ -+ int *destinations; -+ int **cyclic; -+ int **order; -+ int *tmpswap; -+ gmx_bool *incycle; -+ gmx_bool *bEx; -+ -+ /* helper arrays to hold the quantities that are exchanged */ -+ real *prob; -+ real *Epot; -+ real *beta; -+ real *Vol; -+ real **de; -+ -+} t_gmx_repl_ex; -+ -+static gmx_bool repl_quantity(const gmx_multisim_t *ms, -+ struct gmx_repl_ex *re, int ere, real q) -+{ -+ real *qall; -+ gmx_bool bDiff; -+ int i, s; -+ -+ snew(qall, ms->nsim); -+ qall[re->repl] = q; -+ gmx_sum_sim(ms->nsim, qall, ms); -+ -+ bDiff = FALSE; -+ for (s = 1; s < ms->nsim; s++) -+ { -+ if (qall[s] != qall[0]) -+ { -+ bDiff = TRUE; -+ } -+ } -+ -+ if (bDiff) -+ { -+ /* Set the replica exchange type and quantities */ -+ re->type = ere; -+ -+ snew(re->q[ere], re->nrepl); -+ for (s = 0; s < ms->nsim; s++) -+ { -+ re->q[ere][s] = qall[s]; -+ } -+ } -+ sfree(qall); -+ return bDiff; -+} -+ -+gmx_repl_ex_t init_replica_exchange(FILE *fplog, -+ const gmx_multisim_t *ms, -+ const t_state *state, -+ const t_inputrec *ir, -+ int nst, int nex, int init_seed) -+{ -+ real temp, pres; -+ int i, j, k; -+ struct gmx_repl_ex *re; -+ gmx_bool bTemp; -+ gmx_bool bLambda = FALSE; -+ -+ fprintf(fplog, "\nInitializing Replica Exchange\n"); -+ -+ if (ms == NULL || ms->nsim == 1) -+ { -+ gmx_fatal(FARGS, "Nothing to exchange with only one replica, maybe you forgot to set the -multi option of mdrun?"); -+ } -+ if (!EI_DYNAMICS(ir->eI)) -+ { -+ gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations"); -+ /* Note that PAR(cr) is defined by cr->nnodes > 1, which is -+ * distinct from MULTISIM(cr). A multi-simulation only runs -+ * with real MPI parallelism, but this does not imply PAR(cr) -+ * is true! -+ * -+ * Since we are using a dynamical integrator, the only -+ * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are -+ * synonymous. The only way for cr->nnodes > 1 to be true is -+ * if we are using DD. */ -+ } -+ -+ snew(re, 1); -+ -+ re->repl = ms->sim; -+ re->nrepl = ms->nsim; -+ snew(re->q, ereENDSINGLE); -+ -+ fprintf(fplog, "Repl There are %d replicas:\n", re->nrepl); -+ -+ check_multi_int(fplog, ms, state->natoms, "the number of atoms", FALSE); -+ check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE); -+ check_multi_int64(fplog, ms, ir->init_step+ir->nsteps, "init_step+nsteps", FALSE); -+ check_multi_int64(fplog, ms, (ir->init_step+nst-1)/nst, -+ "first exchange step: init_step/-replex", FALSE); -+ check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE); -+ check_multi_int(fplog, ms, ir->opts.ngtc, -+ "the number of temperature coupling groups", FALSE); -+ check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE); -+ check_multi_int(fplog, ms, ir->efep, "free energy", FALSE); -+ check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE); -+ -+ re->temp = ir->opts.ref_t[0]; -+ for (i = 1; (i < ir->opts.ngtc); i++) -+ { -+ if (ir->opts.ref_t[i] != re->temp) -+ { -+ fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); -+ fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); -+ } -+ } -+ -+ re->type = -1; -+ bTemp = repl_quantity(ms, re, ereTEMP, re->temp); -+ if (ir->efep != efepNO) -+ { -+ bLambda = repl_quantity(ms, re, ereLAMBDA, (real)ir->fepvals->init_fep_state); -+ } -+ if (re->type == -1) /* nothing was assigned */ -+ { -+ gmx_fatal(FARGS, "The properties of the %d systems are all the same, there is nothing to exchange", re->nrepl); -+ } -+ if (bLambda && bTemp) -+ { -+ re->type = ereTL; -+ } -+ -+ if (bTemp) -+ { -+ please_cite(fplog, "Sugita1999a"); -+ if (ir->epc != epcNO) -+ { -+ re->bNPT = TRUE; -+ fprintf(fplog, "Repl Using Constant Pressure REMD.\n"); -+ please_cite(fplog, "Okabe2001a"); -+ } -+ if (ir->etc == etcBERENDSEN) -+ { -+ gmx_fatal(FARGS, "REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead", -+ ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE)); -+ } -+ } -+ if (bLambda) -+ { -+ if (ir->fepvals->delta_lambda != 0) /* check this? */ -+ { -+ gmx_fatal(FARGS, "delta_lambda is not zero"); -+ } -+ } -+ if (re->bNPT) -+ { -+ snew(re->pres, re->nrepl); -+ if (ir->epct == epctSURFACETENSION) -+ { -+ pres = ir->ref_p[ZZ][ZZ]; -+ } -+ else -+ { -+ pres = 0; -+ j = 0; -+ for (i = 0; i < DIM; i++) -+ { -+ if (ir->compress[i][i] != 0) -+ { -+ pres += ir->ref_p[i][i]; -+ j++; -+ } -+ } -+ pres /= j; -+ } -+ re->pres[re->repl] = pres; -+ gmx_sum_sim(re->nrepl, re->pres, ms); -+ } -+ -+ /* Make an index for increasing replica order */ -+ /* only makes sense if one or the other is varying, not both! -+ if both are varying, we trust the order the person gave. */ -+ snew(re->ind, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->ind[i] = i; -+ } -+ -+ if (re->type < ereENDSINGLE) -+ { -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = i+1; j < re->nrepl; j++) -+ { -+ if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) -+ { -+ /* Unordered replicas are supposed to work, but there -+ * is still an issues somewhere. -+ * Note that at this point still re->ind[i]=i. -+ */ -+ gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", -+ i, j, -+ erename[re->type], -+ re->q[re->type][i], re->q[re->type][j], -+ erename[re->type]); -+ -+ k = re->ind[i]; -+ re->ind[i] = re->ind[j]; -+ re->ind[j] = k; -+ } -+ else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]]) -+ { -+ gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]); -+ } -+ } -+ } -+ } -+ -+ /* keep track of all the swaps, starting with the initial placement. */ -+ snew(re->allswaps, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->allswaps[i] = re->ind[i]; -+ } -+ -+ switch (re->type) -+ { -+ case ereTEMP: -+ fprintf(fplog, "\nReplica exchange in temperature\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ case ereLAMBDA: -+ fprintf(fplog, "\nReplica exchange in lambda\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %3d", (int)re->q[re->type][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ case ereTL: -+ fprintf(fplog, "\nReplica exchange in temperature and lambda state\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5d", (int)re->q[ereLAMBDA][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ default: -+ gmx_incons("Unknown replica exchange quantity"); -+ } -+ if (re->bNPT) -+ { -+ fprintf(fplog, "\nRepl p"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.2f", re->pres[re->ind[i]]); -+ } -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]])) -+ { -+ fprintf(fplog, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); -+ fprintf(stderr, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); -+ } -+ } -+ } -+ re->nst = nst; -+ if (init_seed == -1) -+ { -+ if (MASTERSIM(ms)) -+ { -+ re->seed = (int)gmx_rng_make_seed(); -+ } -+ else -+ { -+ re->seed = 0; -+ } -+ gmx_sumi_sim(1, &(re->seed), ms); -+ } -+ else -+ { -+ re->seed = init_seed; -+ } -+ fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst); -+ fprintf(fplog, "\nReplica random seed: %d\n", re->seed); -+ re->rng = gmx_rng_init(re->seed); -+ -+ re->nattempt[0] = 0; -+ re->nattempt[1] = 0; -+ -+ snew(re->prob_sum, re->nrepl); -+ snew(re->nexchange, re->nrepl); -+ snew(re->nmoves, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->nmoves[i], re->nrepl); -+ } -+ fprintf(fplog, "Replica exchange information below: x=exchange, pr=probability\n"); -+ -+ /* generate space for the helper functions so we don't have to snew each time */ -+ -+ snew(re->destinations, re->nrepl); -+ snew(re->incycle, re->nrepl); -+ snew(re->tmpswap, re->nrepl); -+ snew(re->cyclic, re->nrepl); -+ snew(re->order, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->cyclic[i], re->nrepl); -+ snew(re->order[i], re->nrepl); -+ } -+ /* allocate space for the functions storing the data for the replicas */ -+ /* not all of these arrays needed in all cases, but they don't take -+ up much space, since the max size is nrepl**2 */ -+ snew(re->prob, re->nrepl); -+ snew(re->bEx, re->nrepl); -+ snew(re->beta, re->nrepl); -+ snew(re->Vol, re->nrepl); -+ snew(re->Epot, re->nrepl); -+ snew(re->de, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->de[i], re->nrepl); -+ } -+ re->nex = nex; -+ return re; -+} -+ -+static void exchange_reals(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, real *v, int n) -+{ -+ real *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+ -+static void exchange_ints(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, int *v, int n) -+{ -+ int *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_doubles(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, double *v, int n) -+{ -+ double *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_rvecs(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, rvec *v, int n) -+{ -+ rvec *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v[0], n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, -+ buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ copy_rvec(buf[i], v[i]); -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_state(const gmx_multisim_t *ms, int b, t_state *state) -+{ -+ /* When t_state changes, this code should be updated. */ -+ int ngtc, nnhpres; -+ ngtc = state->ngtc * state->nhchainlength; -+ nnhpres = state->nnhpres* state->nhchainlength; -+ exchange_rvecs(ms, b, state->box, DIM); -+ exchange_rvecs(ms, b, state->box_rel, DIM); -+ exchange_rvecs(ms, b, state->boxv, DIM); -+ exchange_reals(ms, b, &(state->veta), 1); -+ exchange_reals(ms, b, &(state->vol0), 1); -+ exchange_rvecs(ms, b, state->svir_prev, DIM); -+ exchange_rvecs(ms, b, state->fvir_prev, DIM); -+ exchange_rvecs(ms, b, state->pres_prev, DIM); -+ exchange_doubles(ms, b, state->nosehoover_xi, ngtc); -+ exchange_doubles(ms, b, state->nosehoover_vxi, ngtc); -+ exchange_doubles(ms, b, state->nhpres_xi, nnhpres); -+ exchange_doubles(ms, b, state->nhpres_vxi, nnhpres); -+ exchange_doubles(ms, b, state->therm_integral, state->ngtc); -+ exchange_rvecs(ms, b, state->x, state->natoms); -+ exchange_rvecs(ms, b, state->v, state->natoms); -+ exchange_rvecs(ms, b, state->sd_X, state->natoms); -+} -+ -+static void copy_rvecs(rvec *s, rvec *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ copy_rvec(s[i], d[i]); -+ } -+ } -+} -+ -+static void copy_doubles(const double *s, double *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+static void copy_reals(const real *s, real *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+static void copy_ints(const int *s, int *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+#define scopy_rvecs(v, n) copy_rvecs(state->v, state_local->v, n); -+#define scopy_doubles(v, n) copy_doubles(state->v, state_local->v, n); -+#define scopy_reals(v, n) copy_reals(state->v, state_local->v, n); -+#define scopy_ints(v, n) copy_ints(state->v, state_local->v, n); -+ -+static void copy_state_nonatomdata(t_state *state, t_state *state_local) -+{ -+ /* When t_state changes, this code should be updated. */ -+ int ngtc, nnhpres; -+ ngtc = state->ngtc * state->nhchainlength; -+ nnhpres = state->nnhpres* state->nhchainlength; -+ scopy_rvecs(box, DIM); -+ scopy_rvecs(box_rel, DIM); -+ scopy_rvecs(boxv, DIM); -+ state_local->veta = state->veta; -+ state_local->vol0 = state->vol0; -+ scopy_rvecs(svir_prev, DIM); -+ scopy_rvecs(fvir_prev, DIM); -+ scopy_rvecs(pres_prev, DIM); -+ scopy_doubles(nosehoover_xi, ngtc); -+ scopy_doubles(nosehoover_vxi, ngtc); -+ scopy_doubles(nhpres_xi, nnhpres); -+ scopy_doubles(nhpres_vxi, nnhpres); -+ scopy_doubles(therm_integral, state->ngtc); -+ scopy_rvecs(x, state->natoms); -+ scopy_rvecs(v, state->natoms); -+ scopy_rvecs(sd_X, state->natoms); -+ copy_ints(&(state->fep_state), &(state_local->fep_state), 1); -+ scopy_reals(lambda, efptNR); -+} -+ -+static void scale_velocities(t_state *state, real fac) -+{ -+ int i; -+ -+ if (state->v) -+ { -+ for (i = 0; i < state->natoms; i++) -+ { -+ svmul(fac, state->v[i], state->v[i]); -+ } -+ } -+} -+ -+static void print_transition_matrix(FILE *fplog, int n, int **nmoves, int *nattempt) -+{ -+ int i, j, ntot; -+ float Tprint; -+ -+ ntot = nattempt[0] + nattempt[1]; -+ fprintf(fplog, "\n"); -+ fprintf(fplog, "Repl"); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, " "); /* put the title closer to the center */ -+ } -+ fprintf(fplog, "Empirical Transition Matrix\n"); -+ -+ fprintf(fplog, "Repl"); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%8d", (i+1)); -+ } -+ fprintf(fplog, "\n"); -+ -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "Repl"); -+ for (j = 0; j < n; j++) -+ { -+ Tprint = 0.0; -+ if (nmoves[i][j] > 0) -+ { -+ Tprint = nmoves[i][j]/(2.0*ntot); -+ } -+ fprintf(fplog, "%8.4f", Tprint); -+ } -+ fprintf(fplog, "%3d\n", i); -+ } -+} -+ -+static void print_ind(FILE *fplog, const char *leg, int n, int *ind, gmx_bool *bEx) -+{ -+ int i; -+ -+ fprintf(fplog, "Repl %2s %2d", leg, ind[0]); -+ for (i = 1; i < n; i++) -+ { -+ fprintf(fplog, " %c %2d", (bEx != 0 && bEx[i]) ? 'x' : ' ', ind[i]); -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static void print_allswitchind(FILE *fplog, int n, int *pind, int *allswaps, int *tmpswap) -+{ -+ int i; -+ -+ for (i = 0; i < n; i++) -+ { -+ tmpswap[i] = allswaps[i]; -+ } -+ for (i = 0; i < n; i++) -+ { -+ allswaps[i] = tmpswap[pind[i]]; -+ } -+ -+ fprintf(fplog, "\nAccepted Exchanges: "); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%d ", pind[i]); -+ } -+ fprintf(fplog, "\n"); -+ -+ /* the "Order After Exchange" is the state label corresponding to the configuration that -+ started in state listed in order, i.e. -+ -+ 3 0 1 2 -+ -+ means that the: -+ configuration starting in simulation 3 is now in simulation 0, -+ configuration starting in simulation 0 is now in simulation 1, -+ configuration starting in simulation 1 is now in simulation 2, -+ configuration starting in simulation 2 is now in simulation 3 -+ */ -+ fprintf(fplog, "Order After Exchange: "); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%d ", allswaps[i]); -+ } -+ fprintf(fplog, "\n\n"); -+} -+ -+static void print_prob(FILE *fplog, const char *leg, int n, real *prob) -+{ -+ int i; -+ char buf[8]; -+ -+ fprintf(fplog, "Repl %2s ", leg); -+ for (i = 1; i < n; i++) -+ { -+ if (prob[i] >= 0) -+ { -+ sprintf(buf, "%4.2f", prob[i]); -+ fprintf(fplog, " %3s", buf[0] == '1' ? "1.0" : buf+1); -+ } -+ else -+ { -+ fprintf(fplog, " "); -+ } -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static void print_count(FILE *fplog, const char *leg, int n, int *count) -+{ -+ int i; -+ -+ fprintf(fplog, "Repl %2s ", leg); -+ for (i = 1; i < n; i++) -+ { -+ fprintf(fplog, " %4d", count[i]); -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, int a, int b, int ap, int bp) -+{ -+ -+ real ediff, dpV, delta = 0; -+ real *Epot = re->Epot; -+ real *Vol = re->Vol; -+ real **de = re->de; -+ real *beta = re->beta; -+ -+ /* Two cases; we are permuted and not. In all cases, setting ap = a and bp = b will reduce -+ to the non permuted case */ -+ -+ switch (re->type) -+ { -+ case ereTEMP: -+ /* -+ * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439 -+ */ -+ ediff = Epot[b] - Epot[a]; -+ delta = -(beta[bp] - beta[ap])*ediff; -+ break; -+ case ereLAMBDA: -+ /* two cases: when we are permuted, and not. */ -+ /* non-permuted: -+ ediff = E_new - E_old -+ = [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)] -+ = [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)] -+ = de[b][a] + de[a][b] */ -+ -+ /* permuted: -+ ediff = E_new - E_old -+ = [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)] -+ = [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)] -+ = [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)] -+ = [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)] -+ = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]) */ -+ /* but, in the current code implementation, we flip configurations, not indices . . . -+ So let's examine that. -+ = [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)] -+ = [H_b(x_ap) - H_a(x_ap)] + [H_a(x_bp) - H_b(x_pb)] -+ = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp] -+ So, if we exchange b<=> bp and a<=> ap, we return to the same result. -+ So the simple solution is to flip the -+ position of perturbed and original indices in the tests. -+ */ -+ -+ ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]); -+ delta = ediff*beta[a]; /* assume all same temperature in this case */ -+ break; -+ case ereTL: -+ /* not permuted: */ -+ /* delta = reduced E_new - reduced E_old -+ = [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)] -+ = [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)] -+ = [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] + -+ [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)] -+ = [beta_b dH_b(x_a) + [beta_a dH_a(x_b) + -+ beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b)) -+ = beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */ -+ /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */ -+ /* permuted (big breath!) */ -+ /* delta = reduced E_new - reduced E_old -+ = [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)] -+ = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] -+ = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] -+ - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a) -+ - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b) -+ = [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] + -+ [(beta_ap H_ap(x_b) - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))] -+ + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b) -+ = [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] + -+ [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))] -+ + beta_pb (H_a(x_a) - H_b(x_b)) - beta_ap (H_a(x_a) - H_b(x_b)) -+ = ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b] - beta_bp de[bp][b]) -+ + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b)) */ -+ delta = beta[bp]*(de[bp][a] - de[bp][b]) + beta[ap]*(de[ap][b] - de[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]); -+ break; -+ default: -+ gmx_incons("Unknown replica exchange quantity"); -+ } -+ if (bPrint) -+ { -+ fprintf(fplog, "Repl %d <-> %d dE_term = %10.3e (kT)\n", a, b, delta); -+ } -+ if (re->bNPT) -+ { -+ /* revist the calculation for 5.0. Might be some improvements. */ -+ dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC; -+ if (bPrint) -+ { -+ fprintf(fplog, " dpV = %10.3e d = %10.3e\n", dpV, delta + dpV); -+ } -+ delta += dpV; -+ } -+ return delta; -+} -+ -+static void -+test_for_replica_exchange(FILE *fplog, -+ const gmx_multisim_t *ms, -+ struct gmx_repl_ex *re, -+ gmx_enerdata_t *enerd, -+ real vol, -+ gmx_int64_t step, -+ real time) -+{ -+ int m, i, j, a, b, ap, bp, i0, i1, tmp; -+ real ediff = 0, delta = 0, dpV = 0; -+ gmx_bool bPrint, bMultiEx; -+ gmx_bool *bEx = re->bEx; -+ real *prob = re->prob; -+ int *pind = re->destinations; /* permuted index */ -+ gmx_bool bEpot = FALSE; -+ gmx_bool bDLambda = FALSE; -+ gmx_bool bVol = FALSE; -+ gmx_rng_t rng; -+ -+ bMultiEx = (re->nex > 1); /* multiple exchanges at each state */ -+ fprintf(fplog, "Replica exchange at step " "%"GMX_PRId64 " time %.5f\n", step, time); -+ -+ if (re->bNPT) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->Vol[i] = 0; -+ } -+ bVol = TRUE; -+ re->Vol[re->repl] = vol; -+ } -+ if ((re->type == ereTEMP || re->type == ereTL)) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->Epot[i] = 0; -+ } -+ bEpot = TRUE; -+ re->Epot[re->repl] = enerd->term[F_EPOT]; -+ /* temperatures of different states*/ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ); -+ } -+ } -+ else -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->beta[i] = 1.0/(re->temp*BOLTZ); /* we have a single temperature */ -+ } -+ } -+ if (re->type == ereLAMBDA || re->type == ereTL) -+ { -+ bDLambda = TRUE; -+ /* lambda differences. */ -+ /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian -+ minus the energy of the jth simulation in the jth Hamiltonian */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = 0; j < re->nrepl; j++) -+ { -+ re->de[i][j] = 0; -+ } -+ } -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->de[i][re->repl] = (enerd->enerpart_lambda[(int)re->q[ereLAMBDA][i]+1]-enerd->enerpart_lambda[0]); -+ } -+ } -+ -+ /* now actually do the communication */ -+ if (bVol) -+ { -+ gmx_sum_sim(re->nrepl, re->Vol, ms); -+ } -+ if (bEpot) -+ { -+ gmx_sum_sim(re->nrepl, re->Epot, ms); -+ } -+ if (bDLambda) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ gmx_sum_sim(re->nrepl, re->de[i], ms); -+ } -+ } -+ -+ /* make a duplicate set of indices for shuffling */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ pind[i] = re->ind[i]; -+ } -+ -+ if (bMultiEx) -+ { -+ /* multiple random switch exchange */ -+ int nself = 0; -+ for (i = 0; i < re->nex + nself; i++) -+ { -+ double rnd[2]; -+ -+ gmx_rng_cycle_2uniform(step, i*2, re->seed, RND_SEED_REPLEX, rnd); -+ /* randomly select a pair */ -+ /* in theory, could reduce this by identifying only which switches had a nonneglibible -+ probability of occurring (log p > -100) and only operate on those switches */ -+ /* find out which state it is from, and what label that state currently has. Likely -+ more work that useful. */ -+ i0 = (int)(re->nrepl*rnd[0]); -+ i1 = (int)(re->nrepl*rnd[1]); -+ if (i0 == i1) -+ { -+ nself++; -+ continue; /* self-exchange, back up and do it again */ -+ } -+ -+ a = re->ind[i0]; /* what are the indices of these states? */ -+ b = re->ind[i1]; -+ ap = pind[i0]; -+ bp = pind[i1]; -+ -+ bPrint = FALSE; /* too noisy */ -+ /* calculate the energy difference */ -+ /* if the code changes to flip the STATES, rather than the configurations, -+ use the commented version of the code */ -+ /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */ -+ delta = calc_delta(fplog, bPrint, re, ap, bp, a, b); -+ -+ /* we actually only use the first space in the prob and bEx array, -+ since there are actually many switches between pairs. */ -+ -+ if (delta <= 0) -+ { -+ /* accepted */ -+ prob[0] = 1; -+ bEx[0] = TRUE; -+ } -+ else -+ { -+ if (delta > PROBABILITYCUTOFF) -+ { -+ prob[0] = 0; -+ } -+ else -+ { -+ prob[0] = exp(-delta); -+ } -+ /* roll a number to determine if accepted */ -+ gmx_rng_cycle_2uniform(step, i*2+1, re->seed, RND_SEED_REPLEX, rnd); -+ bEx[0] = rnd[0] < prob[0]; -+ } -+ re->prob_sum[0] += prob[0]; -+ -+ if (bEx[0]) -+ { -+ /* swap the states */ -+ tmp = pind[i0]; -+ pind[i0] = pind[i1]; -+ pind[i1] = tmp; -+ } -+ } -+ re->nattempt[0]++; /* keep track of total permutation trials here */ -+ print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap); -+ } -+ else -+ { -+ /* standard nearest neighbor replica exchange */ -+ -+ m = (step / re->nst) % 2; -+ for (i = 1; i < re->nrepl; i++) -+ { -+ a = re->ind[i-1]; -+ b = re->ind[i]; -+ -+ bPrint = (re->repl == a || re->repl == b); -+ if (i % 2 == m) -+ { -+ delta = calc_delta(fplog, bPrint, re, a, b, a, b); -+ if (delta <= 0) -+ { -+ /* accepted */ -+ prob[i] = 1; -+ bEx[i] = TRUE; -+ } -+ else -+ { -+ double rnd[2]; -+ -+ if (delta > PROBABILITYCUTOFF) -+ { -+ prob[i] = 0; -+ } -+ else -+ { -+ prob[i] = exp(-delta); -+ } -+ /* roll a number to determine if accepted */ -+ gmx_rng_cycle_2uniform(step, i, re->seed, RND_SEED_REPLEX, rnd); -+ bEx[i] = rnd[0] < prob[i]; -+ } -+ re->prob_sum[i] += prob[i]; -+ -+ if (bEx[i]) -+ { -+ /* swap these two */ -+ tmp = pind[i-1]; -+ pind[i-1] = pind[i]; -+ pind[i] = tmp; -+ re->nexchange[i]++; /* statistics for back compatibility */ -+ } -+ } -+ else -+ { -+ prob[i] = -1; -+ bEx[i] = FALSE; -+ } -+ } -+ /* print some statistics */ -+ print_ind(fplog, "ex", re->nrepl, re->ind, bEx); -+ print_prob(fplog, "pr", re->nrepl, prob); -+ fprintf(fplog, "\n"); -+ re->nattempt[m]++; -+ } -+ -+ /* record which moves were made and accepted */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->nmoves[re->ind[i]][pind[i]] += 1; -+ re->nmoves[pind[i]][re->ind[i]] += 1; -+ } -+ fflush(fplog); /* make sure we can see what the last exchange was */ -+} -+ -+static void write_debug_x(t_state *state) -+{ -+ int i; -+ -+ if (debug) -+ { -+ for (i = 0; i < state->natoms; i += 10) -+ { -+ fprintf(debug, "dx %5d %10.5f %10.5f %10.5f\n", i, state->x[i][XX], state->x[i][YY], state->x[i][ZZ]); -+ } -+ } -+} -+ -+static void -+cyclic_decomposition(const int *destinations, -+ int **cyclic, -+ gmx_bool *incycle, -+ const int nrepl, -+ int *nswap) -+{ -+ -+ int i, j, c, p; -+ int maxlen = 1; -+ for (i = 0; i < nrepl; i++) -+ { -+ incycle[i] = FALSE; -+ } -+ for (i = 0; i < nrepl; i++) /* one cycle for each replica */ -+ { -+ if (incycle[i]) -+ { -+ cyclic[i][0] = -1; -+ continue; -+ } -+ cyclic[i][0] = i; -+ incycle[i] = TRUE; -+ c = 1; -+ p = i; -+ for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */ -+ { -+ p = destinations[p]; /* start permuting */ -+ if (p == i) -+ { -+ cyclic[i][c] = -1; -+ if (c > maxlen) -+ { -+ maxlen = c; -+ } -+ break; /* we've reached the original element, the cycle is complete, and we marked the end. */ -+ } -+ else -+ { -+ cyclic[i][c] = p; /* each permutation gives a new member of the cycle */ -+ incycle[p] = TRUE; -+ c++; -+ } -+ } -+ } -+ *nswap = maxlen - 1; -+ -+ if (debug) -+ { -+ for (i = 0; i < nrepl; i++) -+ { -+ fprintf(debug, "Cycle %d:", i); -+ for (j = 0; j < nrepl; j++) -+ { -+ if (cyclic[i][j] < 0) -+ { -+ break; -+ } -+ fprintf(debug, "%2d", cyclic[i][j]); -+ } -+ fprintf(debug, "\n"); -+ } -+ fflush(debug); -+ } -+} -+ -+static void -+compute_exchange_order(FILE *fplog, -+ int **cyclic, -+ int **order, -+ const int nrepl, -+ const int maxswap) -+{ -+ int i, j; -+ -+ for (j = 0; j < maxswap; j++) -+ { -+ for (i = 0; i < nrepl; i++) -+ { -+ if (cyclic[i][j+1] >= 0) -+ { -+ order[cyclic[i][j+1]][j] = cyclic[i][j]; -+ order[cyclic[i][j]][j] = cyclic[i][j+1]; -+ } -+ } -+ for (i = 0; i < nrepl; i++) -+ { -+ if (order[i][j] < 0) -+ { -+ order[i][j] = i; /* if it's not exchanging, it should stay this round*/ -+ } -+ } -+ } -+ -+ if (debug) -+ { -+ fprintf(fplog, "Replica Exchange Order\n"); -+ for (i = 0; i < nrepl; i++) -+ { -+ fprintf(fplog, "Replica %d:", i); -+ for (j = 0; j < maxswap; j++) -+ { -+ if (order[i][j] < 0) -+ { -+ break; -+ } -+ fprintf(debug, "%2d", order[i][j]); -+ } -+ fprintf(fplog, "\n"); -+ } -+ fflush(fplog); -+ } -+} -+ -+static void -+prepare_to_do_exchange(FILE *fplog, -+ struct gmx_repl_ex *re, -+ const int replica_id, -+ int *maxswap, -+ gmx_bool *bThisReplicaExchanged) -+{ -+ int i, j; -+ /* Hold the cyclic decomposition of the (multiple) replica -+ * exchange. */ -+ gmx_bool bAnyReplicaExchanged = FALSE; -+ *bThisReplicaExchanged = FALSE; -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ if (re->destinations[i] != re->ind[i]) -+ { -+ /* only mark as exchanged if the index has been shuffled */ -+ bAnyReplicaExchanged = TRUE; -+ break; -+ } -+ } -+ if (bAnyReplicaExchanged) -+ { -+ /* reinitialize the placeholder arrays */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = 0; j < re->nrepl; j++) -+ { -+ re->cyclic[i][j] = -1; -+ re->order[i][j] = -1; -+ } -+ } -+ -+ /* Identify the cyclic decomposition of the permutation (very -+ * fast if neighbor replica exchange). */ -+ cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap); -+ -+ /* Now translate the decomposition into a replica exchange -+ * order at each step. */ -+ compute_exchange_order(fplog, re->cyclic, re->order, re->nrepl, *maxswap); -+ -+ /* Did this replica do any exchange at any point? */ -+ for (j = 0; j < *maxswap; j++) -+ { -+ if (replica_id != re->order[replica_id][j]) -+ { -+ *bThisReplicaExchanged = TRUE; -+ break; -+ } -+ } -+ } -+} -+ -+gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex *re, -+ t_state *state, gmx_enerdata_t *enerd, -+ t_state *state_local, gmx_int64_t step, real time) -+{ -+ int i, j; -+ int replica_id = 0; -+ int exchange_partner; -+ int maxswap = 0; -+ /* Number of rounds of exchanges needed to deal with any multiple -+ * exchanges. */ -+ /* Where each replica ends up after the exchange attempt(s). */ -+ /* The order in which multiple exchanges will occur. */ -+ gmx_bool bThisReplicaExchanged = FALSE; -+ -+ if (MASTER(cr)) -+ { -+ replica_id = re->repl; -+ test_for_replica_exchange(fplog, cr->ms, re, enerd, det(state_local->box), step, time); -+ prepare_to_do_exchange(fplog, re, replica_id, &maxswap, &bThisReplicaExchanged); -+ } -+ /* Do intra-simulation broadcast so all processors belonging to -+ * each simulation know whether they need to participate in -+ * collecting the state. Otherwise, they might as well get on with -+ * the next thing to do. */ -+ if (DOMAINDECOMP(cr)) -+ { -+#ifdef GMX_MPI -+ MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), -+ cr->mpi_comm_mygroup); -+#endif -+ } -+ -+ if (bThisReplicaExchanged) -+ { -+ /* Exchange the states */ -+ /* Collect the global state on the master node */ -+ if (DOMAINDECOMP(cr)) -+ { -+ dd_collect_state(cr->dd, state_local, state); -+ } -+ else -+ { -+ copy_state_nonatomdata(state_local, state); -+ } -+ -+ if (MASTER(cr)) -+ { -+ /* There will be only one swap cycle with standard replica -+ * exchange, but there may be multiple swap cycles if we -+ * allow multiple swaps. */ -+ -+ for (j = 0; j < maxswap; j++) -+ { -+ exchange_partner = re->order[replica_id][j]; -+ -+ if (exchange_partner != replica_id) -+ { -+ /* Exchange the global states between the master nodes */ -+ if (debug) -+ { -+ fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner); -+ } -+ exchange_state(cr->ms, exchange_partner, state); -+ } -+ } -+ /* For temperature-type replica exchange, we need to scale -+ * the velocities. */ -+ if (re->type == ereTEMP || re->type == ereTL) -+ { -+ scale_velocities(state, sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]])); -+ } -+ -+ } -+ -+ /* With domain decomposition the global state is distributed later */ -+ if (!DOMAINDECOMP(cr)) -+ { -+ /* Copy the global state to the local state data structure */ -+ copy_state_nonatomdata(state, state_local); -+ } -+ } -+ -+ return bThisReplicaExchanged; -+} -+ -+void print_replica_exchange_statistics(FILE *fplog, struct gmx_repl_ex *re) -+{ -+ int i; -+ -+ fprintf(fplog, "\nReplica exchange statistics\n"); -+ -+ if (re->nex == 0) -+ { -+ fprintf(fplog, "Repl %d attempts, %d odd, %d even\n", -+ re->nattempt[0]+re->nattempt[1], re->nattempt[1], re->nattempt[0]); -+ -+ fprintf(fplog, "Repl average probabilities:\n"); -+ for (i = 1; i < re->nrepl; i++) -+ { -+ if (re->nattempt[i%2] == 0) -+ { -+ re->prob[i] = 0; -+ } -+ else -+ { -+ re->prob[i] = re->prob_sum[i]/re->nattempt[i%2]; -+ } -+ } -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_prob(fplog, "", re->nrepl, re->prob); -+ -+ fprintf(fplog, "Repl number of exchanges:\n"); -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_count(fplog, "", re->nrepl, re->nexchange); -+ -+ fprintf(fplog, "Repl average number of exchanges:\n"); -+ for (i = 1; i < re->nrepl; i++) -+ { -+ if (re->nattempt[i%2] == 0) -+ { -+ re->prob[i] = 0; -+ } -+ else -+ { -+ re->prob[i] = ((real)re->nexchange[i])/re->nattempt[i%2]; -+ } -+ } -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_prob(fplog, "", re->nrepl, re->prob); -+ -+ fprintf(fplog, "\n"); -+ } -+ /* print the transition matrix */ -+ print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt); -+} diff --git a/g/GROMACS/gromacs-5.0.4-plumed-2.1.3.patch b/g/GROMACS/gromacs-5.0.4-plumed-2.1.3.patch deleted file mode 100644 index 9faba3c3..00000000 --- a/g/GROMACS/gromacs-5.0.4-plumed-2.1.3.patch +++ /dev/null @@ -1,9575 +0,0 @@ -diff --git a/Plumed.cmake b/Plumed.cmake -new file mode 100644 -index 0000000..f66e115 ---- /dev/null -+++ b/Plumed.cmake -@@ -0,0 +1,3 @@ -+# PLUMED: shared installation -+set(PLUMED_LOAD /home/jas02/software/PLUMED/2.1.3-foss-2015b/lib/plumed///src/lib/libplumed.so -ldl ) -+set(PLUMED_DEPENDENCIES /home/jas02/software/PLUMED/2.1.3-foss-2015b/lib/plumed///src/lib/libplumed.so) -diff --git a/Plumed.h b/Plumed.h -new file mode 100644 -index 0000000..16da74a ---- /dev/null -+++ b/Plumed.h -@@ -0,0 +1,494 @@ -+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -+ Copyright (c) 2011-2014 The plumed team -+ (see the PEOPLE file at the root of the distribution for a list of names) -+ -+ See http://www.plumed-code.org for more information. -+ -+ This file is part of plumed, version 2. -+ -+ plumed is free software: you can redistribute it and/or modify -+ it under the terms of the GNU Lesser General Public License as published by -+ the Free Software Foundation, either version 3 of the License, or -+ (at your option) any later version. -+ -+ plumed is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public License -+ along with plumed. If not, see . -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ -+#ifndef __PLUMED_wrapper_Plumed_h -+#define __PLUMED_wrapper_Plumed_h -+ -+/** -+\page ReferencePlumedH Reference for interfacing MD codes with PLUMED -+ -+ Plumed.h and Plumed.c contain the external plumed interface, which is used to -+ integrate it with MD engines. This interface is very general, and is expected -+ not to change across plumed versions. Plumed.c also implements a dummy version -+ of the interface, so as to allow a code to be fully linked even if the plumed -+ library is not available yet. These files could be directly included in the official -+ host MD distribution. In this manner, it will be sufficient to link the plumed -+ library at link time (on all systems) or directly at runtime (on system where -+ dynamic loading is enabled) to include plumed features. -+ -+ Why is Plumed.c written in C and not C++? The reason is that the resulting Plumed.o -+ needs to be linked with the host MD code immediately (whereas the rest of plumed -+ could be linked a posteriori). Imagine the MD code is written in FORTRAN: when we -+ link the Plumed.o file we would like not to need any C++ library linked. In this -+ manner, we do not need to know which C++ compiler will be used to compile plumed. -+ The C++ library is only linked to the "rest" of plumed, which actually use it. -+ Anyway, Plumed.c is written in such a manner to allow its compilation also in C++ -+ (C++ is a bit stricter than C; compatibility is checked when PlumedStatic.cpp, -+ which basically includes Plumed.c, is compiled with the C++ compiler). This will -+ allow e.g. MD codes written in C++ to just incorporate Plumed.c (maybe renamed into -+ Plumed.cpp), without the need of configuring a plain C compiler. -+ -+ Plumed interface can be used from C, C++ and FORTRAN. Everything concerning plumed -+ is hidden inside a single object type, which is described in C by a structure -+ (struct \ref plumed), in C++ by a class (PLMD::Plumed) and in FORTRAN by a -+ fixed-length string (CHARACTER(LEN=32)). Obviously C++ can use both struct -+ and class interfaces, but the first should be preferred. The reference interface -+ is the C one, whereas FORTRAN and C++ interfaces are implemented as wrappers -+ around it. -+ -+ In the C++ interface, all the routines are implemented as methods of PLMD::Plumed. -+ In the C and FORTRAN interfaces, all the routines are named plumed_*, to -+ avoid potential name clashes. Notice that the entire plumed library -+ is implemented in C++, and it is hidden inside the PLMD namespace. -+ -+ Handlers to the plumed object can be converted among different representations, -+ to allow inter-operability among languages. In C, there are tools to convert -+ to/from FORTRAN, whereas in C++ there are tools to convert to/from FORTRAN and C. -+ -+ These handlers only contain a pointer to the real structure, so that -+ when a plumed object is brought from one language to another, -+ it brings a reference to the same environment. -+ -+ Moreover, to simplify life in all cases where a single Plumed object is -+ required for the entire simulation (which covers most of the practical -+ applications with conventional MD codes) it is possible to take advantage -+ of a global interface, which is implicitly referring to a unique global instance. -+ The global object should still be initialized and finalized properly. -+ -+ The basic method to send a message to plumed is -+\verbatim -+ (C) plumed_cmd -+ (C++) PLMD::Plumed::cmd -+ (FORTRAN) PLUMED_F_CMD -+\endverbatim -+ -+ To initialize a plumed object, use: -+\verbatim -+ (C) plumed_create -+ (C++) (constructor of PLMD::Plumed) -+ (FORTRAN) PLUMED_F_CREATE -+\endverbatim -+ -+ To finalize it, use -+\verbatim -+ (C) plumed_finalize -+ (C++) (destructor of PLMD::Plumed) -+ (FORTRAN) PLUMED_F_FINALIZE -+\endverbatim -+ -+ To access to the global-object, use -+\verbatim -+ (C) plumed_gcreate, plumed_gfinalize, plumed_gcmd -+ (C++) PLMD::Plumed::gcreate, PLMD::Plumed::gfinalize, PLMD::Plumed::gcmd -+ (FORTRAN) PLUMED_F_GCREATE, PLUMED_F_GFINALIZE, PLUMED_F_GCMD -+\endverbatim -+ -+ To check if the global object has been initialized, use -+\verbatim -+ (C) plumed_ginitialized -+ (C++) PLMD::Plumed::ginitialized -+ (FORTRAN) PLUMED_F_GINITIALIZED -+\endverbatim -+ -+ To check if plumed library is available (this is useful for runtime linking), use -+\verbatim -+ (C) plumed_installed -+ (C++) PLMD::Plumed::installed -+ (FORTRAN) PLUMED_F_INSTALLED -+\endverbatim -+ -+ To convert handlers use -+\verbatim -+ (C) plumed_c2f (C to FORTRAN) -+ (C) plumed_f2c (FORTRAN to C) -+ (C++) Plumed(plumed) constructor (C to C++) -+ (C++) operator plumed() cast (C++ to C) -+ (C++) Plumed(char*) constructor (FORTRAN to C++) -+ (C++) toFortran(char*) (C++ to FORTRAN) -+\endverbatim -+ -+\verbatim -+ FORTRAN interface -+ SUBROUTINE PLUMED_F_INSTALLED(i) -+ INTEGER, INTENT(OUT) :: i -+ SUBROUTINE PLUMED_F_GINITIALIZED(i) -+ INTEGER, INTENT(OUT) :: i -+ SUBROUTINE PLUMED_F_GCREATE() -+ SUBROUTINE PLUMED_F_GCMD(key,val) -+ CHARACTER(LEN=*), INTENT(IN) :: key -+ UNSPECIFIED_TYPE, INTENT(INOUT) :: val(*) -+ SUBROUTINE PLUMED_F_GFINALIZE() -+ SUBROUTINE PLUMED_F_GLOBAL(p) -+ CHARACTER(LEN=32), INTENT(OUT) :: p -+ SUBROUTINE PLUMED_F_CREATE(p) -+ CHARACTER(LEN=32), INTENT(OUT) :: p -+ SUBROUTINE PLUMED_F_CMD(p,key,val) -+ CHARACTER(LEN=32), INTENT(IN) :: p -+ CHARACTER(LEN=*), INTENT(IN) :: key -+ UNSPECIFIED_TYPE, INTENT(INOUT) :: val(*) -+ SUBROUTINE PLUMED_F_FINALIZE(p) -+ CHARACTER(LEN=32), INTENT(IN) :: p -+\endverbatim -+ -+ The main routine is "cmd", which accepts two arguments: -+ key is a string containing the name of the command -+ val is the argument. it is declared const so as to use allow passing const objects, but in practice plumed -+ is going to modify val in several cases (using a const_cast). -+ In some cases val can be omitted: just pass a NULL pointer (in C++, val is optional and can be omitted). -+ The set of possible keys is the real API of the plumed library, and will be expanded with time. -+ New commands will be added, but backward compatibility will be retained as long as possible. -+ -+ To pass plumed a callback function use the following syntax (not available in FORTRAN yet) -+\verbatim -+ plumed_function_holder ff; -+ ff.p=your_function; -+ plumed_cmd(plumed,"xxxx",&ff); -+\endverbatim -+ (this is passing the your_function() function to the "xxxx" command) -+*/ -+ -+#ifdef __cplusplus -+ extern "C" { -+#endif -+ -+/* Generic function pointer */ -+typedef void (*plumed_function_pointer)(void); -+ -+/** -+ \brief Holder for function pointer. -+ -+ To pass plumed a callback function use the following syntax: -+\verbatim -+ plumed_function_holder ff; -+ ff.p=your_function; -+ plumed_cmd(plumed,"xxxx",&ff); -+\endverbatim -+ (this is going to pass the your_function() function to the "xxxx" command) -+*/ -+ -+typedef struct { -+ plumed_function_pointer p; -+} plumed_function_holder; -+ -+/** -+ \brief Main plumed object -+ -+ This is an object containing a Plumed instance, which should be used in -+ the MD engine. It should first be initialized with plumed_create(), -+ then it communicates with the MD engine using plumed_cmd(). Finally, -+ before the termination, it should be deallocated with plumed_finalize(). -+ Its interface is very simple and general, and is expected -+ not to change across plumed versions. See \ref ReferencePlumedH. -+*/ -+typedef struct { -+/** -+ \private -+ \brief Void pointer holding the real PlumedMain structure -+*/ -+ void*p; -+} plumed; -+ -+/** \relates plumed -+ \brief Constructor -+ -+ \return The constructed plumed object -+*/ -+plumed plumed_create(void); -+ -+/** \relates plumed -+ \brief Tells p to execute a command -+ -+ \param p The plumed object on which command is acting -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like plumed_cmd(p,"A","B"), -+ but for some choice of key it can change the content -+*/ -+void plumed_cmd(plumed p,const char*key,const void*val); -+ -+/** \relates plumed -+ \brief Destructor -+ -+ \param p The plumed object to be deallocated -+*/ -+void plumed_finalize(plumed p); -+ -+/** \relates plumed -+ \brief Check if plumed is installed (for runtime binding) -+ -+ \return 1 if plumed is installed, to 0 otherwise -+*/ -+int plumed_installed(void); -+ -+/** \relates plumed -+ \brief Retrieves an handler to the global structure. -+*/ -+plumed plumed_global(void); -+ -+/** \relates plumed -+ \brief Check if the global interface has been initialized -+ -+ \return 1 if plumed has been initialized, 0 otherwise -+*/ -+int plumed_ginitialized(void); -+ -+/* global C interface, working on a global object */ -+ -+/** \relates plumed -+ \brief Constructor for the global interface. -+ -+ \note Equivalent to plumed_create(), but initialize a static global plumed object -+*/ -+void plumed_gcreate(void); -+ -+/** \relates plumed -+ \brief Tells to the global interface to execute a command. -+ -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like plumed_gcmd("A","B"), -+ but for some choice of key it can change the content -+ -+ \note Equivalent to plumed_cmd(), but skipping the plumed argument -+*/ -+void plumed_gcmd(const char* key,const void* val); -+ -+/** \relates plumed -+ \brief Destructor for the global interface. -+ -+ \note Equivalent to plumed_finalize(), but skipping the plumed argument -+*/ -+void plumed_gfinalize(void); -+ -+/* routines to convert char handler from/to plumed objects */ -+ -+/** \related plumed -+ \brief Converts a C handler to a FORTRAN handler -+ -+ \param p The C handler -+ \param c The FORTRAN handler (a char[32]) -+*/ -+void plumed_c2f(plumed p,char* c); -+ -+/** \related plumed -+ \brief Converts a FORTRAN handler to a C handler -+ \param c The FORTRAN handler (a char[32]) -+ \return The C handler -+*/ -+plumed plumed_f2c(const char* c); -+ -+#ifdef __cplusplus -+ } -+#endif -+ -+#ifdef __cplusplus -+ -+/* this is to include the NULL pointer */ -+#include -+ -+/* C++ interface is hidden in PLMD namespace (same as plumed library) */ -+namespace PLMD { -+ -+/** -+ C++ wrapper for \ref plumed. -+ -+ This class provides a C++ interface to PLUMED. -+*/ -+ -+class Plumed{ -+ plumed main; -+/** -+ keeps track if the object was created from scratch using -+ the defaults destructor (cloned=false) or if it was imported -+ from C or FORTRAN (cloned-true). In the latter case, the -+ plumed_finalize() method is not called when destructing the object, -+ since it is expected to be finalized in the C/FORTRAN code -+*/ -+ bool cloned; -+public: -+/** -+ Check if plumed is installed (for runtime binding) -+ \return true if plumed is installed, false otherwise -+*/ -+ static bool installed(); -+/** -+ Check if global-plumed has been initialized -+ \return true if global plumed object (see global()) is initialized (i.e. if gcreate() has been -+ called), false otherwise. -+*/ -+ static bool ginitialized(); -+/** -+ Initialize global-plumed -+*/ -+ static void gcreate(); -+/** -+ Send a command to global-plumed -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like gcmd("A","B"), -+ but for some choice of key it can change the content -+*/ -+ static void gcmd(const char* key,const void* val); -+/** -+ Finalize global-plumed -+*/ -+ static void gfinalize(); -+/** -+ Returns the Plumed global object -+ \return The Plumed global object -+*/ -+ static Plumed global(); -+/** -+ Constructor -+*/ -+ Plumed(); -+/** -+ Clone a Plumed object from a FORTRAN char* handler -+ \param c The FORTRAN handler (a char[32]). -+ -+ \attention The Plumed object created in this manner -+ will not finalize the corresponding plumed structure. -+ It is expected that the FORTRAN code calls plumed_c_finalize for it -+*/ -+ Plumed(const char*c); -+/** -+ Clone a Plumed object from a C plumed structure -+ \param p The C plumed structure. -+ -+ \attention The Plumed object created in this manner -+ will not finalize the corresponding plumed structure. -+ It is expected that the C code calls plumed_finalize for it -+*/ -+ Plumed(plumed p); -+private: -+/** Copy constructor is disabled (private and unimplemented) -+ The problem here is that after copying it will not be clear who is -+ going to finalize the corresponding plumed structure. -+*/ -+ Plumed(const Plumed&); -+/** Assignment operator is disabled (private and unimplemented) -+ The problem here is that after copying it will not be clear who is -+ going to finalize the corresponding plumed structure. -+*/ -+ Plumed&operator=(const Plumed&); -+public: -+/** -+ Retrieve the C plumed structure for this object -+*/ -+ operator plumed()const; -+/** -+ Retrieve a FORTRAN handler for this object -+ \param c The FORTRAN handler (a char[32]). -+*/ -+ void toFortran(char*c)const; -+/** -+ Send a command to this plumed object -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like p.cmd("A","B"), -+ but for some choice of key it can change the content -+*/ -+ void cmd(const char*key,const void*val=NULL); -+/** -+ Destructor -+ -+ Destructor is virtual so as to allow correct inheritance from Plumed object. -+ To avoid linking problems with g++, I specify "inline" also here (in principle -+ it should be enough to specify it down in the definition of the function, but -+ for some reason that I do not understand g++ does not inline it properly in that -+ case and complains when Plumed.h is included but Plumed.o is not linked. Anyway, the -+ way it is done here seems to work properly). -+*/ -+ inline virtual ~Plumed(); -+}; -+ -+/* All methods are inlined so as to avoid the compilation of an extra c++ file */ -+ -+inline -+bool Plumed::installed(){ -+ return plumed_installed(); -+} -+ -+inline -+Plumed::Plumed(): -+ main(plumed_create()), -+ cloned(false) -+{} -+ -+inline -+Plumed::Plumed(const char*c): -+ main(plumed_f2c(c)), -+ cloned(true) -+{} -+ -+inline -+Plumed::Plumed(plumed p): -+ main(p), -+ cloned(true) -+{} -+ -+inline -+Plumed::operator plumed()const{ -+ return main; -+} -+ -+inline -+void Plumed::toFortran(char*c)const{ -+ plumed_c2f(main,c); -+} -+ -+inline -+void Plumed::cmd(const char*key,const void*val){ -+ plumed_cmd(main,key,val); -+} -+ -+inline -+Plumed::~Plumed(){ -+ if(!cloned)plumed_finalize(main); -+} -+ -+inline -+bool Plumed::ginitialized(){ -+ return plumed_ginitialized(); -+} -+ -+inline -+void Plumed::gcreate(){ -+ plumed_gcreate(); -+} -+ -+inline -+void Plumed::gcmd(const char* key,const void* val){ -+ plumed_gcmd(key,val); -+} -+ -+inline -+void Plumed::gfinalize(){ -+ plumed_gfinalize(); -+} -+ -+inline -+Plumed Plumed::global(){ -+ return plumed_global(); -+} -+ -+} -+ -+#endif -+ -+ -+#endif -diff --git a/Plumed.inc b/Plumed.inc -new file mode 100644 -index 0000000..cd6097a ---- /dev/null -+++ b/Plumed.inc -@@ -0,0 +1,3 @@ -+# PLUMED: shared installation -+PLUMED_LOAD= /home/jas02/software/PLUMED/2.1.3-foss-2015b/lib/plumed///src/lib/libplumed.so -ldl -+PLUMED_DEPENDENCIES= /home/jas02/software/PLUMED/2.1.3-foss-2015b/lib/plumed///src/lib/libplumed.so -diff --git a/src/gromacs/CMakeLists.txt b/src/gromacs/CMakeLists.txt -index 6db37e2..cc97aa8 100644 ---- a/src/gromacs/CMakeLists.txt -+++ b/src/gromacs/CMakeLists.txt -@@ -32,6 +32,8 @@ - # To help us fund GROMACS development, we humbly ask that you cite - # the research papers on the package. Check out http://www.gromacs.org. - -+include(${CMAKE_SOURCE_DIR}/Plumed.cmake) -+ - set(LIBGROMACS_SOURCES) - - function (gmx_install_headers DESTINATION) -@@ -189,7 +191,7 @@ target_link_libraries(libgromacs - ${TNG_IO_LIBRARIES} - ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} - ${XML_LIBRARIES} -- ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS}) -+ ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} ${PLUMED_LOAD}) - set_target_properties(libgromacs PROPERTIES - OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" - SOVERSION ${LIBRARY_SOVERSION} -diff --git a/src/gromacs/CMakeLists.txt.preplumed b/src/gromacs/CMakeLists.txt.preplumed -new file mode 100644 -index 0000000..6db37e2 ---- /dev/null -+++ b/src/gromacs/CMakeLists.txt.preplumed -@@ -0,0 +1,232 @@ -+# -+# This file is part of the GROMACS molecular simulation package. -+# -+# Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by -+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+# and including many others, as listed in the AUTHORS file in the -+# top-level source directory and at http://www.gromacs.org. -+# -+# GROMACS is free software; you can redistribute it and/or -+# modify it under the terms of the GNU Lesser General Public License -+# as published by the Free Software Foundation; either version 2.1 -+# of the License, or (at your option) any later version. -+# -+# GROMACS is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# Lesser General Public License for more details. -+# -+# You should have received a copy of the GNU Lesser General Public -+# License along with GROMACS; if not, see -+# http://www.gnu.org/licenses, or write to the Free Software Foundation, -+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+# -+# If you want to redistribute modifications to GROMACS, please -+# consider that scientific software is very special. Version -+# control is crucial - bugs must be traceable. We will be happy to -+# consider code for inclusion in the official distribution, but -+# derived work must not be called official GROMACS. Details are found -+# in the README & COPYING files - if they are missing, get the -+# official version at http://www.gromacs.org. -+# -+# To help us fund GROMACS development, we humbly ask that you cite -+# the research papers on the package. Check out http://www.gromacs.org. -+ -+set(LIBGROMACS_SOURCES) -+ -+function (gmx_install_headers DESTINATION) -+ if (NOT GMX_BUILD_MDRUN_ONLY) -+ if (DESTINATION) -+ set(DESTINATION ${INCL_INSTALL_DIR}/gromacs/${DESTINATION}) -+ else() -+ set(DESTINATION ${INCL_INSTALL_DIR}/gromacs) -+ endif() -+ install(FILES ${ARGN} DESTINATION ${DESTINATION} COMPONENT development) -+ endif() -+endfunction () -+ -+if(GMX_USE_TNG) -+ option(GMX_EXTERNAL_TNG "Use external TNG instead of compiling the version shipped with GROMACS." -+ OFF) -+ # Detect TNG if GMX_EXTERNAL_TNG is explicitly ON -+ if(GMX_EXTERNAL_TNG) -+ find_package(TNG_IO 1.6.0) -+ if(NOT TNG_IO_FOUND) -+ message(FATAL_ERROR -+ "TNG >= 1.6.0 not found. " -+ "You can set GMX_EXTERNAL_TNG=OFF to compile TNG.") -+ endif() -+ include_directories(${TNG_IO_INCLUDE_DIRS}) -+ endif() -+ if(NOT GMX_EXTERNAL_TNG) -+ include(${CMAKE_SOURCE_DIR}/src/external/tng_io/BuildTNG.cmake) -+ tng_get_source_list(TNG_SOURCES TNG_IO_DEFINITIONS) -+ list(APPEND LIBGROMACS_SOURCES ${TNG_SOURCES}) -+ tng_set_source_properties(WITH_ZLIB ${HAVE_ZLIB}) -+ -+ if (HAVE_ZLIB) -+ list(APPEND GMX_EXTRA_LIBRARIES ${ZLIB_LIBRARIES}) -+ include_directories(${ZLIB_INCLUDE_DIRS}) -+ endif() -+ endif() -+else() -+ # We still need to get tng/tng_io_fwd.h from somewhere! -+ include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/tng_io/include) -+endif() -+ -+add_subdirectory(gmxlib) -+add_subdirectory(mdlib) -+add_subdirectory(gmxpreprocess) -+add_subdirectory(commandline) -+add_subdirectory(fft) -+add_subdirectory(linearalgebra) -+add_subdirectory(math) -+add_subdirectory(random) -+add_subdirectory(onlinehelp) -+add_subdirectory(options) -+add_subdirectory(timing) -+add_subdirectory(utility) -+add_subdirectory(fileio) -+add_subdirectory(swap) -+add_subdirectory(essentialdynamics) -+add_subdirectory(pulling) -+add_subdirectory(simd) -+add_subdirectory(imd) -+if (NOT GMX_BUILD_MDRUN_ONLY) -+ add_subdirectory(legacyheaders) -+ add_subdirectory(gmxana) -+ add_subdirectory(statistics) -+ add_subdirectory(analysisdata) -+ add_subdirectory(selection) -+ add_subdirectory(trajectoryanalysis) -+ add_subdirectory(tools) -+endif() -+ -+list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES}) -+ -+# This would be the standard way to include thread_mpi, but -+# we want libgromacs to link the functions directly -+#if(GMX_THREAD_MPI) -+# add_subdirectory(thread_mpi) -+#endif() -+#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB}) -+ -+tmpi_get_source_list(THREAD_MPI_SOURCES ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/src) -+list(APPEND LIBGROMACS_SOURCES ${THREAD_MPI_SOURCES}) -+ -+file(GLOB LIBGROMACS_HEADERS *.h) -+configure_file(version.h.cmakein version.h) -+gmx_install_headers("" ${LIBGROMACS_HEADERS}) -+gmx_install_headers("" ${CMAKE_CURRENT_BINARY_DIR}/version.h) -+ -+# Add target that generates baseversion-gen.c every time make is run -+# if git version info is requested, or create it statically. -+# This code is here instead of utility/CMakeLists.txt because CMake -+# ignores set_source_file_properties from subdirectories. -+set(GENERATED_VERSION_FILE -+ ${CMAKE_CURRENT_BINARY_DIR}/utility/baseversion-gen.c) -+set(GENERATED_VERSION_FILE_SOURCE -+ ${CMAKE_CURRENT_SOURCE_DIR}/utility/baseversion-gen.c.cmakein) -+if (GMX_GIT_VERSION_INFO) -+ add_custom_target(gmx-version ALL -+ COMMAND ${CMAKE_COMMAND} -+ -D GIT_EXECUTABLE="${GIT_EXECUTABLE}" -+ -D PROJECT_VERSION="${PROJECT_VERSION}" -+ -D PROJECT_SOURCE_DIR="${PROJECT_SOURCE_DIR}" -+ -D VERSION_CMAKEIN=${GENERATED_VERSION_FILE_SOURCE} -+ -D VERSION_OUT=${GENERATED_VERSION_FILE} -+ -P ${CMAKE_SOURCE_DIR}/cmake/gmxGenerateVersionInfo.cmake -+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} -+ DEPENDS ${GENERATED_VERSION_FILE_SOURCE} -+ COMMENT "Generating git version information") -+ set_source_files_properties(${GENERATED_VERSION_FILE} -+ PROPERTIES GENERATED true) -+else() -+ set(GMX_PROJECT_VERSION_STR ${PROJECT_VERSION}) -+ configure_file(${GENERATED_VERSION_FILE_SOURCE} ${GENERATED_VERSION_FILE}) -+endif() -+list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE}) -+ -+# apply gcc 4.4.x bug workaround -+if(GMX_USE_GCC44_BUG_WORKAROUND) -+ include(gmxGCC44O3BugWorkaround) -+ gmx_apply_gcc44_bug_workaround("gmxlib/bondfree.c") -+ gmx_apply_gcc44_bug_workaround("mdlib/force.c") -+ gmx_apply_gcc44_bug_workaround("mdlib/constr.c") -+endif() -+ -+add_library(libgromacs ${LIBGROMACS_SOURCES}) -+if (GMX_GIT_VERSION_INFO) -+ add_dependencies(libgromacs gmx-version) -+endif() -+ -+# Recent versions of gcc and clang give warnings on scanner.cpp, which -+# is a generated source file. These are awkward to suppress inline, so -+# we do it in the compilation command (after testing that the compiler -+# supports the suppressions). Setting the properties only works after -+# the related target has been created, e.g. after when the file is -+# used with add_library(). -+include(CheckCXXCompilerFlag) -+check_cxx_compiler_flag(-Wno-unused-parameter HAS_NO_UNUSED_PARAMETER) -+if (HAS_NO_UNUSED_PARAMETER) -+ set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-unused-parameter") -+endif() -+check_cxx_compiler_flag(-Wno-deprecated-register HAS_NO_DEPRECATED_REGISTER) -+if (HAS_NO_DEPRECATED_REGISTER) -+ set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated-register") -+else() -+ check_cxx_compiler_flag(-Wno-deprecated HAS_NO_DEPRECATED) -+ if (HAS_NO_DEPRECATED) -+ set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated") -+ endif() -+endif() -+set_source_files_properties(selection/scanner.cpp PROPERTIES COMPILE_FLAGS "${_scanner_cpp_compiler_flags}") -+ -+target_link_libraries(libgromacs -+ ${EXTRAE_LIBRARIES} -+ ${GMX_GPU_LIBRARIES} -+ ${GMX_EXTRA_LIBRARIES} -+ ${TNG_IO_LIBRARIES} -+ ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} -+ ${XML_LIBRARIES} -+ ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS}) -+set_target_properties(libgromacs PROPERTIES -+ OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" -+ SOVERSION ${LIBRARY_SOVERSION} -+ VERSION ${LIBRARY_VERSION} -+ COMPILE_FLAGS "${OpenMP_C_FLAGS}") -+ -+# Only install the library in mdrun-only mode if it is actually necessary -+# for the binary -+if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS) -+ install(TARGETS libgromacs -+ LIBRARY DESTINATION ${LIB_INSTALL_DIR} -+ RUNTIME DESTINATION ${BIN_INSTALL_DIR} -+ ARCHIVE DESTINATION ${LIB_INSTALL_DIR} -+ COMPONENT libraries) -+endif() -+ -+if (NOT GMX_BUILD_MDRUN_ONLY) -+ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgromacs.pc.cmakein -+ ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc @ONLY) -+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc -+ DESTINATION ${LIB_INSTALL_DIR}/pkgconfig -+ RENAME "libgromacs${GMX_LIBS_SUFFIX}.pc" -+ COMPONENT development) -+endif() -+ -+if (INSTALL_CUDART_LIB) #can be set manual by user -+ if (GMX_GPU) -+ foreach(CUDA_LIB ${CUDA_LIBRARIES}) -+ string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB}) -+ if(IS_CUDART) #libcuda should not be installed -+ #install also name-links (linker uses those) -+ file(GLOB CUDA_LIBS ${CUDA_LIB}*) -+ install(FILES ${CUDA_LIBS} DESTINATION -+ ${LIB_INSTALL_DIR} COMPONENT libraries) -+ endif() -+ endforeach() -+ else() -+ message(WARNING "INSTALL_CUDART_LIB only makes sense with GMX_GPU") -+ endif() -+endif() -diff --git a/src/gromacs/mdlib/force.c b/src/gromacs/mdlib/force.c -index 5230983..8227d5b 100644 ---- a/src/gromacs/mdlib/force.c -+++ b/src/gromacs/mdlib/force.c -@@ -67,6 +67,14 @@ - #include "gromacs/timing/wallcycle.h" - #include "gmx_fatal.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+int plumedswitch=0; -+plumed plumedmain; -+void(*plumedcmd)(plumed,const char*,const void*)=NULL; -+/* END PLUMED */ -+ -+ - void ns(FILE *fp, - t_forcerec *fr, - matrix box, -@@ -737,6 +745,13 @@ void do_force_lowlevel(FILE *fplog, gmx_int64_t step, - pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); - } - -+ /* PLUMED */ -+ if(plumedswitch){ -+ int plumedNeedsEnergy; -+ (*plumedcmd)(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ if(!plumedNeedsEnergy) (*plumedcmd)(plumedmain,"performCalc",NULL); -+ } -+ /* END PLUMED */ - } - - void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) -diff --git a/src/gromacs/mdlib/force.c.preplumed b/src/gromacs/mdlib/force.c.preplumed -new file mode 100644 -index 0000000..5230983 ---- /dev/null -+++ b/src/gromacs/mdlib/force.c.preplumed -@@ -0,0 +1,1018 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include -+#include -+#include "sysstuff.h" -+#include "typedefs.h" -+#include "macros.h" -+#include "gromacs/utility/smalloc.h" -+#include "macros.h" -+#include "physics.h" -+#include "force.h" -+#include "nonbonded.h" -+#include "names.h" -+#include "network.h" -+#include "pbc.h" -+#include "ns.h" -+#include "nrnb.h" -+#include "bondf.h" -+#include "mshift.h" -+#include "txtdump.h" -+#include "coulomb.h" -+#include "pme.h" -+#include "mdrun.h" -+#include "domdec.h" -+#include "qmmm.h" -+#include "gmx_omp_nthreads.h" -+ -+#include "gromacs/timing/wallcycle.h" -+#include "gmx_fatal.h" -+ -+void ns(FILE *fp, -+ t_forcerec *fr, -+ matrix box, -+ gmx_groups_t *groups, -+ gmx_localtop_t *top, -+ t_mdatoms *md, -+ t_commrec *cr, -+ t_nrnb *nrnb, -+ gmx_bool bFillGrid, -+ gmx_bool bDoLongRangeNS) -+{ -+ char *ptr; -+ int nsearch; -+ -+ -+ if (!fr->ns.nblist_initialized) -+ { -+ init_neighbor_list(fp, fr, md->homenr); -+ } -+ -+ if (fr->bTwinRange) -+ { -+ fr->nlr = 0; -+ } -+ -+ nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md, -+ bFillGrid, bDoLongRangeNS); -+ if (debug) -+ { -+ fprintf(debug, "nsearch = %d\n", nsearch); -+ } -+ -+ /* Check whether we have to do dynamic load balancing */ -+ /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0)) -+ count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr, -+ &(top->idef),opts->ngener); -+ */ -+ if (fr->ns.dump_nl > 0) -+ { -+ dump_nblist(fp, cr, fr, fr->ns.dump_nl); -+ } -+} -+ -+static void reduce_thread_forces(int n, rvec *f, -+ tensor vir_q, tensor vir_lj, -+ real *Vcorr_q, real *Vcorr_lj, -+ real *dvdl_q, real *dvdl_lj, -+ int nthreads, f_thread_t *f_t) -+{ -+ int t, i; -+ int nthreads_loop gmx_unused; -+ -+ /* This reduction can run over any number of threads */ -+ nthreads_loop = gmx_omp_nthreads_get(emntBonded); -+#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static) -+ for (i = 0; i < n; i++) -+ { -+ for (t = 1; t < nthreads; t++) -+ { -+ rvec_inc(f[i], f_t[t].f[i]); -+ } -+ } -+ for (t = 1; t < nthreads; t++) -+ { -+ *Vcorr_q += f_t[t].Vcorr_q; -+ *Vcorr_lj += f_t[t].Vcorr_lj; -+ *dvdl_q += f_t[t].dvdl[efptCOUL]; -+ *dvdl_lj += f_t[t].dvdl[efptVDW]; -+ m_add(vir_q, f_t[t].vir_q, vir_q); -+ m_add(vir_lj, f_t[t].vir_lj, vir_lj); -+ } -+} -+ -+void gmx_print_sepdvdl(FILE *fplog, const char *s, real v, real dvdlambda) -+{ -+ fprintf(fplog, " %-30s V %12.5e dVdl %12.5e\n", s, v, dvdlambda); -+} -+ -+void do_force_lowlevel(FILE *fplog, gmx_int64_t step, -+ t_forcerec *fr, t_inputrec *ir, -+ t_idef *idef, t_commrec *cr, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ t_mdatoms *md, -+ rvec x[], history_t *hist, -+ rvec f[], -+ rvec f_longrange[], -+ gmx_enerdata_t *enerd, -+ t_fcdata *fcd, -+ gmx_localtop_t *top, -+ gmx_genborn_t *born, -+ t_atomtypes *atype, -+ gmx_bool bBornRadii, -+ matrix box, -+ t_lambda *fepvals, -+ real *lambda, -+ t_graph *graph, -+ t_blocka *excl, -+ rvec mu_tot[], -+ int flags, -+ float *cycles_pme) -+{ -+ int i, j; -+ int donb_flags; -+ gmx_bool bDoEpot, bSepDVDL, bSB; -+ int pme_flags; -+ matrix boxs; -+ rvec box_size; -+ t_pbc pbc; -+ char buf[22]; -+ double clam_i, vlam_i; -+ real dvdl_dum[efptNR], dvdl_nb[efptNR], lam_i[efptNR]; -+ real dvdl_q, dvdl_lj; -+ -+#ifdef GMX_MPI -+ double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ -+#endif -+ -+#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) { gmx_print_sepdvdl(fplog, s, v, dvdlambda); } -+ -+ set_pbc(&pbc, fr->ePBC, box); -+ -+ /* reset free energy components */ -+ for (i = 0; i < efptNR; i++) -+ { -+ dvdl_nb[i] = 0; -+ dvdl_dum[i] = 0; -+ } -+ -+ /* Reset box */ -+ for (i = 0; (i < DIM); i++) -+ { -+ box_size[i] = box[i][i]; -+ } -+ -+ bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); -+ debug_gmx(); -+ -+ /* do QMMM first if requested */ -+ if (fr->bQMMM) -+ { -+ enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); -+ } -+ -+ if (bSepDVDL) -+ { -+ fprintf(fplog, "Step %s: non-bonded V and dVdl for rank %d:\n", -+ gmx_step_str(step, buf), cr->nodeid); -+ } -+ -+ /* Call the short range functions all in one go. */ -+ -+#ifdef GMX_MPI -+ /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ -+#define TAKETIME FALSE -+ if (TAKETIME) -+ { -+ MPI_Barrier(cr->mpi_comm_mygroup); -+ t0 = MPI_Wtime(); -+ } -+#endif -+ -+ if (ir->nwall) -+ { -+ /* foreign lambda component for walls */ -+ real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], -+ enerd->grpp.ener[egLJSR], nrnb); -+ PRINT_SEPDVDL("Walls", 0.0, dvdl_walls); -+ enerd->dvdl_lin[efptVDW] += dvdl_walls; -+ } -+ -+ /* If doing GB, reset dvda and calculate the Born radii */ -+ if (ir->implicit_solvent) -+ { -+ wallcycle_sub_start(wcycle, ewcsNONBONDED); -+ -+ for (i = 0; i < born->nr; i++) -+ { -+ fr->dvda[i] = 0; -+ } -+ -+ if (bBornRadii) -+ { -+ calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb); -+ } -+ -+ wallcycle_sub_stop(wcycle, ewcsNONBONDED); -+ } -+ -+ where(); -+ /* We only do non-bonded calculation with group scheme here, the verlet -+ * calls are done from do_force_cutsVERLET(). */ -+ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) -+ { -+ donb_flags = 0; -+ /* Add short-range interactions */ -+ donb_flags |= GMX_NONBONDED_DO_SR; -+ -+ /* Currently all group scheme kernels always calculate (shift-)forces */ -+ if (flags & GMX_FORCE_FORCES) -+ { -+ donb_flags |= GMX_NONBONDED_DO_FORCE; -+ } -+ if (flags & GMX_FORCE_VIRIAL) -+ { -+ donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; -+ } -+ if (flags & GMX_FORCE_ENERGY) -+ { -+ donb_flags |= GMX_NONBONDED_DO_POTENTIAL; -+ } -+ if (flags & GMX_FORCE_DO_LR) -+ { -+ donb_flags |= GMX_NONBONDED_DO_LR; -+ } -+ -+ wallcycle_sub_start(wcycle, ewcsNONBONDED); -+ do_nonbonded(fr, x, f, f_longrange, md, excl, -+ &enerd->grpp, nrnb, -+ lambda, dvdl_nb, -1, -1, donb_flags); -+ -+ /* If we do foreign lambda and we have soft-core interactions -+ * we have to recalculate the (non-linear) energies contributions. -+ */ -+ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) -+ { -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ for (j = 0; j < efptNR; j++) -+ { -+ lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); -+ } -+ reset_foreign_enerdata(enerd); -+ do_nonbonded(fr, x, f, f_longrange, md, excl, -+ &(enerd->foreign_grpp), nrnb, -+ lam_i, dvdl_dum, -1, -1, -+ (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); -+ sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); -+ enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; -+ } -+ } -+ wallcycle_sub_stop(wcycle, ewcsNONBONDED); -+ where(); -+ } -+ -+ /* If we are doing GB, calculate bonded forces and apply corrections -+ * to the solvation forces */ -+ /* MRS: Eventually, many need to include free energy contribution here! */ -+ if (ir->implicit_solvent) -+ { -+ wallcycle_sub_start(wcycle, ewcsBONDED); -+ calc_gb_forces(cr, md, born, top, x, f, fr, idef, -+ ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); -+ wallcycle_sub_stop(wcycle, ewcsBONDED); -+ } -+ -+#ifdef GMX_MPI -+ if (TAKETIME) -+ { -+ t1 = MPI_Wtime(); -+ fr->t_fnbf += t1-t0; -+ } -+#endif -+ -+ if (fepvals->sc_alpha != 0) -+ { -+ enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; -+ } -+ else -+ { -+ enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; -+ } -+ -+ if (fepvals->sc_alpha != 0) -+ -+ /* even though coulomb part is linear, we already added it, beacuse we -+ need to go through the vdw calculation anyway */ -+ { -+ enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; -+ } -+ else -+ { -+ enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; -+ } -+ -+ if (bSepDVDL) -+ { -+ real V_short_range = 0; -+ real dvdl_short_range = 0; -+ -+ for (i = 0; i < enerd->grpp.nener; i++) -+ { -+ V_short_range += -+ (fr->bBHAM ? -+ enerd->grpp.ener[egBHAMSR][i] : -+ enerd->grpp.ener[egLJSR][i]) -+ + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; -+ } -+ dvdl_short_range = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; -+ PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", -+ V_short_range, -+ dvdl_short_range); -+ } -+ debug_gmx(); -+ -+ -+ if (debug) -+ { -+ pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); -+ } -+ -+ /* Shift the coordinates. Must be done before bonded forces and PPPM, -+ * but is also necessary for SHAKE and update, therefore it can NOT -+ * go when no bonded forces have to be evaluated. -+ */ -+ -+ /* Here sometimes we would not need to shift with NBFonly, -+ * but we do so anyhow for consistency of the returned coordinates. -+ */ -+ if (graph) -+ { -+ shift_self(graph, box, x); -+ if (TRICLINIC(box)) -+ { -+ inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); -+ } -+ else -+ { -+ inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); -+ } -+ } -+ /* Check whether we need to do bondeds or correct for exclusions */ -+ if (fr->bMolPBC && -+ ((flags & GMX_FORCE_BONDED) -+ || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) -+ { -+ /* Since all atoms are in the rectangular or triclinic unit-cell, -+ * only single box vector shifts (2 in x) are required. -+ */ -+ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); -+ } -+ debug_gmx(); -+ -+ if (flags & GMX_FORCE_BONDED) -+ { -+ wallcycle_sub_start(wcycle, ewcsBONDED); -+ calc_bonds(fplog, cr->ms, -+ idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, -+ DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, -+ flags, -+ fr->bSepDVDL && do_per_step(step, ir->nstlog), step); -+ -+ /* Check if we have to determine energy differences -+ * at foreign lambda's. -+ */ -+ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && -+ idef->ilsort != ilsortNO_FE) -+ { -+ if (idef->ilsort != ilsortFE_SORTED) -+ { -+ gmx_incons("The bonded interactions are not sorted for free energy"); -+ } -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ reset_foreign_enerdata(enerd); -+ for (j = 0; j < efptNR; j++) -+ { -+ lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); -+ } -+ calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, -+ fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); -+ sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); -+ enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; -+ } -+ } -+ debug_gmx(); -+ -+ wallcycle_sub_stop(wcycle, ewcsBONDED); -+ } -+ -+ where(); -+ -+ *cycles_pme = 0; -+ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) -+ { -+ real Vlr = 0, Vcorr = 0; -+ real dvdl_long_range = 0; -+ int status = 0; -+ -+ bSB = (ir->nwall == 2); -+ if (bSB) -+ { -+ copy_mat(box, boxs); -+ svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); -+ box_size[ZZ] *= ir->wall_ewald_zfac; -+ } -+ } -+ -+ /* Do long-range electrostatics and/or LJ-PME, including related short-range -+ * corrections. -+ */ -+ -+ clear_mat(fr->vir_el_recip); -+ clear_mat(fr->vir_lj_recip); -+ -+ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) -+ { -+ real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; -+ real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; -+ int status = 0; -+ -+ if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) -+ { -+ real dvdl_long_range_correction_q = 0; -+ real dvdl_long_range_correction_lj = 0; -+ /* With the Verlet scheme exclusion forces are calculated -+ * in the non-bonded kernel. -+ */ -+ /* The TPI molecule does not have exclusions with the rest -+ * of the system and no intra-molecular PME grid -+ * contributions will be calculated in -+ * gmx_pme_calc_energy. -+ */ -+ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || -+ ir->ewald_geometry != eewg3D || -+ ir->epsilon_surface != 0) -+ { -+ int nthreads, t; -+ -+ wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); -+ -+ if (fr->n_tpi > 0) -+ { -+ gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); -+ } -+ -+ nthreads = gmx_omp_nthreads_get(emntBonded); -+#pragma omp parallel for num_threads(nthreads) schedule(static) -+ for (t = 0; t < nthreads; t++) -+ { -+ int s, e, i; -+ rvec *fnv; -+ tensor *vir_q, *vir_lj; -+ real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; -+ if (t == 0) -+ { -+ fnv = fr->f_novirsum; -+ vir_q = &fr->vir_el_recip; -+ vir_lj = &fr->vir_lj_recip; -+ Vcorrt_q = &Vcorr_q; -+ Vcorrt_lj = &Vcorr_lj; -+ dvdlt_q = &dvdl_long_range_correction_q; -+ dvdlt_lj = &dvdl_long_range_correction_lj; -+ } -+ else -+ { -+ fnv = fr->f_t[t].f; -+ vir_q = &fr->f_t[t].vir_q; -+ vir_lj = &fr->f_t[t].vir_lj; -+ Vcorrt_q = &fr->f_t[t].Vcorr_q; -+ Vcorrt_lj = &fr->f_t[t].Vcorr_lj; -+ dvdlt_q = &fr->f_t[t].dvdl[efptCOUL]; -+ dvdlt_lj = &fr->f_t[t].dvdl[efptVDW]; -+ for (i = 0; i < fr->natoms_force; i++) -+ { -+ clear_rvec(fnv[i]); -+ } -+ clear_mat(*vir_q); -+ clear_mat(*vir_lj); -+ } -+ *dvdlt_q = 0; -+ *dvdlt_lj = 0; -+ -+ ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], -+ cr, t, fr, -+ md->chargeA, md->chargeB, -+ md->sqrt_c6A, md->sqrt_c6B, -+ md->sigmaA, md->sigmaB, -+ md->sigma3A, md->sigma3B, -+ md->nChargePerturbed || md->nTypePerturbed, -+ ir->cutoff_scheme != ecutsVERLET, -+ excl, x, bSB ? boxs : box, mu_tot, -+ ir->ewald_geometry, -+ ir->epsilon_surface, -+ fnv, *vir_q, *vir_lj, -+ Vcorrt_q, Vcorrt_lj, -+ lambda[efptCOUL], lambda[efptVDW], -+ dvdlt_q, dvdlt_lj); -+ } -+ if (nthreads > 1) -+ { -+ reduce_thread_forces(fr->natoms_force, fr->f_novirsum, -+ fr->vir_el_recip, fr->vir_lj_recip, -+ &Vcorr_q, &Vcorr_lj, -+ &dvdl_long_range_correction_q, -+ &dvdl_long_range_correction_lj, -+ nthreads, fr->f_t); -+ } -+ wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); -+ } -+ -+ if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0) -+ { -+ Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, -+ &dvdl_long_range_correction_q, -+ fr->vir_el_recip); -+ } -+ -+ PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr_q, dvdl_long_range_correction_q); -+ PRINT_SEPDVDL("Ewald excl. corr. LJ", Vcorr_lj, dvdl_long_range_correction_lj); -+ enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q; -+ enerd->dvdl_lin[efptVDW] += dvdl_long_range_correction_lj; -+ } -+ -+ if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype))) -+ { -+ if (cr->duty & DUTY_PME) -+ { -+ /* Do reciprocal PME for Coulomb and/or LJ. */ -+ assert(fr->n_tpi >= 0); -+ if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) -+ { -+ pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; -+ if (EEL_PME(fr->eeltype)) -+ { -+ pme_flags |= GMX_PME_DO_COULOMB; -+ } -+ if (EVDW_PME(fr->vdwtype)) -+ { -+ pme_flags |= GMX_PME_DO_LJ; -+ } -+ if (flags & GMX_FORCE_FORCES) -+ { -+ pme_flags |= GMX_PME_CALC_F; -+ } -+ if (flags & GMX_FORCE_VIRIAL) -+ { -+ pme_flags |= GMX_PME_CALC_ENER_VIR; -+ } -+ if (fr->n_tpi > 0) -+ { -+ /* We don't calculate f, but we do want the potential */ -+ pme_flags |= GMX_PME_CALC_POT; -+ } -+ wallcycle_start(wcycle, ewcPMEMESH); -+ status = gmx_pme_do(fr->pmedata, -+ 0, md->homenr - fr->n_tpi, -+ x, fr->f_novirsum, -+ md->chargeA, md->chargeB, -+ md->sqrt_c6A, md->sqrt_c6B, -+ md->sigmaA, md->sigmaB, -+ bSB ? boxs : box, cr, -+ DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, -+ DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, -+ nrnb, wcycle, -+ fr->vir_el_recip, fr->ewaldcoeff_q, -+ fr->vir_lj_recip, fr->ewaldcoeff_lj, -+ &Vlr_q, &Vlr_lj, -+ lambda[efptCOUL], lambda[efptVDW], -+ &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags); -+ *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); -+ if (status != 0) -+ { -+ gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); -+ } -+ /* We should try to do as little computation after -+ * this as possible, because parallel PME synchronizes -+ * the nodes, so we want all load imbalance of the -+ * rest of the force calculation to be before the PME -+ * call. DD load balancing is done on the whole time -+ * of the force call (without PME). -+ */ -+ } -+ if (fr->n_tpi > 0) -+ { -+ if (EVDW_PME(ir->vdwtype)) -+ { -+ -+ gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME"); -+ } -+ /* Determine the PME grid energy of the test molecule -+ * with the PME grid potential of the other charges. -+ */ -+ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, -+ x + md->homenr - fr->n_tpi, -+ md->chargeA + md->homenr - fr->n_tpi, -+ &Vlr_q); -+ } -+ PRINT_SEPDVDL("PME mesh", Vlr_q + Vlr_lj, dvdl_long_range_q+dvdl_long_range_lj); -+ } -+ } -+ -+ if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype)) -+ { -+ Vlr_q = do_ewald(ir, x, fr->f_novirsum, -+ md->chargeA, md->chargeB, -+ box_size, cr, md->homenr, -+ fr->vir_el_recip, fr->ewaldcoeff_q, -+ lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table); -+ PRINT_SEPDVDL("Ewald long-range", Vlr_q, dvdl_long_range_q); -+ } -+ -+ /* Note that with separate PME nodes we get the real energies later */ -+ enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q; -+ enerd->dvdl_lin[efptVDW] += dvdl_long_range_lj; -+ enerd->term[F_COUL_RECIP] = Vlr_q + Vcorr_q; -+ enerd->term[F_LJ_RECIP] = Vlr_lj + Vcorr_lj; -+ if (debug) -+ { -+ fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", -+ Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]); -+ pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); -+ pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); -+ fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", -+ Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]); -+ pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM); -+ } -+ } -+ else -+ { -+ /* Is there a reaction-field exclusion correction needed? */ -+ if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype) -+ { -+ /* With the Verlet scheme, exclusion forces are calculated -+ * in the non-bonded kernel. -+ */ -+ if (ir->cutoff_scheme != ecutsVERLET) -+ { -+ real dvdl_rf_excl = 0; -+ enerd->term[F_RF_EXCL] = -+ RF_excl_correction(fr, graph, md, excl, x, f, -+ fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); -+ -+ enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; -+ PRINT_SEPDVDL("RF exclusion correction", -+ enerd->term[F_RF_EXCL], dvdl_rf_excl); -+ } -+ } -+ } -+ where(); -+ debug_gmx(); -+ -+ if (debug) -+ { -+ print_nrnb(debug, nrnb); -+ } -+ debug_gmx(); -+ -+#ifdef GMX_MPI -+ if (TAKETIME) -+ { -+ t2 = MPI_Wtime(); -+ MPI_Barrier(cr->mpi_comm_mygroup); -+ t3 = MPI_Wtime(); -+ fr->t_wait += t3-t2; -+ if (fr->timesteps == 11) -+ { -+ fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", -+ cr->nodeid, gmx_step_str(fr->timesteps, buf), -+ 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), -+ (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); -+ } -+ fr->timesteps++; -+ } -+#endif -+ -+ if (debug) -+ { -+ pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); -+ } -+ -+} -+ -+void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) -+{ -+ int i, n2; -+ -+ for (i = 0; i < F_NRE; i++) -+ { -+ enerd->term[i] = 0; -+ enerd->foreign_term[i] = 0; -+ } -+ -+ -+ for (i = 0; i < efptNR; i++) -+ { -+ enerd->dvdl_lin[i] = 0; -+ enerd->dvdl_nonlin[i] = 0; -+ } -+ -+ n2 = ngener*ngener; -+ if (debug) -+ { -+ fprintf(debug, "Creating %d sized group matrix for energies\n", n2); -+ } -+ enerd->grpp.nener = n2; -+ enerd->foreign_grpp.nener = n2; -+ for (i = 0; (i < egNR); i++) -+ { -+ snew(enerd->grpp.ener[i], n2); -+ snew(enerd->foreign_grpp.ener[i], n2); -+ } -+ -+ if (n_lambda) -+ { -+ enerd->n_lambda = 1 + n_lambda; -+ snew(enerd->enerpart_lambda, enerd->n_lambda); -+ } -+ else -+ { -+ enerd->n_lambda = 0; -+ } -+} -+ -+void destroy_enerdata(gmx_enerdata_t *enerd) -+{ -+ int i; -+ -+ for (i = 0; (i < egNR); i++) -+ { -+ sfree(enerd->grpp.ener[i]); -+ } -+ -+ for (i = 0; (i < egNR); i++) -+ { -+ sfree(enerd->foreign_grpp.ener[i]); -+ } -+ -+ if (enerd->n_lambda) -+ { -+ sfree(enerd->enerpart_lambda); -+ } -+} -+ -+static real sum_v(int n, real v[]) -+{ -+ real t; -+ int i; -+ -+ t = 0.0; -+ for (i = 0; (i < n); i++) -+ { -+ t = t + v[i]; -+ } -+ -+ return t; -+} -+ -+void sum_epot(gmx_grppairener_t *grpp, real *epot) -+{ -+ int i; -+ -+ /* Accumulate energies */ -+ epot[F_COUL_SR] = sum_v(grpp->nener, grpp->ener[egCOULSR]); -+ epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); -+ epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); -+ epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); -+ epot[F_COUL_LR] = sum_v(grpp->nener, grpp->ener[egCOULLR]); -+ epot[F_LJ_LR] = sum_v(grpp->nener, grpp->ener[egLJLR]); -+ /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */ -+ epot[F_GBPOL] += sum_v(grpp->nener, grpp->ener[egGB]); -+ -+/* lattice part of LR doesnt belong to any group -+ * and has been added earlier -+ */ -+ epot[F_BHAM] = sum_v(grpp->nener, grpp->ener[egBHAMSR]); -+ epot[F_BHAM_LR] = sum_v(grpp->nener, grpp->ener[egBHAMLR]); -+ -+ epot[F_EPOT] = 0; -+ for (i = 0; (i < F_EPOT); i++) -+ { -+ if (i != F_DISRESVIOL && i != F_ORIRESDEV) -+ { -+ epot[F_EPOT] += epot[i]; -+ } -+ } -+} -+ -+void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals) -+{ -+ int i, j, index; -+ double dlam; -+ -+ enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW]; /* include dispersion correction */ -+ enerd->term[F_DVDL] = 0.0; -+ for (i = 0; i < efptNR; i++) -+ { -+ if (fepvals->separate_dvdl[i]) -+ { -+ /* could this be done more readably/compactly? */ -+ switch (i) -+ { -+ case (efptMASS): -+ index = F_DKDL; -+ break; -+ case (efptCOUL): -+ index = F_DVDL_COUL; -+ break; -+ case (efptVDW): -+ index = F_DVDL_VDW; -+ break; -+ case (efptBONDED): -+ index = F_DVDL_BONDED; -+ break; -+ case (efptRESTRAINT): -+ index = F_DVDL_RESTRAINT; -+ break; -+ default: -+ index = F_DVDL; -+ break; -+ } -+ enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; -+ if (debug) -+ { -+ fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n", -+ efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); -+ } -+ } -+ else -+ { -+ enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; -+ if (debug) -+ { -+ fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n", -+ efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); -+ } -+ } -+ } -+ -+ /* Notes on the foreign lambda free energy difference evaluation: -+ * Adding the potential and ekin terms that depend linearly on lambda -+ * as delta lam * dvdl to the energy differences is exact. -+ * For the constraints this is not exact, but we have no other option -+ * without literally changing the lengths and reevaluating the energies at each step. -+ * (try to remedy this post 4.6 - MRS) -+ * For the non-bonded LR term we assume that the soft-core (if present) -+ * no longer affects the energy beyond the short-range cut-off, -+ * which is a very good approximation (except for exotic settings). -+ * (investigate how to overcome this post 4.6 - MRS) -+ */ -+ if (fepvals->separate_dvdl[efptBONDED]) -+ { -+ enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; -+ } -+ else -+ { -+ enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR]; -+ } -+ enerd->term[F_DVDL_CONSTR] = 0; -+ -+ for (i = 0; i < fepvals->n_lambda; i++) -+ { -+ /* note we are iterating over fepvals here! -+ For the current lam, dlam = 0 automatically, -+ so we don't need to add anything to the -+ enerd->enerpart_lambda[0] */ -+ -+ /* we don't need to worry about dvdl_lin contributions to dE at -+ current lambda, because the contributions to the current -+ lambda are automatically zeroed */ -+ -+ for (j = 0; j < efptNR; j++) -+ { -+ /* Note that this loop is over all dhdl components, not just the separated ones */ -+ dlam = (fepvals->all_lambda[j][i]-lambda[j]); -+ enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j]; -+ if (debug) -+ { -+ fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n", -+ fepvals->all_lambda[j][i], efpt_names[j], -+ (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]), -+ dlam, enerd->dvdl_lin[j]); -+ } -+ } -+ } -+} -+ -+ -+void reset_foreign_enerdata(gmx_enerdata_t *enerd) -+{ -+ int i, j; -+ -+ /* First reset all foreign energy components. Foreign energies always called on -+ neighbor search steps */ -+ for (i = 0; (i < egNR); i++) -+ { -+ for (j = 0; (j < enerd->grpp.nener); j++) -+ { -+ enerd->foreign_grpp.ener[i][j] = 0.0; -+ } -+ } -+ -+ /* potential energy components */ -+ for (i = 0; (i <= F_EPOT); i++) -+ { -+ enerd->foreign_term[i] = 0.0; -+ } -+} -+ -+void reset_enerdata(t_forcerec *fr, gmx_bool bNS, -+ gmx_enerdata_t *enerd, -+ gmx_bool bMaster) -+{ -+ gmx_bool bKeepLR; -+ int i, j; -+ -+ /* First reset all energy components, except for the long range terms -+ * on the master at non neighbor search steps, since the long range -+ * terms have already been summed at the last neighbor search step. -+ */ -+ bKeepLR = (fr->bTwinRange && !bNS); -+ for (i = 0; (i < egNR); i++) -+ { -+ if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) -+ { -+ for (j = 0; (j < enerd->grpp.nener); j++) -+ { -+ enerd->grpp.ener[i][j] = 0.0; -+ } -+ } -+ } -+ for (i = 0; i < efptNR; i++) -+ { -+ enerd->dvdl_lin[i] = 0.0; -+ enerd->dvdl_nonlin[i] = 0.0; -+ } -+ -+ /* Normal potential energy components */ -+ for (i = 0; (i <= F_EPOT); i++) -+ { -+ enerd->term[i] = 0.0; -+ } -+ /* Initialize the dVdlambda term with the long range contribution */ -+ /* Initialize the dvdl term with the long range contribution */ -+ enerd->term[F_DVDL] = 0.0; -+ enerd->term[F_DVDL_COUL] = 0.0; -+ enerd->term[F_DVDL_VDW] = 0.0; -+ enerd->term[F_DVDL_BONDED] = 0.0; -+ enerd->term[F_DVDL_RESTRAINT] = 0.0; -+ enerd->term[F_DKDL] = 0.0; -+ if (enerd->n_lambda > 0) -+ { -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ enerd->enerpart_lambda[i] = 0.0; -+ } -+ } -+ /* reset foreign energy data - separate function since we also call it elsewhere */ -+ reset_foreign_enerdata(enerd); -+} -diff --git a/src/gromacs/mdlib/minimize.c b/src/gromacs/mdlib/minimize.c -index 69008f5..5114fa0 100644 ---- a/src/gromacs/mdlib/minimize.c -+++ b/src/gromacs/mdlib/minimize.c -@@ -80,6 +80,13 @@ - #include "gromacs/timing/walltime_accounting.h" - #include "gromacs/imd/imd.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+extern void(*plumedcmd)(plumed,const char*,const void*); -+/* END PLUMED */ -+ - typedef struct { - t_state s; - rvec *f; -@@ -442,6 +449,43 @@ void init_em(FILE *fplog, const char *title, - - clear_rvec(mu_tot); - calc_shifts(ems->s.box, fr->shift_vec); -+ -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(cr->ms && cr->ms->nsim>1) { -+ if(MASTER(cr)) (*plumedcmd) (plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters); -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); -+ }else{ -+ (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); -+ } -+ } -+ (*plumedcmd) (plumedmain,"GREX init",NULL); -+ } -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); -+ }else{ -+ (*plumedcmd) (plumedmain,"setMPIComm",&cr->mpi_comm_mysim); -+ } -+ } -+ (*plumedcmd) (plumedmain,"setNatoms",&top_global->natoms); -+ (*plumedcmd) (plumedmain,"setMDEngine","gromacs"); -+ (*plumedcmd) (plumedmain,"setLog",fplog); -+ real real_delta_t; -+ real_delta_t=ir->delta_t; -+ (*plumedcmd) (plumedmain,"setTimestep",&real_delta_t); -+ (*plumedcmd) (plumedmain,"init",NULL); -+ -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ } -+ } -+ /* END PLUMED */ - } - - static void finish_em(t_commrec *cr, gmx_mdoutf_t outf, -@@ -737,12 +781,34 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr, - em_dd_partition_system(fplog, count, cr, top_global, inputrec, - ems, top, mdatoms, fr, vsite, constr, - nrnb, wcycle); -+ /* PLUMED */ -+ if(plumedswitch){ -+ (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ /* END PLUMED */ - } - - /* Calc force & energy on new trial position */ - /* do_force always puts the charge groups in the box and shifts again - * We do not unshift, so molecules are always whole in congrad.c - */ -+ /* PLUMED */ -+ int plumedNeedsEnergy=0; -+ matrix plumed_vir; -+ if(plumedswitch){ -+ long int lstep=count; (*plumedcmd)(plumedmain,"setStepLong",&count); -+ (*plumedcmd) (plumedmain,"setPositions",&ems->s.x[0][0]); -+ (*plumedcmd) (plumedmain,"setMasses",&mdatoms->massT[0]); -+ (*plumedcmd) (plumedmain,"setCharges",&mdatoms->chargeA[0]); -+ (*plumedcmd) (plumedmain,"setBox",&ems->s.box[0][0]); -+ (*plumedcmd) (plumedmain,"prepareCalc",NULL); -+ (*plumedcmd) (plumedmain,"setForces",&ems->f[0][0]); -+ (*plumedcmd) (plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ clear_mat(plumed_vir); -+ (*plumedcmd) (plumedmain,"setVirial",&plumed_vir[0][0]); -+ } -+ /* END PLUMED */ - do_force(fplog, cr, inputrec, - count, nrnb, wcycle, top, &top_global->groups, - ems->s.box, ems->s.x, &ems->s.hist, -@@ -751,6 +817,19 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr, - GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | - GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | - (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0)); -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(plumedNeedsEnergy) { -+ msmul(force_vir,2.0,plumed_vir); -+ (*plumedcmd) (plumedmain,"setEnergy",&enerd->term[F_EPOT]); -+ (*plumedcmd) (plumedmain,"performCalc",NULL); -+ msmul(plumed_vir,0.5,force_vir); -+ } else { -+ msmul(plumed_vir,0.5,plumed_vir); -+ m_add(force_vir,plumed_vir,force_vir); -+ } -+ } -+ /* END PLUMED */ - - /* Clear the unused shake virial and pressure */ - clear_mat(shake_vir); -diff --git a/src/gromacs/mdlib/minimize.c.preplumed b/src/gromacs/mdlib/minimize.c.preplumed -new file mode 100644 -index 0000000..69008f5 ---- /dev/null -+++ b/src/gromacs/mdlib/minimize.c.preplumed -@@ -0,0 +1,2906 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include -+#include -+#include "sysstuff.h" -+#include "gromacs/utility/cstringutil.h" -+#include "network.h" -+#include "gromacs/utility/smalloc.h" -+#include "nrnb.h" -+#include "main.h" -+#include "force.h" -+#include "macros.h" -+#include "names.h" -+#include "gmx_fatal.h" -+#include "txtdump.h" -+#include "typedefs.h" -+#include "update.h" -+#include "constr.h" -+#include "vec.h" -+#include "tgroup.h" -+#include "mdebin.h" -+#include "vsite.h" -+#include "force.h" -+#include "mdrun.h" -+#include "md_support.h" -+#include "sim_util.h" -+#include "domdec.h" -+#include "mdatoms.h" -+#include "ns.h" -+#include "mtop_util.h" -+#include "pme.h" -+#include "bondf.h" -+#include "gmx_omp_nthreads.h" -+#include "md_logging.h" -+ -+#include "gromacs/fileio/confio.h" -+#include "gromacs/fileio/trajectory_writing.h" -+#include "gromacs/linearalgebra/mtxio.h" -+#include "gromacs/linearalgebra/sparsematrix.h" -+#include "gromacs/timing/wallcycle.h" -+#include "gromacs/timing/walltime_accounting.h" -+#include "gromacs/imd/imd.h" -+ -+typedef struct { -+ t_state s; -+ rvec *f; -+ real epot; -+ real fnorm; -+ real fmax; -+ int a_fmax; -+} em_state_t; -+ -+static em_state_t *init_em_state() -+{ -+ em_state_t *ems; -+ -+ snew(ems, 1); -+ -+ /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */ -+ snew(ems->s.lambda, efptNR); -+ -+ return ems; -+} -+ -+static void print_em_start(FILE *fplog, -+ t_commrec *cr, -+ gmx_walltime_accounting_t walltime_accounting, -+ gmx_wallcycle_t wcycle, -+ const char *name) -+{ -+ walltime_accounting_start(walltime_accounting); -+ wallcycle_start(wcycle, ewcRUN); -+ print_start(fplog, cr, walltime_accounting, name); -+} -+static void em_time_end(gmx_walltime_accounting_t walltime_accounting, -+ gmx_wallcycle_t wcycle) -+{ -+ wallcycle_stop(wcycle, ewcRUN); -+ -+ walltime_accounting_end(walltime_accounting); -+} -+ -+static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps) -+{ -+ fprintf(out, "\n"); -+ fprintf(out, "%s:\n", minimizer); -+ fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol); -+ fprintf(out, " Number of steps = %12d\n", nsteps); -+} -+ -+static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain) -+{ -+ char buffer[2048]; -+ if (bLastStep) -+ { -+ sprintf(buffer, -+ "\nEnergy minimization reached the maximum number " -+ "of steps before the forces reached the requested " -+ "precision Fmax < %g.\n", ftol); -+ } -+ else -+ { -+ sprintf(buffer, -+ "\nEnergy minimization has stopped, but the forces have " -+ "not converged to the requested precision Fmax < %g (which " -+ "may not be possible for your system). It stopped " -+ "because the algorithm tried to make a new step whose size " -+ "was too small, or there was no change in the energy since " -+ "last step. Either way, we regard the minimization as " -+ "converged to within the available machine precision, " -+ "given your starting configuration and EM parameters.\n%s%s", -+ ftol, -+ sizeof(real) < sizeof(double) ? -+ "\nDouble precision normally gives you higher accuracy, but " -+ "this is often not needed for preparing to run molecular " -+ "dynamics.\n" : -+ "", -+ bConstrain ? -+ "You might need to increase your constraint accuracy, or turn\n" -+ "off constraints altogether (set constraints = none in mdp file)\n" : -+ ""); -+ } -+ fputs(wrap_lines(buffer, 78, 0, FALSE), fp); -+} -+ -+ -+ -+static void print_converged(FILE *fp, const char *alg, real ftol, -+ gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps, -+ real epot, real fmax, int nfmax, real fnorm) -+{ -+ char buf[STEPSTRSIZE]; -+ -+ if (bDone) -+ { -+ fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", -+ alg, ftol, gmx_step_str(count, buf)); -+ } -+ else if (count < nsteps) -+ { -+ fprintf(fp, "\n%s converged to machine precision in %s steps,\n" -+ "but did not reach the requested Fmax < %g.\n", -+ alg, gmx_step_str(count, buf), ftol); -+ } -+ else -+ { -+ fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", -+ alg, ftol, gmx_step_str(count, buf)); -+ } -+ -+#ifdef GMX_DOUBLE -+ fprintf(fp, "Potential Energy = %21.14e\n", epot); -+ fprintf(fp, "Maximum force = %21.14e on atom %d\n", fmax, nfmax+1); -+ fprintf(fp, "Norm of force = %21.14e\n", fnorm); -+#else -+ fprintf(fp, "Potential Energy = %14.7e\n", epot); -+ fprintf(fp, "Maximum force = %14.7e on atom %d\n", fmax, nfmax+1); -+ fprintf(fp, "Norm of force = %14.7e\n", fnorm); -+#endif -+} -+ -+static void get_f_norm_max(t_commrec *cr, -+ t_grpopts *opts, t_mdatoms *mdatoms, rvec *f, -+ real *fnorm, real *fmax, int *a_fmax) -+{ -+ double fnorm2, *sum; -+ real fmax2, fmax2_0, fam; -+ int la_max, a_max, start, end, i, m, gf; -+ -+ /* This routine finds the largest force and returns it. -+ * On parallel machines the global max is taken. -+ */ -+ fnorm2 = 0; -+ fmax2 = 0; -+ la_max = -1; -+ gf = 0; -+ start = 0; -+ end = mdatoms->homenr; -+ if (mdatoms->cFREEZE) -+ { -+ for (i = start; i < end; i++) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ fam = 0; -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ fam += sqr(f[i][m]); -+ } -+ } -+ fnorm2 += fam; -+ if (fam > fmax2) -+ { -+ fmax2 = fam; -+ la_max = i; -+ } -+ } -+ } -+ else -+ { -+ for (i = start; i < end; i++) -+ { -+ fam = norm2(f[i]); -+ fnorm2 += fam; -+ if (fam > fmax2) -+ { -+ fmax2 = fam; -+ la_max = i; -+ } -+ } -+ } -+ -+ if (la_max >= 0 && DOMAINDECOMP(cr)) -+ { -+ a_max = cr->dd->gatindex[la_max]; -+ } -+ else -+ { -+ a_max = la_max; -+ } -+ if (PAR(cr)) -+ { -+ snew(sum, 2*cr->nnodes+1); -+ sum[2*cr->nodeid] = fmax2; -+ sum[2*cr->nodeid+1] = a_max; -+ sum[2*cr->nnodes] = fnorm2; -+ gmx_sumd(2*cr->nnodes+1, sum, cr); -+ fnorm2 = sum[2*cr->nnodes]; -+ /* Determine the global maximum */ -+ for (i = 0; i < cr->nnodes; i++) -+ { -+ if (sum[2*i] > fmax2) -+ { -+ fmax2 = sum[2*i]; -+ a_max = (int)(sum[2*i+1] + 0.5); -+ } -+ } -+ sfree(sum); -+ } -+ -+ if (fnorm) -+ { -+ *fnorm = sqrt(fnorm2); -+ } -+ if (fmax) -+ { -+ *fmax = sqrt(fmax2); -+ } -+ if (a_fmax) -+ { -+ *a_fmax = a_max; -+ } -+} -+ -+static void get_state_f_norm_max(t_commrec *cr, -+ t_grpopts *opts, t_mdatoms *mdatoms, -+ em_state_t *ems) -+{ -+ get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax); -+} -+ -+void init_em(FILE *fplog, const char *title, -+ t_commrec *cr, t_inputrec *ir, -+ t_state *state_global, gmx_mtop_t *top_global, -+ em_state_t *ems, gmx_localtop_t **top, -+ rvec **f, rvec **f_global, -+ t_nrnb *nrnb, rvec mu_tot, -+ t_forcerec *fr, gmx_enerdata_t **enerd, -+ t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int nfile, const t_filenm fnm[], -+ gmx_mdoutf_t *outf, t_mdebin **mdebin, -+ int imdport, unsigned long gmx_unused Flags, -+ gmx_wallcycle_t wcycle) -+{ -+ int i; -+ real dvdl_constr; -+ -+ if (fplog) -+ { -+ fprintf(fplog, "Initiating %s\n", title); -+ } -+ -+ state_global->ngtc = 0; -+ -+ /* Initialize lambda variables */ -+ initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL); -+ -+ init_nrnb(nrnb); -+ -+ /* Interactive molecular dynamics */ -+ init_IMD(ir, cr, top_global, fplog, 1, state_global->x, -+ nfile, fnm, NULL, imdport, Flags); -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ *top = dd_init_local_top(top_global); -+ -+ dd_init_local_state(cr->dd, state_global, &ems->s); -+ -+ *f = NULL; -+ -+ /* Distribute the charge groups over the nodes from the master node */ -+ dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ &ems->s, &ems->f, mdatoms, *top, -+ fr, vsite, NULL, constr, -+ nrnb, NULL, FALSE); -+ dd_store_state(cr->dd, &ems->s); -+ -+ if (ir->nstfout) -+ { -+ snew(*f_global, top_global->natoms); -+ } -+ else -+ { -+ *f_global = NULL; -+ } -+ *graph = NULL; -+ } -+ else -+ { -+ snew(*f, top_global->natoms); -+ -+ /* Just copy the state */ -+ ems->s = *state_global; -+ snew(ems->s.x, ems->s.nalloc); -+ snew(ems->f, ems->s.nalloc); -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(state_global->x[i], ems->s.x[i]); -+ } -+ copy_mat(state_global->box, ems->s.box); -+ -+ *top = gmx_mtop_generate_local_top(top_global, ir); -+ *f_global = *f; -+ -+ forcerec_set_excl_load(fr, *top); -+ -+ setup_bonded_threading(fr, &(*top)->idef); -+ -+ if (ir->ePBC != epbcNONE && !fr->bMolPBC) -+ { -+ *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE); -+ } -+ else -+ { -+ *graph = NULL; -+ } -+ -+ atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms); -+ update_mdatoms(mdatoms, state_global->lambda[efptFEP]); -+ -+ if (vsite) -+ { -+ set_vsite_top(vsite, *top, mdatoms, cr); -+ } -+ } -+ -+ if (constr) -+ { -+ if (ir->eConstrAlg == econtSHAKE && -+ gmx_mtop_ftype_count(top_global, F_CONSTR) > 0) -+ { -+ gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n", -+ econstr_names[econtSHAKE], econstr_names[econtLINCS]); -+ } -+ -+ if (!DOMAINDECOMP(cr)) -+ { -+ set_constraints(constr, *top, ir, mdatoms, cr); -+ } -+ -+ if (!ir->bContinuation) -+ { -+ /* Constrain the starting coordinates */ -+ dvdl_constr = 0; -+ constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef, -+ ir, NULL, cr, -1, 0, 1.0, mdatoms, -+ ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box, -+ ems->s.lambda[efptFEP], &dvdl_constr, -+ NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); -+ } -+ } -+ -+ if (PAR(cr)) -+ { -+ *gstat = global_stat_init(ir); -+ } -+ -+ *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL, wcycle); -+ -+ snew(*enerd, 1); -+ init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, -+ *enerd); -+ -+ if (mdebin != NULL) -+ { -+ /* Init bin for energy stuff */ -+ *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, NULL); -+ } -+ -+ clear_rvec(mu_tot); -+ calc_shifts(ems->s.box, fr->shift_vec); -+} -+ -+static void finish_em(t_commrec *cr, gmx_mdoutf_t outf, -+ gmx_walltime_accounting_t walltime_accounting, -+ gmx_wallcycle_t wcycle) -+{ -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell the PME only node to finish */ -+ gmx_pme_send_finish(cr); -+ } -+ -+ done_mdoutf(outf); -+ -+ em_time_end(walltime_accounting, wcycle); -+} -+ -+static void swap_em_state(em_state_t *ems1, em_state_t *ems2) -+{ -+ em_state_t tmp; -+ -+ tmp = *ems1; -+ *ems1 = *ems2; -+ *ems2 = tmp; -+} -+ -+static void copy_em_coords(em_state_t *ems, t_state *state) -+{ -+ int i; -+ -+ for (i = 0; (i < state->natoms); i++) -+ { -+ copy_rvec(ems->s.x[i], state->x[i]); -+ } -+} -+ -+static void write_em_traj(FILE *fplog, t_commrec *cr, -+ gmx_mdoutf_t outf, -+ gmx_bool bX, gmx_bool bF, const char *confout, -+ gmx_mtop_t *top_global, -+ t_inputrec *ir, gmx_int64_t step, -+ em_state_t *state, -+ t_state *state_global, rvec *f_global) -+{ -+ int mdof_flags; -+ gmx_bool bIMDout = FALSE; -+ -+ -+ /* Shall we do IMD output? */ -+ if (ir->bIMD) -+ { -+ bIMDout = do_per_step(step, IMD_get_step(ir->imd->setup)); -+ } -+ -+ if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr)) -+ { -+ copy_em_coords(state, state_global); -+ f_global = state->f; -+ } -+ -+ mdof_flags = 0; -+ if (bX) -+ { -+ mdof_flags |= MDOF_X; -+ } -+ if (bF) -+ { -+ mdof_flags |= MDOF_F; -+ } -+ -+ /* If we want IMD output, set appropriate MDOF flag */ -+ if (ir->bIMD) -+ { -+ mdof_flags |= MDOF_IMD; -+ } -+ -+ mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, -+ top_global, step, (double)step, -+ &state->s, state_global, state->f, f_global); -+ -+ if (confout != NULL && MASTER(cr)) -+ { -+ if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr)) -+ { -+ /* Make molecules whole only for confout writing */ -+ do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global, -+ state_global->x); -+ } -+ -+ write_sto_conf_mtop(confout, -+ *top_global->name, top_global, -+ state_global->x, NULL, ir->ePBC, state_global->box); -+ } -+} -+ -+static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, -+ gmx_bool bMolPBC, -+ em_state_t *ems1, real a, rvec *f, em_state_t *ems2, -+ gmx_constr_t constr, gmx_localtop_t *top, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_int64_t count) -+ -+{ -+ t_state *s1, *s2; -+ int i; -+ int start, end; -+ rvec *x1, *x2; -+ real dvdl_constr; -+ int nthreads gmx_unused; -+ -+ s1 = &ems1->s; -+ s2 = &ems2->s; -+ -+ if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count) -+ { -+ gmx_incons("state mismatch in do_em_step"); -+ } -+ -+ s2->flags = s1->flags; -+ -+ if (s2->nalloc != s1->nalloc) -+ { -+ s2->nalloc = s1->nalloc; -+ srenew(s2->x, s1->nalloc); -+ srenew(ems2->f, s1->nalloc); -+ if (s2->flags & (1<cg_p, s1->nalloc); -+ } -+ } -+ -+ s2->natoms = s1->natoms; -+ copy_mat(s1->box, s2->box); -+ /* Copy free energy state */ -+ for (i = 0; i < efptNR; i++) -+ { -+ s2->lambda[i] = s1->lambda[i]; -+ } -+ copy_mat(s1->box, s2->box); -+ -+ start = 0; -+ end = md->homenr; -+ -+ x1 = s1->x; -+ x2 = s2->x; -+ -+ nthreads = gmx_omp_nthreads_get(emntUpdate); -+#pragma omp parallel num_threads(nthreads) -+ { -+ int gf, i, m; -+ -+ gf = 0; -+#pragma omp for schedule(static) nowait -+ for (i = start; i < end; i++) -+ { -+ if (md->cFREEZE) -+ { -+ gf = md->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (ir->opts.nFreeze[gf][m]) -+ { -+ x2[i][m] = x1[i][m]; -+ } -+ else -+ { -+ x2[i][m] = x1[i][m] + a*f[i][m]; -+ } -+ } -+ } -+ -+ if (s2->flags & (1<cg_p; -+ x2 = s2->cg_p; -+#pragma omp for schedule(static) nowait -+ for (i = start; i < end; i++) -+ { -+ copy_rvec(x1[i], x2[i]); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ s2->ddp_count = s1->ddp_count; -+ if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) -+ { -+#pragma omp barrier -+ s2->cg_gl_nalloc = s1->cg_gl_nalloc; -+ srenew(s2->cg_gl, s2->cg_gl_nalloc); -+#pragma omp barrier -+ } -+ s2->ncg_gl = s1->ncg_gl; -+#pragma omp for schedule(static) nowait -+ for (i = 0; i < s2->ncg_gl; i++) -+ { -+ s2->cg_gl[i] = s1->cg_gl[i]; -+ } -+ s2->ddp_count_cg_gl = s1->ddp_count_cg_gl; -+ } -+ } -+ -+ if (constr) -+ { -+ wallcycle_start(wcycle, ewcCONSTR); -+ dvdl_constr = 0; -+ constrain(NULL, TRUE, TRUE, constr, &top->idef, -+ ir, NULL, cr, count, 0, 1.0, md, -+ s1->x, s2->x, NULL, bMolPBC, s2->box, -+ s2->lambda[efptBONDED], &dvdl_constr, -+ NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); -+ wallcycle_stop(wcycle, ewcCONSTR); -+ } -+} -+ -+static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr, -+ gmx_mtop_t *top_global, t_inputrec *ir, -+ em_state_t *ems, gmx_localtop_t *top, -+ t_mdatoms *mdatoms, t_forcerec *fr, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle) -+{ -+ /* Repartition the domain decomposition */ -+ wallcycle_start(wcycle, ewcDOMDEC); -+ dd_partition_system(fplog, step, cr, FALSE, 1, -+ NULL, top_global, ir, -+ &ems->s, &ems->f, -+ mdatoms, top, fr, vsite, NULL, constr, -+ nrnb, wcycle, FALSE); -+ dd_store_state(cr->dd, &ems->s); -+ wallcycle_stop(wcycle, ewcDOMDEC); -+} -+ -+static void evaluate_energy(FILE *fplog, t_commrec *cr, -+ gmx_mtop_t *top_global, -+ em_state_t *ems, gmx_localtop_t *top, -+ t_inputrec *inputrec, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_global_stat_t gstat, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ t_fcdata *fcd, -+ t_graph *graph, t_mdatoms *mdatoms, -+ t_forcerec *fr, rvec mu_tot, -+ gmx_enerdata_t *enerd, tensor vir, tensor pres, -+ gmx_int64_t count, gmx_bool bFirst) -+{ -+ real t; -+ gmx_bool bNS; -+ int nabnsb; -+ tensor force_vir, shake_vir, ekin; -+ real dvdl_constr, prescorr, enercorr, dvdlcorr; -+ real terminate = 0; -+ -+ /* Set the time to the initial time, the time does not change during EM */ -+ t = inputrec->init_t; -+ -+ if (bFirst || -+ (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) -+ { -+ /* This is the first state or an old state used before the last ns */ -+ bNS = TRUE; -+ } -+ else -+ { -+ bNS = FALSE; -+ if (inputrec->nstlist > 0) -+ { -+ bNS = TRUE; -+ } -+ else if (inputrec->nstlist == -1) -+ { -+ nabnsb = natoms_beyond_ns_buffer(inputrec, fr, &top->cgs, NULL, ems->s.x); -+ if (PAR(cr)) -+ { -+ gmx_sumi(1, &nabnsb, cr); -+ } -+ bNS = (nabnsb > 0); -+ } -+ } -+ -+ if (vsite) -+ { -+ construct_vsites(vsite, ems->s.x, 1, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, ems->s.box); -+ } -+ -+ if (DOMAINDECOMP(cr) && bNS) -+ { -+ /* Repartition the domain decomposition */ -+ em_dd_partition_system(fplog, count, cr, top_global, inputrec, -+ ems, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ -+ /* Calc force & energy on new trial position */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole in congrad.c -+ */ -+ do_force(fplog, cr, inputrec, -+ count, nrnb, wcycle, top, &top_global->groups, -+ ems->s.box, ems->s.x, &ems->s.hist, -+ ems->f, force_vir, mdatoms, enerd, fcd, -+ ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE, -+ GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | -+ GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | -+ (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0)); -+ -+ /* Clear the unused shake virial and pressure */ -+ clear_mat(shake_vir); -+ clear_mat(pres); -+ -+ /* Communicate stuff when parallel */ -+ if (PAR(cr) && inputrec->eI != eiNM) -+ { -+ wallcycle_start(wcycle, ewcMoveE); -+ -+ global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot, -+ inputrec, NULL, NULL, NULL, 1, &terminate, -+ top_global, &ems->s, FALSE, -+ CGLO_ENERGY | -+ CGLO_PRESSURE | -+ CGLO_CONSTRAINT | -+ CGLO_FIRSTITERATE); -+ -+ wallcycle_stop(wcycle, ewcMoveE); -+ } -+ -+ /* Calculate long range corrections to pressure and energy */ -+ calc_dispcorr(fplog, inputrec, fr, count, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW], -+ pres, force_vir, &prescorr, &enercorr, &dvdlcorr); -+ enerd->term[F_DISPCORR] = enercorr; -+ enerd->term[F_EPOT] += enercorr; -+ enerd->term[F_PRES] += prescorr; -+ enerd->term[F_DVDL] += dvdlcorr; -+ -+ ems->epot = enerd->term[F_EPOT]; -+ -+ if (constr) -+ { -+ /* Project out the constraint components of the force */ -+ wallcycle_start(wcycle, ewcCONSTR); -+ dvdl_constr = 0; -+ constrain(NULL, FALSE, FALSE, constr, &top->idef, -+ inputrec, NULL, cr, count, 0, 1.0, mdatoms, -+ ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box, -+ ems->s.lambda[efptBONDED], &dvdl_constr, -+ NULL, &shake_vir, nrnb, econqForceDispl, FALSE, 0, 0); -+ if (fr->bSepDVDL && fplog) -+ { -+ gmx_print_sepdvdl(fplog, "Constraints", t, dvdl_constr); -+ } -+ enerd->term[F_DVDL_CONSTR] += dvdl_constr; -+ m_add(force_vir, shake_vir, vir); -+ wallcycle_stop(wcycle, ewcCONSTR); -+ } -+ else -+ { -+ copy_mat(force_vir, vir); -+ } -+ -+ clear_mat(ekin); -+ enerd->term[F_PRES] = -+ calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres); -+ -+ sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals); -+ -+ if (EI_ENERGY_MINIMIZATION(inputrec->eI)) -+ { -+ get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems); -+ } -+} -+ -+static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, -+ gmx_mtop_t *mtop, -+ em_state_t *s_min, em_state_t *s_b) -+{ -+ rvec *fm, *fb, *fmg; -+ t_block *cgs_gl; -+ int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m; -+ double partsum; -+ unsigned char *grpnrFREEZE; -+ -+ if (debug) -+ { -+ fprintf(debug, "Doing reorder_partsum\n"); -+ } -+ -+ fm = s_min->f; -+ fb = s_b->f; -+ -+ cgs_gl = dd_charge_groups_global(cr->dd); -+ index = cgs_gl->index; -+ -+ /* Collect fm in a global vector fmg. -+ * This conflicts with the spirit of domain decomposition, -+ * but to fully optimize this a much more complicated algorithm is required. -+ */ -+ snew(fmg, mtop->natoms); -+ -+ ncg = s_min->s.ncg_gl; -+ cg_gl = s_min->s.cg_gl; -+ i = 0; -+ for (c = 0; c < ncg; c++) -+ { -+ cg = cg_gl[c]; -+ a0 = index[cg]; -+ a1 = index[cg+1]; -+ for (a = a0; a < a1; a++) -+ { -+ copy_rvec(fm[i], fmg[a]); -+ i++; -+ } -+ } -+ gmx_sum(mtop->natoms*3, fmg[0], cr); -+ -+ /* Now we will determine the part of the sum for the cgs in state s_b */ -+ ncg = s_b->s.ncg_gl; -+ cg_gl = s_b->s.cg_gl; -+ partsum = 0; -+ i = 0; -+ gf = 0; -+ grpnrFREEZE = mtop->groups.grpnr[egcFREEZE]; -+ for (c = 0; c < ncg; c++) -+ { -+ cg = cg_gl[c]; -+ a0 = index[cg]; -+ a1 = index[cg+1]; -+ for (a = a0; a < a1; a++) -+ { -+ if (mdatoms->cFREEZE && grpnrFREEZE) -+ { -+ gf = grpnrFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ partsum += (fb[i][m] - fmg[a][m])*fb[i][m]; -+ } -+ } -+ i++; -+ } -+ } -+ -+ sfree(fmg); -+ -+ return partsum; -+} -+ -+static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, -+ gmx_mtop_t *mtop, -+ em_state_t *s_min, em_state_t *s_b) -+{ -+ rvec *fm, *fb; -+ double sum; -+ int gf, i, m; -+ -+ /* This is just the classical Polak-Ribiere calculation of beta; -+ * it looks a bit complicated since we take freeze groups into account, -+ * and might have to sum it in parallel runs. -+ */ -+ -+ if (!DOMAINDECOMP(cr) || -+ (s_min->s.ddp_count == cr->dd->ddp_count && -+ s_b->s.ddp_count == cr->dd->ddp_count)) -+ { -+ fm = s_min->f; -+ fb = s_b->f; -+ sum = 0; -+ gf = 0; -+ /* This part of code can be incorrect with DD, -+ * since the atom ordering in s_b and s_min might differ. -+ */ -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ sum += (fb[i][m] - fm[i][m])*fb[i][m]; -+ } -+ } -+ } -+ } -+ else -+ { -+ /* We need to reorder cgs while summing */ -+ sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b); -+ } -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &sum, cr); -+ } -+ -+ return sum/sqr(s_min->fnorm); -+} -+ -+double do_cg(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ const char *CG = "Polak-Ribiere Conjugate Gradients"; -+ -+ em_state_t *s_min, *s_a, *s_b, *s_c; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ rvec *f_global, *p, *sf, *sfm; -+ double gpa, gpb, gpc, tmp, sum[2], minstep; -+ real fnormn; -+ real stepsize; -+ real a, b, c, beta = 0.0; -+ real epot_repl = 0; -+ real pnorm; -+ t_mdebin *mdebin; -+ gmx_bool converged, foundlower; -+ rvec mu_tot; -+ gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; -+ tensor vir, pres; -+ int number_steps, neval = 0, nstcg = inputrec->nstcgsteep; -+ gmx_mdoutf_t outf; -+ int i, m, gf, step, nminstep; -+ real terminate = 0; -+ -+ step = 0; -+ -+ s_min = init_em_state(); -+ s_a = init_em_state(); -+ s_b = init_em_state(); -+ s_c = init_em_state(); -+ -+ /* Init em and store the local state in s_min */ -+ init_em(fplog, CG, cr, inputrec, -+ state_global, top_global, s_min, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, CG); -+ -+ /* Max number of steps */ -+ number_steps = inputrec->nsteps; -+ -+ if (MASTER(cr)) -+ { -+ sp_header(stderr, CG, inputrec->em_tol, number_steps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, CG, inputrec->em_tol, number_steps); -+ } -+ -+ /* Call the force routine and some auxiliary (neighboursearching etc.) */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole in congrad.c -+ */ -+ evaluate_energy(fplog, cr, -+ top_global, s_min, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ where(); -+ -+ if (MASTER(cr)) -+ { -+ /* Copy stuff to the energy bin for easy printing etc. */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ where(); -+ -+ /* Estimate/guess the initial stepsize */ -+ stepsize = inputrec->em_stepsize/s_min->fnorm; -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, " F-max = %12.5e on atom %d\n", -+ s_min->fmax, s_min->a_fmax+1); -+ fprintf(stderr, " F-Norm = %12.5e\n", -+ s_min->fnorm/sqrt(state_global->natoms)); -+ fprintf(stderr, "\n"); -+ /* and copy to the log file too... */ -+ fprintf(fplog, " F-max = %12.5e on atom %d\n", -+ s_min->fmax, s_min->a_fmax+1); -+ fprintf(fplog, " F-Norm = %12.5e\n", -+ s_min->fnorm/sqrt(state_global->natoms)); -+ fprintf(fplog, "\n"); -+ } -+ /* Start the loop over CG steps. -+ * Each successful step is counted, and we continue until -+ * we either converge or reach the max number of steps. -+ */ -+ converged = FALSE; -+ for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++) -+ { -+ -+ /* start taking steps in a new direction -+ * First time we enter the routine, beta=0, and the direction is -+ * simply the negative gradient. -+ */ -+ -+ /* Calculate the new direction in p, and the gradient in this direction, gpa */ -+ p = s_min->s.cg_p; -+ sf = s_min->f; -+ gpa = 0; -+ gf = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!inputrec->opts.nFreeze[gf][m]) -+ { -+ p[i][m] = sf[i][m] + beta*p[i][m]; -+ gpa -= p[i][m]*sf[i][m]; -+ /* f is negative gradient, thus the sign */ -+ } -+ else -+ { -+ p[i][m] = 0; -+ } -+ } -+ } -+ -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpa, cr); -+ } -+ -+ /* Calculate the norm of the search vector */ -+ get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL); -+ -+ /* Just in case stepsize reaches zero due to numerical precision... */ -+ if (stepsize <= 0) -+ { -+ stepsize = inputrec->em_stepsize/pnorm; -+ } -+ -+ /* -+ * Double check the value of the derivative in the search direction. -+ * If it is positive it must be due to the old information in the -+ * CG formula, so just remove that and start over with beta=0. -+ * This corresponds to a steepest descent step. -+ */ -+ if (gpa > 0) -+ { -+ beta = 0; -+ step--; /* Don't count this step since we are restarting */ -+ continue; /* Go back to the beginning of the big for-loop */ -+ } -+ -+ /* Calculate minimum allowed stepsize, before the average (norm) -+ * relative change in coordinate is smaller than precision -+ */ -+ minstep = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ tmp = fabs(s_min->s.x[i][m]); -+ if (tmp < 1.0) -+ { -+ tmp = 1.0; -+ } -+ tmp = p[i][m]/tmp; -+ minstep += tmp*tmp; -+ } -+ } -+ /* Add up from all CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &minstep, cr); -+ } -+ -+ minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms)); -+ -+ if (stepsize < minstep) -+ { -+ converged = TRUE; -+ break; -+ } -+ -+ /* Write coordinates if necessary */ -+ do_x = do_per_step(step, inputrec->nstxout); -+ do_f = do_per_step(step, inputrec->nstfout); -+ -+ write_em_traj(fplog, cr, outf, do_x, do_f, NULL, -+ top_global, inputrec, step, -+ s_min, state_global, f_global); -+ -+ /* Take a step downhill. -+ * In theory, we should minimize the function along this direction. -+ * That is quite possible, but it turns out to take 5-10 function evaluations -+ * for each line. However, we dont really need to find the exact minimum - -+ * it is much better to start a new CG step in a modified direction as soon -+ * as we are close to it. This will save a lot of energy evaluations. -+ * -+ * In practice, we just try to take a single step. -+ * If it worked (i.e. lowered the energy), we increase the stepsize but -+ * the continue straight to the next CG step without trying to find any minimum. -+ * If it didn't work (higher energy), there must be a minimum somewhere between -+ * the old position and the new one. -+ * -+ * Due to the finite numerical accuracy, it turns out that it is a good idea -+ * to even accept a SMALL increase in energy, if the derivative is still downhill. -+ * This leads to lower final energies in the tests I've done. / Erik -+ */ -+ s_a->epot = s_min->epot; -+ a = 0.0; -+ c = a + stepsize; /* reference position along line is zero */ -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) -+ { -+ em_dd_partition_system(fplog, step, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ -+ /* Take a trial step (new coords in s_c) */ -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c, -+ constr, top, nrnb, wcycle, -1); -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ evaluate_energy(fplog, cr, -+ top_global, s_c, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, FALSE); -+ -+ /* Calc derivative along line */ -+ p = s_c->s.cg_p; -+ sf = s_c->f; -+ gpc = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ -+ } -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpc, cr); -+ } -+ -+ /* This is the max amount of increase in energy we tolerate */ -+ tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot); -+ -+ /* Accept the step if the energy is lower, or if it is not significantly higher -+ * and the line derivative is still negative. -+ */ -+ if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) -+ { -+ foundlower = TRUE; -+ /* Great, we found a better energy. Increase step for next iteration -+ * if we are still going down, decrease it otherwise -+ */ -+ if (gpc < 0) -+ { -+ stepsize *= 1.618034; /* The golden section */ -+ } -+ else -+ { -+ stepsize *= 0.618034; /* 1/golden section */ -+ } -+ } -+ else -+ { -+ /* New energy is the same or higher. We will have to do some work -+ * to find a smaller value in the interval. Take smaller step next time! -+ */ -+ foundlower = FALSE; -+ stepsize *= 0.618034; -+ } -+ -+ -+ -+ -+ /* OK, if we didn't find a lower value we will have to locate one now - there must -+ * be one in the interval [a=0,c]. -+ * The same thing is valid here, though: Don't spend dozens of iterations to find -+ * the line minimum. We try to interpolate based on the derivative at the endpoints, -+ * and only continue until we find a lower value. In most cases this means 1-2 iterations. -+ * -+ * I also have a safeguard for potentially really patological functions so we never -+ * take more than 20 steps before we give up ... -+ * -+ * If we already found a lower value we just skip this step and continue to the update. -+ */ -+ if (!foundlower) -+ { -+ nminstep = 0; -+ -+ do -+ { -+ /* Select a new trial point. -+ * If the derivatives at points a & c have different sign we interpolate to zero, -+ * otherwise just do a bisection. -+ */ -+ if (gpa < 0 && gpc > 0) -+ { -+ b = a + gpa*(a-c)/(gpc-gpa); -+ } -+ else -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* safeguard if interpolation close to machine accuracy causes errors: -+ * never go outside the interval -+ */ -+ if (b <= a || b >= c) -+ { -+ b = 0.5*(a+c); -+ } -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) -+ { -+ /* Reload the old state */ -+ em_dd_partition_system(fplog, -1, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ -+ /* Take a trial step to this new point - new coords in s_b */ -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b, -+ constr, top, nrnb, wcycle, -1); -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ evaluate_energy(fplog, cr, -+ top_global, s_b, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, FALSE); -+ -+ /* p does not change within a step, but since the domain decomposition -+ * might change, we have to use cg_p of s_b here. -+ */ -+ p = s_b->s.cg_p; -+ sf = s_b->f; -+ gpb = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ -+ } -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpb, cr); -+ } -+ -+ if (debug) -+ { -+ fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", -+ s_a->epot, s_b->epot, s_c->epot, gpb); -+ } -+ -+ epot_repl = s_b->epot; -+ -+ /* Keep one of the intervals based on the value of the derivative at the new point */ -+ if (gpb > 0) -+ { -+ /* Replace c endpoint with b */ -+ swap_em_state(s_b, s_c); -+ c = b; -+ gpc = gpb; -+ } -+ else -+ { -+ /* Replace a endpoint with b */ -+ swap_em_state(s_b, s_a); -+ a = b; -+ gpa = gpb; -+ } -+ -+ /* -+ * Stop search as soon as we find a value smaller than the endpoints. -+ * Never run more than 20 steps, no matter what. -+ */ -+ nminstep++; -+ } -+ while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && -+ (nminstep < 20)); -+ -+ if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS || -+ nminstep >= 20) -+ { -+ /* OK. We couldn't find a significantly lower energy. -+ * If beta==0 this was steepest descent, and then we give up. -+ * If not, set beta=0 and restart with steepest descent before quitting. -+ */ -+ if (beta == 0.0) -+ { -+ /* Converged */ -+ converged = TRUE; -+ break; -+ } -+ else -+ { -+ /* Reset memory before giving up */ -+ beta = 0.0; -+ continue; -+ } -+ } -+ -+ /* Select min energy state of A & C, put the best in B. -+ */ -+ if (s_c->epot < s_a->epot) -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", -+ s_c->epot, s_a->epot); -+ } -+ swap_em_state(s_b, s_c); -+ gpb = gpc; -+ b = c; -+ } -+ else -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", -+ s_a->epot, s_c->epot); -+ } -+ swap_em_state(s_b, s_a); -+ gpb = gpa; -+ b = a; -+ } -+ -+ } -+ else -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", -+ s_c->epot); -+ } -+ swap_em_state(s_b, s_c); -+ gpb = gpc; -+ b = c; -+ } -+ -+ /* new search direction */ -+ /* beta = 0 means forget all memory and restart with steepest descents. */ -+ if (nstcg && ((step % nstcg) == 0)) -+ { -+ beta = 0.0; -+ } -+ else -+ { -+ /* s_min->fnorm cannot be zero, because then we would have converged -+ * and broken out. -+ */ -+ -+ /* Polak-Ribiere update. -+ * Change to fnorm2/fnorm2_old for Fletcher-Reeves -+ */ -+ beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b); -+ } -+ /* Limit beta to prevent oscillations */ -+ if (fabs(beta) > 5.0) -+ { -+ beta = 0.0; -+ } -+ -+ -+ /* update positions */ -+ swap_em_state(s_min, s_b); -+ gpa = gpb; -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", -+ step, s_min->epot, s_min->fnorm/sqrt(state_global->natoms), -+ s_min->fmax, s_min->a_fmax+1); -+ } -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ do_log = do_per_step(step, inputrec->nstlog); -+ do_ene = do_per_step(step, inputrec->nstenergy); -+ -+ /* Prepare IMD energy record, if bIMD is TRUE. */ -+ IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE); -+ -+ if (do_log) -+ { -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ } -+ print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, -+ do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Send energies and positions to the IMD client if bIMD is TRUE. */ -+ if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) -+ { -+ IMD_send_positions(inputrec->imd); -+ } -+ -+ /* Stop when the maximum force lies below tolerance. -+ * If we have reached machine precision, converged is already set to true. -+ */ -+ converged = converged || (s_min->fmax < inputrec->em_tol); -+ -+ } /* End of the loop */ -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(inputrec->bIMD, inputrec->imd); -+ -+ if (converged) -+ { -+ step--; /* we never took that last step in this case */ -+ -+ } -+ if (s_min->fmax > inputrec->em_tol) -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); -+ warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); -+ } -+ converged = FALSE; -+ } -+ -+ if (MASTER(cr)) -+ { -+ /* If we printed energy and/or logfile last step (which was the last step) -+ * we don't have to do it again, but otherwise print the final values. -+ */ -+ if (!do_log) -+ { -+ /* Write final value to log since we didn't do anything the last step */ -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ } -+ if (!do_ene || !do_log) -+ { -+ /* Write final energy file entries */ -+ print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, -+ !do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ } -+ -+ /* Print some stuff... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ -+ /* IMPORTANT! -+ * For accurate normal mode calculation it is imperative that we -+ * store the last conformation into the full precision binary trajectory. -+ * -+ * However, we should only do it if we did NOT already write this step -+ * above (which we did if do_x or do_f was true). -+ */ -+ do_x = !do_per_step(step, inputrec->nstxout); -+ do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout)); -+ -+ write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, step, -+ s_min, state_global, f_global); -+ -+ fnormn = s_min->fnorm/sqrt(state_global->natoms); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ -+ fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ walltime_accounting_set_nsteps_done(walltime_accounting, step); -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_lbfgs(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ static const char *LBFGS = "Low-Memory BFGS Minimizer"; -+ em_state_t ems; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ rvec *f_global; -+ int ncorr, nmaxcorr, point, cp, neval, nminstep; -+ double stepsize, gpa, gpb, gpc, tmp, minstep; -+ real *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg; -+ real *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp; -+ real a, b, c, maxdelta, delta; -+ real diag, Epot0, Epot, EpotA, EpotB, EpotC; -+ real dgdx, dgdg, sq, yr, beta; -+ t_mdebin *mdebin; -+ gmx_bool converged, first; -+ rvec mu_tot; -+ real fnorm, fmax; -+ gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; -+ tensor vir, pres; -+ int start, end, number_steps; -+ gmx_mdoutf_t outf; -+ int i, k, m, n, nfmax, gf, step; -+ int mdof_flags; -+ /* not used */ -+ real terminate; -+ -+ if (PAR(cr)) -+ { -+ gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n"); -+ } -+ -+ if (NULL != constr) -+ { -+ gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent)."); -+ } -+ -+ n = 3*state->natoms; -+ nmaxcorr = inputrec->nbfgscorr; -+ -+ /* Allocate memory */ -+ /* Use pointers to real so we dont have to loop over both atoms and -+ * dimensions all the time... -+ * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real -+ * that point to the same memory. -+ */ -+ snew(xa, n); -+ snew(xb, n); -+ snew(xc, n); -+ snew(fa, n); -+ snew(fb, n); -+ snew(fc, n); -+ snew(frozen, n); -+ -+ snew(p, n); -+ snew(lastx, n); -+ snew(lastf, n); -+ snew(rho, nmaxcorr); -+ snew(alpha, nmaxcorr); -+ -+ snew(dx, nmaxcorr); -+ for (i = 0; i < nmaxcorr; i++) -+ { -+ snew(dx[i], n); -+ } -+ -+ snew(dg, nmaxcorr); -+ for (i = 0; i < nmaxcorr; i++) -+ { -+ snew(dg[i], n); -+ } -+ -+ step = 0; -+ neval = 0; -+ -+ /* Init em */ -+ init_em(fplog, LBFGS, cr, inputrec, -+ state, top_global, &ems, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); -+ /* Do_lbfgs is not completely updated like do_steep and do_cg, -+ * so we free some memory again. -+ */ -+ sfree(ems.s.x); -+ sfree(ems.f); -+ -+ xx = (real *)state->x; -+ ff = (real *)f; -+ -+ start = 0; -+ end = mdatoms->homenr; -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); -+ -+ do_log = do_ene = do_x = do_f = TRUE; -+ -+ /* Max number of steps */ -+ number_steps = inputrec->nsteps; -+ -+ /* Create a 3*natoms index to tell whether each degree of freedom is frozen */ -+ gf = 0; -+ for (i = start; i < end; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ frozen[3*i+m] = inputrec->opts.nFreeze[gf][m]; -+ } -+ } -+ if (MASTER(cr)) -+ { -+ sp_header(stderr, LBFGS, inputrec->em_tol, number_steps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, LBFGS, inputrec->em_tol, number_steps); -+ } -+ -+ if (vsite) -+ { -+ construct_vsites(vsite, state->x, 1, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ } -+ -+ /* Call the force routine and some auxiliary (neighboursearching etc.) */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole -+ */ -+ neval++; -+ ems.s.x = state->x; -+ ems.f = f; -+ evaluate_energy(fplog, cr, -+ top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ where(); -+ -+ if (MASTER(cr)) -+ { -+ /* Copy stuff to the energy bin for easy printing etc. */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ where(); -+ -+ /* This is the starting energy */ -+ Epot = enerd->term[F_EPOT]; -+ -+ fnorm = ems.fnorm; -+ fmax = ems.fmax; -+ nfmax = ems.a_fmax; -+ -+ /* Set the initial step. -+ * since it will be multiplied by the non-normalized search direction -+ * vector (force vector the first time), we scale it by the -+ * norm of the force. -+ */ -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr); -+ fprintf(stderr, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); -+ fprintf(stderr, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms)); -+ fprintf(stderr, "\n"); -+ /* and copy to the log file too... */ -+ fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr); -+ fprintf(fplog, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); -+ fprintf(fplog, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms)); -+ fprintf(fplog, "\n"); -+ } -+ -+ point = 0; -+ for (i = 0; i < n; i++) -+ { -+ if (!frozen[i]) -+ { -+ dx[point][i] = ff[i]; /* Initial search direction */ -+ } -+ else -+ { -+ dx[point][i] = 0; -+ } -+ } -+ -+ stepsize = 1.0/fnorm; -+ converged = FALSE; -+ -+ /* Start the loop over BFGS steps. -+ * Each successful step is counted, and we continue until -+ * we either converge or reach the max number of steps. -+ */ -+ -+ ncorr = 0; -+ -+ /* Set the gradient from the force */ -+ converged = FALSE; -+ for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++) -+ { -+ -+ /* Write coordinates if necessary */ -+ do_x = do_per_step(step, inputrec->nstxout); -+ do_f = do_per_step(step, inputrec->nstfout); -+ -+ mdof_flags = 0; -+ if (do_x) -+ { -+ mdof_flags |= MDOF_X; -+ } -+ -+ if (do_f) -+ { -+ mdof_flags |= MDOF_F; -+ } -+ -+ if (inputrec->bIMD) -+ { -+ mdof_flags |= MDOF_IMD; -+ } -+ -+ mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, -+ top_global, step, (real)step, state, state, f, f); -+ -+ /* Do the linesearching in the direction dx[point][0..(n-1)] */ -+ -+ /* pointer to current direction - point=0 first time here */ -+ s = dx[point]; -+ -+ /* calculate line gradient */ -+ for (gpa = 0, i = 0; i < n; i++) -+ { -+ gpa -= s[i]*ff[i]; -+ } -+ -+ /* Calculate minimum allowed stepsize, before the average (norm) -+ * relative change in coordinate is smaller than precision -+ */ -+ for (minstep = 0, i = 0; i < n; i++) -+ { -+ tmp = fabs(xx[i]); -+ if (tmp < 1.0) -+ { -+ tmp = 1.0; -+ } -+ tmp = s[i]/tmp; -+ minstep += tmp*tmp; -+ } -+ minstep = GMX_REAL_EPS/sqrt(minstep/n); -+ -+ if (stepsize < minstep) -+ { -+ converged = TRUE; -+ break; -+ } -+ -+ /* Store old forces and coordinates */ -+ for (i = 0; i < n; i++) -+ { -+ lastx[i] = xx[i]; -+ lastf[i] = ff[i]; -+ } -+ Epot0 = Epot; -+ -+ first = TRUE; -+ -+ for (i = 0; i < n; i++) -+ { -+ xa[i] = xx[i]; -+ } -+ -+ /* Take a step downhill. -+ * In theory, we should minimize the function along this direction. -+ * That is quite possible, but it turns out to take 5-10 function evaluations -+ * for each line. However, we dont really need to find the exact minimum - -+ * it is much better to start a new BFGS step in a modified direction as soon -+ * as we are close to it. This will save a lot of energy evaluations. -+ * -+ * In practice, we just try to take a single step. -+ * If it worked (i.e. lowered the energy), we increase the stepsize but -+ * the continue straight to the next BFGS step without trying to find any minimum. -+ * If it didn't work (higher energy), there must be a minimum somewhere between -+ * the old position and the new one. -+ * -+ * Due to the finite numerical accuracy, it turns out that it is a good idea -+ * to even accept a SMALL increase in energy, if the derivative is still downhill. -+ * This leads to lower final energies in the tests I've done. / Erik -+ */ -+ foundlower = FALSE; -+ EpotA = Epot0; -+ a = 0.0; -+ c = a + stepsize; /* reference position along line is zero */ -+ -+ /* Check stepsize first. We do not allow displacements -+ * larger than emstep. -+ */ -+ do -+ { -+ c = a + stepsize; -+ maxdelta = 0; -+ for (i = 0; i < n; i++) -+ { -+ delta = c*s[i]; -+ if (delta > maxdelta) -+ { -+ maxdelta = delta; -+ } -+ } -+ if (maxdelta > inputrec->em_stepsize) -+ { -+ stepsize *= 0.1; -+ } -+ } -+ while (maxdelta > inputrec->em_stepsize); -+ -+ /* Take a trial step */ -+ for (i = 0; i < n; i++) -+ { -+ xc[i] = lastx[i] + c*s[i]; -+ } -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ ems.s.x = (rvec *)xc; -+ ems.f = (rvec *)fc; -+ evaluate_energy(fplog, cr, -+ top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, step, FALSE); -+ EpotC = ems.epot; -+ -+ /* Calc derivative along line */ -+ for (gpc = 0, i = 0; i < n; i++) -+ { -+ gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */ -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpc, cr); -+ } -+ -+ /* This is the max amount of increase in energy we tolerate */ -+ tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA); -+ -+ /* Accept the step if the energy is lower, or if it is not significantly higher -+ * and the line derivative is still negative. -+ */ -+ if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp))) -+ { -+ foundlower = TRUE; -+ /* Great, we found a better energy. Increase step for next iteration -+ * if we are still going down, decrease it otherwise -+ */ -+ if (gpc < 0) -+ { -+ stepsize *= 1.618034; /* The golden section */ -+ } -+ else -+ { -+ stepsize *= 0.618034; /* 1/golden section */ -+ } -+ } -+ else -+ { -+ /* New energy is the same or higher. We will have to do some work -+ * to find a smaller value in the interval. Take smaller step next time! -+ */ -+ foundlower = FALSE; -+ stepsize *= 0.618034; -+ } -+ -+ /* OK, if we didn't find a lower value we will have to locate one now - there must -+ * be one in the interval [a=0,c]. -+ * The same thing is valid here, though: Don't spend dozens of iterations to find -+ * the line minimum. We try to interpolate based on the derivative at the endpoints, -+ * and only continue until we find a lower value. In most cases this means 1-2 iterations. -+ * -+ * I also have a safeguard for potentially really patological functions so we never -+ * take more than 20 steps before we give up ... -+ * -+ * If we already found a lower value we just skip this step and continue to the update. -+ */ -+ -+ if (!foundlower) -+ { -+ -+ nminstep = 0; -+ do -+ { -+ /* Select a new trial point. -+ * If the derivatives at points a & c have different sign we interpolate to zero, -+ * otherwise just do a bisection. -+ */ -+ -+ if (gpa < 0 && gpc > 0) -+ { -+ b = a + gpa*(a-c)/(gpc-gpa); -+ } -+ else -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* safeguard if interpolation close to machine accuracy causes errors: -+ * never go outside the interval -+ */ -+ if (b <= a || b >= c) -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* Take a trial step */ -+ for (i = 0; i < n; i++) -+ { -+ xb[i] = lastx[i] + b*s[i]; -+ } -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ ems.s.x = (rvec *)xb; -+ ems.f = (rvec *)fb; -+ evaluate_energy(fplog, cr, -+ top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, step, FALSE); -+ EpotB = ems.epot; -+ -+ fnorm = ems.fnorm; -+ -+ for (gpb = 0, i = 0; i < n; i++) -+ { -+ gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */ -+ -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpb, cr); -+ } -+ -+ /* Keep one of the intervals based on the value of the derivative at the new point */ -+ if (gpb > 0) -+ { -+ /* Replace c endpoint with b */ -+ EpotC = EpotB; -+ c = b; -+ gpc = gpb; -+ /* swap coord pointers b/c */ -+ xtmp = xb; -+ ftmp = fb; -+ xb = xc; -+ fb = fc; -+ xc = xtmp; -+ fc = ftmp; -+ } -+ else -+ { -+ /* Replace a endpoint with b */ -+ EpotA = EpotB; -+ a = b; -+ gpa = gpb; -+ /* swap coord pointers a/b */ -+ xtmp = xb; -+ ftmp = fb; -+ xb = xa; -+ fb = fa; -+ xa = xtmp; -+ fa = ftmp; -+ } -+ -+ /* -+ * Stop search as soon as we find a value smaller than the endpoints, -+ * or if the tolerance is below machine precision. -+ * Never run more than 20 steps, no matter what. -+ */ -+ nminstep++; -+ } -+ while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20)); -+ -+ if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20) -+ { -+ /* OK. We couldn't find a significantly lower energy. -+ * If ncorr==0 this was steepest descent, and then we give up. -+ * If not, reset memory to restart as steepest descent before quitting. -+ */ -+ if (ncorr == 0) -+ { -+ /* Converged */ -+ converged = TRUE; -+ break; -+ } -+ else -+ { -+ /* Reset memory */ -+ ncorr = 0; -+ /* Search in gradient direction */ -+ for (i = 0; i < n; i++) -+ { -+ dx[point][i] = ff[i]; -+ } -+ /* Reset stepsize */ -+ stepsize = 1.0/fnorm; -+ continue; -+ } -+ } -+ -+ /* Select min energy state of A & C, put the best in xx/ff/Epot -+ */ -+ if (EpotC < EpotA) -+ { -+ Epot = EpotC; -+ /* Use state C */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xc[i]; -+ ff[i] = fc[i]; -+ } -+ stepsize = c; -+ } -+ else -+ { -+ Epot = EpotA; -+ /* Use state A */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xa[i]; -+ ff[i] = fa[i]; -+ } -+ stepsize = a; -+ } -+ -+ } -+ else -+ { -+ /* found lower */ -+ Epot = EpotC; -+ /* Use state C */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xc[i]; -+ ff[i] = fc[i]; -+ } -+ stepsize = c; -+ } -+ -+ /* Update the memory information, and calculate a new -+ * approximation of the inverse hessian -+ */ -+ -+ /* Have new data in Epot, xx, ff */ -+ if (ncorr < nmaxcorr) -+ { -+ ncorr++; -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ dg[point][i] = lastf[i]-ff[i]; -+ dx[point][i] *= stepsize; -+ } -+ -+ dgdg = 0; -+ dgdx = 0; -+ for (i = 0; i < n; i++) -+ { -+ dgdg += dg[point][i]*dg[point][i]; -+ dgdx += dg[point][i]*dx[point][i]; -+ } -+ -+ diag = dgdx/dgdg; -+ -+ rho[point] = 1.0/dgdx; -+ point++; -+ -+ if (point >= nmaxcorr) -+ { -+ point = 0; -+ } -+ -+ /* Update */ -+ for (i = 0; i < n; i++) -+ { -+ p[i] = ff[i]; -+ } -+ -+ cp = point; -+ -+ /* Recursive update. First go back over the memory points */ -+ for (k = 0; k < ncorr; k++) -+ { -+ cp--; -+ if (cp < 0) -+ { -+ cp = ncorr-1; -+ } -+ -+ sq = 0; -+ for (i = 0; i < n; i++) -+ { -+ sq += dx[cp][i]*p[i]; -+ } -+ -+ alpha[cp] = rho[cp]*sq; -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] -= alpha[cp]*dg[cp][i]; -+ } -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] *= diag; -+ } -+ -+ /* And then go forward again */ -+ for (k = 0; k < ncorr; k++) -+ { -+ yr = 0; -+ for (i = 0; i < n; i++) -+ { -+ yr += p[i]*dg[cp][i]; -+ } -+ -+ beta = rho[cp]*yr; -+ beta = alpha[cp]-beta; -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] += beta*dx[cp][i]; -+ } -+ -+ cp++; -+ if (cp >= ncorr) -+ { -+ cp = 0; -+ } -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ if (!frozen[i]) -+ { -+ dx[point][i] = p[i]; -+ } -+ else -+ { -+ dx[point][i] = 0; -+ } -+ } -+ -+ stepsize = 1.0; -+ -+ /* Test whether the convergence criterion is met */ -+ get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax); -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", -+ step, Epot, fnorm/sqrt(state->natoms), fmax, nfmax+1); -+ } -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ do_log = do_per_step(step, inputrec->nstlog); -+ do_ene = do_per_step(step, inputrec->nstenergy); -+ if (do_log) -+ { -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ } -+ print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, -+ do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Send x and E to IMD client, if bIMD is TRUE. */ -+ if (do_IMD(inputrec->bIMD, step, cr, TRUE, state->box, state->x, inputrec, 0, wcycle) && MASTER(cr)) -+ { -+ IMD_send_positions(inputrec->imd); -+ } -+ -+ /* Stop when the maximum force lies below tolerance. -+ * If we have reached machine precision, converged is already set to true. -+ */ -+ -+ converged = converged || (fmax < inputrec->em_tol); -+ -+ } /* End of the loop */ -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(inputrec->bIMD, inputrec->imd); -+ -+ if (converged) -+ { -+ step--; /* we never took that last step in this case */ -+ -+ } -+ if (fmax > inputrec->em_tol) -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); -+ warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); -+ } -+ converged = FALSE; -+ } -+ -+ /* If we printed energy and/or logfile last step (which was the last step) -+ * we don't have to do it again, but otherwise print the final values. -+ */ -+ if (!do_log) /* Write final value to log since we didn't do anythin last step */ -+ { -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ } -+ if (!do_ene || !do_log) /* Write final energy file entries */ -+ { -+ print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, -+ !do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Print some stuff... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ -+ /* IMPORTANT! -+ * For accurate normal mode calculation it is imperative that we -+ * store the last conformation into the full precision binary trajectory. -+ * -+ * However, we should only do it if we did NOT already write this step -+ * above (which we did if do_x or do_f was true). -+ */ -+ do_x = !do_per_step(step, inputrec->nstxout); -+ do_f = !do_per_step(step, inputrec->nstfout); -+ write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, step, -+ &ems, state, f); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, -+ number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms)); -+ print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, -+ number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms)); -+ -+ fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ walltime_accounting_set_nsteps_done(walltime_accounting, step); -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_steep(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ const char *SD = "Steepest Descents"; -+ em_state_t *s_min, *s_try; -+ rvec *f_global; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ real stepsize, constepsize; -+ real ustep, fnormn; -+ gmx_mdoutf_t outf; -+ t_mdebin *mdebin; -+ gmx_bool bDone, bAbort, do_x, do_f; -+ tensor vir, pres; -+ rvec mu_tot; -+ int nsteps; -+ int count = 0; -+ int steps_accepted = 0; -+ /* not used */ -+ real terminate = 0; -+ -+ s_min = init_em_state(); -+ s_try = init_em_state(); -+ -+ /* Init em and store the local state in s_try */ -+ init_em(fplog, SD, cr, inputrec, -+ state_global, top_global, s_try, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, SD); -+ -+ /* Set variables for stepsize (in nm). This is the largest -+ * step that we are going to make in any direction. -+ */ -+ ustep = inputrec->em_stepsize; -+ stepsize = 0; -+ -+ /* Max number of steps */ -+ nsteps = inputrec->nsteps; -+ -+ if (MASTER(cr)) -+ { -+ /* Print to the screen */ -+ sp_header(stderr, SD, inputrec->em_tol, nsteps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, SD, inputrec->em_tol, nsteps); -+ } -+ -+ /**** HERE STARTS THE LOOP **** -+ * count is the counter for the number of steps -+ * bDone will be TRUE when the minimization has converged -+ * bAbort will be TRUE when nsteps steps have been performed or when -+ * the stepsize becomes smaller than is reasonable for machine precision -+ */ -+ count = 0; -+ bDone = FALSE; -+ bAbort = FALSE; -+ while (!bDone && !bAbort) -+ { -+ bAbort = (nsteps >= 0) && (count == nsteps); -+ -+ /* set new coordinates, except for first step */ -+ if (count > 0) -+ { -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, -+ s_min, stepsize, s_min->f, s_try, -+ constr, top, nrnb, wcycle, count); -+ } -+ -+ evaluate_energy(fplog, cr, -+ top_global, s_try, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, count, count == 0); -+ -+ if (MASTER(cr)) -+ { -+ print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]); -+ } -+ -+ if (count == 0) -+ { -+ s_min->epot = s_try->epot + 1; -+ } -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c", -+ count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1, -+ (s_try->epot < s_min->epot) ? '\n' : '\r'); -+ } -+ -+ if (s_try->epot < s_min->epot) -+ { -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)count, -+ mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals, -+ s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ /* Prepare IMD energy record, if bIMD is TRUE. */ -+ IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE); -+ -+ print_ebin(mdoutf_get_fp_ene(outf), TRUE, -+ do_per_step(steps_accepted, inputrec->nstdisreout), -+ do_per_step(steps_accepted, inputrec->nstorireout), -+ fplog, count, count, eprNORMAL, TRUE, -+ mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ fflush(fplog); -+ } -+ } -+ -+ /* Now if the new energy is smaller than the previous... -+ * or if this is the first step! -+ * or if we did random steps! -+ */ -+ -+ if ( (count == 0) || (s_try->epot < s_min->epot) ) -+ { -+ steps_accepted++; -+ -+ /* Test whether the convergence criterion is met... */ -+ bDone = (s_try->fmax < inputrec->em_tol); -+ -+ /* Copy the arrays for force, positions and energy */ -+ /* The 'Min' array always holds the coords and forces of the minimal -+ sampled energy */ -+ swap_em_state(s_min, s_try); -+ if (count > 0) -+ { -+ ustep *= 1.2; -+ } -+ -+ /* Write to trn, if necessary */ -+ do_x = do_per_step(steps_accepted, inputrec->nstxout); -+ do_f = do_per_step(steps_accepted, inputrec->nstfout); -+ write_em_traj(fplog, cr, outf, do_x, do_f, NULL, -+ top_global, inputrec, count, -+ s_min, state_global, f_global); -+ } -+ else -+ { -+ /* If energy is not smaller make the step smaller... */ -+ ustep *= 0.5; -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) -+ { -+ /* Reload the old state */ -+ em_dd_partition_system(fplog, count, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ } -+ -+ /* Determine new step */ -+ stepsize = ustep/s_min->fmax; -+ -+ /* Check if stepsize is too small, with 1 nm as a characteristic length */ -+#ifdef GMX_DOUBLE -+ if (count == nsteps || ustep < 1e-12) -+#else -+ if (count == nsteps || ustep < 1e-6) -+#endif -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL); -+ warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL); -+ } -+ bAbort = TRUE; -+ } -+ -+ /* Send IMD energies and positions, if bIMD is TRUE. */ -+ if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) -+ { -+ IMD_send_positions(inputrec->imd); -+ } -+ -+ count++; -+ } /* End of the loop */ -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(inputrec->bIMD, inputrec->imd); -+ -+ /* Print some data... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, count, -+ s_min, state_global, f_global); -+ -+ fnormn = s_min->fnorm/sqrt(state_global->natoms); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ inputrec->nsteps = count; -+ -+ walltime_accounting_set_nsteps_done(walltime_accounting, count); -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_nm(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ const char *NM = "Normal Mode Analysis"; -+ gmx_mdoutf_t outf; -+ int natoms, atom, d; -+ int nnodes, node; -+ rvec *f_global; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ real t, t0, lambda, lam0; -+ gmx_bool bNS; -+ tensor vir, pres; -+ rvec mu_tot; -+ rvec *fneg, *dfdx; -+ gmx_bool bSparse; /* use sparse matrix storage format */ -+ size_t sz = 0; -+ gmx_sparsematrix_t * sparse_matrix = NULL; -+ real * full_matrix = NULL; -+ em_state_t * state_work; -+ -+ /* added with respect to mdrun */ -+ int i, j, k, row, col; -+ real der_range = 10.0*sqrt(GMX_REAL_EPS); -+ real x_min; -+ real fnorm, fmax; -+ -+ if (constr != NULL) -+ { -+ gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported"); -+ } -+ -+ state_work = init_em_state(); -+ -+ /* Init em and store the local state in state_minimum */ -+ init_em(fplog, NM, cr, inputrec, -+ state_global, top_global, state_work, &top, -+ &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, NULL, imdport, Flags, wcycle); -+ -+ natoms = top_global->natoms; -+ snew(fneg, natoms); -+ snew(dfdx, natoms); -+ -+#ifndef GMX_DOUBLE -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, -+ "NOTE: This version of Gromacs has been compiled in single precision,\n" -+ " which MIGHT not be accurate enough for normal mode analysis.\n" -+ " Gromacs now uses sparse matrix storage, so the memory requirements\n" -+ " are fairly modest even if you recompile in double precision.\n\n"); -+ } -+#endif -+ -+ /* Check if we can/should use sparse storage format. -+ * -+ * Sparse format is only useful when the Hessian itself is sparse, which it -+ * will be when we use a cutoff. -+ * For small systems (n<1000) it is easier to always use full matrix format, though. -+ */ -+ if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0) -+ { -+ md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n"); -+ bSparse = FALSE; -+ } -+ else if (top_global->natoms < 1000) -+ { -+ md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms); -+ bSparse = FALSE; -+ } -+ else -+ { -+ md_print_info(cr, fplog, "Using compressed symmetric sparse Hessian format.\n"); -+ bSparse = TRUE; -+ } -+ -+ if (MASTER(cr)) -+ { -+ sz = DIM*top_global->natoms; -+ -+ fprintf(stderr, "Allocating Hessian memory...\n\n"); -+ -+ if (bSparse) -+ { -+ sparse_matrix = gmx_sparsematrix_init(sz); -+ sparse_matrix->compressed_symmetric = TRUE; -+ } -+ else -+ { -+ snew(full_matrix, sz*sz); -+ } -+ } -+ -+ /* Initial values */ -+ t0 = inputrec->init_t; -+ lam0 = inputrec->fepvals->init_lambda; -+ t = t0; -+ lambda = lam0; -+ -+ init_nrnb(nrnb); -+ -+ where(); -+ -+ /* Write start time and temperature */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, NM); -+ -+ /* fudge nr of steps to nr of atoms */ -+ inputrec->nsteps = natoms*2; -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n", -+ *(top_global->name), (int)inputrec->nsteps); -+ } -+ -+ nnodes = cr->nnodes; -+ -+ /* Make evaluate_energy do a single node force calculation */ -+ cr->nnodes = 1; -+ evaluate_energy(fplog, cr, -+ top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ cr->nnodes = nnodes; -+ -+ /* if forces are not small, warn user */ -+ get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work); -+ -+ md_print_info(cr, fplog, "Maximum force:%12.5e\n", state_work->fmax); -+ if (state_work->fmax > 1.0e-3) -+ { -+ md_print_info(cr, fplog, -+ "The force is probably not small enough to " -+ "ensure that you are at a minimum.\n" -+ "Be aware that negative eigenvalues may occur\n" -+ "when the resulting matrix is diagonalized.\n\n"); -+ } -+ -+ /*********************************************************** -+ * -+ * Loop over all pairs in matrix -+ * -+ * do_force called twice. Once with positive and -+ * once with negative displacement -+ * -+ ************************************************************/ -+ -+ /* Steps are divided one by one over the nodes */ -+ for (atom = cr->nodeid; atom < natoms; atom += nnodes) -+ { -+ -+ for (d = 0; d < DIM; d++) -+ { -+ x_min = state_work->s.x[atom][d]; -+ -+ state_work->s.x[atom][d] = x_min - der_range; -+ -+ /* Make evaluate_energy do a single node force calculation */ -+ cr->nnodes = 1; -+ evaluate_energy(fplog, cr, -+ top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, atom*2, FALSE); -+ -+ for (i = 0; i < natoms; i++) -+ { -+ copy_rvec(state_work->f[i], fneg[i]); -+ } -+ -+ state_work->s.x[atom][d] = x_min + der_range; -+ -+ evaluate_energy(fplog, cr, -+ top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, atom*2+1, FALSE); -+ cr->nnodes = nnodes; -+ -+ /* x is restored to original */ -+ state_work->s.x[atom][d] = x_min; -+ -+ for (j = 0; j < natoms; j++) -+ { -+ for (k = 0; (k < DIM); k++) -+ { -+ dfdx[j][k] = -+ -(state_work->f[j][k] - fneg[j][k])/(2*der_range); -+ } -+ } -+ -+ if (!MASTER(cr)) -+ { -+#ifdef GMX_MPI -+#ifdef GMX_DOUBLE -+#define mpi_type MPI_DOUBLE -+#else -+#define mpi_type MPI_FLOAT -+#endif -+ MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid, -+ cr->mpi_comm_mygroup); -+#endif -+ } -+ else -+ { -+ for (node = 0; (node < nnodes && atom+node < natoms); node++) -+ { -+ if (node > 0) -+ { -+#ifdef GMX_MPI -+ MPI_Status stat; -+ MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node, -+ cr->mpi_comm_mygroup, &stat); -+#undef mpi_type -+#endif -+ } -+ -+ row = (atom + node)*DIM + d; -+ -+ for (j = 0; j < natoms; j++) -+ { -+ for (k = 0; k < DIM; k++) -+ { -+ col = j*DIM + k; -+ -+ if (bSparse) -+ { -+ if (col >= row && dfdx[j][k] != 0.0) -+ { -+ gmx_sparsematrix_increment_value(sparse_matrix, -+ row, col, dfdx[j][k]); -+ } -+ } -+ else -+ { -+ full_matrix[row*sz+col] = dfdx[j][k]; -+ } -+ } -+ } -+ } -+ } -+ -+ if (bVerbose && fplog) -+ { -+ fflush(fplog); -+ } -+ } -+ /* write progress */ -+ if (MASTER(cr) && bVerbose) -+ { -+ fprintf(stderr, "\rFinished step %d out of %d", -+ min(atom+nnodes, natoms), natoms); -+ fflush(stderr); -+ } -+ } -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\n\nWriting Hessian...\n"); -+ gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ walltime_accounting_set_nsteps_done(walltime_accounting, natoms*2); -+ -+ return 0; -+} -diff --git a/src/programs/mdrun/md.c b/src/programs/mdrun/md.c -index 3d98d59..b34d23c 100644 ---- a/src/programs/mdrun/md.c -+++ b/src/programs/mdrun/md.c -@@ -96,6 +96,12 @@ - #include "gromacs/swap/swapcoords.h" - #include "gromacs/imd/imd.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+/* END PLUMED */ -+ - #ifdef GMX_FAHCORE - #include "corewrap.h" - #endif -@@ -224,6 +230,12 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - /* Interactive MD */ - gmx_bool bIMDstep = FALSE; - -+ /* PLUMED */ -+ int plumedNeedsEnergy=0; -+ int plumedWantsToStop=0; -+ matrix plumed_vir; -+ /* END PLUMED */ -+ - #ifdef GMX_FAHCORE - /* Temporary addition for FAHCORE checkpointing */ - int chkpt_ret; -@@ -651,6 +663,48 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - fprintf(fplog, "\n"); - } - -+ /* PLUMED */ -+ if(plumedswitch){ -+ /* detect plumed API version */ -+ int pversion=0; -+ plumed_cmd(plumedmain,"getApiVersion",&pversion); -+ /* setting kbT is only implemented with api>1) */ -+ real kbT=ir->opts.ref_t[0]*BOLTZ; -+ if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT); -+ -+ if(cr->ms && cr->ms->nsim>1) { -+ if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters); -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); -+ }else{ -+ plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); -+ } -+ } -+ plumed_cmd(plumedmain,"GREX init",NULL); -+ } -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); -+ } -+ } -+ plumed_cmd(plumedmain,"setNatoms",&top_global->natoms); -+ plumed_cmd(plumedmain,"setMDEngine","gromacs"); -+ plumed_cmd(plumedmain,"setLog",fplog); -+ real real_delta_t; -+ real_delta_t=ir->delta_t; -+ plumed_cmd(plumedmain,"setTimestep",&real_delta_t); -+ plumed_cmd(plumedmain,"init",NULL); -+ -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ } -+ } -+ /* END PLUMED */ -+ - walltime_accounting_start(walltime_accounting); - wallcycle_start(wcycle, ewcRUN); - print_start(fplog, cr, walltime_accounting, "mdrun"); -@@ -955,6 +1009,13 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - do_verbose && !bPMETuneRunning); - wallcycle_stop(wcycle, ewcDOMDEC); - /* If using an iterative integrator, reallocate space to match the decomposition */ -+ -+ /* PLUMED */ -+ if(plumedswitch){ -+ plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ /* END PLUMED */ - } - } - -@@ -1078,12 +1139,45 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - * This is parallellized as well, and does communication too. - * Check comments in sim_util.c - */ -+ -+ /* PLUMED */ -+ plumedNeedsEnergy=0; -+ if(plumedswitch){ -+ long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep); -+ plumed_cmd(plumedmain,"setPositions",&state->x[0][0]); -+ plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[0]); -+ plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[0]); -+ plumed_cmd(plumedmain,"setBox",&state->box[0][0]); -+ plumed_cmd(plumedmain,"prepareCalc",NULL); -+ plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop); -+ plumed_cmd(plumedmain,"setForces",&f[0][0]); -+ plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ clear_mat(plumed_vir); -+ plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]); -+ } -+ /* END PLUMED */ - do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups, - state->box, state->x, &state->hist, - f, force_vir, mdatoms, enerd, fcd, - state->lambda, graph, - fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii, - (bNS ? GMX_FORCE_NS : 0) | force_flags); -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(plumedNeedsEnergy){ -+ msmul(force_vir,2.0,plumed_vir); -+ plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]); -+ plumed_cmd(plumedmain,"performCalc",NULL); -+ msmul(plumed_vir,0.5,force_vir); -+ } else { -+ msmul(plumed_vir,0.5,plumed_vir); -+ m_add(force_vir,plumed_vir,force_vir); -+ } -+ if ((repl_ex_nst > 0) && (step > 0) && !bLastStep && -+ do_per_step(step,repl_ex_nst)) plumed_cmd(plumedmain,"GREX savePositions",NULL); -+ if(plumedWantsToStop) ir->nsteps=step_rel+1; -+ } -+ /* END PLUMED */ - } - - if (bVV && !bStartingFromCpt && !bRerunMD) -diff --git a/src/programs/mdrun/md.c.preplumed b/src/programs/mdrun/md.c.preplumed -new file mode 100644 -index 0000000..3d98d59 ---- /dev/null -+++ b/src/programs/mdrun/md.c.preplumed -@@ -0,0 +1,2058 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include "typedefs.h" -+#include "gromacs/utility/smalloc.h" -+#include "sysstuff.h" -+#include "vec.h" -+#include "vcm.h" -+#include "mdebin.h" -+#include "nrnb.h" -+#include "calcmu.h" -+#include "index.h" -+#include "vsite.h" -+#include "update.h" -+#include "ns.h" -+#include "mdrun.h" -+#include "md_support.h" -+#include "md_logging.h" -+#include "network.h" -+#include "xvgr.h" -+#include "physics.h" -+#include "names.h" -+#include "force.h" -+#include "disre.h" -+#include "orires.h" -+#include "pme.h" -+#include "mdatoms.h" -+#include "repl_ex.h" -+#include "deform.h" -+#include "qmmm.h" -+#include "domdec.h" -+#include "domdec_network.h" -+#include "gromacs/gmxlib/topsort.h" -+#include "coulomb.h" -+#include "constr.h" -+#include "shellfc.h" -+#include "gromacs/gmxpreprocess/compute_io.h" -+#include "checkpoint.h" -+#include "mtop_util.h" -+#include "sighandler.h" -+#include "txtdump.h" -+#include "gromacs/utility/cstringutil.h" -+#include "pme_loadbal.h" -+#include "bondf.h" -+#include "membed.h" -+#include "types/nlistheuristics.h" -+#include "types/iteratedconstraints.h" -+#include "nbnxn_cuda_data_mgmt.h" -+ -+#include "gromacs/utility/gmxmpi.h" -+#include "gromacs/fileio/confio.h" -+#include "gromacs/fileio/trajectory_writing.h" -+#include "gromacs/fileio/trnio.h" -+#include "gromacs/fileio/trxio.h" -+#include "gromacs/fileio/xtcio.h" -+#include "gromacs/timing/wallcycle.h" -+#include "gromacs/timing/walltime_accounting.h" -+#include "gromacs/pulling/pull.h" -+#include "gromacs/swap/swapcoords.h" -+#include "gromacs/imd/imd.h" -+ -+#ifdef GMX_FAHCORE -+#include "corewrap.h" -+#endif -+ -+static void reset_all_counters(FILE *fplog, t_commrec *cr, -+ gmx_int64_t step, -+ gmx_int64_t *step_rel, t_inputrec *ir, -+ gmx_wallcycle_t wcycle, t_nrnb *nrnb, -+ gmx_walltime_accounting_t walltime_accounting, -+ nbnxn_cuda_ptr_t cu_nbv) -+{ -+ char sbuf[STEPSTRSIZE]; -+ -+ /* Reset all the counters related to performance over the run */ -+ md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n", -+ gmx_step_str(step, sbuf)); -+ -+ if (cu_nbv) -+ { -+ nbnxn_cuda_reset_timings(cu_nbv); -+ } -+ -+ wallcycle_stop(wcycle, ewcRUN); -+ wallcycle_reset_all(wcycle); -+ if (DOMAINDECOMP(cr)) -+ { -+ reset_dd_statistics_counters(cr->dd); -+ } -+ init_nrnb(nrnb); -+ ir->init_step += *step_rel; -+ ir->nsteps -= *step_rel; -+ *step_rel = 0; -+ wallcycle_start(wcycle, ewcRUN); -+ walltime_accounting_start(walltime_accounting); -+ print_date_and_time(fplog, cr->nodeid, "Restarted time", gmx_gettime()); -+} -+ -+double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], -+ const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, -+ int nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int stepout, t_inputrec *ir, -+ gmx_mtop_t *top_global, -+ t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t ed, t_forcerec *fr, -+ int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed, -+ real cpt_period, real max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ gmx_mdoutf_t outf = NULL; -+ gmx_int64_t step, step_rel; -+ double elapsed_time; -+ double t, t0, lam0[efptNR]; -+ gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEner; -+ gmx_bool bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE, -+ bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep, -+ bBornRadii, bStartingFromCpt; -+ gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; -+ gmx_bool do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE, -+ bForceUpdate = FALSE, bCPT; -+ gmx_bool bMasterState; -+ int force_flags, cglo_flags; -+ tensor force_vir, shake_vir, total_vir, tmp_vir, pres; -+ int i, m; -+ t_trxstatus *status; -+ rvec mu_tot; -+ t_vcm *vcm; -+ t_state *bufstate = NULL; -+ matrix *scale_tot, pcoupl_mu, M, ebox; -+ gmx_nlheur_t nlh; -+ t_trxframe rerun_fr; -+ gmx_repl_ex_t repl_ex = NULL; -+ int nchkpt = 1; -+ gmx_localtop_t *top; -+ t_mdebin *mdebin = NULL; -+ t_state *state = NULL; -+ rvec *f_global = NULL; -+ gmx_enerdata_t *enerd; -+ rvec *f = NULL; -+ gmx_global_stat_t gstat; -+ gmx_update_t upd = NULL; -+ t_graph *graph = NULL; -+ globsig_t gs; -+ gmx_groups_t *groups; -+ gmx_ekindata_t *ekind, *ekind_save; -+ gmx_shellfc_t shellfc; -+ int count, nconverged = 0; -+ real timestep = 0; -+ double tcount = 0; -+ gmx_bool bConverged = TRUE, bOK, bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition; -+ gmx_bool bAppend; -+ gmx_bool bResetCountersHalfMaxH = FALSE; -+ gmx_bool bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter; -+ gmx_bool bUpdateDoLR; -+ real dvdl_constr; -+ rvec *cbuf = NULL; -+ matrix lastbox; -+ real veta_save, scalevir, tracevir; -+ real vetanew = 0; -+ int lamnew = 0; -+ /* for FEP */ -+ int nstfep; -+ double cycles; -+ real saved_conserved_quantity = 0; -+ real last_ekin = 0; -+ int iter_i; -+ t_extmass MassQ; -+ int **trotter_seq; -+ char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; -+ int handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/ -+ gmx_iterate_t iterate; -+ gmx_int64_t multisim_nsteps = -1; /* number of steps to do before first multisim -+ simulation stops. If equal to zero, don't -+ communicate any more between multisims.*/ -+ /* PME load balancing data for GPU kernels */ -+ pme_load_balancing_t pme_loadbal = NULL; -+ double cycles_pmes; -+ gmx_bool bPMETuneTry = FALSE, bPMETuneRunning = FALSE; -+ -+ /* Interactive MD */ -+ gmx_bool bIMDstep = FALSE; -+ -+#ifdef GMX_FAHCORE -+ /* Temporary addition for FAHCORE checkpointing */ -+ int chkpt_ret; -+#endif -+ -+ /* Check for special mdrun options */ -+ bRerunMD = (Flags & MD_RERUN); -+ bAppend = (Flags & MD_APPENDFILES); -+ if (Flags & MD_RESETCOUNTERSHALFWAY) -+ { -+ if (ir->nsteps > 0) -+ { -+ /* Signal to reset the counters half the simulation steps. */ -+ wcycle_set_reset_counters(wcycle, ir->nsteps/2); -+ } -+ /* Signal to reset the counters halfway the simulation time. */ -+ bResetCountersHalfMaxH = (max_hours > 0); -+ } -+ -+ /* md-vv uses averaged full step velocities for T-control -+ md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) -+ md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ -+ bVV = EI_VV(ir->eI); -+ if (bVV) /* to store the initial velocities while computing virial */ -+ { -+ snew(cbuf, top_global->natoms); -+ } -+ /* all the iteratative cases - only if there are constraints */ -+ bIterativeCase = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD)); -+ gmx_iterate_init(&iterate, FALSE); /* The default value of iterate->bIterationActive is set to -+ false in this step. The correct value, true or false, -+ is set at each step, as it depends on the frequency of temperature -+ and pressure control.*/ -+ bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir))); -+ -+ if (bRerunMD) -+ { -+ /* Since we don't know if the frames read are related in any way, -+ * rebuild the neighborlist at every step. -+ */ -+ ir->nstlist = 1; -+ ir->nstcalcenergy = 1; -+ nstglobalcomm = 1; -+ } -+ -+ check_ir_old_tpx_versions(cr, fplog, ir, top_global); -+ -+ nstglobalcomm = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir); -+ bGStatEveryStep = (nstglobalcomm == 1); -+ -+ if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL) -+ { -+ fprintf(fplog, -+ "To reduce the energy communication with nstlist = -1\n" -+ "the neighbor list validity should not be checked at every step,\n" -+ "this means that exact integration is not guaranteed.\n" -+ "The neighbor list validity is checked after:\n" -+ " - 2*std.dev.(n.list life time) steps.\n" -+ "In most cases this will result in exact integration.\n" -+ "This reduces the energy communication by a factor of 2 to 3.\n" -+ "If you want less energy communication, set nstlist > 3.\n\n"); -+ } -+ -+ if (bRerunMD) -+ { -+ ir->nstxout_compressed = 0; -+ } -+ groups = &top_global->groups; -+ -+ /* Initial values */ -+ init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda, -+ &(state_global->fep_state), lam0, -+ nrnb, top_global, &upd, -+ nfile, fnm, &outf, &mdebin, -+ force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags, wcycle); -+ -+ clear_mat(total_vir); -+ clear_mat(pres); -+ /* Energy terms and groups */ -+ snew(enerd, 1); -+ init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, -+ enerd); -+ if (DOMAINDECOMP(cr)) -+ { -+ f = NULL; -+ } -+ else -+ { -+ snew(f, top_global->natoms); -+ } -+ -+ /* Kinetic energy data */ -+ snew(ekind, 1); -+ init_ekindata(fplog, top_global, &(ir->opts), ekind); -+ /* needed for iteration of constraints */ -+ snew(ekind_save, 1); -+ init_ekindata(fplog, top_global, &(ir->opts), ekind_save); -+ /* Copy the cos acceleration to the groups struct */ -+ ekind->cosacc.cos_accel = ir->cos_accel; -+ -+ gstat = global_stat_init(ir); -+ debug_gmx(); -+ -+ /* Check for polarizable models and flexible constraints */ -+ shellfc = init_shell_flexcon(fplog, -+ top_global, n_flexible_constraints(constr), -+ (ir->bContinuation || -+ (DOMAINDECOMP(cr) && !MASTER(cr))) ? -+ NULL : state_global->x); -+ if (shellfc && ir->nstcalcenergy != 1) -+ { -+ gmx_fatal(FARGS, "You have nstcalcenergy set to a value (%d) that is different from 1.\nThis is not supported in combinations with shell particles.\nPlease make a new tpr file.", ir->nstcalcenergy); -+ } -+ if (shellfc && DOMAINDECOMP(cr)) -+ { -+ gmx_fatal(FARGS, "Shell particles are not implemented with domain decomposition, use a single rank"); -+ } -+ if (shellfc && ir->eI == eiNM) -+ { -+ /* Currently shells don't work with Normal Modes */ -+ gmx_fatal(FARGS, "Normal Mode analysis is not supported with shells.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n"); -+ } -+ -+ if (vsite && ir->eI == eiNM) -+ { -+ /* Currently virtual sites don't work with Normal Modes */ -+ gmx_fatal(FARGS, "Normal Mode analysis is not supported with virtual sites.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n"); -+ } -+ -+ if (DEFORM(*ir)) -+ { -+ tMPI_Thread_mutex_lock(&deform_init_box_mutex); -+ set_deform_reference_box(upd, -+ deform_init_init_step_tpx, -+ deform_init_box_tpx); -+ tMPI_Thread_mutex_unlock(&deform_init_box_mutex); -+ } -+ -+ { -+ double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1); -+ if ((io > 2000) && MASTER(cr)) -+ { -+ fprintf(stderr, -+ "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", -+ io); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ top = dd_init_local_top(top_global); -+ -+ snew(state, 1); -+ dd_init_local_state(cr->dd, state_global, state); -+ -+ if (DDMASTER(cr->dd) && ir->nstfout) -+ { -+ snew(f_global, state_global->natoms); -+ } -+ } -+ else -+ { -+ top = gmx_mtop_generate_local_top(top_global, ir); -+ -+ forcerec_set_excl_load(fr, top); -+ -+ state = serial_init_local_state(state_global); -+ f_global = f; -+ -+ atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms); -+ -+ if (vsite) -+ { -+ set_vsite_top(vsite, top, mdatoms, cr); -+ } -+ -+ if (ir->ePBC != epbcNONE && !fr->bMolPBC) -+ { -+ graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE); -+ } -+ -+ if (shellfc) -+ { -+ make_local_shells(cr, mdatoms, shellfc); -+ } -+ -+ setup_bonded_threading(fr, &top->idef); -+ } -+ -+ /* Set up interactive MD (IMD) */ -+ init_IMD(ir, cr, top_global, fplog, ir->nstcalcenergy, state_global->x, -+ nfile, fnm, oenv, imdport, Flags); -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ /* Distribute the charge groups over the nodes from the master node */ -+ dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, FALSE); -+ -+ } -+ -+ update_mdatoms(mdatoms, state->lambda[efptMASS]); -+ -+ if (opt2bSet("-cpi", nfile, fnm)) -+ { -+ bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr); -+ } -+ else -+ { -+ bStateFromCP = FALSE; -+ } -+ -+ if (ir->bExpanded) -+ { -+ init_expanded_ensemble(bStateFromCP, ir, &state->dfhist); -+ } -+ -+ if (MASTER(cr)) -+ { -+ if (bStateFromCP) -+ { -+ /* Update mdebin with energy history if appending to output files */ -+ if (Flags & MD_APPENDFILES) -+ { -+ restore_energyhistory_from_state(mdebin, &state_global->enerhist); -+ } -+ else -+ { -+ /* We might have read an energy history from checkpoint, -+ * free the allocated memory and reset the counts. -+ */ -+ done_energyhistory(&state_global->enerhist); -+ init_energyhistory(&state_global->enerhist); -+ } -+ } -+ /* Set the initial energy history in state by updating once */ -+ update_energyhistory(&state_global->enerhist, mdebin); -+ } -+ -+ /* Initialize constraints */ -+ if (constr && !DOMAINDECOMP(cr)) -+ { -+ set_constraints(constr, top, ir, mdatoms, cr); -+ } -+ -+ if (repl_ex_nst > 0 && MASTER(cr)) -+ { -+ repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir, -+ repl_ex_nst, repl_ex_nex, repl_ex_seed); -+ } -+ -+ /* PME tuning is only supported with GPUs or PME nodes and not with rerun. -+ * PME tuning is not supported with PME only for LJ and not for Coulomb. -+ */ -+ if ((Flags & MD_TUNEPME) && -+ EEL_PME(fr->eeltype) && -+ ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) && -+ !bRerunMD) -+ { -+ pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata); -+ cycles_pmes = 0; -+ if (cr->duty & DUTY_PME) -+ { -+ /* Start tuning right away, as we can't measure the load */ -+ bPMETuneRunning = TRUE; -+ } -+ else -+ { -+ /* Separate PME nodes, we can measure the PP/PME load balance */ -+ bPMETuneTry = TRUE; -+ } -+ } -+ -+ if (!ir->bContinuation && !bRerunMD) -+ { -+ if (mdatoms->cFREEZE && (state->flags & (1<homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) -+ { -+ state->v[i][m] = 0; -+ } -+ } -+ } -+ } -+ -+ if (constr) -+ { -+ /* Constrain the initial coordinates and velocities */ -+ do_constrain_first(fplog, constr, ir, mdatoms, state, -+ cr, nrnb, fr, top); -+ } -+ if (vsite) -+ { -+ /* Construct the virtual sites for the initial configuration */ -+ construct_vsites(vsite, state->x, ir->delta_t, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ } -+ } -+ -+ debug_gmx(); -+ -+ /* set free energy calculation frequency as the minimum -+ greatest common denominator of nstdhdl, nstexpanded, and repl_ex_nst*/ -+ nstfep = ir->fepvals->nstdhdl; -+ if (ir->bExpanded) -+ { -+ nstfep = gmx_greatest_common_divisor(ir->fepvals->nstdhdl, nstfep); -+ } -+ if (repl_ex_nst > 0) -+ { -+ nstfep = gmx_greatest_common_divisor(repl_ex_nst, nstfep); -+ } -+ -+ /* I'm assuming we need global communication the first time! MRS */ -+ cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT -+ | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM : 0) -+ | (bVV ? CGLO_PRESSURE : 0) -+ | (bVV ? CGLO_CONSTRAINT : 0) -+ | (bRerunMD ? CGLO_RERUNMD : 0) -+ | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0)); -+ -+ bSumEkinhOld = FALSE; -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, cglo_flags); -+ if (ir->eI == eiVVAK) -+ { -+ /* a second call to get the half step temperature initialized as well */ -+ /* we do the same call as above, but turn the pressure off -- internally to -+ compute_globals, this is recognized as a velocity verlet half-step -+ kinetic energy calculation. This minimized excess variables, but -+ perhaps loses some logic?*/ -+ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE)); -+ } -+ -+ /* Calculate the initial half step temperature, and save the ekinh_old */ -+ if (!(Flags & MD_STARTFROMCPT)) -+ { -+ for (i = 0; (i < ir->opts.ngtc); i++) -+ { -+ copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); -+ } -+ } -+ if (ir->eI != eiVV) -+ { -+ enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step, -+ and there is no previous step */ -+ } -+ -+ /* if using an iterative algorithm, we need to create a working directory for the state. */ -+ if (bIterativeCase) -+ { -+ bufstate = init_bufstate(state); -+ } -+ -+ /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter -+ temperature control */ -+ trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); -+ -+ if (MASTER(cr)) -+ { -+ if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS) -+ { -+ fprintf(fplog, -+ "RMS relative constraint deviation after constraining: %.2e\n", -+ constr_rmsd(constr, FALSE)); -+ } -+ if (EI_STATE_VELOCITY(ir->eI)) -+ { -+ fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]); -+ } -+ if (bRerunMD) -+ { -+ fprintf(stderr, "starting md rerun '%s', reading coordinates from" -+ " input trajectory '%s'\n\n", -+ *(top_global->name), opt2fn("-rerun", nfile, fnm)); -+ if (bVerbose) -+ { -+ fprintf(stderr, "Calculated time to finish depends on nsteps from " -+ "run input file,\nwhich may not correspond to the time " -+ "needed to process input trajectory.\n\n"); -+ } -+ } -+ else -+ { -+ char tbuf[20]; -+ fprintf(stderr, "starting mdrun '%s'\n", -+ *(top_global->name)); -+ if (ir->nsteps >= 0) -+ { -+ sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t); -+ } -+ else -+ { -+ sprintf(tbuf, "%s", "infinite"); -+ } -+ if (ir->init_step > 0) -+ { -+ fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", -+ gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf, -+ gmx_step_str(ir->init_step, sbuf2), -+ ir->init_step*ir->delta_t); -+ } -+ else -+ { -+ fprintf(stderr, "%s steps, %s ps.\n", -+ gmx_step_str(ir->nsteps, sbuf), tbuf); -+ } -+ } -+ fprintf(fplog, "\n"); -+ } -+ -+ walltime_accounting_start(walltime_accounting); -+ wallcycle_start(wcycle, ewcRUN); -+ print_start(fplog, cr, walltime_accounting, "mdrun"); -+ -+ /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ -+#ifdef GMX_FAHCORE -+ chkpt_ret = fcCheckPointParallel( cr->nodeid, -+ NULL, 0); -+ if (chkpt_ret == 0) -+ { -+ gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 ); -+ } -+#endif -+ -+ debug_gmx(); -+ /*********************************************************** -+ * -+ * Loop over MD steps -+ * -+ ************************************************************/ -+ -+ /* if rerunMD then read coordinates and velocities from input trajectory */ -+ if (bRerunMD) -+ { -+ if (getenv("GMX_FORCE_UPDATE")) -+ { -+ bForceUpdate = TRUE; -+ } -+ -+ rerun_fr.natoms = 0; -+ if (MASTER(cr)) -+ { -+ bNotLastFrame = read_first_frame(oenv, &status, -+ opt2fn("-rerun", nfile, fnm), -+ &rerun_fr, TRX_NEED_X | TRX_READ_V); -+ if (rerun_fr.natoms != top_global->natoms) -+ { -+ gmx_fatal(FARGS, -+ "Number of atoms in trajectory (%d) does not match the " -+ "run input file (%d)\n", -+ rerun_fr.natoms, top_global->natoms); -+ } -+ if (ir->ePBC != epbcNONE) -+ { -+ if (!rerun_fr.bBox) -+ { -+ gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time); -+ } -+ if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong)) -+ { -+ gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time); -+ } -+ } -+ } -+ -+ if (PAR(cr)) -+ { -+ rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); -+ } -+ -+ if (ir->ePBC != epbcNONE) -+ { -+ /* Set the shift vectors. -+ * Necessary here when have a static box different from the tpr box. -+ */ -+ calc_shifts(rerun_fr.box, fr->shift_vec); -+ } -+ } -+ -+ /* loop over MD steps or if rerunMD to end of input trajectory */ -+ bFirstStep = TRUE; -+ /* Skip the first Nose-Hoover integration when we get the state from tpx */ -+ bStateFromTPX = !bStateFromCP; -+ bInitStep = bFirstStep && (bStateFromTPX || bVV); -+ bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep; -+ bLastStep = FALSE; -+ bSumEkinhOld = FALSE; -+ bDoReplEx = FALSE; -+ bExchanged = FALSE; -+ bNeedRepartition = FALSE; -+ -+ init_global_signals(&gs, cr, ir, repl_ex_nst); -+ -+ step = ir->init_step; -+ step_rel = 0; -+ -+ if (ir->nstlist == -1) -+ { -+ init_nlistheuristics(&nlh, bGStatEveryStep, step); -+ } -+ -+ if (MULTISIM(cr) && (repl_ex_nst <= 0 )) -+ { -+ /* check how many steps are left in other sims */ -+ multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps); -+ } -+ -+ -+ /* and stop now if we should */ -+ bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) || -+ ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps ))); -+ while (!bLastStep || (bRerunMD && bNotLastFrame)) -+ { -+ -+ wallcycle_start(wcycle, ewcSTEP); -+ -+ if (bRerunMD) -+ { -+ if (rerun_fr.bStep) -+ { -+ step = rerun_fr.step; -+ step_rel = step - ir->init_step; -+ } -+ if (rerun_fr.bTime) -+ { -+ t = rerun_fr.time; -+ } -+ else -+ { -+ t = step; -+ } -+ } -+ else -+ { -+ bLastStep = (step_rel == ir->nsteps); -+ t = t0 + step*ir->delta_t; -+ } -+ -+ if (ir->efep != efepNO || ir->bSimTemp) -+ { -+ /* find and set the current lambdas. If rerunning, we either read in a state, or a lambda value, -+ requiring different logic. */ -+ -+ set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0); -+ bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); -+ bDoFEP = (do_per_step(step, nstfep) && (ir->efep != efepNO)); -+ bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) -+ && (ir->bExpanded) && (step > 0) && (!bStartingFromCpt)); -+ } -+ -+ bDoReplEx = ((repl_ex_nst > 0) && (step > 0) && !bLastStep && -+ do_per_step(step, repl_ex_nst)); -+ -+ if (bSimAnn) -+ { -+ update_annealing_target_temp(&(ir->opts), t); -+ } -+ -+ if (bRerunMD) -+ { -+ if (!DOMAINDECOMP(cr) || MASTER(cr)) -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(rerun_fr.x[i], state_global->x[i]); -+ } -+ if (rerun_fr.bV) -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(rerun_fr.v[i], state_global->v[i]); -+ } -+ } -+ else -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ clear_rvec(state_global->v[i]); -+ } -+ if (bRerunWarnNoV) -+ { -+ fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n" -+ " Ekin, temperature and pressure are incorrect,\n" -+ " the virial will be incorrect when constraints are present.\n" -+ "\n"); -+ bRerunWarnNoV = FALSE; -+ } -+ } -+ } -+ copy_mat(rerun_fr.box, state_global->box); -+ copy_mat(state_global->box, state->box); -+ -+ if (vsite && (Flags & MD_RERUN_VSITE)) -+ { -+ if (DOMAINDECOMP(cr)) -+ { -+ gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented with domain decomposition, use a single rank"); -+ } -+ if (graph) -+ { -+ /* Following is necessary because the graph may get out of sync -+ * with the coordinates if we only have every N'th coordinate set -+ */ -+ mk_mshift(fplog, graph, fr->ePBC, state->box, state->x); -+ shift_self(graph, state->box, state->x); -+ } -+ construct_vsites(vsite, state->x, ir->delta_t, state->v, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ if (graph) -+ { -+ unshift_self(graph, state->box, state->x); -+ } -+ } -+ } -+ -+ /* Stop Center of Mass motion */ -+ bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); -+ -+ if (bRerunMD) -+ { -+ /* for rerun MD always do Neighbour Searching */ -+ bNS = (bFirstStep || ir->nstlist != 0); -+ bNStList = bNS; -+ } -+ else -+ { -+ /* Determine whether or not to do Neighbour Searching and LR */ -+ bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); -+ -+ bNS = (bFirstStep || bExchanged || bNeedRepartition || bNStList || bDoFEP || -+ (ir->nstlist == -1 && nlh.nabnsb > 0)); -+ -+ if (bNS && ir->nstlist == -1) -+ { -+ set_nlistheuristics(&nlh, bFirstStep || bExchanged || bNeedRepartition || bDoFEP, step); -+ } -+ } -+ -+ /* check whether we should stop because another simulation has -+ stopped. */ -+ if (MULTISIM(cr)) -+ { -+ if ( (multisim_nsteps >= 0) && (step_rel >= multisim_nsteps) && -+ (multisim_nsteps != ir->nsteps) ) -+ { -+ if (bNS) -+ { -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, -+ "Stopping simulation %d because another one has finished\n", -+ cr->ms->sim); -+ } -+ bLastStep = TRUE; -+ gs.sig[eglsCHKPT] = 1; -+ } -+ } -+ } -+ -+ /* < 0 means stop at next step, > 0 means stop at next NS step */ -+ if ( (gs.set[eglsSTOPCOND] < 0) || -+ ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) ) -+ { -+ bLastStep = TRUE; -+ } -+ -+ /* Determine whether or not to update the Born radii if doing GB */ -+ bBornRadii = bFirstStep; -+ if (ir->implicit_solvent && (step % ir->nstgbradii == 0)) -+ { -+ bBornRadii = TRUE; -+ } -+ -+ do_log = do_per_step(step, ir->nstlog) || bFirstStep || bLastStep; -+ do_verbose = bVerbose && -+ (step % stepout == 0 || bFirstStep || bLastStep); -+ -+ if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD)) -+ { -+ if (bRerunMD) -+ { -+ bMasterState = TRUE; -+ } -+ else -+ { -+ bMasterState = FALSE; -+ /* Correct the new box if it is too skewed */ -+ if (DYNAMIC_BOX(*ir)) -+ { -+ if (correct_box(fplog, step, state->box, graph)) -+ { -+ bMasterState = TRUE; -+ } -+ } -+ if (DOMAINDECOMP(cr) && bMasterState) -+ { -+ dd_collect_state(cr->dd, state, state_global); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ /* Repartition the domain decomposition */ -+ wallcycle_start(wcycle, ewcDOMDEC); -+ dd_partition_system(fplog, step, cr, -+ bMasterState, nstglobalcomm, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, -+ do_verbose && !bPMETuneRunning); -+ wallcycle_stop(wcycle, ewcDOMDEC); -+ /* If using an iterative integrator, reallocate space to match the decomposition */ -+ } -+ } -+ -+ if (MASTER(cr) && do_log) -+ { -+ print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */ -+ } -+ -+ if (ir->efep != efepNO) -+ { -+ update_mdatoms(mdatoms, state->lambda[efptMASS]); -+ } -+ -+ if ((bRerunMD && rerun_fr.bV) || bExchanged) -+ { -+ -+ /* We need the kinetic energy at minus the half step for determining -+ * the full step kinetic energy and possibly for T-coupling.*/ -+ /* This may not be quite working correctly yet . . . . */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); -+ } -+ clear_mat(force_vir); -+ -+ /* We write a checkpoint at this MD step when: -+ * either at an NS step when we signalled through gs, -+ * or at the last step (but not when we do not want confout), -+ * but never at the first step or with rerun. -+ */ -+ bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) || -+ (bLastStep && (Flags & MD_CONFOUT))) && -+ step > ir->init_step && !bRerunMD); -+ if (bCPT) -+ { -+ gs.set[eglsCHKPT] = 0; -+ } -+ -+ /* Determine the energy and pressure: -+ * at nstcalcenergy steps and at energy output steps (set below). -+ */ -+ if (EI_VV(ir->eI) && (!bInitStep)) -+ { -+ /* for vv, the first half of the integration actually corresponds -+ to the previous step. bCalcEner is only required to be evaluated on the 'next' step, -+ but the virial needs to be calculated on both the current step and the 'next' step. Future -+ reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */ -+ -+ bCalcEner = do_per_step(step-1, ir->nstcalcenergy); -+ bCalcVir = bCalcEner || -+ (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple))); -+ } -+ else -+ { -+ bCalcEner = do_per_step(step, ir->nstcalcenergy); -+ bCalcVir = bCalcEner || -+ (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); -+ } -+ -+ /* Do we need global communication ? */ -+ bGStat = (bCalcVir || bCalcEner || bStopCM || -+ do_per_step(step, nstglobalcomm) || (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)) || -+ (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck)); -+ -+ do_ene = (do_per_step(step, ir->nstenergy) || bLastStep); -+ -+ if (do_ene || do_log || bDoReplEx) -+ { -+ bCalcVir = TRUE; -+ bCalcEner = TRUE; -+ bGStat = TRUE; -+ } -+ -+ /* these CGLO_ options remain the same throughout the iteration */ -+ cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) | -+ (bGStat ? CGLO_GSTAT : 0) -+ ); -+ -+ force_flags = (GMX_FORCE_STATECHANGED | -+ ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) | -+ GMX_FORCE_ALLFORCES | -+ GMX_FORCE_SEPLRF | -+ (bCalcVir ? GMX_FORCE_VIRIAL : 0) | -+ (bCalcEner ? GMX_FORCE_ENERGY : 0) | -+ (bDoFEP ? GMX_FORCE_DHDL : 0) -+ ); -+ -+ if (fr->bTwinRange) -+ { -+ if (do_per_step(step, ir->nstcalclr)) -+ { -+ force_flags |= GMX_FORCE_DO_LR; -+ } -+ } -+ -+ if (shellfc) -+ { -+ /* Now is the time to relax the shells */ -+ count = relax_shell_flexcon(fplog, cr, bVerbose, step, -+ ir, bNS, force_flags, -+ top, -+ constr, enerd, fcd, -+ state, f, force_vir, mdatoms, -+ nrnb, wcycle, graph, groups, -+ shellfc, fr, bBornRadii, t, mu_tot, -+ &bConverged, vsite, -+ mdoutf_get_fp_field(outf)); -+ tcount += count; -+ -+ if (bConverged) -+ { -+ nconverged++; -+ } -+ } -+ else -+ { -+ /* The coordinates (x) are shifted (to get whole molecules) -+ * in do_force. -+ * This is parallellized as well, and does communication too. -+ * Check comments in sim_util.c -+ */ -+ do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups, -+ state->box, state->x, &state->hist, -+ f, force_vir, mdatoms, enerd, fcd, -+ state->lambda, graph, -+ fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii, -+ (bNS ? GMX_FORCE_NS : 0) | force_flags); -+ } -+ -+ if (bVV && !bStartingFromCpt && !bRerunMD) -+ /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ -+ { -+ wallcycle_start(wcycle, ewcUPDATE); -+ if (ir->eI == eiVV && bInitStep) -+ { -+ /* if using velocity verlet with full time step Ekin, -+ * take the first half step only to compute the -+ * virial for the first step. From there, -+ * revert back to the initial coordinates -+ * so that the input is actually the initial step. -+ */ -+ copy_rvecn(state->v, cbuf, 0, state->natoms); /* should make this better for parallelizing? */ -+ } -+ else -+ { -+ /* this is for NHC in the Ekin(t+dt/2) version of vv */ -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1); -+ } -+ -+ /* If we are using twin-range interactions where the long-range component -+ * is only evaluated every nstcalclr>1 steps, we should do a special update -+ * step to combine the long-range forces on these steps. -+ * For nstcalclr=1 this is not done, since the forces would have been added -+ * directly to the short-range forces already. -+ * -+ * TODO Remove various aspects of VV+twin-range in master -+ * branch, because VV integrators did not ever support -+ * twin-range multiple time stepping with constraints. -+ */ -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, -+ f, bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, bInitStep, etrtVELOCITY1, -+ cr, nrnb, constr, &top->idef); -+ -+ if (bIterativeCase && do_per_step(step-1, ir->nstpcouple) && !bInitStep) -+ { -+ gmx_iterate_init(&iterate, TRUE); -+ } -+ /* for iterations, we save these vectors, as we will be self-consistently iterating -+ the calculations */ -+ -+ /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */ -+ -+ /* save the state */ -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); -+ } -+ -+ bFirstIterate = TRUE; -+ while (bFirstIterate || iterate.bIterationActive) -+ { -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); -+ if (bFirstIterate && bTrotter) -+ { -+ /* The first time through, we need a decent first estimate -+ of veta(t+dt) to compute the constraints. Do -+ this by computing the box volume part of the -+ trotter integration at this time. Nothing else -+ should be changed by this routine here. If -+ !(first time), we start with the previous value -+ of veta. */ -+ -+ veta_save = state->veta; -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ0); -+ vetanew = state->veta; -+ state->veta = veta_save; -+ } -+ } -+ -+ bOK = TRUE; -+ if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */ -+ { -+ wallcycle_stop(wcycle, ewcUPDATE); -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, shake_vir, -+ cr, nrnb, wcycle, upd, constr, -+ TRUE, bCalcVir, vetanew); -+ wallcycle_start(wcycle, ewcUPDATE); -+ -+ if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1) -+ { -+ /* Correct the virial for multiple time stepping */ -+ m_sub(shake_vir, fr->vir_twin_constr, shake_vir); -+ } -+ -+ if (!bOK) -+ { -+ gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); -+ } -+ -+ } -+ else if (graph) -+ { -+ /* Need to unshift here if a do_force has been -+ called in the previous step */ -+ unshift_self(graph, state->box, state->x); -+ } -+ -+ /* if VV, compute the pressure and constraints */ -+ /* For VV2, we strictly only need this if using pressure -+ * control, but we really would like to have accurate pressures -+ * printed out. -+ * Think about ways around this in the future? -+ * For now, keep this choice in comments. -+ */ -+ /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */ -+ /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/ -+ bPres = TRUE; -+ bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); -+ if (bCalcEner && ir->eI == eiVVAK) /*MRS: 7/9/2010 -- this still doesn't fix it?*/ -+ { -+ bSumEkinhOld = TRUE; -+ } -+ /* for vv, the first half of the integration actually corresponds to the previous step. -+ So we need information from the last step in the first half of the integration */ -+ if (bGStat || do_per_step(step-1, nstglobalcomm)) -+ { -+ wallcycle_stop(wcycle, ewcUPDATE); -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ cglo_flags -+ | CGLO_ENERGY -+ | (bTemp ? CGLO_TEMPERATURE : 0) -+ | (bPres ? CGLO_PRESSURE : 0) -+ | (bPres ? CGLO_CONSTRAINT : 0) -+ | ((iterate.bIterationActive) ? CGLO_ITERATE : 0) -+ | (bFirstIterate ? CGLO_FIRSTITERATE : 0) -+ | CGLO_SCALEEKIN -+ ); -+ /* explanation of above: -+ a) We compute Ekin at the full time step -+ if 1) we are using the AveVel Ekin, and it's not the -+ initial step, or 2) if we are using AveEkin, but need the full -+ time step kinetic energy for the pressure (always true now, since we want accurate statistics). -+ b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in -+ EkinAveVel because it's needed for the pressure */ -+ wallcycle_start(wcycle, ewcUPDATE); -+ } -+ /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ -+ if (!bInitStep) -+ { -+ if (bTrotter) -+ { -+ m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */ -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2); -+ } -+ else -+ { -+ if (bExchanged) -+ { -+ wallcycle_stop(wcycle, ewcUPDATE); -+ /* We need the kinetic energy at minus the half step for determining -+ * the full step kinetic energy and possibly for T-coupling.*/ -+ /* This may not be quite working correctly yet . . . . */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); -+ wallcycle_start(wcycle, ewcUPDATE); -+ } -+ } -+ } -+ -+ if (iterate.bIterationActive && -+ done_iterating(cr, fplog, step, &iterate, bFirstIterate, -+ state->veta, &vetanew)) -+ { -+ break; -+ } -+ bFirstIterate = FALSE; -+ } -+ -+ if (bTrotter && !bInitStep) -+ { -+ copy_mat(shake_vir, state->svir_prev); -+ copy_mat(force_vir, state->fvir_prev); -+ if (IR_NVT_TROTTER(ir) && ir->eI == eiVV) -+ { -+ /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ -+ enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE); -+ enerd->term[F_EKIN] = trace(ekind->ekin); -+ } -+ } -+ /* if it's the initial step, we performed this first step just to get the constraint virial */ -+ if (bInitStep && ir->eI == eiVV) -+ { -+ copy_rvecn(cbuf, state->v, 0, state->natoms); -+ } -+ wallcycle_stop(wcycle, ewcUPDATE); -+ } -+ -+ /* MRS -- now done iterating -- compute the conserved quantity */ -+ if (bVV) -+ { -+ saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ); -+ if (ir->eI == eiVV) -+ { -+ last_ekin = enerd->term[F_EKIN]; -+ } -+ if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) -+ { -+ saved_conserved_quantity -= enerd->term[F_DISPCORR]; -+ } -+ /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */ -+ if (!bRerunMD) -+ { -+ sum_dhdl(enerd, state->lambda, ir->fepvals); -+ } -+ } -+ -+ /* ######## END FIRST UPDATE STEP ############## */ -+ /* ######## If doing VV, we now have v(dt) ###### */ -+ if (bDoExpanded) -+ { -+ /* perform extended ensemble sampling in lambda - we don't -+ actually move to the new state before outputting -+ statistics, but if performing simulated tempering, we -+ do update the velocities and the tau_t. */ -+ -+ lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, &state->dfhist, step, state->v, mdatoms); -+ /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */ -+ copy_df_history(&state_global->dfhist, &state->dfhist); -+ } -+ -+ /* Now we have the energies and forces corresponding to the -+ * coordinates at time t. We must output all of this before -+ * the update. -+ */ -+ do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, -+ ir, state, state_global, top_global, fr, -+ outf, mdebin, ekind, f, f_global, -+ &nchkpt, -+ bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT), -+ bSumEkinhOld); -+ /* Check if IMD step and do IMD communication, if bIMD is TRUE. */ -+ bIMDstep = do_IMD(ir->bIMD, step, cr, bNS, state->box, state->x, ir, t, wcycle); -+ -+ /* kludge -- virial is lost with restart for NPT control. Must restart */ -+ if (bStartingFromCpt && bVV) -+ { -+ copy_mat(state->svir_prev, shake_vir); -+ copy_mat(state->fvir_prev, force_vir); -+ } -+ -+ elapsed_time = walltime_accounting_get_current_elapsed_time(walltime_accounting); -+ -+ /* Check whether everything is still allright */ -+ if (((int)gmx_get_stop_condition() > handled_stop_condition) -+#ifdef GMX_THREAD_MPI -+ && MASTER(cr) -+#endif -+ ) -+ { -+ /* this is just make gs.sig compatible with the hack -+ of sending signals around by MPI_Reduce with together with -+ other floats */ -+ if (gmx_get_stop_condition() == gmx_stop_cond_next_ns) -+ { -+ gs.sig[eglsSTOPCOND] = 1; -+ } -+ if (gmx_get_stop_condition() == gmx_stop_cond_next) -+ { -+ gs.sig[eglsSTOPCOND] = -1; -+ } -+ /* < 0 means stop at next step, > 0 means stop at next NS step */ -+ if (fplog) -+ { -+ fprintf(fplog, -+ "\n\nReceived the %s signal, stopping at the next %sstep\n\n", -+ gmx_get_signal_name(), -+ gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); -+ fflush(fplog); -+ } -+ fprintf(stderr, -+ "\n\nReceived the %s signal, stopping at the next %sstep\n\n", -+ gmx_get_signal_name(), -+ gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); -+ fflush(stderr); -+ handled_stop_condition = (int)gmx_get_stop_condition(); -+ } -+ else if (MASTER(cr) && (bNS || ir->nstlist <= 0) && -+ (max_hours > 0 && elapsed_time > max_hours*60.0*60.0*0.99) && -+ gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0) -+ { -+ /* Signal to terminate the run */ -+ gs.sig[eglsSTOPCOND] = 1; -+ if (fplog) -+ { -+ fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); -+ } -+ fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); -+ } -+ -+ if (bResetCountersHalfMaxH && MASTER(cr) && -+ elapsed_time > max_hours*60.0*60.0*0.495) -+ { -+ gs.sig[eglsRESETCOUNTERS] = 1; -+ } -+ -+ if (ir->nstlist == -1 && !bRerunMD) -+ { -+ /* When bGStatEveryStep=FALSE, global_stat is only called -+ * when we check the atom displacements, not at NS steps. -+ * This means that also the bonded interaction count check is not -+ * performed immediately after NS. Therefore a few MD steps could -+ * be performed with missing interactions. -+ * But wrong energies are never written to file, -+ * since energies are only written after global_stat -+ * has been called. -+ */ -+ if (step >= nlh.step_nscheck) -+ { -+ nlh.nabnsb = natoms_beyond_ns_buffer(ir, fr, &top->cgs, -+ nlh.scale_tot, state->x); -+ } -+ else -+ { -+ /* This is not necessarily true, -+ * but step_nscheck is determined quite conservatively. -+ */ -+ nlh.nabnsb = 0; -+ } -+ } -+ -+ /* In parallel we only have to check for checkpointing in steps -+ * where we do global communication, -+ * otherwise the other nodes don't know. -+ */ -+ if (MASTER(cr) && ((bGStat || !PAR(cr)) && -+ cpt_period >= 0 && -+ (cpt_period == 0 || -+ elapsed_time >= nchkpt*cpt_period*60.0)) && -+ gs.set[eglsCHKPT] == 0) -+ { -+ gs.sig[eglsCHKPT] = 1; -+ } -+ -+ /* at the start of step, randomize or scale the velocities (trotter done elsewhere) */ -+ if (EI_VV(ir->eI)) -+ { -+ if (!bInitStep) -+ { -+ update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); -+ } -+ if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ -+ { -+ gmx_bool bIfRandomize; -+ bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state, upd, constr); -+ /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ -+ if (constr && bIfRandomize) -+ { -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, tmp_vir, -+ cr, nrnb, wcycle, upd, constr, -+ TRUE, bCalcVir, vetanew); -+ } -+ } -+ } -+ -+ if (bIterativeCase && do_per_step(step, ir->nstpcouple)) -+ { -+ gmx_iterate_init(&iterate, TRUE); -+ /* for iterations, we save these vectors, as we will be redoing the calculations */ -+ copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); -+ } -+ -+ bFirstIterate = TRUE; -+ while (bFirstIterate || iterate.bIterationActive) -+ { -+ /* We now restore these vectors to redo the calculation with improved extended variables */ -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); -+ } -+ -+ /* We make the decision to break or not -after- the calculation of Ekin and Pressure, -+ so scroll down for that logic */ -+ -+ /* ######### START SECOND UPDATE STEP ################# */ -+ /* Box is changed in update() when we do pressure coupling, -+ * but we should still use the old box for energy corrections and when -+ * writing it to the energy file, so it matches the trajectory files for -+ * the same timestep above. Make a copy in a separate array. -+ */ -+ copy_mat(state->box, lastbox); -+ -+ bOK = TRUE; -+ dvdl_constr = 0; -+ -+ if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate)) -+ { -+ wallcycle_start(wcycle, ewcUPDATE); -+ /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ -+ if (bTrotter) -+ { -+ if (iterate.bIterationActive) -+ { -+ if (bFirstIterate) -+ { -+ scalevir = 1; -+ } -+ else -+ { -+ /* we use a new value of scalevir to converge the iterations faster */ -+ scalevir = tracevir/trace(shake_vir); -+ } -+ msmul(shake_vir, scalevir, shake_vir); -+ m_add(force_vir, shake_vir, total_vir); -+ clear_mat(shake_vir); -+ } -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); -+ /* We can only do Berendsen coupling after we have summed -+ * the kinetic energy or virial. Since the happens -+ * in global_state after update, we should only do it at -+ * step % nstlist = 1 with bGStatEveryStep=FALSE. -+ */ -+ } -+ else -+ { -+ update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); -+ update_pcouple(fplog, step, ir, state, pcoupl_mu, M, bInitStep); -+ } -+ -+ if (bVV) -+ { -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ /* velocity half-step update */ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, FALSE, etrtVELOCITY2, -+ cr, nrnb, constr, &top->idef); -+ } -+ -+ /* Above, initialize just copies ekinh into ekin, -+ * it doesn't copy position (for VV), -+ * and entire integrator for MD. -+ */ -+ -+ if (ir->eI == eiVVAK) -+ { -+ copy_rvecn(state->x, cbuf, 0, state->natoms); -+ } -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); -+ wallcycle_stop(wcycle, ewcUPDATE); -+ -+ update_constraints(fplog, step, &dvdl_constr, ir, ekind, mdatoms, state, -+ fr->bMolPBC, graph, f, -+ &top->idef, shake_vir, -+ cr, nrnb, wcycle, upd, constr, -+ FALSE, bCalcVir, state->veta); -+ -+ if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1) -+ { -+ /* Correct the virial for multiple time stepping */ -+ m_sub(shake_vir, fr->vir_twin_constr, shake_vir); -+ } -+ -+ if (ir->eI == eiVVAK) -+ { -+ /* erase F_EKIN and F_TEMP here? */ -+ /* just compute the kinetic energy at the half step to perform a trotter step */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, lastbox, -+ top_global, &bSumEkinhOld, -+ cglo_flags | CGLO_TEMPERATURE -+ ); -+ wallcycle_start(wcycle, ewcUPDATE); -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); -+ /* now we know the scaling, we can compute the positions again again */ -+ copy_rvecn(cbuf, state->x, 0, state->natoms); -+ -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); -+ wallcycle_stop(wcycle, ewcUPDATE); -+ -+ /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */ -+ /* are the small terms in the shake_vir here due -+ * to numerical errors, or are they important -+ * physically? I'm thinking they are just errors, but not completely sure. -+ * For now, will call without actually constraining, constr=NULL*/ -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, tmp_vir, -+ cr, nrnb, wcycle, upd, NULL, -+ FALSE, bCalcVir, -+ state->veta); -+ } -+ if (!bOK) -+ { -+ gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); -+ } -+ -+ if (fr->bSepDVDL && fplog && do_log) -+ { -+ gmx_print_sepdvdl(fplog, "Constraint dV/dl", 0.0, dvdl_constr); -+ } -+ if (bVV) -+ { -+ /* this factor or 2 correction is necessary -+ because half of the constraint force is removed -+ in the vv step, so we have to double it. See -+ the Redmine issue #1255. It is not yet clear -+ if the factor of 2 is exact, or just a very -+ good approximation, and this will be -+ investigated. The next step is to see if this -+ can be done adding a dhdl contribution from the -+ rattle step, but this is somewhat more -+ complicated with the current code. Will be -+ investigated, hopefully for 4.6.3. However, -+ this current solution is much better than -+ having it completely wrong. -+ */ -+ enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr; -+ } -+ else -+ { -+ enerd->term[F_DVDL_CONSTR] += dvdl_constr; -+ } -+ } -+ else if (graph) -+ { -+ /* Need to unshift here */ -+ unshift_self(graph, state->box, state->x); -+ } -+ -+ if (vsite != NULL) -+ { -+ wallcycle_start(wcycle, ewcVSITECONSTR); -+ if (graph != NULL) -+ { -+ shift_self(graph, state->box, state->x); -+ } -+ construct_vsites(vsite, state->x, ir->delta_t, state->v, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ -+ if (graph != NULL) -+ { -+ unshift_self(graph, state->box, state->x); -+ } -+ wallcycle_stop(wcycle, ewcVSITECONSTR); -+ } -+ -+ /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */ -+ /* With Leap-Frog we can skip compute_globals at -+ * non-communication steps, but we need to calculate -+ * the kinetic energy one step before communication. -+ */ -+ if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm))) -+ { -+ if (ir->nstlist == -1 && bFirstIterate) -+ { -+ gs.sig[eglsNABNSB] = nlh.nabnsb; -+ } -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, -+ bFirstIterate ? &gs : NULL, -+ (step_rel % gs.nstms == 0) && -+ (multisim_nsteps < 0 || (step_rel < multisim_nsteps)), -+ lastbox, -+ top_global, &bSumEkinhOld, -+ cglo_flags -+ | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0) -+ | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) -+ | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) -+ | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) -+ | (iterate.bIterationActive ? CGLO_ITERATE : 0) -+ | (bFirstIterate ? CGLO_FIRSTITERATE : 0) -+ | CGLO_CONSTRAINT -+ ); -+ if (ir->nstlist == -1 && bFirstIterate) -+ { -+ nlh.nabnsb = gs.set[eglsNABNSB]; -+ gs.set[eglsNABNSB] = 0; -+ } -+ } -+ /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */ -+ /* ############# END CALC EKIN AND PRESSURE ################# */ -+ -+ /* Note: this is OK, but there are some numerical precision issues with using the convergence of -+ the virial that should probably be addressed eventually. state->veta has better properies, -+ but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could -+ generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ -+ -+ if (iterate.bIterationActive && -+ done_iterating(cr, fplog, step, &iterate, bFirstIterate, -+ trace(shake_vir), &tracevir)) -+ { -+ break; -+ } -+ bFirstIterate = FALSE; -+ } -+ -+ if (!bVV || bRerunMD) -+ { -+ /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */ -+ sum_dhdl(enerd, state->lambda, ir->fepvals); -+ } -+ update_box(fplog, step, ir, mdatoms, state, f, -+ ir->nstlist == -1 ? &nlh.scale_tot : NULL, pcoupl_mu, nrnb, upd); -+ -+ /* ################# END UPDATE STEP 2 ################# */ -+ /* #### We now have r(t+dt) and v(t+dt/2) ############# */ -+ -+ /* The coordinates (x) were unshifted in update */ -+ if (!bGStat) -+ { -+ /* We will not sum ekinh_old, -+ * so signal that we still have to do it. -+ */ -+ bSumEkinhOld = TRUE; -+ } -+ -+ /* ######### BEGIN PREPARING EDR OUTPUT ########### */ -+ -+ /* use the directly determined last velocity, not actually the averaged half steps */ -+ if (bTrotter && ir->eI == eiVV) -+ { -+ enerd->term[F_EKIN] = last_ekin; -+ } -+ enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; -+ -+ if (bVV) -+ { -+ enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; -+ } -+ else -+ { -+ enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ); -+ } -+ /* ######### END PREPARING EDR OUTPUT ########### */ -+ -+ /* Output stuff */ -+ if (MASTER(cr)) -+ { -+ gmx_bool do_dr, do_or; -+ -+ if (fplog && do_log && bDoExpanded) -+ { -+ /* only needed if doing expanded ensemble */ -+ PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL, -+ &state_global->dfhist, state->fep_state, ir->nstlog, step); -+ } -+ if (!(bStartingFromCpt && (EI_VV(ir->eI)))) -+ { -+ if (bCalcEner) -+ { -+ upd_mdebin(mdebin, bDoDHDL, TRUE, -+ t, mdatoms->tmass, enerd, state, -+ ir->fepvals, ir->expandedvals, lastbox, -+ shake_vir, force_vir, total_vir, pres, -+ ekind, mu_tot, constr); -+ } -+ else -+ { -+ upd_mdebin_step(mdebin); -+ } -+ -+ do_dr = do_per_step(step, ir->nstdisreout); -+ do_or = do_per_step(step, ir->nstorireout); -+ -+ print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : NULL, -+ step, t, -+ eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts)); -+ } -+ if (ir->ePull != epullNO) -+ { -+ pull_print_output(ir->pull, step, t); -+ } -+ -+ if (do_per_step(step, ir->nstlog)) -+ { -+ if (fflush(fplog) != 0) -+ { -+ gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); -+ } -+ } -+ } -+ if (bDoExpanded) -+ { -+ /* Have to do this part _after_ outputting the logfile and the edr file */ -+ /* Gets written into the state at the beginning of next loop*/ -+ state->fep_state = lamnew; -+ } -+ /* Print the remaining wall clock time for the run */ -+ if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning) -+ { -+ if (shellfc) -+ { -+ fprintf(stderr, "\n"); -+ } -+ print_time(stderr, walltime_accounting, step, ir, cr); -+ } -+ -+ /* Ion/water position swapping. -+ * Not done in last step since trajectory writing happens before this call -+ * in the MD loop and exchanges would be lost anyway. */ -+ bNeedRepartition = FALSE; -+ if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && -+ do_per_step(step, ir->swap->nstswap)) -+ { -+ bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle, -+ bRerunMD ? rerun_fr.x : state->x, -+ bRerunMD ? rerun_fr.box : state->box, -+ top_global, MASTER(cr) && bVerbose, bRerunMD); -+ -+ if (bNeedRepartition && DOMAINDECOMP(cr)) -+ { -+ dd_collect_state(cr->dd, state, state_global); -+ } -+ } -+ -+ /* Replica exchange */ -+ bExchanged = FALSE; -+ if (bDoReplEx) -+ { -+ bExchanged = replica_exchange(fplog, cr, repl_ex, -+ state_global, enerd, -+ state, step, t); -+ } -+ -+ if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) ) -+ { -+ dd_partition_system(fplog, step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, FALSE); -+ } -+ -+ bFirstStep = FALSE; -+ bInitStep = FALSE; -+ bStartingFromCpt = FALSE; -+ -+ /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ -+ /* With all integrators, except VV, we need to retain the pressure -+ * at the current step for coupling at the next step. -+ */ -+ if ((state->flags & (1<nstpcouple > 0 && step % ir->nstpcouple == 0))) -+ { -+ /* Store the pressure in t_state for pressure coupling -+ * at the next MD step. -+ */ -+ copy_mat(pres, state->pres_prev); -+ } -+ -+ /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ -+ -+ if ( (membed != NULL) && (!bLastStep) ) -+ { -+ rescale_membed(step_rel, membed, state_global->x); -+ } -+ -+ if (bRerunMD) -+ { -+ if (MASTER(cr)) -+ { -+ /* read next frame from input trajectory */ -+ bNotLastFrame = read_next_frame(oenv, status, &rerun_fr); -+ } -+ -+ if (PAR(cr)) -+ { -+ rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); -+ } -+ } -+ -+ if (!bRerunMD || !rerun_fr.bStep) -+ { -+ /* increase the MD step number */ -+ step++; -+ step_rel++; -+ } -+ -+ cycles = wallcycle_stop(wcycle, ewcSTEP); -+ if (DOMAINDECOMP(cr) && wcycle) -+ { -+ dd_cycles_add(cr->dd, cycles, ddCyclStep); -+ } -+ -+ if (bPMETuneRunning || bPMETuneTry) -+ { -+ /* PME grid + cut-off optimization with GPUs or PME nodes */ -+ -+ /* Count the total cycles over the last steps */ -+ cycles_pmes += cycles; -+ -+ /* We can only switch cut-off at NS steps */ -+ if (step % ir->nstlist == 0) -+ { -+ /* PME grid + cut-off optimization with GPUs or PME nodes */ -+ if (bPMETuneTry) -+ { -+ if (DDMASTER(cr->dd)) -+ { -+ /* PME node load is too high, start tuning */ -+ bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05); -+ } -+ dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning); -+ -+ if (bPMETuneRunning && -+ fr->nbv->bUseGPU && DOMAINDECOMP(cr) && -+ !(cr->duty & DUTY_PME)) -+ { -+ /* Lock DLB=auto to off (does nothing when DLB=yes/no). -+ * With GPUs + separate PME ranks, we don't want DLB. -+ * This could happen when we scan coarse grids and -+ * it would then never be turned off again. -+ * This would hurt performance at the final, optimal -+ * grid spacing, where DLB almost never helps. -+ * Also, DLB can limit the cut-off for PME tuning. -+ */ -+ dd_dlb_set_lock(cr->dd, TRUE); -+ } -+ -+ if (bPMETuneRunning || step_rel > ir->nstlist*50) -+ { -+ bPMETuneTry = FALSE; -+ } -+ } -+ if (bPMETuneRunning) -+ { -+ /* init_step might not be a multiple of nstlist, -+ * but the first cycle is always skipped anyhow. -+ */ -+ bPMETuneRunning = -+ pme_load_balance(pme_loadbal, cr, -+ (bVerbose && MASTER(cr)) ? stderr : NULL, -+ fplog, -+ ir, state, cycles_pmes, -+ fr->ic, fr->nbv, &fr->pmedata, -+ step); -+ -+ /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */ -+ fr->ewaldcoeff_q = fr->ic->ewaldcoeff_q; -+ fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj; -+ fr->rlist = fr->ic->rlist; -+ fr->rlistlong = fr->ic->rlistlong; -+ fr->rcoulomb = fr->ic->rcoulomb; -+ fr->rvdw = fr->ic->rvdw; -+ -+ if (ir->eDispCorr != edispcNO) -+ { -+ calc_enervirdiff(NULL, ir->eDispCorr, fr); -+ } -+ -+ if (!bPMETuneRunning && -+ DOMAINDECOMP(cr) && -+ dd_dlb_is_locked(cr->dd)) -+ { -+ /* Unlock the DLB=auto, DLB is allowed to activate -+ * (but we don't expect it to activate in most cases). -+ */ -+ dd_dlb_set_lock(cr->dd, FALSE); -+ } -+ } -+ cycles_pmes = 0; -+ } -+ } -+ -+ if (step_rel == wcycle_get_reset_counters(wcycle) || -+ gs.set[eglsRESETCOUNTERS] != 0) -+ { -+ /* Reset all the counters related to performance over the run */ -+ reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting, -+ fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL); -+ wcycle_set_reset_counters(wcycle, -1); -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell our PME node to reset its counters */ -+ gmx_pme_send_resetcounters(cr, step); -+ } -+ /* Correct max_hours for the elapsed time */ -+ max_hours -= elapsed_time/(60.0*60.0); -+ bResetCountersHalfMaxH = FALSE; -+ gs.set[eglsRESETCOUNTERS] = 0; -+ } -+ -+ /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */ -+ IMD_prep_energies_send_positions(ir->bIMD && MASTER(cr), bIMDstep, ir->imd, enerd, step, bCalcEner, wcycle); -+ -+ } -+ /* End of main MD loop */ -+ debug_gmx(); -+ -+ /* Closing TNG files can include compressing data. Therefore it is good to do that -+ * before stopping the time measurements. */ -+ mdoutf_tng_close(outf); -+ -+ /* Stop measuring walltime */ -+ walltime_accounting_end(walltime_accounting); -+ -+ if (bRerunMD && MASTER(cr)) -+ { -+ close_trj(status); -+ } -+ -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell the PME only node to finish */ -+ gmx_pme_send_finish(cr); -+ } -+ -+ if (MASTER(cr)) -+ { -+ if (ir->nstcalcenergy > 0 && !bRerunMD) -+ { -+ print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t, -+ eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts)); -+ } -+ } -+ -+ done_mdoutf(outf); -+ debug_gmx(); -+ -+ if (ir->nstlist == -1 && nlh.nns > 0 && fplog) -+ { -+ fprintf(fplog, "Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n", nlh.s1/nlh.nns, sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns))); -+ fprintf(fplog, "Average number of atoms that crossed the half buffer length: %.1f\n\n", nlh.ab/nlh.nns); -+ } -+ -+ if (pme_loadbal != NULL) -+ { -+ pme_loadbal_done(pme_loadbal, cr, fplog, -+ fr->nbv != NULL && fr->nbv->bUseGPU); -+ } -+ -+ if (shellfc && fplog) -+ { -+ fprintf(fplog, "Fraction of iterations that converged: %.2f %%\n", -+ (nconverged*100.0)/step_rel); -+ fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n", -+ tcount/step_rel); -+ } -+ -+ if (repl_ex_nst > 0 && MASTER(cr)) -+ { -+ print_replica_exchange_statistics(fplog, repl_ex); -+ } -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(ir->bIMD, ir->imd); -+ -+ walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); -+ -+ return 0; -+} -diff --git a/src/programs/mdrun/mdrun.cpp b/src/programs/mdrun/mdrun.cpp -index 6bac3f0..e9fbf48 100644 ---- a/src/programs/mdrun/mdrun.cpp -+++ b/src/programs/mdrun/mdrun.cpp -@@ -55,6 +55,12 @@ - - #include "gromacs/commandline/pargs.h" - #include "gromacs/fileio/filenm.h" -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+extern void(*plumedcmd)(plumed,const char*,const void*); -+/* END PLUMED */ - - int gmx_mdrun(int argc, char *argv[]) - { -@@ -428,6 +434,7 @@ int gmx_mdrun(int argc, char *argv[]) - { efMTX, "-mtx", "nm", ffOPTWR }, - { efNDX, "-dn", "dipole", ffOPTWR }, - { efRND, "-multidir", NULL, ffOPTRDMULT}, -+ { efDAT, "-plumed", "plumed", ffOPTRD }, /* PLUMED */ - { efDAT, "-membed", "membed", ffOPTRD }, - { efTOP, "-mp", "membed", ffOPTRD }, - { efNDX, "-mn", "membed", ffOPTRD }, -@@ -780,6 +787,32 @@ int gmx_mdrun(int argc, char *argv[]) - ddxyz[YY] = (int)(realddxyz[YY] + 0.5); - ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5); - -+ /* PLUMED */ -+ plumedswitch=0; -+ if (opt2bSet("-plumed",NFILE,fnm)) plumedswitch=1; -+ if(plumedswitch){ -+ plumedcmd=plumed_cmd; -+ int plumed_is_there=0; -+ int real_precision=sizeof(real); -+ real energyUnits=1.0; -+ real lengthUnits=1.0; -+ real timeUnits=1.0; -+ -+ if(!plumed_installed()){ -+ gmx_fatal(FARGS,"Plumed is not available. Check your PLUMED_KERNEL variable."); -+ } -+ plumedmain=plumed_create(); -+ plumed_cmd(plumedmain,"setRealPrecision",&real_precision); -+ // this is not necessary for gromacs units: -+ plumed_cmd(plumedmain,"setMDEnergyUnits",&energyUnits); -+ plumed_cmd(plumedmain,"setMDLengthUnits",&lengthUnits); -+ plumed_cmd(plumedmain,"setMDTimeUnits",&timeUnits); -+ // -+ plumed_cmd(plumedmain,"setPlumedDat",ftp2fn(efDAT,NFILE,fnm)); -+ plumedswitch=1; -+ } -+ /* END PLUMED */ -+ - rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact, - nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr, - dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz, -@@ -788,6 +821,12 @@ int gmx_mdrun(int argc, char *argv[]) - nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, - pforce, cpt_period, max_hours, deviceOptions, imdport, Flags); - -+ /* PLUMED */ -+ if(plumedswitch){ -+ plumed_finalize(plumedmain); -+ } -+ /* END PLUMED */ -+ - /* Log file has to be closed in mdrunner if we are appending to it - (fplog not set here) */ - if (MASTER(cr) && !bAppendFiles) -diff --git a/src/programs/mdrun/mdrun.cpp.preplumed b/src/programs/mdrun/mdrun.cpp.preplumed -new file mode 100644 -index 0000000..6bac3f0 ---- /dev/null -+++ b/src/programs/mdrun/mdrun.cpp.preplumed -@@ -0,0 +1,799 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#include "mdrun_main.h" -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+ -+#include "gromacs/legacyheaders/checkpoint.h" -+#include "gromacs/legacyheaders/copyrite.h" -+#include "gromacs/legacyheaders/gmx_fatal.h" -+#include "gromacs/legacyheaders/macros.h" -+#include "gromacs/legacyheaders/main.h" -+#include "gromacs/legacyheaders/mdrun.h" -+#include "gromacs/legacyheaders/network.h" -+#include "gromacs/legacyheaders/readinp.h" -+#include "gromacs/legacyheaders/typedefs.h" -+#include "gromacs/legacyheaders/types/commrec.h" -+ -+#include "gromacs/commandline/pargs.h" -+#include "gromacs/fileio/filenm.h" -+ -+int gmx_mdrun(int argc, char *argv[]) -+{ -+ const char *desc[] = { -+ "[THISMODULE] is the main computational chemistry engine", -+ "within GROMACS. Obviously, it performs Molecular Dynamics simulations,", -+ "but it can also perform Stochastic Dynamics, Energy Minimization,", -+ "test particle insertion or (re)calculation of energies.", -+ "Normal mode analysis is another option. In this case [TT]mdrun[tt]", -+ "builds a Hessian matrix from single conformation.", -+ "For usual Normal Modes-like calculations, make sure that", -+ "the structure provided is properly energy-minimized.", -+ "The generated matrix can be diagonalized by [gmx-nmeig].[PAR]", -+ "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])", -+ "and distributes the topology over ranks if needed.", -+ "[TT]mdrun[tt] produces at least four output files.", -+ "A single log file ([TT]-g[tt]) is written, unless the option", -+ "[TT]-seppot[tt] is used, in which case each rank writes a log file.", -+ "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and", -+ "optionally forces.", -+ "The structure file ([TT]-c[tt]) contains the coordinates and", -+ "velocities of the last step.", -+ "The energy file ([TT]-e[tt]) contains energies, the temperature,", -+ "pressure, etc, a lot of these things are also printed in the log file.", -+ "Optionally coordinates can be written to a compressed trajectory file", -+ "([TT]-x[tt]).[PAR]", -+ "The option [TT]-dhdl[tt] is only used when free energy calculation is", -+ "turned on.[PAR]", -+ "A simulation can be run in parallel using two different parallelization", -+ "schemes: MPI parallelization and/or OpenMP thread parallelization.", -+ "The MPI parallelization uses multiple processes when [TT]mdrun[tt] is", -+ "compiled with a normal MPI library or threads when [TT]mdrun[tt] is", -+ "compiled with the GROMACS built-in thread-MPI library. OpenMP threads", -+ "are supported when [TT]mdrun[tt] is compiled with OpenMP. Full OpenMP support", -+ "is only available with the Verlet cut-off scheme, with the (older)", -+ "group scheme only PME-only ranks can use OpenMP parallelization.", -+ "In all cases [TT]mdrun[tt] will by default try to use all the available", -+ "hardware resources. With a normal MPI library only the options", -+ "[TT]-ntomp[tt] (with the Verlet cut-off scheme) and [TT]-ntomp_pme[tt],", -+ "for PME-only ranks, can be used to control the number of threads.", -+ "With thread-MPI there are additional options [TT]-nt[tt], which sets", -+ "the total number of threads, and [TT]-ntmpi[tt], which sets the number", -+ "of thread-MPI threads.", -+ "The number of OpenMP threads used by [TT]mdrun[tt] can also be set with", -+ "the standard environment variable, [TT]OMP_NUM_THREADS[tt].", -+ "The [TT]GMX_PME_NUM_THREADS[tt] environment variable can be used to specify", -+ "the number of threads used by the PME-only ranks.[PAR]", -+ "Note that combined MPI+OpenMP parallelization is in many cases", -+ "slower than either on its own. However, at high parallelization, using the", -+ "combination is often beneficial as it reduces the number of domains and/or", -+ "the number of MPI ranks. (Less and larger domains can improve scaling,", -+ "with separate PME ranks, using fewer MPI ranks reduces communication costs.)", -+ "OpenMP-only parallelization is typically faster than MPI-only parallelization", -+ "on a single CPU(-die). Since we currently don't have proper hardware", -+ "topology detection, [TT]mdrun[tt] compiled with thread-MPI will only", -+ "automatically use OpenMP-only parallelization when you use up to 4", -+ "threads, up to 12 threads with Intel Nehalem/Westmere, or up to 16", -+ "threads with Intel Sandy Bridge or newer CPUs. Otherwise MPI-only", -+ "parallelization is used (except with GPUs, see below).", -+ "[PAR]", -+ "To quickly test the performance of the new Verlet cut-off scheme", -+ "with old [TT].tpr[tt] files, either on CPUs or CPUs+GPUs, you can use", -+ "the [TT]-testverlet[tt] option. This should not be used for production,", -+ "since it can slightly modify potentials and it will remove charge groups", -+ "making analysis difficult, as the [TT].tpr[tt] file will still contain", -+ "charge groups. For production simulations it is highly recommended", -+ "to specify [TT]cutoff-scheme = Verlet[tt] in the [TT].mdp[tt] file.", -+ "[PAR]", -+ "With GPUs (only supported with the Verlet cut-off scheme), the number", -+ "of GPUs should match the number of particle-particle ranks, i.e.", -+ "excluding PME-only ranks. With thread-MPI, unless set on the command line, the number", -+ "of MPI threads will automatically be set to the number of GPUs detected.", -+ "To use a subset of the available GPUs, or to manually provide a mapping of", -+ "GPUs to PP ranks, you can use the [TT]-gpu_id[tt] option. The argument of [TT]-gpu_id[tt] is", -+ "a string of digits (without delimiter) representing device id-s of the GPUs to be used.", -+ "For example, \"[TT]02[tt]\" specifies using GPUs 0 and 2 in the first and second PP ranks per compute node", -+ "respectively. To select different sets of GPU-s", -+ "on different nodes of a compute cluster, use the [TT]GMX_GPU_ID[tt] environment", -+ "variable instead. The format for [TT]GMX_GPU_ID[tt] is identical to ", -+ "[TT]-gpu_id[tt], with the difference that an environment variable can have", -+ "different values on different compute nodes. Multiple MPI ranks on each node", -+ "can share GPUs. This is accomplished by specifying the id(s) of the GPU(s)", -+ "multiple times, e.g. \"[TT]0011[tt]\" for four ranks sharing two GPUs in this node.", -+ "This works within a single simulation, or a multi-simulation, with any form of MPI.", -+ "[PAR]", -+ "With the Verlet cut-off scheme and verlet-buffer-tolerance set,", -+ "the pair-list update interval nstlist can be chosen freely with", -+ "the option [TT]-nstlist[tt]. [TT]mdrun[tt] will then adjust", -+ "the pair-list cut-off to maintain accuracy, and not adjust nstlist.", -+ "Otherwise, by default, [TT]mdrun[tt] will try to increase the", -+ "value of nstlist set in the [TT].mdp[tt] file to improve the", -+ "performance. For CPU-only runs, nstlist might increase to 20, for", -+ "GPU runs up to 40. For medium to high parallelization or with", -+ "fast GPUs, a (user-supplied) larger nstlist value can give much", -+ "better performance.", -+ "[PAR]", -+ "When using PME with separate PME ranks or with a GPU, the two major", -+ "compute tasks, the non-bonded force calculation and the PME calculation", -+ "run on different compute resources. If this load is not balanced,", -+ "some of the resources will be idle part of time. With the Verlet", -+ "cut-off scheme this load is automatically balanced when the PME load", -+ "is too high (but not when it is too low). This is done by scaling", -+ "the Coulomb cut-off and PME grid spacing by the same amount. In the first", -+ "few hundred steps different settings are tried and the fastest is chosen", -+ "for the rest of the simulation. This does not affect the accuracy of", -+ "the results, but it does affect the decomposition of the Coulomb energy", -+ "into particle and mesh contributions. The auto-tuning can be turned off", -+ "with the option [TT]-notunepme[tt].", -+ "[PAR]", -+ "[TT]mdrun[tt] pins (sets affinity of) threads to specific cores,", -+ "when all (logical) cores on a compute node are used by [TT]mdrun[tt],", -+ "even when no multi-threading is used,", -+ "as this usually results in significantly better performance.", -+ "If the queuing systems or the OpenMP library pinned threads, we honor", -+ "this and don't pin again, even though the layout may be sub-optimal.", -+ "If you want to have [TT]mdrun[tt] override an already set thread affinity", -+ "or pin threads when using less cores, use [TT]-pin on[tt].", -+ "With SMT (simultaneous multithreading), e.g. Intel Hyper-Threading,", -+ "there are multiple logical cores per physical core.", -+ "The option [TT]-pinstride[tt] sets the stride in logical cores for", -+ "pinning consecutive threads. Without SMT, 1 is usually the best choice.", -+ "With Intel Hyper-Threading 2 is best when using half or less of the", -+ "logical cores, 1 otherwise. The default value of 0 do exactly that:", -+ "it minimizes the threads per logical core, to optimize performance.", -+ "If you want to run multiple [TT]mdrun[tt] jobs on the same physical node," -+ "you should set [TT]-pinstride[tt] to 1 when using all logical cores.", -+ "When running multiple [TT]mdrun[tt] (or other) simulations on the same physical", -+ "node, some simulations need to start pinning from a non-zero core", -+ "to avoid overloading cores; with [TT]-pinoffset[tt] you can specify", -+ "the offset in logical cores for pinning.", -+ "[PAR]", -+ "When [TT]mdrun[tt] is started with more than 1 rank,", -+ "parallelization with domain decomposition is used.", -+ "[PAR]", -+ "With domain decomposition, the spatial decomposition can be set", -+ "with option [TT]-dd[tt]. By default [TT]mdrun[tt] selects a good decomposition.", -+ "The user only needs to change this when the system is very inhomogeneous.", -+ "Dynamic load balancing is set with the option [TT]-dlb[tt],", -+ "which can give a significant performance improvement,", -+ "especially for inhomogeneous systems. The only disadvantage of", -+ "dynamic load balancing is that runs are no longer binary reproducible,", -+ "but in most cases this is not important.", -+ "By default the dynamic load balancing is automatically turned on", -+ "when the measured performance loss due to load imbalance is 5% or more.", -+ "At low parallelization these are the only important options", -+ "for domain decomposition.", -+ "At high parallelization the options in the next two sections", -+ "could be important for increasing the performace.", -+ "[PAR]", -+ "When PME is used with domain decomposition, separate ranks can", -+ "be assigned to do only the PME mesh calculation;", -+ "this is computationally more efficient starting at about 12 ranks,", -+ "or even fewer when OpenMP parallelization is used.", -+ "The number of PME ranks is set with option [TT]-npme[tt],", -+ "but this cannot be more than half of the ranks.", -+ "By default [TT]mdrun[tt] makes a guess for the number of PME", -+ "ranks when the number of ranks is larger than 16. With GPUs,", -+ "using separate PME ranks is not selected automatically,", -+ "since the optimal setup depends very much on the details", -+ "of the hardware. In all cases, you might gain performance", -+ "by optimizing [TT]-npme[tt]. Performance statistics on this issue", -+ "are written at the end of the log file.", -+ "For good load balancing at high parallelization, the PME grid x and y", -+ "dimensions should be divisible by the number of PME ranks", -+ "(the simulation will run correctly also when this is not the case).", -+ "[PAR]", -+ "This section lists all options that affect the domain decomposition.", -+ "[PAR]", -+ "Option [TT]-rdd[tt] can be used to set the required maximum distance", -+ "for inter charge-group bonded interactions.", -+ "Communication for two-body bonded interactions below the non-bonded", -+ "cut-off distance always comes for free with the non-bonded communication.", -+ "Atoms beyond the non-bonded cut-off are only communicated when they have", -+ "missing bonded interactions; this means that the extra cost is minor", -+ "and nearly indepedent of the value of [TT]-rdd[tt].", -+ "With dynamic load balancing option [TT]-rdd[tt] also sets", -+ "the lower limit for the domain decomposition cell sizes.", -+ "By default [TT]-rdd[tt] is determined by [TT]mdrun[tt] based on", -+ "the initial coordinates. The chosen value will be a balance", -+ "between interaction range and communication cost.", -+ "[PAR]", -+ "When inter charge-group bonded interactions are beyond", -+ "the bonded cut-off distance, [TT]mdrun[tt] terminates with an error message.", -+ "For pair interactions and tabulated bonds", -+ "that do not generate exclusions, this check can be turned off", -+ "with the option [TT]-noddcheck[tt].", -+ "[PAR]", -+ "When constraints are present, option [TT]-rcon[tt] influences", -+ "the cell size limit as well.", -+ "Atoms connected by NC constraints, where NC is the LINCS order plus 1,", -+ "should not be beyond the smallest cell size. A error message is", -+ "generated when this happens and the user should change the decomposition", -+ "or decrease the LINCS order and increase the number of LINCS iterations.", -+ "By default [TT]mdrun[tt] estimates the minimum cell size required for P-LINCS", -+ "in a conservative fashion. For high parallelization it can be useful", -+ "to set the distance required for P-LINCS with the option [TT]-rcon[tt].", -+ "[PAR]", -+ "The [TT]-dds[tt] option sets the minimum allowed x, y and/or z scaling", -+ "of the cells with dynamic load balancing. [TT]mdrun[tt] will ensure that", -+ "the cells can scale down by at least this factor. This option is used", -+ "for the automated spatial decomposition (when not using [TT]-dd[tt])", -+ "as well as for determining the number of grid pulses, which in turn", -+ "sets the minimum allowed cell size. Under certain circumstances", -+ "the value of [TT]-dds[tt] might need to be adjusted to account for", -+ "high or low spatial inhomogeneity of the system.", -+ "[PAR]", -+ "The option [TT]-gcom[tt] can be used to only do global communication", -+ "every n steps.", -+ "This can improve performance for highly parallel simulations", -+ "where this global communication step becomes the bottleneck.", -+ "For a global thermostat and/or barostat the temperature", -+ "and/or pressure will also only be updated every [TT]-gcom[tt] steps.", -+ "By default it is set to the minimum of nstcalcenergy and nstlist.[PAR]", -+ "With [TT]-rerun[tt] an input trajectory can be given for which ", -+ "forces and energies will be (re)calculated. Neighbor searching will be", -+ "performed for every frame, unless [TT]nstlist[tt] is zero", -+ "(see the [TT].mdp[tt] file).[PAR]", -+ "ED (essential dynamics) sampling and/or additional flooding potentials", -+ "are switched on by using the [TT]-ei[tt] flag followed by an [TT].edi[tt]", -+ "file. The [TT].edi[tt] file can be produced with the [TT]make_edi[tt] tool", -+ "or by using options in the essdyn menu of the WHAT IF program.", -+ "[TT]mdrun[tt] produces a [TT].xvg[tt] output file that", -+ "contains projections of positions, velocities and forces onto selected", -+ "eigenvectors.[PAR]", -+ "When user-defined potential functions have been selected in the", -+ "[TT].mdp[tt] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]", -+ "a formatted table with potential functions. The file is read from", -+ "either the current directory or from the [TT]GMXLIB[tt] directory.", -+ "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,", -+ "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with", -+ "normal Coulomb.", -+ "When pair interactions are present, a separate table for pair interaction", -+ "functions is read using the [TT]-tablep[tt] option.[PAR]", -+ "When tabulated bonded functions are present in the topology,", -+ "interaction functions are read using the [TT]-tableb[tt] option.", -+ "For each different tabulated interaction type the table file name is", -+ "modified in a different way: before the file extension an underscore is", -+ "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals", -+ "and finally the table number of the interaction type.[PAR]", -+ "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM", -+ "coordinates and forces when pulling is selected", -+ "in the [TT].mdp[tt] file.[PAR]", -+ "With [TT]-multi[tt] or [TT]-multidir[tt], multiple systems can be ", -+ "simulated in parallel.", -+ "As many input files/directories are required as the number of systems. ", -+ "The [TT]-multidir[tt] option takes a list of directories (one for each ", -+ "system) and runs in each of them, using the input/output file names, ", -+ "such as specified by e.g. the [TT]-s[tt] option, relative to these ", -+ "directories.", -+ "With [TT]-multi[tt], the system number is appended to the run input ", -+ "and each output filename, for instance [TT]topol.tpr[tt] becomes", -+ "[TT]topol0.tpr[tt], [TT]topol1.tpr[tt] etc.", -+ "The number of ranks per system is the total number of ranks", -+ "divided by the number of systems.", -+ "One use of this option is for NMR refinement: when distance", -+ "or orientation restraints are present these can be ensemble averaged", -+ "over all the systems.[PAR]", -+ "With [TT]-replex[tt] replica exchange is attempted every given number", -+ "of steps. The number of replicas is set with the [TT]-multi[tt] or ", -+ "[TT]-multidir[tt] option, described above.", -+ "All run input files should use a different coupling temperature,", -+ "the order of the files is not important. The random seed is set with", -+ "[TT]-reseed[tt]. The velocities are scaled and neighbor searching", -+ "is performed after every exchange.[PAR]", -+ "Finally some experimental algorithms can be tested when the", -+ "appropriate options have been given. Currently under", -+ "investigation are: polarizability.", -+ "[PAR]", -+ "The option [TT]-membed[tt] does what used to be g_membed, i.e. embed", -+ "a protein into a membrane. The data file should contain the options", -+ "that where passed to g_membed before. The [TT]-mn[tt] and [TT]-mp[tt]", -+ "both apply to this as well.", -+ "[PAR]", -+ "The option [TT]-pforce[tt] is useful when you suspect a simulation", -+ "crashes due to too large forces. With this option coordinates and", -+ "forces of atoms with a force larger than a certain value will", -+ "be printed to stderr.", -+ "[PAR]", -+ "Checkpoints containing the complete state of the system are written", -+ "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],", -+ "unless option [TT]-cpt[tt] is set to -1.", -+ "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to", -+ "make sure that a recent state of the system is always available,", -+ "even when the simulation is terminated while writing a checkpoint.", -+ "With [TT]-cpnum[tt] all checkpoint files are kept and appended", -+ "with the step number.", -+ "A simulation can be continued by reading the full state from file", -+ "with option [TT]-cpi[tt]. This option is intelligent in the way that", -+ "if no checkpoint file is found, Gromacs just assumes a normal run and", -+ "starts from the first step of the [TT].tpr[tt] file. By default the output", -+ "will be appending to the existing output files. The checkpoint file", -+ "contains checksums of all output files, such that you will never", -+ "loose data when some output files are modified, corrupt or removed.", -+ "There are three scenarios with [TT]-cpi[tt]:[PAR]", -+ "[TT]*[tt] no files with matching names are present: new output files are written[PAR]", -+ "[TT]*[tt] all files are present with names and checksums matching those stored", -+ "in the checkpoint file: files are appended[PAR]", -+ "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]", -+ "With [TT]-noappend[tt] new output files are opened and the simulation", -+ "part number is added to all output file names.", -+ "Note that in all cases the checkpoint file itself is not renamed", -+ "and will be overwritten, unless its name does not match", -+ "the [TT]-cpo[tt] option.", -+ "[PAR]", -+ "With checkpointing the output is appended to previously written", -+ "output files, unless [TT]-noappend[tt] is used or none of the previous", -+ "output files are present (except for the checkpoint file).", -+ "The integrity of the files to be appended is verified using checksums", -+ "which are stored in the checkpoint file. This ensures that output can", -+ "not be mixed up or corrupted due to file appending. When only some", -+ "of the previous output files are present, a fatal error is generated", -+ "and no old output files are modified and no new output files are opened.", -+ "The result with appending will be the same as from a single run.", -+ "The contents will be binary identical, unless you use a different number", -+ "of ranks or dynamic load balancing or the FFT library uses optimizations", -+ "through timing.", -+ "[PAR]", -+ "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint", -+ "file is written at the first neighbor search step where the run time", -+ "exceeds [TT]-maxh[tt]*0.99 hours.", -+ "[PAR]", -+ "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current", -+ "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is", -+ "pressed), it will stop after the next neighbor search step ", -+ "(with nstlist=0 at the next step).", -+ "In both cases all the usual output will be written to file.", -+ "When running with MPI, a signal to one of the [TT]mdrun[tt] ranks", -+ "is sufficient, this signal should not be sent to mpirun or", -+ "the [TT]mdrun[tt] process that is the parent of the others.", -+ "[PAR]", -+ "Interactive molecular dynamics (IMD) can be activated by using at least one", -+ "of the three IMD switches: The [TT]-imdterm[tt] switch allows to terminate the", -+ "simulation from the molecular viewer (e.g. VMD). With [TT]-imdwait[tt],", -+ "[TT]mdrun[tt] pauses whenever no IMD client is connected. Pulling from the", -+ "IMD remote can be turned on by [TT]-imdpull[tt].", -+ "The port [TT]mdrun[tt] listens to can be altered by [TT]-imdport[tt].The", -+ "file pointed to by [TT]-if[tt] contains atom indices and forces if IMD", -+ "pulling is used." -+ "[PAR]", -+ "When [TT]mdrun[tt] is started with MPI, it does not run niced by default." -+ }; -+ t_commrec *cr; -+ t_filenm fnm[] = { -+ { efTPX, NULL, NULL, ffREAD }, -+ { efTRN, "-o", NULL, ffWRITE }, -+ { efCOMPRESSED, "-x", NULL, ffOPTWR }, -+ { efCPT, "-cpi", NULL, ffOPTRD }, -+ { efCPT, "-cpo", NULL, ffOPTWR }, -+ { efSTO, "-c", "confout", ffWRITE }, -+ { efEDR, "-e", "ener", ffWRITE }, -+ { efLOG, "-g", "md", ffWRITE }, -+ { efXVG, "-dhdl", "dhdl", ffOPTWR }, -+ { efXVG, "-field", "field", ffOPTWR }, -+ { efXVG, "-table", "table", ffOPTRD }, -+ { efXVG, "-tabletf", "tabletf", ffOPTRD }, -+ { efXVG, "-tablep", "tablep", ffOPTRD }, -+ { efXVG, "-tableb", "table", ffOPTRD }, -+ { efTRX, "-rerun", "rerun", ffOPTRD }, -+ { efXVG, "-tpi", "tpi", ffOPTWR }, -+ { efXVG, "-tpid", "tpidist", ffOPTWR }, -+ { efEDI, "-ei", "sam", ffOPTRD }, -+ { efXVG, "-eo", "edsam", ffOPTWR }, -+ { efXVG, "-devout", "deviatie", ffOPTWR }, -+ { efXVG, "-runav", "runaver", ffOPTWR }, -+ { efXVG, "-px", "pullx", ffOPTWR }, -+ { efXVG, "-pf", "pullf", ffOPTWR }, -+ { efXVG, "-ro", "rotation", ffOPTWR }, -+ { efLOG, "-ra", "rotangles", ffOPTWR }, -+ { efLOG, "-rs", "rotslabs", ffOPTWR }, -+ { efLOG, "-rt", "rottorque", ffOPTWR }, -+ { efMTX, "-mtx", "nm", ffOPTWR }, -+ { efNDX, "-dn", "dipole", ffOPTWR }, -+ { efRND, "-multidir", NULL, ffOPTRDMULT}, -+ { efDAT, "-membed", "membed", ffOPTRD }, -+ { efTOP, "-mp", "membed", ffOPTRD }, -+ { efNDX, "-mn", "membed", ffOPTRD }, -+ { efXVG, "-if", "imdforces", ffOPTWR }, -+ { efXVG, "-swap", "swapions", ffOPTWR } -+ }; -+#define NFILE asize(fnm) -+ -+ /* Command line options ! */ -+ gmx_bool bDDBondCheck = TRUE; -+ gmx_bool bDDBondComm = TRUE; -+ gmx_bool bTunePME = TRUE; -+ gmx_bool bTestVerlet = FALSE; -+ gmx_bool bVerbose = FALSE; -+ gmx_bool bCompact = TRUE; -+ gmx_bool bSepPot = FALSE; -+ gmx_bool bRerunVSite = FALSE; -+ gmx_bool bConfout = TRUE; -+ gmx_bool bReproducible = FALSE; -+ gmx_bool bIMDwait = FALSE; -+ gmx_bool bIMDterm = FALSE; -+ gmx_bool bIMDpull = FALSE; -+ -+ int npme = -1; -+ int nstlist = 0; -+ int nmultisim = 0; -+ int nstglobalcomm = -1; -+ int repl_ex_nst = 0; -+ int repl_ex_seed = -1; -+ int repl_ex_nex = 0; -+ int nstepout = 100; -+ int resetstep = -1; -+ gmx_int64_t nsteps = -2; /* the value -2 means that the mdp option will be used */ -+ int imdport = 8888; /* can be almost anything, 8888 is easy to remember */ -+ -+ rvec realddxyz = {0, 0, 0}; -+ const char *ddno_opt[ddnoNR+1] = -+ { NULL, "interleave", "pp_pme", "cartesian", NULL }; -+ const char *dddlb_opt[] = -+ { NULL, "auto", "no", "yes", NULL }; -+ const char *thread_aff_opt[threadaffNR+1] = -+ { NULL, "auto", "on", "off", NULL }; -+ const char *nbpu_opt[] = -+ { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL }; -+ real rdd = 0.0, rconstr = 0.0, dlb_scale = 0.8, pforce = -1; -+ char *ddcsx = NULL, *ddcsy = NULL, *ddcsz = NULL; -+ real cpt_period = 15.0, max_hours = -1; -+ gmx_bool bAppendFiles = TRUE; -+ gmx_bool bKeepAndNumCPT = FALSE; -+ gmx_bool bResetCountersHalfWay = FALSE; -+ output_env_t oenv = NULL; -+ const char *deviceOptions = ""; -+ -+ /* Non transparent initialization of a complex gmx_hw_opt_t struct. -+ * But unfortunately we are not allowed to call a function here, -+ * since declarations follow below. -+ */ -+ gmx_hw_opt_t hw_opt = { -+ 0, 0, 0, 0, threadaffSEL, 0, 0, -+ { NULL, FALSE, 0, NULL } -+ }; -+ -+ t_pargs pa[] = { -+ -+ { "-dd", FALSE, etRVEC, {&realddxyz}, -+ "Domain decomposition grid, 0 is optimize" }, -+ { "-ddorder", FALSE, etENUM, {ddno_opt}, -+ "DD rank order" }, -+ { "-npme", FALSE, etINT, {&npme}, -+ "Number of separate ranks to be used for PME, -1 is guess" }, -+ { "-nt", FALSE, etINT, {&hw_opt.nthreads_tot}, -+ "Total number of threads to start (0 is guess)" }, -+ { "-ntmpi", FALSE, etINT, {&hw_opt.nthreads_tmpi}, -+ "Number of thread-MPI threads to start (0 is guess)" }, -+ { "-ntomp", FALSE, etINT, {&hw_opt.nthreads_omp}, -+ "Number of OpenMP threads per MPI rank to start (0 is guess)" }, -+ { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme}, -+ "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" }, -+ { "-pin", FALSE, etENUM, {thread_aff_opt}, -+ "Set thread affinities" }, -+ { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset}, -+ "The starting logical core number for pinning to cores; used to avoid pinning threads from different mdrun instances to the same core" }, -+ { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride}, -+ "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" }, -+ { "-gpu_id", FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id}, -+ "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" }, -+ { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck}, -+ "Check for all bonded interactions with DD" }, -+ { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm}, -+ "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" }, -+ { "-rdd", FALSE, etREAL, {&rdd}, -+ "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" }, -+ { "-rcon", FALSE, etREAL, {&rconstr}, -+ "Maximum distance for P-LINCS (nm), 0 is estimate" }, -+ { "-dlb", FALSE, etENUM, {dddlb_opt}, -+ "Dynamic load balancing (with DD)" }, -+ { "-dds", FALSE, etREAL, {&dlb_scale}, -+ "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in order to " -+ "provide a margin in which dynamic load balancing can act while preserving the minimum cell size." }, -+ { "-ddcsx", FALSE, etSTR, {&ddcsx}, -+ "HIDDENA string containing a vector of the relative sizes in the x " -+ "direction of the corresponding DD cells. Only effective with static " -+ "load balancing." }, -+ { "-ddcsy", FALSE, etSTR, {&ddcsy}, -+ "HIDDENA string containing a vector of the relative sizes in the y " -+ "direction of the corresponding DD cells. Only effective with static " -+ "load balancing." }, -+ { "-ddcsz", FALSE, etSTR, {&ddcsz}, -+ "HIDDENA string containing a vector of the relative sizes in the z " -+ "direction of the corresponding DD cells. Only effective with static " -+ "load balancing." }, -+ { "-gcom", FALSE, etINT, {&nstglobalcomm}, -+ "Global communication frequency" }, -+ { "-nb", FALSE, etENUM, {&nbpu_opt}, -+ "Calculate non-bonded interactions on" }, -+ { "-nstlist", FALSE, etINT, {&nstlist}, -+ "Set nstlist when using a Verlet buffer tolerance (0 is guess)" }, -+ { "-tunepme", FALSE, etBOOL, {&bTunePME}, -+ "Optimize PME load between PP/PME ranks or GPU/CPU" }, -+ { "-testverlet", FALSE, etBOOL, {&bTestVerlet}, -+ "Test the Verlet non-bonded scheme" }, -+ { "-v", FALSE, etBOOL, {&bVerbose}, -+ "Be loud and noisy" }, -+ { "-compact", FALSE, etBOOL, {&bCompact}, -+ "Write a compact log file" }, -+ { "-seppot", FALSE, etBOOL, {&bSepPot}, -+ "Write separate V and dVdl terms for each interaction type and rank to the log file(s)" }, -+ { "-pforce", FALSE, etREAL, {&pforce}, -+ "Print all forces larger than this (kJ/mol nm)" }, -+ { "-reprod", FALSE, etBOOL, {&bReproducible}, -+ "Try to avoid optimizations that affect binary reproducibility" }, -+ { "-cpt", FALSE, etREAL, {&cpt_period}, -+ "Checkpoint interval (minutes)" }, -+ { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT}, -+ "Keep and number checkpoint files" }, -+ { "-append", FALSE, etBOOL, {&bAppendFiles}, -+ "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" }, -+ { "-nsteps", FALSE, etINT64, {&nsteps}, -+ "Run this number of steps, overrides .mdp file option" }, -+ { "-maxh", FALSE, etREAL, {&max_hours}, -+ "Terminate after 0.99 times this time (hours)" }, -+ { "-multi", FALSE, etINT, {&nmultisim}, -+ "Do multiple simulations in parallel" }, -+ { "-replex", FALSE, etINT, {&repl_ex_nst}, -+ "Attempt replica exchange periodically with this period (steps)" }, -+ { "-nex", FALSE, etINT, {&repl_ex_nex}, -+ "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). -nex zero or not specified gives neighbor replica exchange." }, -+ { "-reseed", FALSE, etINT, {&repl_ex_seed}, -+ "Seed for replica exchange, -1 is generate a seed" }, -+ { "-imdport", FALSE, etINT, {&imdport}, -+ "HIDDENIMD listening port" }, -+ { "-imdwait", FALSE, etBOOL, {&bIMDwait}, -+ "HIDDENPause the simulation while no IMD client is connected" }, -+ { "-imdterm", FALSE, etBOOL, {&bIMDterm}, -+ "HIDDENAllow termination of the simulation from IMD client" }, -+ { "-imdpull", FALSE, etBOOL, {&bIMDpull}, -+ "HIDDENAllow pulling in the simulation from IMD client" }, -+ { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite}, -+ "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" }, -+ { "-confout", FALSE, etBOOL, {&bConfout}, -+ "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" }, -+ { "-stepout", FALSE, etINT, {&nstepout}, -+ "HIDDENFrequency of writing the remaining wall clock time for the run" }, -+ { "-resetstep", FALSE, etINT, {&resetstep}, -+ "HIDDENReset cycle counters after these many time steps" }, -+ { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay}, -+ "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" } -+ }; -+ unsigned long Flags, PCA_Flags; -+ ivec ddxyz; -+ int dd_node_order; -+ gmx_bool bAddPart; -+ FILE *fplog, *fpmulti; -+ int sim_part, sim_part_fn; -+ const char *part_suffix = ".part"; -+ char suffix[STRLEN]; -+ int rc; -+ char **multidir = NULL; -+ -+ -+ cr = init_commrec(); -+ -+ PCA_Flags = (PCA_CAN_SET_DEFFNM | (MASTER(cr) ? 0 : PCA_QUIET)); -+ -+ /* Comment this in to do fexist calls only on master -+ * works not with rerun or tables at the moment -+ * also comment out the version of init_forcerec in md.c -+ * with NULL instead of opt2fn -+ */ -+ /* -+ if (!MASTER(cr)) -+ { -+ PCA_Flags |= PCA_NOT_READ_NODE; -+ } -+ */ -+ -+ if (!parse_common_args(&argc, argv, PCA_Flags, NFILE, fnm, asize(pa), pa, -+ asize(desc), desc, 0, NULL, &oenv)) -+ { -+ return 0; -+ } -+ -+ -+ /* we set these early because they might be used in init_multisystem() -+ Note that there is the potential for npme>nnodes until the number of -+ threads is set later on, if there's thread parallelization. That shouldn't -+ lead to problems. */ -+ dd_node_order = nenum(ddno_opt); -+ cr->npmenodes = npme; -+ -+ hw_opt.thread_affinity = nenum(thread_aff_opt); -+ -+ /* now check the -multi and -multidir option */ -+ if (opt2bSet("-multidir", NFILE, fnm)) -+ { -+ if (nmultisim > 0) -+ { -+ gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive."); -+ } -+ nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm); -+ } -+ -+ -+ if (repl_ex_nst != 0 && nmultisim < 2) -+ { -+ gmx_fatal(FARGS, "Need at least two replicas for replica exchange (option -multi)"); -+ } -+ -+ if (repl_ex_nex < 0) -+ { -+ gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive"); -+ } -+ -+ if (nmultisim > 1) -+ { -+#ifndef GMX_THREAD_MPI -+ gmx_bool bParFn = (multidir == NULL); -+ init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn); -+#else -+ gmx_fatal(FARGS, "mdrun -multi is not supported with the thread library. " -+ "Please compile GROMACS with MPI support"); -+#endif -+ } -+ -+ bAddPart = !bAppendFiles; -+ -+ /* Check if there is ANY checkpoint file available */ -+ sim_part = 1; -+ sim_part_fn = sim_part; -+ if (opt2bSet("-cpi", NFILE, fnm)) -+ { -+ if (bSepPot && bAppendFiles) -+ { -+ gmx_fatal(FARGS, "Output file appending is not supported with -seppot"); -+ } -+ -+ bAppendFiles = -+ read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE, -+ fnm, cr), -+ &sim_part_fn, NULL, cr, -+ bAppendFiles, NFILE, fnm, -+ part_suffix, &bAddPart); -+ if (sim_part_fn == 0 && MULTIMASTER(cr)) -+ { -+ fprintf(stdout, "No previous checkpoint file present, assuming this is a new run.\n"); -+ } -+ else -+ { -+ sim_part = sim_part_fn + 1; -+ } -+ -+ if (MULTISIM(cr) && MASTER(cr)) -+ { -+ if (MULTIMASTER(cr)) -+ { -+ /* Log file is not yet available, so if there's a -+ * problem we can only write to stderr. */ -+ fpmulti = stderr; -+ } -+ else -+ { -+ fpmulti = NULL; -+ } -+ check_multi_int(fpmulti, cr->ms, sim_part, "simulation part", TRUE); -+ } -+ } -+ else -+ { -+ bAppendFiles = FALSE; -+ } -+ -+ if (!bAppendFiles) -+ { -+ sim_part_fn = sim_part; -+ } -+ -+ if (bAddPart) -+ { -+ /* Rename all output files (except checkpoint files) */ -+ /* create new part name first (zero-filled) */ -+ sprintf(suffix, "%s%04d", part_suffix, sim_part_fn); -+ -+ add_suffix_to_output_names(fnm, NFILE, suffix); -+ if (MULTIMASTER(cr)) -+ { -+ fprintf(stdout, "Checkpoint file is from part %d, new output files will be suffixed '%s'.\n", sim_part-1, suffix); -+ } -+ } -+ -+ Flags = opt2bSet("-rerun", NFILE, fnm) ? MD_RERUN : 0; -+ Flags = Flags | (bSepPot ? MD_SEPPOT : 0); -+ Flags = Flags | (bDDBondCheck ? MD_DDBONDCHECK : 0); -+ Flags = Flags | (bDDBondComm ? MD_DDBONDCOMM : 0); -+ Flags = Flags | (bTunePME ? MD_TUNEPME : 0); -+ Flags = Flags | (bTestVerlet ? MD_TESTVERLET : 0); -+ Flags = Flags | (bConfout ? MD_CONFOUT : 0); -+ Flags = Flags | (bRerunVSite ? MD_RERUN_VSITE : 0); -+ Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0); -+ Flags = Flags | (bAppendFiles ? MD_APPENDFILES : 0); -+ Flags = Flags | (opt2parg_bSet("-append", asize(pa), pa) ? MD_APPENDFILESSET : 0); -+ Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0); -+ Flags = Flags | (sim_part > 1 ? MD_STARTFROMCPT : 0); -+ Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0); -+ Flags = Flags | (bIMDwait ? MD_IMDWAIT : 0); -+ Flags = Flags | (bIMDterm ? MD_IMDTERM : 0); -+ Flags = Flags | (bIMDpull ? MD_IMDPULL : 0); -+ -+ /* We postpone opening the log file if we are appending, so we can -+ first truncate the old log file and append to the correct position -+ there instead. */ -+ if ((MASTER(cr) || bSepPot) && !bAppendFiles) -+ { -+ gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr, -+ !bSepPot, Flags & MD_APPENDFILES, &fplog); -+ please_cite(fplog, "Hess2008b"); -+ please_cite(fplog, "Spoel2005a"); -+ please_cite(fplog, "Lindahl2001a"); -+ please_cite(fplog, "Berendsen95a"); -+ } -+ else if (!MASTER(cr) && bSepPot) -+ { -+ gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr, !bSepPot, Flags, &fplog); -+ } -+ else -+ { -+ fplog = NULL; -+ } -+ -+ ddxyz[XX] = (int)(realddxyz[XX] + 0.5); -+ ddxyz[YY] = (int)(realddxyz[YY] + 0.5); -+ ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5); -+ -+ rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact, -+ nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr, -+ dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz, -+ nbpu_opt[0], nstlist, -+ nsteps, nstepout, resetstep, -+ nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, -+ pforce, cpt_period, max_hours, deviceOptions, imdport, Flags); -+ -+ /* Log file has to be closed in mdrunner if we are appending to it -+ (fplog not set here) */ -+ if (MASTER(cr) && !bAppendFiles) -+ { -+ gmx_log_close(fplog); -+ } -+ -+ return rc; -+} -diff --git a/src/programs/mdrun/repl_ex.c b/src/programs/mdrun/repl_ex.c -index 46a9bc0..cfb0b7f 100644 ---- a/src/programs/mdrun/repl_ex.c -+++ b/src/programs/mdrun/repl_ex.c -@@ -51,6 +51,12 @@ - #include "domdec.h" - #include "gromacs/random/random.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+/* END PLUMED */ -+ - #define PROBABILITYCUTOFF 100 - /* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ - -@@ -112,14 +118,16 @@ static gmx_bool repl_quantity(const gmx_multisim_t *ms, - qall[re->repl] = q; - gmx_sum_sim(ms->nsim, qall, ms); - -- bDiff = FALSE; -- for (s = 1; s < ms->nsim; s++) -- { -- if (qall[s] != qall[0]) -- { -+ /* PLUMED */ -+ //bDiff = FALSE; -+ //for (s = 1; s < ms->nsim; s++) -+ //{ -+ // if (qall[s] != qall[0]) -+ // { - bDiff = TRUE; -- } -- } -+ // } -+ //} -+ /* END PLUMED */ - - if (bDiff) - { -@@ -269,6 +277,10 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - re->ind[i] = i; - } - -+ /* PLUMED */ -+ // plumed2: check if we want alternative patterns (i.e. for bias-exchange metaD) -+ // in those cases replicas can share the same temperature. -+ /* - if (re->type < ereENDSINGLE) - { - -@@ -277,11 +289,12 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - for (j = i+1; j < re->nrepl; j++) - { - if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) -- { -+ {*/ - /* Unordered replicas are supposed to work, but there - * is still an issues somewhere. - * Note that at this point still re->ind[i]=i. - */ -+ /* - gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", - i, j, - erename[re->type], -@@ -299,6 +312,8 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - } - } - } -+ */ -+ /* END PLUMED */ - - /* keep track of all the swaps, starting with the initial placement. */ - snew(re->allswaps, re->nrepl); -@@ -982,6 +997,10 @@ test_for_replica_exchange(FILE *fplog, - pind[i] = re->ind[i]; - } - -+ /* PLUMED */ -+ int plumed_test_exchange_pattern=0; -+ /* END PLUMED */ -+ - if (bMultiEx) - { - /* multiple random switch exchange */ -@@ -1057,6 +1076,31 @@ test_for_replica_exchange(FILE *fplog, - /* standard nearest neighbor replica exchange */ - - m = (step / re->nst) % 2; -+ /* PLUMED */ -+ if(plumedswitch){ -+ int partner=re->repl; -+ plumed_cmd(plumedmain,"getExchangesFlag",&plumed_test_exchange_pattern); -+ if(plumed_test_exchange_pattern>0){ -+ int *list; -+ snew(list,re->nrepl); -+ plumed_cmd(plumedmain,"setNumberOfReplicas",&(re->nrepl)); -+ plumed_cmd(plumedmain,"getExchangesList",list); -+ for(i=0; inrepl; i++) re->ind[i]=list[i]; -+ sfree(list); -+ } -+ -+ for(i=1; inrepl; i++) { -+ if (i % 2 != m) continue; -+ a = re->ind[i-1]; -+ b = re->ind[i]; -+ if(re->repl==a) partner=b; -+ if(re->repl==b) partner=a; -+ } -+ plumed_cmd(plumedmain,"GREX setPartner",&partner); -+ plumed_cmd(plumedmain,"GREX calculate",NULL); -+ plumed_cmd(plumedmain,"GREX shareAllDeltaBias",NULL); -+ } -+ /* END PLUMED */ - for (i = 1; i < re->nrepl; i++) - { - a = re->ind[i-1]; -@@ -1066,6 +1110,18 @@ test_for_replica_exchange(FILE *fplog, - if (i % 2 == m) - { - delta = calc_delta(fplog, bPrint, re, a, b, a, b); -+ /* PLUMED */ -+ if(plumedswitch){ -+ real adb,bdb,dplumed; -+ char buf[300]; -+ sprintf(buf,"GREX getDeltaBias %d",a); plumed_cmd(plumedmain,buf,&adb); -+ sprintf(buf,"GREX getDeltaBias %d",b); plumed_cmd(plumedmain,buf,&bdb); -+ dplumed=adb*re->beta[a]+bdb*re->beta[b]; -+ delta+=dplumed; -+ if (bPrint) -+ fprintf(fplog,"dplumed = %10.3e dE_Term = %10.3e (kT)\n",dplumed,delta); -+ } -+ /* END PLUMED */ - if (delta <= 0) - { - /* accepted */ -@@ -1092,11 +1148,22 @@ test_for_replica_exchange(FILE *fplog, - - if (bEx[i]) - { -+ /* PLUMED */ -+ if(!plumed_test_exchange_pattern) { -+ /* standard neighbour swapping */ - /* swap these two */ - tmp = pind[i-1]; - pind[i-1] = pind[i]; - pind[i] = tmp; - re->nexchange[i]++; /* statistics for back compatibility */ -+ } else { -+ /* alternative swapping patterns */ -+ tmp = pind[a]; -+ pind[a] = pind[b]; -+ pind[b] = tmp; -+ re->nexchange[i]++; /* statistics for back compatibility */ -+ } -+ /* END PLUMED */ - } - } - else -@@ -1112,6 +1179,15 @@ test_for_replica_exchange(FILE *fplog, - re->nattempt[m]++; - } - -+ /* PLUMED */ -+ if(plumed_test_exchange_pattern>0) { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->ind[i] = i; -+ } -+ } -+ /* END PLUMED */ -+ - /* record which moves were made and accepted */ - for (i = 0; i < re->nrepl; i++) - { -@@ -1316,6 +1392,10 @@ gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex * - /* The order in which multiple exchanges will occur. */ - gmx_bool bThisReplicaExchanged = FALSE; - -+ /* PLUMED */ -+ if(plumedswitch)plumed_cmd(plumedmain,"GREX prepare",NULL); -+ /* END PLUMED */ -+ - if (MASTER(cr)) - { - replica_id = re->repl; -diff --git a/src/programs/mdrun/repl_ex.c.preplumed b/src/programs/mdrun/repl_ex.c.preplumed -new file mode 100644 -index 0000000..46a9bc0 ---- /dev/null -+++ b/src/programs/mdrun/repl_ex.c.preplumed -@@ -0,0 +1,1439 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include "repl_ex.h" -+#include "network.h" -+#include "gromacs/random/random.h" -+#include "gromacs/utility/smalloc.h" -+#include "physics.h" -+#include "copyrite.h" -+#include "macros.h" -+#include "vec.h" -+#include "names.h" -+#include "domdec.h" -+#include "gromacs/random/random.h" -+ -+#define PROBABILITYCUTOFF 100 -+/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ -+ -+enum { -+ ereTEMP, ereLAMBDA, ereENDSINGLE, ereTL, ereNR -+}; -+const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"}; -+/* end_single_marker merely notes the end of single variable replica exchange. All types higher than -+ it are multiple replica exchange methods */ -+/* Eventually, should add 'pressure', 'temperature and pressure', 'lambda_and_pressure', 'temperature_lambda_pressure'?; -+ Let's wait until we feel better about the pressure control methods giving exact ensembles. Right now, we assume constant pressure */ -+ -+typedef struct gmx_repl_ex -+{ -+ int repl; -+ int nrepl; -+ real temp; -+ int type; -+ real **q; -+ gmx_bool bNPT; -+ real *pres; -+ int *ind; -+ int *allswaps; -+ int nst; -+ int nex; -+ int seed; -+ int nattempt[2]; -+ real *prob_sum; -+ int **nmoves; -+ int *nexchange; -+ gmx_rng_t rng; -+ -+ /* these are helper arrays for replica exchange; allocated here so they -+ don't have to be allocated each time */ -+ int *destinations; -+ int **cyclic; -+ int **order; -+ int *tmpswap; -+ gmx_bool *incycle; -+ gmx_bool *bEx; -+ -+ /* helper arrays to hold the quantities that are exchanged */ -+ real *prob; -+ real *Epot; -+ real *beta; -+ real *Vol; -+ real **de; -+ -+} t_gmx_repl_ex; -+ -+static gmx_bool repl_quantity(const gmx_multisim_t *ms, -+ struct gmx_repl_ex *re, int ere, real q) -+{ -+ real *qall; -+ gmx_bool bDiff; -+ int i, s; -+ -+ snew(qall, ms->nsim); -+ qall[re->repl] = q; -+ gmx_sum_sim(ms->nsim, qall, ms); -+ -+ bDiff = FALSE; -+ for (s = 1; s < ms->nsim; s++) -+ { -+ if (qall[s] != qall[0]) -+ { -+ bDiff = TRUE; -+ } -+ } -+ -+ if (bDiff) -+ { -+ /* Set the replica exchange type and quantities */ -+ re->type = ere; -+ -+ snew(re->q[ere], re->nrepl); -+ for (s = 0; s < ms->nsim; s++) -+ { -+ re->q[ere][s] = qall[s]; -+ } -+ } -+ sfree(qall); -+ return bDiff; -+} -+ -+gmx_repl_ex_t init_replica_exchange(FILE *fplog, -+ const gmx_multisim_t *ms, -+ const t_state *state, -+ const t_inputrec *ir, -+ int nst, int nex, int init_seed) -+{ -+ real temp, pres; -+ int i, j, k; -+ struct gmx_repl_ex *re; -+ gmx_bool bTemp; -+ gmx_bool bLambda = FALSE; -+ -+ fprintf(fplog, "\nInitializing Replica Exchange\n"); -+ -+ if (ms == NULL || ms->nsim == 1) -+ { -+ gmx_fatal(FARGS, "Nothing to exchange with only one replica, maybe you forgot to set the -multi option of mdrun?"); -+ } -+ if (!EI_DYNAMICS(ir->eI)) -+ { -+ gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations"); -+ /* Note that PAR(cr) is defined by cr->nnodes > 1, which is -+ * distinct from MULTISIM(cr). A multi-simulation only runs -+ * with real MPI parallelism, but this does not imply PAR(cr) -+ * is true! -+ * -+ * Since we are using a dynamical integrator, the only -+ * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are -+ * synonymous. The only way for cr->nnodes > 1 to be true is -+ * if we are using DD. */ -+ } -+ -+ snew(re, 1); -+ -+ re->repl = ms->sim; -+ re->nrepl = ms->nsim; -+ snew(re->q, ereENDSINGLE); -+ -+ fprintf(fplog, "Repl There are %d replicas:\n", re->nrepl); -+ -+ check_multi_int(fplog, ms, state->natoms, "the number of atoms", FALSE); -+ check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE); -+ check_multi_int64(fplog, ms, ir->init_step+ir->nsteps, "init_step+nsteps", FALSE); -+ check_multi_int64(fplog, ms, (ir->init_step+nst-1)/nst, -+ "first exchange step: init_step/-replex", FALSE); -+ check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE); -+ check_multi_int(fplog, ms, ir->opts.ngtc, -+ "the number of temperature coupling groups", FALSE); -+ check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE); -+ check_multi_int(fplog, ms, ir->efep, "free energy", FALSE); -+ check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE); -+ -+ re->temp = ir->opts.ref_t[0]; -+ for (i = 1; (i < ir->opts.ngtc); i++) -+ { -+ if (ir->opts.ref_t[i] != re->temp) -+ { -+ fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); -+ fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); -+ } -+ } -+ -+ re->type = -1; -+ bTemp = repl_quantity(ms, re, ereTEMP, re->temp); -+ if (ir->efep != efepNO) -+ { -+ bLambda = repl_quantity(ms, re, ereLAMBDA, (real)ir->fepvals->init_fep_state); -+ } -+ if (re->type == -1) /* nothing was assigned */ -+ { -+ gmx_fatal(FARGS, "The properties of the %d systems are all the same, there is nothing to exchange", re->nrepl); -+ } -+ if (bLambda && bTemp) -+ { -+ re->type = ereTL; -+ } -+ -+ if (bTemp) -+ { -+ please_cite(fplog, "Sugita1999a"); -+ if (ir->epc != epcNO) -+ { -+ re->bNPT = TRUE; -+ fprintf(fplog, "Repl Using Constant Pressure REMD.\n"); -+ please_cite(fplog, "Okabe2001a"); -+ } -+ if (ir->etc == etcBERENDSEN) -+ { -+ gmx_fatal(FARGS, "REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead", -+ ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE)); -+ } -+ } -+ if (bLambda) -+ { -+ if (ir->fepvals->delta_lambda != 0) /* check this? */ -+ { -+ gmx_fatal(FARGS, "delta_lambda is not zero"); -+ } -+ } -+ if (re->bNPT) -+ { -+ snew(re->pres, re->nrepl); -+ if (ir->epct == epctSURFACETENSION) -+ { -+ pres = ir->ref_p[ZZ][ZZ]; -+ } -+ else -+ { -+ pres = 0; -+ j = 0; -+ for (i = 0; i < DIM; i++) -+ { -+ if (ir->compress[i][i] != 0) -+ { -+ pres += ir->ref_p[i][i]; -+ j++; -+ } -+ } -+ pres /= j; -+ } -+ re->pres[re->repl] = pres; -+ gmx_sum_sim(re->nrepl, re->pres, ms); -+ } -+ -+ /* Make an index for increasing replica order */ -+ /* only makes sense if one or the other is varying, not both! -+ if both are varying, we trust the order the person gave. */ -+ snew(re->ind, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->ind[i] = i; -+ } -+ -+ if (re->type < ereENDSINGLE) -+ { -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = i+1; j < re->nrepl; j++) -+ { -+ if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) -+ { -+ /* Unordered replicas are supposed to work, but there -+ * is still an issues somewhere. -+ * Note that at this point still re->ind[i]=i. -+ */ -+ gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", -+ i, j, -+ erename[re->type], -+ re->q[re->type][i], re->q[re->type][j], -+ erename[re->type]); -+ -+ k = re->ind[i]; -+ re->ind[i] = re->ind[j]; -+ re->ind[j] = k; -+ } -+ else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]]) -+ { -+ gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]); -+ } -+ } -+ } -+ } -+ -+ /* keep track of all the swaps, starting with the initial placement. */ -+ snew(re->allswaps, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->allswaps[i] = re->ind[i]; -+ } -+ -+ switch (re->type) -+ { -+ case ereTEMP: -+ fprintf(fplog, "\nReplica exchange in temperature\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ case ereLAMBDA: -+ fprintf(fplog, "\nReplica exchange in lambda\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %3d", (int)re->q[re->type][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ case ereTL: -+ fprintf(fplog, "\nReplica exchange in temperature and lambda state\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5d", (int)re->q[ereLAMBDA][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ default: -+ gmx_incons("Unknown replica exchange quantity"); -+ } -+ if (re->bNPT) -+ { -+ fprintf(fplog, "\nRepl p"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.2f", re->pres[re->ind[i]]); -+ } -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]])) -+ { -+ fprintf(fplog, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); -+ fprintf(stderr, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); -+ } -+ } -+ } -+ re->nst = nst; -+ if (init_seed == -1) -+ { -+ if (MASTERSIM(ms)) -+ { -+ re->seed = (int)gmx_rng_make_seed(); -+ } -+ else -+ { -+ re->seed = 0; -+ } -+ gmx_sumi_sim(1, &(re->seed), ms); -+ } -+ else -+ { -+ re->seed = init_seed; -+ } -+ fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst); -+ fprintf(fplog, "\nReplica random seed: %d\n", re->seed); -+ re->rng = gmx_rng_init(re->seed); -+ -+ re->nattempt[0] = 0; -+ re->nattempt[1] = 0; -+ -+ snew(re->prob_sum, re->nrepl); -+ snew(re->nexchange, re->nrepl); -+ snew(re->nmoves, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->nmoves[i], re->nrepl); -+ } -+ fprintf(fplog, "Replica exchange information below: x=exchange, pr=probability\n"); -+ -+ /* generate space for the helper functions so we don't have to snew each time */ -+ -+ snew(re->destinations, re->nrepl); -+ snew(re->incycle, re->nrepl); -+ snew(re->tmpswap, re->nrepl); -+ snew(re->cyclic, re->nrepl); -+ snew(re->order, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->cyclic[i], re->nrepl); -+ snew(re->order[i], re->nrepl); -+ } -+ /* allocate space for the functions storing the data for the replicas */ -+ /* not all of these arrays needed in all cases, but they don't take -+ up much space, since the max size is nrepl**2 */ -+ snew(re->prob, re->nrepl); -+ snew(re->bEx, re->nrepl); -+ snew(re->beta, re->nrepl); -+ snew(re->Vol, re->nrepl); -+ snew(re->Epot, re->nrepl); -+ snew(re->de, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->de[i], re->nrepl); -+ } -+ re->nex = nex; -+ return re; -+} -+ -+static void exchange_reals(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, real *v, int n) -+{ -+ real *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+ -+static void exchange_ints(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, int *v, int n) -+{ -+ int *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_doubles(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, double *v, int n) -+{ -+ double *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_rvecs(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, rvec *v, int n) -+{ -+ rvec *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v[0], n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, -+ buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ copy_rvec(buf[i], v[i]); -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_state(const gmx_multisim_t *ms, int b, t_state *state) -+{ -+ /* When t_state changes, this code should be updated. */ -+ int ngtc, nnhpres; -+ ngtc = state->ngtc * state->nhchainlength; -+ nnhpres = state->nnhpres* state->nhchainlength; -+ exchange_rvecs(ms, b, state->box, DIM); -+ exchange_rvecs(ms, b, state->box_rel, DIM); -+ exchange_rvecs(ms, b, state->boxv, DIM); -+ exchange_reals(ms, b, &(state->veta), 1); -+ exchange_reals(ms, b, &(state->vol0), 1); -+ exchange_rvecs(ms, b, state->svir_prev, DIM); -+ exchange_rvecs(ms, b, state->fvir_prev, DIM); -+ exchange_rvecs(ms, b, state->pres_prev, DIM); -+ exchange_doubles(ms, b, state->nosehoover_xi, ngtc); -+ exchange_doubles(ms, b, state->nosehoover_vxi, ngtc); -+ exchange_doubles(ms, b, state->nhpres_xi, nnhpres); -+ exchange_doubles(ms, b, state->nhpres_vxi, nnhpres); -+ exchange_doubles(ms, b, state->therm_integral, state->ngtc); -+ exchange_rvecs(ms, b, state->x, state->natoms); -+ exchange_rvecs(ms, b, state->v, state->natoms); -+ exchange_rvecs(ms, b, state->sd_X, state->natoms); -+} -+ -+static void copy_rvecs(rvec *s, rvec *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ copy_rvec(s[i], d[i]); -+ } -+ } -+} -+ -+static void copy_doubles(const double *s, double *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+static void copy_reals(const real *s, real *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+static void copy_ints(const int *s, int *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+#define scopy_rvecs(v, n) copy_rvecs(state->v, state_local->v, n); -+#define scopy_doubles(v, n) copy_doubles(state->v, state_local->v, n); -+#define scopy_reals(v, n) copy_reals(state->v, state_local->v, n); -+#define scopy_ints(v, n) copy_ints(state->v, state_local->v, n); -+ -+static void copy_state_nonatomdata(t_state *state, t_state *state_local) -+{ -+ /* When t_state changes, this code should be updated. */ -+ int ngtc, nnhpres; -+ ngtc = state->ngtc * state->nhchainlength; -+ nnhpres = state->nnhpres* state->nhchainlength; -+ scopy_rvecs(box, DIM); -+ scopy_rvecs(box_rel, DIM); -+ scopy_rvecs(boxv, DIM); -+ state_local->veta = state->veta; -+ state_local->vol0 = state->vol0; -+ scopy_rvecs(svir_prev, DIM); -+ scopy_rvecs(fvir_prev, DIM); -+ scopy_rvecs(pres_prev, DIM); -+ scopy_doubles(nosehoover_xi, ngtc); -+ scopy_doubles(nosehoover_vxi, ngtc); -+ scopy_doubles(nhpres_xi, nnhpres); -+ scopy_doubles(nhpres_vxi, nnhpres); -+ scopy_doubles(therm_integral, state->ngtc); -+ scopy_rvecs(x, state->natoms); -+ scopy_rvecs(v, state->natoms); -+ scopy_rvecs(sd_X, state->natoms); -+ copy_ints(&(state->fep_state), &(state_local->fep_state), 1); -+ scopy_reals(lambda, efptNR); -+} -+ -+static void scale_velocities(t_state *state, real fac) -+{ -+ int i; -+ -+ if (state->v) -+ { -+ for (i = 0; i < state->natoms; i++) -+ { -+ svmul(fac, state->v[i], state->v[i]); -+ } -+ } -+} -+ -+static void print_transition_matrix(FILE *fplog, int n, int **nmoves, int *nattempt) -+{ -+ int i, j, ntot; -+ float Tprint; -+ -+ ntot = nattempt[0] + nattempt[1]; -+ fprintf(fplog, "\n"); -+ fprintf(fplog, "Repl"); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, " "); /* put the title closer to the center */ -+ } -+ fprintf(fplog, "Empirical Transition Matrix\n"); -+ -+ fprintf(fplog, "Repl"); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%8d", (i+1)); -+ } -+ fprintf(fplog, "\n"); -+ -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "Repl"); -+ for (j = 0; j < n; j++) -+ { -+ Tprint = 0.0; -+ if (nmoves[i][j] > 0) -+ { -+ Tprint = nmoves[i][j]/(2.0*ntot); -+ } -+ fprintf(fplog, "%8.4f", Tprint); -+ } -+ fprintf(fplog, "%3d\n", i); -+ } -+} -+ -+static void print_ind(FILE *fplog, const char *leg, int n, int *ind, gmx_bool *bEx) -+{ -+ int i; -+ -+ fprintf(fplog, "Repl %2s %2d", leg, ind[0]); -+ for (i = 1; i < n; i++) -+ { -+ fprintf(fplog, " %c %2d", (bEx != 0 && bEx[i]) ? 'x' : ' ', ind[i]); -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static void print_allswitchind(FILE *fplog, int n, int *pind, int *allswaps, int *tmpswap) -+{ -+ int i; -+ -+ for (i = 0; i < n; i++) -+ { -+ tmpswap[i] = allswaps[i]; -+ } -+ for (i = 0; i < n; i++) -+ { -+ allswaps[i] = tmpswap[pind[i]]; -+ } -+ -+ fprintf(fplog, "\nAccepted Exchanges: "); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%d ", pind[i]); -+ } -+ fprintf(fplog, "\n"); -+ -+ /* the "Order After Exchange" is the state label corresponding to the configuration that -+ started in state listed in order, i.e. -+ -+ 3 0 1 2 -+ -+ means that the: -+ configuration starting in simulation 3 is now in simulation 0, -+ configuration starting in simulation 0 is now in simulation 1, -+ configuration starting in simulation 1 is now in simulation 2, -+ configuration starting in simulation 2 is now in simulation 3 -+ */ -+ fprintf(fplog, "Order After Exchange: "); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%d ", allswaps[i]); -+ } -+ fprintf(fplog, "\n\n"); -+} -+ -+static void print_prob(FILE *fplog, const char *leg, int n, real *prob) -+{ -+ int i; -+ char buf[8]; -+ -+ fprintf(fplog, "Repl %2s ", leg); -+ for (i = 1; i < n; i++) -+ { -+ if (prob[i] >= 0) -+ { -+ sprintf(buf, "%4.2f", prob[i]); -+ fprintf(fplog, " %3s", buf[0] == '1' ? "1.0" : buf+1); -+ } -+ else -+ { -+ fprintf(fplog, " "); -+ } -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static void print_count(FILE *fplog, const char *leg, int n, int *count) -+{ -+ int i; -+ -+ fprintf(fplog, "Repl %2s ", leg); -+ for (i = 1; i < n; i++) -+ { -+ fprintf(fplog, " %4d", count[i]); -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, int a, int b, int ap, int bp) -+{ -+ -+ real ediff, dpV, delta = 0; -+ real *Epot = re->Epot; -+ real *Vol = re->Vol; -+ real **de = re->de; -+ real *beta = re->beta; -+ -+ /* Two cases; we are permuted and not. In all cases, setting ap = a and bp = b will reduce -+ to the non permuted case */ -+ -+ switch (re->type) -+ { -+ case ereTEMP: -+ /* -+ * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439 -+ */ -+ ediff = Epot[b] - Epot[a]; -+ delta = -(beta[bp] - beta[ap])*ediff; -+ break; -+ case ereLAMBDA: -+ /* two cases: when we are permuted, and not. */ -+ /* non-permuted: -+ ediff = E_new - E_old -+ = [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)] -+ = [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)] -+ = de[b][a] + de[a][b] */ -+ -+ /* permuted: -+ ediff = E_new - E_old -+ = [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)] -+ = [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)] -+ = [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)] -+ = [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)] -+ = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]) */ -+ /* but, in the current code implementation, we flip configurations, not indices . . . -+ So let's examine that. -+ = [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)] -+ = [H_b(x_ap) - H_a(x_ap)] + [H_a(x_bp) - H_b(x_pb)] -+ = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp] -+ So, if we exchange b<=> bp and a<=> ap, we return to the same result. -+ So the simple solution is to flip the -+ position of perturbed and original indices in the tests. -+ */ -+ -+ ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]); -+ delta = ediff*beta[a]; /* assume all same temperature in this case */ -+ break; -+ case ereTL: -+ /* not permuted: */ -+ /* delta = reduced E_new - reduced E_old -+ = [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)] -+ = [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)] -+ = [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] + -+ [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)] -+ = [beta_b dH_b(x_a) + [beta_a dH_a(x_b) + -+ beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b)) -+ = beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */ -+ /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */ -+ /* permuted (big breath!) */ -+ /* delta = reduced E_new - reduced E_old -+ = [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)] -+ = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] -+ = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] -+ - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a) -+ - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b) -+ = [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] + -+ [(beta_ap H_ap(x_b) - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))] -+ + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b) -+ = [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] + -+ [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))] -+ + beta_pb (H_a(x_a) - H_b(x_b)) - beta_ap (H_a(x_a) - H_b(x_b)) -+ = ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b] - beta_bp de[bp][b]) -+ + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b)) */ -+ delta = beta[bp]*(de[bp][a] - de[bp][b]) + beta[ap]*(de[ap][b] - de[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]); -+ break; -+ default: -+ gmx_incons("Unknown replica exchange quantity"); -+ } -+ if (bPrint) -+ { -+ fprintf(fplog, "Repl %d <-> %d dE_term = %10.3e (kT)\n", a, b, delta); -+ } -+ if (re->bNPT) -+ { -+ /* revist the calculation for 5.0. Might be some improvements. */ -+ dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC; -+ if (bPrint) -+ { -+ fprintf(fplog, " dpV = %10.3e d = %10.3e\n", dpV, delta + dpV); -+ } -+ delta += dpV; -+ } -+ return delta; -+} -+ -+static void -+test_for_replica_exchange(FILE *fplog, -+ const gmx_multisim_t *ms, -+ struct gmx_repl_ex *re, -+ gmx_enerdata_t *enerd, -+ real vol, -+ gmx_int64_t step, -+ real time) -+{ -+ int m, i, j, a, b, ap, bp, i0, i1, tmp; -+ real ediff = 0, delta = 0, dpV = 0; -+ gmx_bool bPrint, bMultiEx; -+ gmx_bool *bEx = re->bEx; -+ real *prob = re->prob; -+ int *pind = re->destinations; /* permuted index */ -+ gmx_bool bEpot = FALSE; -+ gmx_bool bDLambda = FALSE; -+ gmx_bool bVol = FALSE; -+ gmx_rng_t rng; -+ -+ bMultiEx = (re->nex > 1); /* multiple exchanges at each state */ -+ fprintf(fplog, "Replica exchange at step " "%"GMX_PRId64 " time %.5f\n", step, time); -+ -+ if (re->bNPT) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->Vol[i] = 0; -+ } -+ bVol = TRUE; -+ re->Vol[re->repl] = vol; -+ } -+ if ((re->type == ereTEMP || re->type == ereTL)) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->Epot[i] = 0; -+ } -+ bEpot = TRUE; -+ re->Epot[re->repl] = enerd->term[F_EPOT]; -+ /* temperatures of different states*/ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ); -+ } -+ } -+ else -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->beta[i] = 1.0/(re->temp*BOLTZ); /* we have a single temperature */ -+ } -+ } -+ if (re->type == ereLAMBDA || re->type == ereTL) -+ { -+ bDLambda = TRUE; -+ /* lambda differences. */ -+ /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian -+ minus the energy of the jth simulation in the jth Hamiltonian */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = 0; j < re->nrepl; j++) -+ { -+ re->de[i][j] = 0; -+ } -+ } -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->de[i][re->repl] = (enerd->enerpart_lambda[(int)re->q[ereLAMBDA][i]+1]-enerd->enerpart_lambda[0]); -+ } -+ } -+ -+ /* now actually do the communication */ -+ if (bVol) -+ { -+ gmx_sum_sim(re->nrepl, re->Vol, ms); -+ } -+ if (bEpot) -+ { -+ gmx_sum_sim(re->nrepl, re->Epot, ms); -+ } -+ if (bDLambda) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ gmx_sum_sim(re->nrepl, re->de[i], ms); -+ } -+ } -+ -+ /* make a duplicate set of indices for shuffling */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ pind[i] = re->ind[i]; -+ } -+ -+ if (bMultiEx) -+ { -+ /* multiple random switch exchange */ -+ int nself = 0; -+ for (i = 0; i < re->nex + nself; i++) -+ { -+ double rnd[2]; -+ -+ gmx_rng_cycle_2uniform(step, i*2, re->seed, RND_SEED_REPLEX, rnd); -+ /* randomly select a pair */ -+ /* in theory, could reduce this by identifying only which switches had a nonneglibible -+ probability of occurring (log p > -100) and only operate on those switches */ -+ /* find out which state it is from, and what label that state currently has. Likely -+ more work that useful. */ -+ i0 = (int)(re->nrepl*rnd[0]); -+ i1 = (int)(re->nrepl*rnd[1]); -+ if (i0 == i1) -+ { -+ nself++; -+ continue; /* self-exchange, back up and do it again */ -+ } -+ -+ a = re->ind[i0]; /* what are the indices of these states? */ -+ b = re->ind[i1]; -+ ap = pind[i0]; -+ bp = pind[i1]; -+ -+ bPrint = FALSE; /* too noisy */ -+ /* calculate the energy difference */ -+ /* if the code changes to flip the STATES, rather than the configurations, -+ use the commented version of the code */ -+ /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */ -+ delta = calc_delta(fplog, bPrint, re, ap, bp, a, b); -+ -+ /* we actually only use the first space in the prob and bEx array, -+ since there are actually many switches between pairs. */ -+ -+ if (delta <= 0) -+ { -+ /* accepted */ -+ prob[0] = 1; -+ bEx[0] = TRUE; -+ } -+ else -+ { -+ if (delta > PROBABILITYCUTOFF) -+ { -+ prob[0] = 0; -+ } -+ else -+ { -+ prob[0] = exp(-delta); -+ } -+ /* roll a number to determine if accepted */ -+ gmx_rng_cycle_2uniform(step, i*2+1, re->seed, RND_SEED_REPLEX, rnd); -+ bEx[0] = rnd[0] < prob[0]; -+ } -+ re->prob_sum[0] += prob[0]; -+ -+ if (bEx[0]) -+ { -+ /* swap the states */ -+ tmp = pind[i0]; -+ pind[i0] = pind[i1]; -+ pind[i1] = tmp; -+ } -+ } -+ re->nattempt[0]++; /* keep track of total permutation trials here */ -+ print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap); -+ } -+ else -+ { -+ /* standard nearest neighbor replica exchange */ -+ -+ m = (step / re->nst) % 2; -+ for (i = 1; i < re->nrepl; i++) -+ { -+ a = re->ind[i-1]; -+ b = re->ind[i]; -+ -+ bPrint = (re->repl == a || re->repl == b); -+ if (i % 2 == m) -+ { -+ delta = calc_delta(fplog, bPrint, re, a, b, a, b); -+ if (delta <= 0) -+ { -+ /* accepted */ -+ prob[i] = 1; -+ bEx[i] = TRUE; -+ } -+ else -+ { -+ double rnd[2]; -+ -+ if (delta > PROBABILITYCUTOFF) -+ { -+ prob[i] = 0; -+ } -+ else -+ { -+ prob[i] = exp(-delta); -+ } -+ /* roll a number to determine if accepted */ -+ gmx_rng_cycle_2uniform(step, i, re->seed, RND_SEED_REPLEX, rnd); -+ bEx[i] = rnd[0] < prob[i]; -+ } -+ re->prob_sum[i] += prob[i]; -+ -+ if (bEx[i]) -+ { -+ /* swap these two */ -+ tmp = pind[i-1]; -+ pind[i-1] = pind[i]; -+ pind[i] = tmp; -+ re->nexchange[i]++; /* statistics for back compatibility */ -+ } -+ } -+ else -+ { -+ prob[i] = -1; -+ bEx[i] = FALSE; -+ } -+ } -+ /* print some statistics */ -+ print_ind(fplog, "ex", re->nrepl, re->ind, bEx); -+ print_prob(fplog, "pr", re->nrepl, prob); -+ fprintf(fplog, "\n"); -+ re->nattempt[m]++; -+ } -+ -+ /* record which moves were made and accepted */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->nmoves[re->ind[i]][pind[i]] += 1; -+ re->nmoves[pind[i]][re->ind[i]] += 1; -+ } -+ fflush(fplog); /* make sure we can see what the last exchange was */ -+} -+ -+static void write_debug_x(t_state *state) -+{ -+ int i; -+ -+ if (debug) -+ { -+ for (i = 0; i < state->natoms; i += 10) -+ { -+ fprintf(debug, "dx %5d %10.5f %10.5f %10.5f\n", i, state->x[i][XX], state->x[i][YY], state->x[i][ZZ]); -+ } -+ } -+} -+ -+static void -+cyclic_decomposition(const int *destinations, -+ int **cyclic, -+ gmx_bool *incycle, -+ const int nrepl, -+ int *nswap) -+{ -+ -+ int i, j, c, p; -+ int maxlen = 1; -+ for (i = 0; i < nrepl; i++) -+ { -+ incycle[i] = FALSE; -+ } -+ for (i = 0; i < nrepl; i++) /* one cycle for each replica */ -+ { -+ if (incycle[i]) -+ { -+ cyclic[i][0] = -1; -+ continue; -+ } -+ cyclic[i][0] = i; -+ incycle[i] = TRUE; -+ c = 1; -+ p = i; -+ for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */ -+ { -+ p = destinations[p]; /* start permuting */ -+ if (p == i) -+ { -+ cyclic[i][c] = -1; -+ if (c > maxlen) -+ { -+ maxlen = c; -+ } -+ break; /* we've reached the original element, the cycle is complete, and we marked the end. */ -+ } -+ else -+ { -+ cyclic[i][c] = p; /* each permutation gives a new member of the cycle */ -+ incycle[p] = TRUE; -+ c++; -+ } -+ } -+ } -+ *nswap = maxlen - 1; -+ -+ if (debug) -+ { -+ for (i = 0; i < nrepl; i++) -+ { -+ fprintf(debug, "Cycle %d:", i); -+ for (j = 0; j < nrepl; j++) -+ { -+ if (cyclic[i][j] < 0) -+ { -+ break; -+ } -+ fprintf(debug, "%2d", cyclic[i][j]); -+ } -+ fprintf(debug, "\n"); -+ } -+ fflush(debug); -+ } -+} -+ -+static void -+compute_exchange_order(FILE *fplog, -+ int **cyclic, -+ int **order, -+ const int nrepl, -+ const int maxswap) -+{ -+ int i, j; -+ -+ for (j = 0; j < maxswap; j++) -+ { -+ for (i = 0; i < nrepl; i++) -+ { -+ if (cyclic[i][j+1] >= 0) -+ { -+ order[cyclic[i][j+1]][j] = cyclic[i][j]; -+ order[cyclic[i][j]][j] = cyclic[i][j+1]; -+ } -+ } -+ for (i = 0; i < nrepl; i++) -+ { -+ if (order[i][j] < 0) -+ { -+ order[i][j] = i; /* if it's not exchanging, it should stay this round*/ -+ } -+ } -+ } -+ -+ if (debug) -+ { -+ fprintf(fplog, "Replica Exchange Order\n"); -+ for (i = 0; i < nrepl; i++) -+ { -+ fprintf(fplog, "Replica %d:", i); -+ for (j = 0; j < maxswap; j++) -+ { -+ if (order[i][j] < 0) -+ { -+ break; -+ } -+ fprintf(debug, "%2d", order[i][j]); -+ } -+ fprintf(fplog, "\n"); -+ } -+ fflush(fplog); -+ } -+} -+ -+static void -+prepare_to_do_exchange(FILE *fplog, -+ struct gmx_repl_ex *re, -+ const int replica_id, -+ int *maxswap, -+ gmx_bool *bThisReplicaExchanged) -+{ -+ int i, j; -+ /* Hold the cyclic decomposition of the (multiple) replica -+ * exchange. */ -+ gmx_bool bAnyReplicaExchanged = FALSE; -+ *bThisReplicaExchanged = FALSE; -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ if (re->destinations[i] != re->ind[i]) -+ { -+ /* only mark as exchanged if the index has been shuffled */ -+ bAnyReplicaExchanged = TRUE; -+ break; -+ } -+ } -+ if (bAnyReplicaExchanged) -+ { -+ /* reinitialize the placeholder arrays */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = 0; j < re->nrepl; j++) -+ { -+ re->cyclic[i][j] = -1; -+ re->order[i][j] = -1; -+ } -+ } -+ -+ /* Identify the cyclic decomposition of the permutation (very -+ * fast if neighbor replica exchange). */ -+ cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap); -+ -+ /* Now translate the decomposition into a replica exchange -+ * order at each step. */ -+ compute_exchange_order(fplog, re->cyclic, re->order, re->nrepl, *maxswap); -+ -+ /* Did this replica do any exchange at any point? */ -+ for (j = 0; j < *maxswap; j++) -+ { -+ if (replica_id != re->order[replica_id][j]) -+ { -+ *bThisReplicaExchanged = TRUE; -+ break; -+ } -+ } -+ } -+} -+ -+gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex *re, -+ t_state *state, gmx_enerdata_t *enerd, -+ t_state *state_local, gmx_int64_t step, real time) -+{ -+ int i, j; -+ int replica_id = 0; -+ int exchange_partner; -+ int maxswap = 0; -+ /* Number of rounds of exchanges needed to deal with any multiple -+ * exchanges. */ -+ /* Where each replica ends up after the exchange attempt(s). */ -+ /* The order in which multiple exchanges will occur. */ -+ gmx_bool bThisReplicaExchanged = FALSE; -+ -+ if (MASTER(cr)) -+ { -+ replica_id = re->repl; -+ test_for_replica_exchange(fplog, cr->ms, re, enerd, det(state_local->box), step, time); -+ prepare_to_do_exchange(fplog, re, replica_id, &maxswap, &bThisReplicaExchanged); -+ } -+ /* Do intra-simulation broadcast so all processors belonging to -+ * each simulation know whether they need to participate in -+ * collecting the state. Otherwise, they might as well get on with -+ * the next thing to do. */ -+ if (DOMAINDECOMP(cr)) -+ { -+#ifdef GMX_MPI -+ MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), -+ cr->mpi_comm_mygroup); -+#endif -+ } -+ -+ if (bThisReplicaExchanged) -+ { -+ /* Exchange the states */ -+ /* Collect the global state on the master node */ -+ if (DOMAINDECOMP(cr)) -+ { -+ dd_collect_state(cr->dd, state_local, state); -+ } -+ else -+ { -+ copy_state_nonatomdata(state_local, state); -+ } -+ -+ if (MASTER(cr)) -+ { -+ /* There will be only one swap cycle with standard replica -+ * exchange, but there may be multiple swap cycles if we -+ * allow multiple swaps. */ -+ -+ for (j = 0; j < maxswap; j++) -+ { -+ exchange_partner = re->order[replica_id][j]; -+ -+ if (exchange_partner != replica_id) -+ { -+ /* Exchange the global states between the master nodes */ -+ if (debug) -+ { -+ fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner); -+ } -+ exchange_state(cr->ms, exchange_partner, state); -+ } -+ } -+ /* For temperature-type replica exchange, we need to scale -+ * the velocities. */ -+ if (re->type == ereTEMP || re->type == ereTL) -+ { -+ scale_velocities(state, sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]])); -+ } -+ -+ } -+ -+ /* With domain decomposition the global state is distributed later */ -+ if (!DOMAINDECOMP(cr)) -+ { -+ /* Copy the global state to the local state data structure */ -+ copy_state_nonatomdata(state, state_local); -+ } -+ } -+ -+ return bThisReplicaExchanged; -+} -+ -+void print_replica_exchange_statistics(FILE *fplog, struct gmx_repl_ex *re) -+{ -+ int i; -+ -+ fprintf(fplog, "\nReplica exchange statistics\n"); -+ -+ if (re->nex == 0) -+ { -+ fprintf(fplog, "Repl %d attempts, %d odd, %d even\n", -+ re->nattempt[0]+re->nattempt[1], re->nattempt[1], re->nattempt[0]); -+ -+ fprintf(fplog, "Repl average probabilities:\n"); -+ for (i = 1; i < re->nrepl; i++) -+ { -+ if (re->nattempt[i%2] == 0) -+ { -+ re->prob[i] = 0; -+ } -+ else -+ { -+ re->prob[i] = re->prob_sum[i]/re->nattempt[i%2]; -+ } -+ } -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_prob(fplog, "", re->nrepl, re->prob); -+ -+ fprintf(fplog, "Repl number of exchanges:\n"); -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_count(fplog, "", re->nrepl, re->nexchange); -+ -+ fprintf(fplog, "Repl average number of exchanges:\n"); -+ for (i = 1; i < re->nrepl; i++) -+ { -+ if (re->nattempt[i%2] == 0) -+ { -+ re->prob[i] = 0; -+ } -+ else -+ { -+ re->prob[i] = ((real)re->nexchange[i])/re->nattempt[i%2]; -+ } -+ } -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_prob(fplog, "", re->nrepl, re->prob); -+ -+ fprintf(fplog, "\n"); -+ } -+ /* print the transition matrix */ -+ print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt); -+} diff --git a/g/GROMACS/gromacs-5.1.4-plumed-2.3.0-mpi.patch b/g/GROMACS/gromacs-5.1.4-plumed-2.3.0-mpi.patch deleted file mode 100644 index e91a0aef..00000000 --- a/g/GROMACS/gromacs-5.1.4-plumed-2.3.0-mpi.patch +++ /dev/null @@ -1,9575 +0,0 @@ -diff --git a/Plumed.cmake b/Plumed.cmake -new file mode 100644 -index 0000000..01472f0 ---- /dev/null -+++ b/Plumed.cmake -@@ -0,0 +1,3 @@ -+# PLUMED: shared installation -+set(PLUMED_LOAD /apps/all/PLUMED/2.3.0-foss-2017a/lib/plumed///src/lib/libplumed.so -ldl ) -+set(PLUMED_DEPENDENCIES /apps/all/PLUMED/2.3.0-foss-2017a/lib/plumed///src/lib/libplumed.so) -diff --git a/Plumed.h b/Plumed.h -new file mode 100644 -index 0000000..16da74a ---- /dev/null -+++ b/Plumed.h -@@ -0,0 +1,494 @@ -+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -+ Copyright (c) 2011-2014 The plumed team -+ (see the PEOPLE file at the root of the distribution for a list of names) -+ -+ See http://www.plumed-code.org for more information. -+ -+ This file is part of plumed, version 2. -+ -+ plumed is free software: you can redistribute it and/or modify -+ it under the terms of the GNU Lesser General Public License as published by -+ the Free Software Foundation, either version 3 of the License, or -+ (at your option) any later version. -+ -+ plumed is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public License -+ along with plumed. If not, see . -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ -+#ifndef __PLUMED_wrapper_Plumed_h -+#define __PLUMED_wrapper_Plumed_h -+ -+/** -+\page ReferencePlumedH Reference for interfacing MD codes with PLUMED -+ -+ Plumed.h and Plumed.c contain the external plumed interface, which is used to -+ integrate it with MD engines. This interface is very general, and is expected -+ not to change across plumed versions. Plumed.c also implements a dummy version -+ of the interface, so as to allow a code to be fully linked even if the plumed -+ library is not available yet. These files could be directly included in the official -+ host MD distribution. In this manner, it will be sufficient to link the plumed -+ library at link time (on all systems) or directly at runtime (on system where -+ dynamic loading is enabled) to include plumed features. -+ -+ Why is Plumed.c written in C and not C++? The reason is that the resulting Plumed.o -+ needs to be linked with the host MD code immediately (whereas the rest of plumed -+ could be linked a posteriori). Imagine the MD code is written in FORTRAN: when we -+ link the Plumed.o file we would like not to need any C++ library linked. In this -+ manner, we do not need to know which C++ compiler will be used to compile plumed. -+ The C++ library is only linked to the "rest" of plumed, which actually use it. -+ Anyway, Plumed.c is written in such a manner to allow its compilation also in C++ -+ (C++ is a bit stricter than C; compatibility is checked when PlumedStatic.cpp, -+ which basically includes Plumed.c, is compiled with the C++ compiler). This will -+ allow e.g. MD codes written in C++ to just incorporate Plumed.c (maybe renamed into -+ Plumed.cpp), without the need of configuring a plain C compiler. -+ -+ Plumed interface can be used from C, C++ and FORTRAN. Everything concerning plumed -+ is hidden inside a single object type, which is described in C by a structure -+ (struct \ref plumed), in C++ by a class (PLMD::Plumed) and in FORTRAN by a -+ fixed-length string (CHARACTER(LEN=32)). Obviously C++ can use both struct -+ and class interfaces, but the first should be preferred. The reference interface -+ is the C one, whereas FORTRAN and C++ interfaces are implemented as wrappers -+ around it. -+ -+ In the C++ interface, all the routines are implemented as methods of PLMD::Plumed. -+ In the C and FORTRAN interfaces, all the routines are named plumed_*, to -+ avoid potential name clashes. Notice that the entire plumed library -+ is implemented in C++, and it is hidden inside the PLMD namespace. -+ -+ Handlers to the plumed object can be converted among different representations, -+ to allow inter-operability among languages. In C, there are tools to convert -+ to/from FORTRAN, whereas in C++ there are tools to convert to/from FORTRAN and C. -+ -+ These handlers only contain a pointer to the real structure, so that -+ when a plumed object is brought from one language to another, -+ it brings a reference to the same environment. -+ -+ Moreover, to simplify life in all cases where a single Plumed object is -+ required for the entire simulation (which covers most of the practical -+ applications with conventional MD codes) it is possible to take advantage -+ of a global interface, which is implicitly referring to a unique global instance. -+ The global object should still be initialized and finalized properly. -+ -+ The basic method to send a message to plumed is -+\verbatim -+ (C) plumed_cmd -+ (C++) PLMD::Plumed::cmd -+ (FORTRAN) PLUMED_F_CMD -+\endverbatim -+ -+ To initialize a plumed object, use: -+\verbatim -+ (C) plumed_create -+ (C++) (constructor of PLMD::Plumed) -+ (FORTRAN) PLUMED_F_CREATE -+\endverbatim -+ -+ To finalize it, use -+\verbatim -+ (C) plumed_finalize -+ (C++) (destructor of PLMD::Plumed) -+ (FORTRAN) PLUMED_F_FINALIZE -+\endverbatim -+ -+ To access to the global-object, use -+\verbatim -+ (C) plumed_gcreate, plumed_gfinalize, plumed_gcmd -+ (C++) PLMD::Plumed::gcreate, PLMD::Plumed::gfinalize, PLMD::Plumed::gcmd -+ (FORTRAN) PLUMED_F_GCREATE, PLUMED_F_GFINALIZE, PLUMED_F_GCMD -+\endverbatim -+ -+ To check if the global object has been initialized, use -+\verbatim -+ (C) plumed_ginitialized -+ (C++) PLMD::Plumed::ginitialized -+ (FORTRAN) PLUMED_F_GINITIALIZED -+\endverbatim -+ -+ To check if plumed library is available (this is useful for runtime linking), use -+\verbatim -+ (C) plumed_installed -+ (C++) PLMD::Plumed::installed -+ (FORTRAN) PLUMED_F_INSTALLED -+\endverbatim -+ -+ To convert handlers use -+\verbatim -+ (C) plumed_c2f (C to FORTRAN) -+ (C) plumed_f2c (FORTRAN to C) -+ (C++) Plumed(plumed) constructor (C to C++) -+ (C++) operator plumed() cast (C++ to C) -+ (C++) Plumed(char*) constructor (FORTRAN to C++) -+ (C++) toFortran(char*) (C++ to FORTRAN) -+\endverbatim -+ -+\verbatim -+ FORTRAN interface -+ SUBROUTINE PLUMED_F_INSTALLED(i) -+ INTEGER, INTENT(OUT) :: i -+ SUBROUTINE PLUMED_F_GINITIALIZED(i) -+ INTEGER, INTENT(OUT) :: i -+ SUBROUTINE PLUMED_F_GCREATE() -+ SUBROUTINE PLUMED_F_GCMD(key,val) -+ CHARACTER(LEN=*), INTENT(IN) :: key -+ UNSPECIFIED_TYPE, INTENT(INOUT) :: val(*) -+ SUBROUTINE PLUMED_F_GFINALIZE() -+ SUBROUTINE PLUMED_F_GLOBAL(p) -+ CHARACTER(LEN=32), INTENT(OUT) :: p -+ SUBROUTINE PLUMED_F_CREATE(p) -+ CHARACTER(LEN=32), INTENT(OUT) :: p -+ SUBROUTINE PLUMED_F_CMD(p,key,val) -+ CHARACTER(LEN=32), INTENT(IN) :: p -+ CHARACTER(LEN=*), INTENT(IN) :: key -+ UNSPECIFIED_TYPE, INTENT(INOUT) :: val(*) -+ SUBROUTINE PLUMED_F_FINALIZE(p) -+ CHARACTER(LEN=32), INTENT(IN) :: p -+\endverbatim -+ -+ The main routine is "cmd", which accepts two arguments: -+ key is a string containing the name of the command -+ val is the argument. it is declared const so as to use allow passing const objects, but in practice plumed -+ is going to modify val in several cases (using a const_cast). -+ In some cases val can be omitted: just pass a NULL pointer (in C++, val is optional and can be omitted). -+ The set of possible keys is the real API of the plumed library, and will be expanded with time. -+ New commands will be added, but backward compatibility will be retained as long as possible. -+ -+ To pass plumed a callback function use the following syntax (not available in FORTRAN yet) -+\verbatim -+ plumed_function_holder ff; -+ ff.p=your_function; -+ plumed_cmd(plumed,"xxxx",&ff); -+\endverbatim -+ (this is passing the your_function() function to the "xxxx" command) -+*/ -+ -+#ifdef __cplusplus -+ extern "C" { -+#endif -+ -+/* Generic function pointer */ -+typedef void (*plumed_function_pointer)(void); -+ -+/** -+ \brief Holder for function pointer. -+ -+ To pass plumed a callback function use the following syntax: -+\verbatim -+ plumed_function_holder ff; -+ ff.p=your_function; -+ plumed_cmd(plumed,"xxxx",&ff); -+\endverbatim -+ (this is going to pass the your_function() function to the "xxxx" command) -+*/ -+ -+typedef struct { -+ plumed_function_pointer p; -+} plumed_function_holder; -+ -+/** -+ \brief Main plumed object -+ -+ This is an object containing a Plumed instance, which should be used in -+ the MD engine. It should first be initialized with plumed_create(), -+ then it communicates with the MD engine using plumed_cmd(). Finally, -+ before the termination, it should be deallocated with plumed_finalize(). -+ Its interface is very simple and general, and is expected -+ not to change across plumed versions. See \ref ReferencePlumedH. -+*/ -+typedef struct { -+/** -+ \private -+ \brief Void pointer holding the real PlumedMain structure -+*/ -+ void*p; -+} plumed; -+ -+/** \relates plumed -+ \brief Constructor -+ -+ \return The constructed plumed object -+*/ -+plumed plumed_create(void); -+ -+/** \relates plumed -+ \brief Tells p to execute a command -+ -+ \param p The plumed object on which command is acting -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like plumed_cmd(p,"A","B"), -+ but for some choice of key it can change the content -+*/ -+void plumed_cmd(plumed p,const char*key,const void*val); -+ -+/** \relates plumed -+ \brief Destructor -+ -+ \param p The plumed object to be deallocated -+*/ -+void plumed_finalize(plumed p); -+ -+/** \relates plumed -+ \brief Check if plumed is installed (for runtime binding) -+ -+ \return 1 if plumed is installed, to 0 otherwise -+*/ -+int plumed_installed(void); -+ -+/** \relates plumed -+ \brief Retrieves an handler to the global structure. -+*/ -+plumed plumed_global(void); -+ -+/** \relates plumed -+ \brief Check if the global interface has been initialized -+ -+ \return 1 if plumed has been initialized, 0 otherwise -+*/ -+int plumed_ginitialized(void); -+ -+/* global C interface, working on a global object */ -+ -+/** \relates plumed -+ \brief Constructor for the global interface. -+ -+ \note Equivalent to plumed_create(), but initialize a static global plumed object -+*/ -+void plumed_gcreate(void); -+ -+/** \relates plumed -+ \brief Tells to the global interface to execute a command. -+ -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like plumed_gcmd("A","B"), -+ but for some choice of key it can change the content -+ -+ \note Equivalent to plumed_cmd(), but skipping the plumed argument -+*/ -+void plumed_gcmd(const char* key,const void* val); -+ -+/** \relates plumed -+ \brief Destructor for the global interface. -+ -+ \note Equivalent to plumed_finalize(), but skipping the plumed argument -+*/ -+void plumed_gfinalize(void); -+ -+/* routines to convert char handler from/to plumed objects */ -+ -+/** \related plumed -+ \brief Converts a C handler to a FORTRAN handler -+ -+ \param p The C handler -+ \param c The FORTRAN handler (a char[32]) -+*/ -+void plumed_c2f(plumed p,char* c); -+ -+/** \related plumed -+ \brief Converts a FORTRAN handler to a C handler -+ \param c The FORTRAN handler (a char[32]) -+ \return The C handler -+*/ -+plumed plumed_f2c(const char* c); -+ -+#ifdef __cplusplus -+ } -+#endif -+ -+#ifdef __cplusplus -+ -+/* this is to include the NULL pointer */ -+#include -+ -+/* C++ interface is hidden in PLMD namespace (same as plumed library) */ -+namespace PLMD { -+ -+/** -+ C++ wrapper for \ref plumed. -+ -+ This class provides a C++ interface to PLUMED. -+*/ -+ -+class Plumed{ -+ plumed main; -+/** -+ keeps track if the object was created from scratch using -+ the defaults destructor (cloned=false) or if it was imported -+ from C or FORTRAN (cloned-true). In the latter case, the -+ plumed_finalize() method is not called when destructing the object, -+ since it is expected to be finalized in the C/FORTRAN code -+*/ -+ bool cloned; -+public: -+/** -+ Check if plumed is installed (for runtime binding) -+ \return true if plumed is installed, false otherwise -+*/ -+ static bool installed(); -+/** -+ Check if global-plumed has been initialized -+ \return true if global plumed object (see global()) is initialized (i.e. if gcreate() has been -+ called), false otherwise. -+*/ -+ static bool ginitialized(); -+/** -+ Initialize global-plumed -+*/ -+ static void gcreate(); -+/** -+ Send a command to global-plumed -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like gcmd("A","B"), -+ but for some choice of key it can change the content -+*/ -+ static void gcmd(const char* key,const void* val); -+/** -+ Finalize global-plumed -+*/ -+ static void gfinalize(); -+/** -+ Returns the Plumed global object -+ \return The Plumed global object -+*/ -+ static Plumed global(); -+/** -+ Constructor -+*/ -+ Plumed(); -+/** -+ Clone a Plumed object from a FORTRAN char* handler -+ \param c The FORTRAN handler (a char[32]). -+ -+ \attention The Plumed object created in this manner -+ will not finalize the corresponding plumed structure. -+ It is expected that the FORTRAN code calls plumed_c_finalize for it -+*/ -+ Plumed(const char*c); -+/** -+ Clone a Plumed object from a C plumed structure -+ \param p The C plumed structure. -+ -+ \attention The Plumed object created in this manner -+ will not finalize the corresponding plumed structure. -+ It is expected that the C code calls plumed_finalize for it -+*/ -+ Plumed(plumed p); -+private: -+/** Copy constructor is disabled (private and unimplemented) -+ The problem here is that after copying it will not be clear who is -+ going to finalize the corresponding plumed structure. -+*/ -+ Plumed(const Plumed&); -+/** Assignment operator is disabled (private and unimplemented) -+ The problem here is that after copying it will not be clear who is -+ going to finalize the corresponding plumed structure. -+*/ -+ Plumed&operator=(const Plumed&); -+public: -+/** -+ Retrieve the C plumed structure for this object -+*/ -+ operator plumed()const; -+/** -+ Retrieve a FORTRAN handler for this object -+ \param c The FORTRAN handler (a char[32]). -+*/ -+ void toFortran(char*c)const; -+/** -+ Send a command to this plumed object -+ \param key The name of the command to be executed -+ \param val The argument. It is declared as const to allow calls like p.cmd("A","B"), -+ but for some choice of key it can change the content -+*/ -+ void cmd(const char*key,const void*val=NULL); -+/** -+ Destructor -+ -+ Destructor is virtual so as to allow correct inheritance from Plumed object. -+ To avoid linking problems with g++, I specify "inline" also here (in principle -+ it should be enough to specify it down in the definition of the function, but -+ for some reason that I do not understand g++ does not inline it properly in that -+ case and complains when Plumed.h is included but Plumed.o is not linked. Anyway, the -+ way it is done here seems to work properly). -+*/ -+ inline virtual ~Plumed(); -+}; -+ -+/* All methods are inlined so as to avoid the compilation of an extra c++ file */ -+ -+inline -+bool Plumed::installed(){ -+ return plumed_installed(); -+} -+ -+inline -+Plumed::Plumed(): -+ main(plumed_create()), -+ cloned(false) -+{} -+ -+inline -+Plumed::Plumed(const char*c): -+ main(plumed_f2c(c)), -+ cloned(true) -+{} -+ -+inline -+Plumed::Plumed(plumed p): -+ main(p), -+ cloned(true) -+{} -+ -+inline -+Plumed::operator plumed()const{ -+ return main; -+} -+ -+inline -+void Plumed::toFortran(char*c)const{ -+ plumed_c2f(main,c); -+} -+ -+inline -+void Plumed::cmd(const char*key,const void*val){ -+ plumed_cmd(main,key,val); -+} -+ -+inline -+Plumed::~Plumed(){ -+ if(!cloned)plumed_finalize(main); -+} -+ -+inline -+bool Plumed::ginitialized(){ -+ return plumed_ginitialized(); -+} -+ -+inline -+void Plumed::gcreate(){ -+ plumed_gcreate(); -+} -+ -+inline -+void Plumed::gcmd(const char* key,const void* val){ -+ plumed_gcmd(key,val); -+} -+ -+inline -+void Plumed::gfinalize(){ -+ plumed_gfinalize(); -+} -+ -+inline -+Plumed Plumed::global(){ -+ return plumed_global(); -+} -+ -+} -+ -+#endif -+ -+ -+#endif -diff --git a/Plumed.inc b/Plumed.inc -new file mode 100644 -index 0000000..e1e29a7 ---- /dev/null -+++ b/Plumed.inc -@@ -0,0 +1,3 @@ -+# PLUMED: shared installation -+PLUMED_LOAD= /apps/all/PLUMED/2.3.0-foss-2017a/lib/plumed///src/lib/libplumed.so -ldl -+PLUMED_DEPENDENCIES= /apps/all/PLUMED/2.3.0-foss-2017a/lib/plumed///src/lib/libplumed.so -diff --git a/src/gromacs/CMakeLists.txt b/src/gromacs/CMakeLists.txt -index 6db37e2..cc97aa8 100644 ---- a/src/gromacs/CMakeLists.txt -+++ b/src/gromacs/CMakeLists.txt -@@ -32,6 +32,8 @@ - # To help us fund GROMACS development, we humbly ask that you cite - # the research papers on the package. Check out http://www.gromacs.org. - -+include(${CMAKE_SOURCE_DIR}/Plumed.cmake) -+ - set(LIBGROMACS_SOURCES) - - function (gmx_install_headers DESTINATION) -@@ -189,7 +191,7 @@ target_link_libraries(libgromacs - ${TNG_IO_LIBRARIES} - ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} - ${XML_LIBRARIES} -- ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS}) -+ ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} ${PLUMED_LOAD}) - set_target_properties(libgromacs PROPERTIES - OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" - SOVERSION ${LIBRARY_SOVERSION} -diff --git a/src/gromacs/CMakeLists.txt.preplumed b/src/gromacs/CMakeLists.txt.preplumed -new file mode 100644 -index 0000000..6db37e2 ---- /dev/null -+++ b/src/gromacs/CMakeLists.txt.preplumed -@@ -0,0 +1,232 @@ -+# -+# This file is part of the GROMACS molecular simulation package. -+# -+# Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by -+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+# and including many others, as listed in the AUTHORS file in the -+# top-level source directory and at http://www.gromacs.org. -+# -+# GROMACS is free software; you can redistribute it and/or -+# modify it under the terms of the GNU Lesser General Public License -+# as published by the Free Software Foundation; either version 2.1 -+# of the License, or (at your option) any later version. -+# -+# GROMACS is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# Lesser General Public License for more details. -+# -+# You should have received a copy of the GNU Lesser General Public -+# License along with GROMACS; if not, see -+# http://www.gnu.org/licenses, or write to the Free Software Foundation, -+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+# -+# If you want to redistribute modifications to GROMACS, please -+# consider that scientific software is very special. Version -+# control is crucial - bugs must be traceable. We will be happy to -+# consider code for inclusion in the official distribution, but -+# derived work must not be called official GROMACS. Details are found -+# in the README & COPYING files - if they are missing, get the -+# official version at http://www.gromacs.org. -+# -+# To help us fund GROMACS development, we humbly ask that you cite -+# the research papers on the package. Check out http://www.gromacs.org. -+ -+set(LIBGROMACS_SOURCES) -+ -+function (gmx_install_headers DESTINATION) -+ if (NOT GMX_BUILD_MDRUN_ONLY) -+ if (DESTINATION) -+ set(DESTINATION ${INCL_INSTALL_DIR}/gromacs/${DESTINATION}) -+ else() -+ set(DESTINATION ${INCL_INSTALL_DIR}/gromacs) -+ endif() -+ install(FILES ${ARGN} DESTINATION ${DESTINATION} COMPONENT development) -+ endif() -+endfunction () -+ -+if(GMX_USE_TNG) -+ option(GMX_EXTERNAL_TNG "Use external TNG instead of compiling the version shipped with GROMACS." -+ OFF) -+ # Detect TNG if GMX_EXTERNAL_TNG is explicitly ON -+ if(GMX_EXTERNAL_TNG) -+ find_package(TNG_IO 1.6.0) -+ if(NOT TNG_IO_FOUND) -+ message(FATAL_ERROR -+ "TNG >= 1.6.0 not found. " -+ "You can set GMX_EXTERNAL_TNG=OFF to compile TNG.") -+ endif() -+ include_directories(${TNG_IO_INCLUDE_DIRS}) -+ endif() -+ if(NOT GMX_EXTERNAL_TNG) -+ include(${CMAKE_SOURCE_DIR}/src/external/tng_io/BuildTNG.cmake) -+ tng_get_source_list(TNG_SOURCES TNG_IO_DEFINITIONS) -+ list(APPEND LIBGROMACS_SOURCES ${TNG_SOURCES}) -+ tng_set_source_properties(WITH_ZLIB ${HAVE_ZLIB}) -+ -+ if (HAVE_ZLIB) -+ list(APPEND GMX_EXTRA_LIBRARIES ${ZLIB_LIBRARIES}) -+ include_directories(${ZLIB_INCLUDE_DIRS}) -+ endif() -+ endif() -+else() -+ # We still need to get tng/tng_io_fwd.h from somewhere! -+ include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/tng_io/include) -+endif() -+ -+add_subdirectory(gmxlib) -+add_subdirectory(mdlib) -+add_subdirectory(gmxpreprocess) -+add_subdirectory(commandline) -+add_subdirectory(fft) -+add_subdirectory(linearalgebra) -+add_subdirectory(math) -+add_subdirectory(random) -+add_subdirectory(onlinehelp) -+add_subdirectory(options) -+add_subdirectory(timing) -+add_subdirectory(utility) -+add_subdirectory(fileio) -+add_subdirectory(swap) -+add_subdirectory(essentialdynamics) -+add_subdirectory(pulling) -+add_subdirectory(simd) -+add_subdirectory(imd) -+if (NOT GMX_BUILD_MDRUN_ONLY) -+ add_subdirectory(legacyheaders) -+ add_subdirectory(gmxana) -+ add_subdirectory(statistics) -+ add_subdirectory(analysisdata) -+ add_subdirectory(selection) -+ add_subdirectory(trajectoryanalysis) -+ add_subdirectory(tools) -+endif() -+ -+list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES}) -+ -+# This would be the standard way to include thread_mpi, but -+# we want libgromacs to link the functions directly -+#if(GMX_THREAD_MPI) -+# add_subdirectory(thread_mpi) -+#endif() -+#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB}) -+ -+tmpi_get_source_list(THREAD_MPI_SOURCES ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/src) -+list(APPEND LIBGROMACS_SOURCES ${THREAD_MPI_SOURCES}) -+ -+file(GLOB LIBGROMACS_HEADERS *.h) -+configure_file(version.h.cmakein version.h) -+gmx_install_headers("" ${LIBGROMACS_HEADERS}) -+gmx_install_headers("" ${CMAKE_CURRENT_BINARY_DIR}/version.h) -+ -+# Add target that generates baseversion-gen.c every time make is run -+# if git version info is requested, or create it statically. -+# This code is here instead of utility/CMakeLists.txt because CMake -+# ignores set_source_file_properties from subdirectories. -+set(GENERATED_VERSION_FILE -+ ${CMAKE_CURRENT_BINARY_DIR}/utility/baseversion-gen.c) -+set(GENERATED_VERSION_FILE_SOURCE -+ ${CMAKE_CURRENT_SOURCE_DIR}/utility/baseversion-gen.c.cmakein) -+if (GMX_GIT_VERSION_INFO) -+ add_custom_target(gmx-version ALL -+ COMMAND ${CMAKE_COMMAND} -+ -D GIT_EXECUTABLE="${GIT_EXECUTABLE}" -+ -D PROJECT_VERSION="${PROJECT_VERSION}" -+ -D PROJECT_SOURCE_DIR="${PROJECT_SOURCE_DIR}" -+ -D VERSION_CMAKEIN=${GENERATED_VERSION_FILE_SOURCE} -+ -D VERSION_OUT=${GENERATED_VERSION_FILE} -+ -P ${CMAKE_SOURCE_DIR}/cmake/gmxGenerateVersionInfo.cmake -+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} -+ DEPENDS ${GENERATED_VERSION_FILE_SOURCE} -+ COMMENT "Generating git version information") -+ set_source_files_properties(${GENERATED_VERSION_FILE} -+ PROPERTIES GENERATED true) -+else() -+ set(GMX_PROJECT_VERSION_STR ${PROJECT_VERSION}) -+ configure_file(${GENERATED_VERSION_FILE_SOURCE} ${GENERATED_VERSION_FILE}) -+endif() -+list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE}) -+ -+# apply gcc 4.4.x bug workaround -+if(GMX_USE_GCC44_BUG_WORKAROUND) -+ include(gmxGCC44O3BugWorkaround) -+ gmx_apply_gcc44_bug_workaround("gmxlib/bondfree.c") -+ gmx_apply_gcc44_bug_workaround("mdlib/force.c") -+ gmx_apply_gcc44_bug_workaround("mdlib/constr.c") -+endif() -+ -+add_library(libgromacs ${LIBGROMACS_SOURCES}) -+if (GMX_GIT_VERSION_INFO) -+ add_dependencies(libgromacs gmx-version) -+endif() -+ -+# Recent versions of gcc and clang give warnings on scanner.cpp, which -+# is a generated source file. These are awkward to suppress inline, so -+# we do it in the compilation command (after testing that the compiler -+# supports the suppressions). Setting the properties only works after -+# the related target has been created, e.g. after when the file is -+# used with add_library(). -+include(CheckCXXCompilerFlag) -+check_cxx_compiler_flag(-Wno-unused-parameter HAS_NO_UNUSED_PARAMETER) -+if (HAS_NO_UNUSED_PARAMETER) -+ set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-unused-parameter") -+endif() -+check_cxx_compiler_flag(-Wno-deprecated-register HAS_NO_DEPRECATED_REGISTER) -+if (HAS_NO_DEPRECATED_REGISTER) -+ set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated-register") -+else() -+ check_cxx_compiler_flag(-Wno-deprecated HAS_NO_DEPRECATED) -+ if (HAS_NO_DEPRECATED) -+ set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated") -+ endif() -+endif() -+set_source_files_properties(selection/scanner.cpp PROPERTIES COMPILE_FLAGS "${_scanner_cpp_compiler_flags}") -+ -+target_link_libraries(libgromacs -+ ${EXTRAE_LIBRARIES} -+ ${GMX_GPU_LIBRARIES} -+ ${GMX_EXTRA_LIBRARIES} -+ ${TNG_IO_LIBRARIES} -+ ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} -+ ${XML_LIBRARIES} -+ ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS}) -+set_target_properties(libgromacs PROPERTIES -+ OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" -+ SOVERSION ${LIBRARY_SOVERSION} -+ VERSION ${LIBRARY_VERSION} -+ COMPILE_FLAGS "${OpenMP_C_FLAGS}") -+ -+# Only install the library in mdrun-only mode if it is actually necessary -+# for the binary -+if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS) -+ install(TARGETS libgromacs -+ LIBRARY DESTINATION ${LIB_INSTALL_DIR} -+ RUNTIME DESTINATION ${BIN_INSTALL_DIR} -+ ARCHIVE DESTINATION ${LIB_INSTALL_DIR} -+ COMPONENT libraries) -+endif() -+ -+if (NOT GMX_BUILD_MDRUN_ONLY) -+ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgromacs.pc.cmakein -+ ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc @ONLY) -+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc -+ DESTINATION ${LIB_INSTALL_DIR}/pkgconfig -+ RENAME "libgromacs${GMX_LIBS_SUFFIX}.pc" -+ COMPONENT development) -+endif() -+ -+if (INSTALL_CUDART_LIB) #can be set manual by user -+ if (GMX_GPU) -+ foreach(CUDA_LIB ${CUDA_LIBRARIES}) -+ string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB}) -+ if(IS_CUDART) #libcuda should not be installed -+ #install also name-links (linker uses those) -+ file(GLOB CUDA_LIBS ${CUDA_LIB}*) -+ install(FILES ${CUDA_LIBS} DESTINATION -+ ${LIB_INSTALL_DIR} COMPONENT libraries) -+ endif() -+ endforeach() -+ else() -+ message(WARNING "INSTALL_CUDART_LIB only makes sense with GMX_GPU") -+ endif() -+endif() -diff --git a/src/gromacs/mdlib/force.c b/src/gromacs/mdlib/force.c -index 5230983..8227d5b 100644 ---- a/src/gromacs/mdlib/force.c -+++ b/src/gromacs/mdlib/force.c -@@ -67,6 +67,14 @@ - #include "gromacs/timing/wallcycle.h" - #include "gmx_fatal.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+int plumedswitch=0; -+plumed plumedmain; -+void(*plumedcmd)(plumed,const char*,const void*)=NULL; -+/* END PLUMED */ -+ -+ - void ns(FILE *fp, - t_forcerec *fr, - matrix box, -@@ -737,6 +745,13 @@ void do_force_lowlevel(FILE *fplog, gmx_int64_t step, - pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); - } - -+ /* PLUMED */ -+ if(plumedswitch){ -+ int plumedNeedsEnergy; -+ (*plumedcmd)(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ if(!plumedNeedsEnergy) (*plumedcmd)(plumedmain,"performCalc",NULL); -+ } -+ /* END PLUMED */ - } - - void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) -diff --git a/src/gromacs/mdlib/force.c.preplumed b/src/gromacs/mdlib/force.c.preplumed -new file mode 100644 -index 0000000..5230983 ---- /dev/null -+++ b/src/gromacs/mdlib/force.c.preplumed -@@ -0,0 +1,1018 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include -+#include -+#include "sysstuff.h" -+#include "typedefs.h" -+#include "macros.h" -+#include "gromacs/utility/smalloc.h" -+#include "macros.h" -+#include "physics.h" -+#include "force.h" -+#include "nonbonded.h" -+#include "names.h" -+#include "network.h" -+#include "pbc.h" -+#include "ns.h" -+#include "nrnb.h" -+#include "bondf.h" -+#include "mshift.h" -+#include "txtdump.h" -+#include "coulomb.h" -+#include "pme.h" -+#include "mdrun.h" -+#include "domdec.h" -+#include "qmmm.h" -+#include "gmx_omp_nthreads.h" -+ -+#include "gromacs/timing/wallcycle.h" -+#include "gmx_fatal.h" -+ -+void ns(FILE *fp, -+ t_forcerec *fr, -+ matrix box, -+ gmx_groups_t *groups, -+ gmx_localtop_t *top, -+ t_mdatoms *md, -+ t_commrec *cr, -+ t_nrnb *nrnb, -+ gmx_bool bFillGrid, -+ gmx_bool bDoLongRangeNS) -+{ -+ char *ptr; -+ int nsearch; -+ -+ -+ if (!fr->ns.nblist_initialized) -+ { -+ init_neighbor_list(fp, fr, md->homenr); -+ } -+ -+ if (fr->bTwinRange) -+ { -+ fr->nlr = 0; -+ } -+ -+ nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md, -+ bFillGrid, bDoLongRangeNS); -+ if (debug) -+ { -+ fprintf(debug, "nsearch = %d\n", nsearch); -+ } -+ -+ /* Check whether we have to do dynamic load balancing */ -+ /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0)) -+ count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr, -+ &(top->idef),opts->ngener); -+ */ -+ if (fr->ns.dump_nl > 0) -+ { -+ dump_nblist(fp, cr, fr, fr->ns.dump_nl); -+ } -+} -+ -+static void reduce_thread_forces(int n, rvec *f, -+ tensor vir_q, tensor vir_lj, -+ real *Vcorr_q, real *Vcorr_lj, -+ real *dvdl_q, real *dvdl_lj, -+ int nthreads, f_thread_t *f_t) -+{ -+ int t, i; -+ int nthreads_loop gmx_unused; -+ -+ /* This reduction can run over any number of threads */ -+ nthreads_loop = gmx_omp_nthreads_get(emntBonded); -+#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static) -+ for (i = 0; i < n; i++) -+ { -+ for (t = 1; t < nthreads; t++) -+ { -+ rvec_inc(f[i], f_t[t].f[i]); -+ } -+ } -+ for (t = 1; t < nthreads; t++) -+ { -+ *Vcorr_q += f_t[t].Vcorr_q; -+ *Vcorr_lj += f_t[t].Vcorr_lj; -+ *dvdl_q += f_t[t].dvdl[efptCOUL]; -+ *dvdl_lj += f_t[t].dvdl[efptVDW]; -+ m_add(vir_q, f_t[t].vir_q, vir_q); -+ m_add(vir_lj, f_t[t].vir_lj, vir_lj); -+ } -+} -+ -+void gmx_print_sepdvdl(FILE *fplog, const char *s, real v, real dvdlambda) -+{ -+ fprintf(fplog, " %-30s V %12.5e dVdl %12.5e\n", s, v, dvdlambda); -+} -+ -+void do_force_lowlevel(FILE *fplog, gmx_int64_t step, -+ t_forcerec *fr, t_inputrec *ir, -+ t_idef *idef, t_commrec *cr, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ t_mdatoms *md, -+ rvec x[], history_t *hist, -+ rvec f[], -+ rvec f_longrange[], -+ gmx_enerdata_t *enerd, -+ t_fcdata *fcd, -+ gmx_localtop_t *top, -+ gmx_genborn_t *born, -+ t_atomtypes *atype, -+ gmx_bool bBornRadii, -+ matrix box, -+ t_lambda *fepvals, -+ real *lambda, -+ t_graph *graph, -+ t_blocka *excl, -+ rvec mu_tot[], -+ int flags, -+ float *cycles_pme) -+{ -+ int i, j; -+ int donb_flags; -+ gmx_bool bDoEpot, bSepDVDL, bSB; -+ int pme_flags; -+ matrix boxs; -+ rvec box_size; -+ t_pbc pbc; -+ char buf[22]; -+ double clam_i, vlam_i; -+ real dvdl_dum[efptNR], dvdl_nb[efptNR], lam_i[efptNR]; -+ real dvdl_q, dvdl_lj; -+ -+#ifdef GMX_MPI -+ double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ -+#endif -+ -+#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) { gmx_print_sepdvdl(fplog, s, v, dvdlambda); } -+ -+ set_pbc(&pbc, fr->ePBC, box); -+ -+ /* reset free energy components */ -+ for (i = 0; i < efptNR; i++) -+ { -+ dvdl_nb[i] = 0; -+ dvdl_dum[i] = 0; -+ } -+ -+ /* Reset box */ -+ for (i = 0; (i < DIM); i++) -+ { -+ box_size[i] = box[i][i]; -+ } -+ -+ bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); -+ debug_gmx(); -+ -+ /* do QMMM first if requested */ -+ if (fr->bQMMM) -+ { -+ enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); -+ } -+ -+ if (bSepDVDL) -+ { -+ fprintf(fplog, "Step %s: non-bonded V and dVdl for rank %d:\n", -+ gmx_step_str(step, buf), cr->nodeid); -+ } -+ -+ /* Call the short range functions all in one go. */ -+ -+#ifdef GMX_MPI -+ /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ -+#define TAKETIME FALSE -+ if (TAKETIME) -+ { -+ MPI_Barrier(cr->mpi_comm_mygroup); -+ t0 = MPI_Wtime(); -+ } -+#endif -+ -+ if (ir->nwall) -+ { -+ /* foreign lambda component for walls */ -+ real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], -+ enerd->grpp.ener[egLJSR], nrnb); -+ PRINT_SEPDVDL("Walls", 0.0, dvdl_walls); -+ enerd->dvdl_lin[efptVDW] += dvdl_walls; -+ } -+ -+ /* If doing GB, reset dvda and calculate the Born radii */ -+ if (ir->implicit_solvent) -+ { -+ wallcycle_sub_start(wcycle, ewcsNONBONDED); -+ -+ for (i = 0; i < born->nr; i++) -+ { -+ fr->dvda[i] = 0; -+ } -+ -+ if (bBornRadii) -+ { -+ calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb); -+ } -+ -+ wallcycle_sub_stop(wcycle, ewcsNONBONDED); -+ } -+ -+ where(); -+ /* We only do non-bonded calculation with group scheme here, the verlet -+ * calls are done from do_force_cutsVERLET(). */ -+ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) -+ { -+ donb_flags = 0; -+ /* Add short-range interactions */ -+ donb_flags |= GMX_NONBONDED_DO_SR; -+ -+ /* Currently all group scheme kernels always calculate (shift-)forces */ -+ if (flags & GMX_FORCE_FORCES) -+ { -+ donb_flags |= GMX_NONBONDED_DO_FORCE; -+ } -+ if (flags & GMX_FORCE_VIRIAL) -+ { -+ donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; -+ } -+ if (flags & GMX_FORCE_ENERGY) -+ { -+ donb_flags |= GMX_NONBONDED_DO_POTENTIAL; -+ } -+ if (flags & GMX_FORCE_DO_LR) -+ { -+ donb_flags |= GMX_NONBONDED_DO_LR; -+ } -+ -+ wallcycle_sub_start(wcycle, ewcsNONBONDED); -+ do_nonbonded(fr, x, f, f_longrange, md, excl, -+ &enerd->grpp, nrnb, -+ lambda, dvdl_nb, -1, -1, donb_flags); -+ -+ /* If we do foreign lambda and we have soft-core interactions -+ * we have to recalculate the (non-linear) energies contributions. -+ */ -+ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) -+ { -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ for (j = 0; j < efptNR; j++) -+ { -+ lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); -+ } -+ reset_foreign_enerdata(enerd); -+ do_nonbonded(fr, x, f, f_longrange, md, excl, -+ &(enerd->foreign_grpp), nrnb, -+ lam_i, dvdl_dum, -1, -1, -+ (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); -+ sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); -+ enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; -+ } -+ } -+ wallcycle_sub_stop(wcycle, ewcsNONBONDED); -+ where(); -+ } -+ -+ /* If we are doing GB, calculate bonded forces and apply corrections -+ * to the solvation forces */ -+ /* MRS: Eventually, many need to include free energy contribution here! */ -+ if (ir->implicit_solvent) -+ { -+ wallcycle_sub_start(wcycle, ewcsBONDED); -+ calc_gb_forces(cr, md, born, top, x, f, fr, idef, -+ ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); -+ wallcycle_sub_stop(wcycle, ewcsBONDED); -+ } -+ -+#ifdef GMX_MPI -+ if (TAKETIME) -+ { -+ t1 = MPI_Wtime(); -+ fr->t_fnbf += t1-t0; -+ } -+#endif -+ -+ if (fepvals->sc_alpha != 0) -+ { -+ enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; -+ } -+ else -+ { -+ enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; -+ } -+ -+ if (fepvals->sc_alpha != 0) -+ -+ /* even though coulomb part is linear, we already added it, beacuse we -+ need to go through the vdw calculation anyway */ -+ { -+ enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; -+ } -+ else -+ { -+ enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; -+ } -+ -+ if (bSepDVDL) -+ { -+ real V_short_range = 0; -+ real dvdl_short_range = 0; -+ -+ for (i = 0; i < enerd->grpp.nener; i++) -+ { -+ V_short_range += -+ (fr->bBHAM ? -+ enerd->grpp.ener[egBHAMSR][i] : -+ enerd->grpp.ener[egLJSR][i]) -+ + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; -+ } -+ dvdl_short_range = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; -+ PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", -+ V_short_range, -+ dvdl_short_range); -+ } -+ debug_gmx(); -+ -+ -+ if (debug) -+ { -+ pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); -+ } -+ -+ /* Shift the coordinates. Must be done before bonded forces and PPPM, -+ * but is also necessary for SHAKE and update, therefore it can NOT -+ * go when no bonded forces have to be evaluated. -+ */ -+ -+ /* Here sometimes we would not need to shift with NBFonly, -+ * but we do so anyhow for consistency of the returned coordinates. -+ */ -+ if (graph) -+ { -+ shift_self(graph, box, x); -+ if (TRICLINIC(box)) -+ { -+ inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); -+ } -+ else -+ { -+ inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); -+ } -+ } -+ /* Check whether we need to do bondeds or correct for exclusions */ -+ if (fr->bMolPBC && -+ ((flags & GMX_FORCE_BONDED) -+ || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) -+ { -+ /* Since all atoms are in the rectangular or triclinic unit-cell, -+ * only single box vector shifts (2 in x) are required. -+ */ -+ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); -+ } -+ debug_gmx(); -+ -+ if (flags & GMX_FORCE_BONDED) -+ { -+ wallcycle_sub_start(wcycle, ewcsBONDED); -+ calc_bonds(fplog, cr->ms, -+ idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, -+ DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, -+ flags, -+ fr->bSepDVDL && do_per_step(step, ir->nstlog), step); -+ -+ /* Check if we have to determine energy differences -+ * at foreign lambda's. -+ */ -+ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && -+ idef->ilsort != ilsortNO_FE) -+ { -+ if (idef->ilsort != ilsortFE_SORTED) -+ { -+ gmx_incons("The bonded interactions are not sorted for free energy"); -+ } -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ reset_foreign_enerdata(enerd); -+ for (j = 0; j < efptNR; j++) -+ { -+ lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); -+ } -+ calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, -+ fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); -+ sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); -+ enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; -+ } -+ } -+ debug_gmx(); -+ -+ wallcycle_sub_stop(wcycle, ewcsBONDED); -+ } -+ -+ where(); -+ -+ *cycles_pme = 0; -+ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) -+ { -+ real Vlr = 0, Vcorr = 0; -+ real dvdl_long_range = 0; -+ int status = 0; -+ -+ bSB = (ir->nwall == 2); -+ if (bSB) -+ { -+ copy_mat(box, boxs); -+ svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); -+ box_size[ZZ] *= ir->wall_ewald_zfac; -+ } -+ } -+ -+ /* Do long-range electrostatics and/or LJ-PME, including related short-range -+ * corrections. -+ */ -+ -+ clear_mat(fr->vir_el_recip); -+ clear_mat(fr->vir_lj_recip); -+ -+ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) -+ { -+ real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; -+ real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; -+ int status = 0; -+ -+ if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) -+ { -+ real dvdl_long_range_correction_q = 0; -+ real dvdl_long_range_correction_lj = 0; -+ /* With the Verlet scheme exclusion forces are calculated -+ * in the non-bonded kernel. -+ */ -+ /* The TPI molecule does not have exclusions with the rest -+ * of the system and no intra-molecular PME grid -+ * contributions will be calculated in -+ * gmx_pme_calc_energy. -+ */ -+ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || -+ ir->ewald_geometry != eewg3D || -+ ir->epsilon_surface != 0) -+ { -+ int nthreads, t; -+ -+ wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); -+ -+ if (fr->n_tpi > 0) -+ { -+ gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); -+ } -+ -+ nthreads = gmx_omp_nthreads_get(emntBonded); -+#pragma omp parallel for num_threads(nthreads) schedule(static) -+ for (t = 0; t < nthreads; t++) -+ { -+ int s, e, i; -+ rvec *fnv; -+ tensor *vir_q, *vir_lj; -+ real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; -+ if (t == 0) -+ { -+ fnv = fr->f_novirsum; -+ vir_q = &fr->vir_el_recip; -+ vir_lj = &fr->vir_lj_recip; -+ Vcorrt_q = &Vcorr_q; -+ Vcorrt_lj = &Vcorr_lj; -+ dvdlt_q = &dvdl_long_range_correction_q; -+ dvdlt_lj = &dvdl_long_range_correction_lj; -+ } -+ else -+ { -+ fnv = fr->f_t[t].f; -+ vir_q = &fr->f_t[t].vir_q; -+ vir_lj = &fr->f_t[t].vir_lj; -+ Vcorrt_q = &fr->f_t[t].Vcorr_q; -+ Vcorrt_lj = &fr->f_t[t].Vcorr_lj; -+ dvdlt_q = &fr->f_t[t].dvdl[efptCOUL]; -+ dvdlt_lj = &fr->f_t[t].dvdl[efptVDW]; -+ for (i = 0; i < fr->natoms_force; i++) -+ { -+ clear_rvec(fnv[i]); -+ } -+ clear_mat(*vir_q); -+ clear_mat(*vir_lj); -+ } -+ *dvdlt_q = 0; -+ *dvdlt_lj = 0; -+ -+ ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], -+ cr, t, fr, -+ md->chargeA, md->chargeB, -+ md->sqrt_c6A, md->sqrt_c6B, -+ md->sigmaA, md->sigmaB, -+ md->sigma3A, md->sigma3B, -+ md->nChargePerturbed || md->nTypePerturbed, -+ ir->cutoff_scheme != ecutsVERLET, -+ excl, x, bSB ? boxs : box, mu_tot, -+ ir->ewald_geometry, -+ ir->epsilon_surface, -+ fnv, *vir_q, *vir_lj, -+ Vcorrt_q, Vcorrt_lj, -+ lambda[efptCOUL], lambda[efptVDW], -+ dvdlt_q, dvdlt_lj); -+ } -+ if (nthreads > 1) -+ { -+ reduce_thread_forces(fr->natoms_force, fr->f_novirsum, -+ fr->vir_el_recip, fr->vir_lj_recip, -+ &Vcorr_q, &Vcorr_lj, -+ &dvdl_long_range_correction_q, -+ &dvdl_long_range_correction_lj, -+ nthreads, fr->f_t); -+ } -+ wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); -+ } -+ -+ if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0) -+ { -+ Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, -+ &dvdl_long_range_correction_q, -+ fr->vir_el_recip); -+ } -+ -+ PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr_q, dvdl_long_range_correction_q); -+ PRINT_SEPDVDL("Ewald excl. corr. LJ", Vcorr_lj, dvdl_long_range_correction_lj); -+ enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q; -+ enerd->dvdl_lin[efptVDW] += dvdl_long_range_correction_lj; -+ } -+ -+ if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype))) -+ { -+ if (cr->duty & DUTY_PME) -+ { -+ /* Do reciprocal PME for Coulomb and/or LJ. */ -+ assert(fr->n_tpi >= 0); -+ if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) -+ { -+ pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; -+ if (EEL_PME(fr->eeltype)) -+ { -+ pme_flags |= GMX_PME_DO_COULOMB; -+ } -+ if (EVDW_PME(fr->vdwtype)) -+ { -+ pme_flags |= GMX_PME_DO_LJ; -+ } -+ if (flags & GMX_FORCE_FORCES) -+ { -+ pme_flags |= GMX_PME_CALC_F; -+ } -+ if (flags & GMX_FORCE_VIRIAL) -+ { -+ pme_flags |= GMX_PME_CALC_ENER_VIR; -+ } -+ if (fr->n_tpi > 0) -+ { -+ /* We don't calculate f, but we do want the potential */ -+ pme_flags |= GMX_PME_CALC_POT; -+ } -+ wallcycle_start(wcycle, ewcPMEMESH); -+ status = gmx_pme_do(fr->pmedata, -+ 0, md->homenr - fr->n_tpi, -+ x, fr->f_novirsum, -+ md->chargeA, md->chargeB, -+ md->sqrt_c6A, md->sqrt_c6B, -+ md->sigmaA, md->sigmaB, -+ bSB ? boxs : box, cr, -+ DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, -+ DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, -+ nrnb, wcycle, -+ fr->vir_el_recip, fr->ewaldcoeff_q, -+ fr->vir_lj_recip, fr->ewaldcoeff_lj, -+ &Vlr_q, &Vlr_lj, -+ lambda[efptCOUL], lambda[efptVDW], -+ &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags); -+ *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); -+ if (status != 0) -+ { -+ gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); -+ } -+ /* We should try to do as little computation after -+ * this as possible, because parallel PME synchronizes -+ * the nodes, so we want all load imbalance of the -+ * rest of the force calculation to be before the PME -+ * call. DD load balancing is done on the whole time -+ * of the force call (without PME). -+ */ -+ } -+ if (fr->n_tpi > 0) -+ { -+ if (EVDW_PME(ir->vdwtype)) -+ { -+ -+ gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME"); -+ } -+ /* Determine the PME grid energy of the test molecule -+ * with the PME grid potential of the other charges. -+ */ -+ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, -+ x + md->homenr - fr->n_tpi, -+ md->chargeA + md->homenr - fr->n_tpi, -+ &Vlr_q); -+ } -+ PRINT_SEPDVDL("PME mesh", Vlr_q + Vlr_lj, dvdl_long_range_q+dvdl_long_range_lj); -+ } -+ } -+ -+ if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype)) -+ { -+ Vlr_q = do_ewald(ir, x, fr->f_novirsum, -+ md->chargeA, md->chargeB, -+ box_size, cr, md->homenr, -+ fr->vir_el_recip, fr->ewaldcoeff_q, -+ lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table); -+ PRINT_SEPDVDL("Ewald long-range", Vlr_q, dvdl_long_range_q); -+ } -+ -+ /* Note that with separate PME nodes we get the real energies later */ -+ enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q; -+ enerd->dvdl_lin[efptVDW] += dvdl_long_range_lj; -+ enerd->term[F_COUL_RECIP] = Vlr_q + Vcorr_q; -+ enerd->term[F_LJ_RECIP] = Vlr_lj + Vcorr_lj; -+ if (debug) -+ { -+ fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", -+ Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]); -+ pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); -+ pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); -+ fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", -+ Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]); -+ pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM); -+ } -+ } -+ else -+ { -+ /* Is there a reaction-field exclusion correction needed? */ -+ if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype) -+ { -+ /* With the Verlet scheme, exclusion forces are calculated -+ * in the non-bonded kernel. -+ */ -+ if (ir->cutoff_scheme != ecutsVERLET) -+ { -+ real dvdl_rf_excl = 0; -+ enerd->term[F_RF_EXCL] = -+ RF_excl_correction(fr, graph, md, excl, x, f, -+ fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); -+ -+ enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; -+ PRINT_SEPDVDL("RF exclusion correction", -+ enerd->term[F_RF_EXCL], dvdl_rf_excl); -+ } -+ } -+ } -+ where(); -+ debug_gmx(); -+ -+ if (debug) -+ { -+ print_nrnb(debug, nrnb); -+ } -+ debug_gmx(); -+ -+#ifdef GMX_MPI -+ if (TAKETIME) -+ { -+ t2 = MPI_Wtime(); -+ MPI_Barrier(cr->mpi_comm_mygroup); -+ t3 = MPI_Wtime(); -+ fr->t_wait += t3-t2; -+ if (fr->timesteps == 11) -+ { -+ fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", -+ cr->nodeid, gmx_step_str(fr->timesteps, buf), -+ 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), -+ (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); -+ } -+ fr->timesteps++; -+ } -+#endif -+ -+ if (debug) -+ { -+ pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); -+ } -+ -+} -+ -+void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) -+{ -+ int i, n2; -+ -+ for (i = 0; i < F_NRE; i++) -+ { -+ enerd->term[i] = 0; -+ enerd->foreign_term[i] = 0; -+ } -+ -+ -+ for (i = 0; i < efptNR; i++) -+ { -+ enerd->dvdl_lin[i] = 0; -+ enerd->dvdl_nonlin[i] = 0; -+ } -+ -+ n2 = ngener*ngener; -+ if (debug) -+ { -+ fprintf(debug, "Creating %d sized group matrix for energies\n", n2); -+ } -+ enerd->grpp.nener = n2; -+ enerd->foreign_grpp.nener = n2; -+ for (i = 0; (i < egNR); i++) -+ { -+ snew(enerd->grpp.ener[i], n2); -+ snew(enerd->foreign_grpp.ener[i], n2); -+ } -+ -+ if (n_lambda) -+ { -+ enerd->n_lambda = 1 + n_lambda; -+ snew(enerd->enerpart_lambda, enerd->n_lambda); -+ } -+ else -+ { -+ enerd->n_lambda = 0; -+ } -+} -+ -+void destroy_enerdata(gmx_enerdata_t *enerd) -+{ -+ int i; -+ -+ for (i = 0; (i < egNR); i++) -+ { -+ sfree(enerd->grpp.ener[i]); -+ } -+ -+ for (i = 0; (i < egNR); i++) -+ { -+ sfree(enerd->foreign_grpp.ener[i]); -+ } -+ -+ if (enerd->n_lambda) -+ { -+ sfree(enerd->enerpart_lambda); -+ } -+} -+ -+static real sum_v(int n, real v[]) -+{ -+ real t; -+ int i; -+ -+ t = 0.0; -+ for (i = 0; (i < n); i++) -+ { -+ t = t + v[i]; -+ } -+ -+ return t; -+} -+ -+void sum_epot(gmx_grppairener_t *grpp, real *epot) -+{ -+ int i; -+ -+ /* Accumulate energies */ -+ epot[F_COUL_SR] = sum_v(grpp->nener, grpp->ener[egCOULSR]); -+ epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); -+ epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); -+ epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); -+ epot[F_COUL_LR] = sum_v(grpp->nener, grpp->ener[egCOULLR]); -+ epot[F_LJ_LR] = sum_v(grpp->nener, grpp->ener[egLJLR]); -+ /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */ -+ epot[F_GBPOL] += sum_v(grpp->nener, grpp->ener[egGB]); -+ -+/* lattice part of LR doesnt belong to any group -+ * and has been added earlier -+ */ -+ epot[F_BHAM] = sum_v(grpp->nener, grpp->ener[egBHAMSR]); -+ epot[F_BHAM_LR] = sum_v(grpp->nener, grpp->ener[egBHAMLR]); -+ -+ epot[F_EPOT] = 0; -+ for (i = 0; (i < F_EPOT); i++) -+ { -+ if (i != F_DISRESVIOL && i != F_ORIRESDEV) -+ { -+ epot[F_EPOT] += epot[i]; -+ } -+ } -+} -+ -+void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals) -+{ -+ int i, j, index; -+ double dlam; -+ -+ enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW]; /* include dispersion correction */ -+ enerd->term[F_DVDL] = 0.0; -+ for (i = 0; i < efptNR; i++) -+ { -+ if (fepvals->separate_dvdl[i]) -+ { -+ /* could this be done more readably/compactly? */ -+ switch (i) -+ { -+ case (efptMASS): -+ index = F_DKDL; -+ break; -+ case (efptCOUL): -+ index = F_DVDL_COUL; -+ break; -+ case (efptVDW): -+ index = F_DVDL_VDW; -+ break; -+ case (efptBONDED): -+ index = F_DVDL_BONDED; -+ break; -+ case (efptRESTRAINT): -+ index = F_DVDL_RESTRAINT; -+ break; -+ default: -+ index = F_DVDL; -+ break; -+ } -+ enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; -+ if (debug) -+ { -+ fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n", -+ efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); -+ } -+ } -+ else -+ { -+ enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; -+ if (debug) -+ { -+ fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n", -+ efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); -+ } -+ } -+ } -+ -+ /* Notes on the foreign lambda free energy difference evaluation: -+ * Adding the potential and ekin terms that depend linearly on lambda -+ * as delta lam * dvdl to the energy differences is exact. -+ * For the constraints this is not exact, but we have no other option -+ * without literally changing the lengths and reevaluating the energies at each step. -+ * (try to remedy this post 4.6 - MRS) -+ * For the non-bonded LR term we assume that the soft-core (if present) -+ * no longer affects the energy beyond the short-range cut-off, -+ * which is a very good approximation (except for exotic settings). -+ * (investigate how to overcome this post 4.6 - MRS) -+ */ -+ if (fepvals->separate_dvdl[efptBONDED]) -+ { -+ enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; -+ } -+ else -+ { -+ enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR]; -+ } -+ enerd->term[F_DVDL_CONSTR] = 0; -+ -+ for (i = 0; i < fepvals->n_lambda; i++) -+ { -+ /* note we are iterating over fepvals here! -+ For the current lam, dlam = 0 automatically, -+ so we don't need to add anything to the -+ enerd->enerpart_lambda[0] */ -+ -+ /* we don't need to worry about dvdl_lin contributions to dE at -+ current lambda, because the contributions to the current -+ lambda are automatically zeroed */ -+ -+ for (j = 0; j < efptNR; j++) -+ { -+ /* Note that this loop is over all dhdl components, not just the separated ones */ -+ dlam = (fepvals->all_lambda[j][i]-lambda[j]); -+ enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j]; -+ if (debug) -+ { -+ fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n", -+ fepvals->all_lambda[j][i], efpt_names[j], -+ (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]), -+ dlam, enerd->dvdl_lin[j]); -+ } -+ } -+ } -+} -+ -+ -+void reset_foreign_enerdata(gmx_enerdata_t *enerd) -+{ -+ int i, j; -+ -+ /* First reset all foreign energy components. Foreign energies always called on -+ neighbor search steps */ -+ for (i = 0; (i < egNR); i++) -+ { -+ for (j = 0; (j < enerd->grpp.nener); j++) -+ { -+ enerd->foreign_grpp.ener[i][j] = 0.0; -+ } -+ } -+ -+ /* potential energy components */ -+ for (i = 0; (i <= F_EPOT); i++) -+ { -+ enerd->foreign_term[i] = 0.0; -+ } -+} -+ -+void reset_enerdata(t_forcerec *fr, gmx_bool bNS, -+ gmx_enerdata_t *enerd, -+ gmx_bool bMaster) -+{ -+ gmx_bool bKeepLR; -+ int i, j; -+ -+ /* First reset all energy components, except for the long range terms -+ * on the master at non neighbor search steps, since the long range -+ * terms have already been summed at the last neighbor search step. -+ */ -+ bKeepLR = (fr->bTwinRange && !bNS); -+ for (i = 0; (i < egNR); i++) -+ { -+ if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) -+ { -+ for (j = 0; (j < enerd->grpp.nener); j++) -+ { -+ enerd->grpp.ener[i][j] = 0.0; -+ } -+ } -+ } -+ for (i = 0; i < efptNR; i++) -+ { -+ enerd->dvdl_lin[i] = 0.0; -+ enerd->dvdl_nonlin[i] = 0.0; -+ } -+ -+ /* Normal potential energy components */ -+ for (i = 0; (i <= F_EPOT); i++) -+ { -+ enerd->term[i] = 0.0; -+ } -+ /* Initialize the dVdlambda term with the long range contribution */ -+ /* Initialize the dvdl term with the long range contribution */ -+ enerd->term[F_DVDL] = 0.0; -+ enerd->term[F_DVDL_COUL] = 0.0; -+ enerd->term[F_DVDL_VDW] = 0.0; -+ enerd->term[F_DVDL_BONDED] = 0.0; -+ enerd->term[F_DVDL_RESTRAINT] = 0.0; -+ enerd->term[F_DKDL] = 0.0; -+ if (enerd->n_lambda > 0) -+ { -+ for (i = 0; i < enerd->n_lambda; i++) -+ { -+ enerd->enerpart_lambda[i] = 0.0; -+ } -+ } -+ /* reset foreign energy data - separate function since we also call it elsewhere */ -+ reset_foreign_enerdata(enerd); -+} -diff --git a/src/gromacs/mdlib/minimize.c b/src/gromacs/mdlib/minimize.c -index 69008f5..5114fa0 100644 ---- a/src/gromacs/mdlib/minimize.c -+++ b/src/gromacs/mdlib/minimize.c -@@ -80,6 +80,13 @@ - #include "gromacs/timing/walltime_accounting.h" - #include "gromacs/imd/imd.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+extern void(*plumedcmd)(plumed,const char*,const void*); -+/* END PLUMED */ -+ - typedef struct { - t_state s; - rvec *f; -@@ -442,6 +449,43 @@ void init_em(FILE *fplog, const char *title, - - clear_rvec(mu_tot); - calc_shifts(ems->s.box, fr->shift_vec); -+ -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(cr->ms && cr->ms->nsim>1) { -+ if(MASTER(cr)) (*plumedcmd) (plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters); -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); -+ }else{ -+ (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); -+ } -+ } -+ (*plumedcmd) (plumedmain,"GREX init",NULL); -+ } -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); -+ }else{ -+ (*plumedcmd) (plumedmain,"setMPIComm",&cr->mpi_comm_mysim); -+ } -+ } -+ (*plumedcmd) (plumedmain,"setNatoms",&top_global->natoms); -+ (*plumedcmd) (plumedmain,"setMDEngine","gromacs"); -+ (*plumedcmd) (plumedmain,"setLog",fplog); -+ real real_delta_t; -+ real_delta_t=ir->delta_t; -+ (*plumedcmd) (plumedmain,"setTimestep",&real_delta_t); -+ (*plumedcmd) (plumedmain,"init",NULL); -+ -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ } -+ } -+ /* END PLUMED */ - } - - static void finish_em(t_commrec *cr, gmx_mdoutf_t outf, -@@ -737,12 +781,34 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr, - em_dd_partition_system(fplog, count, cr, top_global, inputrec, - ems, top, mdatoms, fr, vsite, constr, - nrnb, wcycle); -+ /* PLUMED */ -+ if(plumedswitch){ -+ (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ /* END PLUMED */ - } - - /* Calc force & energy on new trial position */ - /* do_force always puts the charge groups in the box and shifts again - * We do not unshift, so molecules are always whole in congrad.c - */ -+ /* PLUMED */ -+ int plumedNeedsEnergy=0; -+ matrix plumed_vir; -+ if(plumedswitch){ -+ long int lstep=count; (*plumedcmd)(plumedmain,"setStepLong",&count); -+ (*plumedcmd) (plumedmain,"setPositions",&ems->s.x[0][0]); -+ (*plumedcmd) (plumedmain,"setMasses",&mdatoms->massT[0]); -+ (*plumedcmd) (plumedmain,"setCharges",&mdatoms->chargeA[0]); -+ (*plumedcmd) (plumedmain,"setBox",&ems->s.box[0][0]); -+ (*plumedcmd) (plumedmain,"prepareCalc",NULL); -+ (*plumedcmd) (plumedmain,"setForces",&ems->f[0][0]); -+ (*plumedcmd) (plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ clear_mat(plumed_vir); -+ (*plumedcmd) (plumedmain,"setVirial",&plumed_vir[0][0]); -+ } -+ /* END PLUMED */ - do_force(fplog, cr, inputrec, - count, nrnb, wcycle, top, &top_global->groups, - ems->s.box, ems->s.x, &ems->s.hist, -@@ -751,6 +817,19 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr, - GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | - GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | - (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0)); -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(plumedNeedsEnergy) { -+ msmul(force_vir,2.0,plumed_vir); -+ (*plumedcmd) (plumedmain,"setEnergy",&enerd->term[F_EPOT]); -+ (*plumedcmd) (plumedmain,"performCalc",NULL); -+ msmul(plumed_vir,0.5,force_vir); -+ } else { -+ msmul(plumed_vir,0.5,plumed_vir); -+ m_add(force_vir,plumed_vir,force_vir); -+ } -+ } -+ /* END PLUMED */ - - /* Clear the unused shake virial and pressure */ - clear_mat(shake_vir); -diff --git a/src/gromacs/mdlib/minimize.c.preplumed b/src/gromacs/mdlib/minimize.c.preplumed -new file mode 100644 -index 0000000..69008f5 ---- /dev/null -+++ b/src/gromacs/mdlib/minimize.c.preplumed -@@ -0,0 +1,2906 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include -+#include -+#include "sysstuff.h" -+#include "gromacs/utility/cstringutil.h" -+#include "network.h" -+#include "gromacs/utility/smalloc.h" -+#include "nrnb.h" -+#include "main.h" -+#include "force.h" -+#include "macros.h" -+#include "names.h" -+#include "gmx_fatal.h" -+#include "txtdump.h" -+#include "typedefs.h" -+#include "update.h" -+#include "constr.h" -+#include "vec.h" -+#include "tgroup.h" -+#include "mdebin.h" -+#include "vsite.h" -+#include "force.h" -+#include "mdrun.h" -+#include "md_support.h" -+#include "sim_util.h" -+#include "domdec.h" -+#include "mdatoms.h" -+#include "ns.h" -+#include "mtop_util.h" -+#include "pme.h" -+#include "bondf.h" -+#include "gmx_omp_nthreads.h" -+#include "md_logging.h" -+ -+#include "gromacs/fileio/confio.h" -+#include "gromacs/fileio/trajectory_writing.h" -+#include "gromacs/linearalgebra/mtxio.h" -+#include "gromacs/linearalgebra/sparsematrix.h" -+#include "gromacs/timing/wallcycle.h" -+#include "gromacs/timing/walltime_accounting.h" -+#include "gromacs/imd/imd.h" -+ -+typedef struct { -+ t_state s; -+ rvec *f; -+ real epot; -+ real fnorm; -+ real fmax; -+ int a_fmax; -+} em_state_t; -+ -+static em_state_t *init_em_state() -+{ -+ em_state_t *ems; -+ -+ snew(ems, 1); -+ -+ /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */ -+ snew(ems->s.lambda, efptNR); -+ -+ return ems; -+} -+ -+static void print_em_start(FILE *fplog, -+ t_commrec *cr, -+ gmx_walltime_accounting_t walltime_accounting, -+ gmx_wallcycle_t wcycle, -+ const char *name) -+{ -+ walltime_accounting_start(walltime_accounting); -+ wallcycle_start(wcycle, ewcRUN); -+ print_start(fplog, cr, walltime_accounting, name); -+} -+static void em_time_end(gmx_walltime_accounting_t walltime_accounting, -+ gmx_wallcycle_t wcycle) -+{ -+ wallcycle_stop(wcycle, ewcRUN); -+ -+ walltime_accounting_end(walltime_accounting); -+} -+ -+static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps) -+{ -+ fprintf(out, "\n"); -+ fprintf(out, "%s:\n", minimizer); -+ fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol); -+ fprintf(out, " Number of steps = %12d\n", nsteps); -+} -+ -+static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain) -+{ -+ char buffer[2048]; -+ if (bLastStep) -+ { -+ sprintf(buffer, -+ "\nEnergy minimization reached the maximum number " -+ "of steps before the forces reached the requested " -+ "precision Fmax < %g.\n", ftol); -+ } -+ else -+ { -+ sprintf(buffer, -+ "\nEnergy minimization has stopped, but the forces have " -+ "not converged to the requested precision Fmax < %g (which " -+ "may not be possible for your system). It stopped " -+ "because the algorithm tried to make a new step whose size " -+ "was too small, or there was no change in the energy since " -+ "last step. Either way, we regard the minimization as " -+ "converged to within the available machine precision, " -+ "given your starting configuration and EM parameters.\n%s%s", -+ ftol, -+ sizeof(real) < sizeof(double) ? -+ "\nDouble precision normally gives you higher accuracy, but " -+ "this is often not needed for preparing to run molecular " -+ "dynamics.\n" : -+ "", -+ bConstrain ? -+ "You might need to increase your constraint accuracy, or turn\n" -+ "off constraints altogether (set constraints = none in mdp file)\n" : -+ ""); -+ } -+ fputs(wrap_lines(buffer, 78, 0, FALSE), fp); -+} -+ -+ -+ -+static void print_converged(FILE *fp, const char *alg, real ftol, -+ gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps, -+ real epot, real fmax, int nfmax, real fnorm) -+{ -+ char buf[STEPSTRSIZE]; -+ -+ if (bDone) -+ { -+ fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", -+ alg, ftol, gmx_step_str(count, buf)); -+ } -+ else if (count < nsteps) -+ { -+ fprintf(fp, "\n%s converged to machine precision in %s steps,\n" -+ "but did not reach the requested Fmax < %g.\n", -+ alg, gmx_step_str(count, buf), ftol); -+ } -+ else -+ { -+ fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", -+ alg, ftol, gmx_step_str(count, buf)); -+ } -+ -+#ifdef GMX_DOUBLE -+ fprintf(fp, "Potential Energy = %21.14e\n", epot); -+ fprintf(fp, "Maximum force = %21.14e on atom %d\n", fmax, nfmax+1); -+ fprintf(fp, "Norm of force = %21.14e\n", fnorm); -+#else -+ fprintf(fp, "Potential Energy = %14.7e\n", epot); -+ fprintf(fp, "Maximum force = %14.7e on atom %d\n", fmax, nfmax+1); -+ fprintf(fp, "Norm of force = %14.7e\n", fnorm); -+#endif -+} -+ -+static void get_f_norm_max(t_commrec *cr, -+ t_grpopts *opts, t_mdatoms *mdatoms, rvec *f, -+ real *fnorm, real *fmax, int *a_fmax) -+{ -+ double fnorm2, *sum; -+ real fmax2, fmax2_0, fam; -+ int la_max, a_max, start, end, i, m, gf; -+ -+ /* This routine finds the largest force and returns it. -+ * On parallel machines the global max is taken. -+ */ -+ fnorm2 = 0; -+ fmax2 = 0; -+ la_max = -1; -+ gf = 0; -+ start = 0; -+ end = mdatoms->homenr; -+ if (mdatoms->cFREEZE) -+ { -+ for (i = start; i < end; i++) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ fam = 0; -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ fam += sqr(f[i][m]); -+ } -+ } -+ fnorm2 += fam; -+ if (fam > fmax2) -+ { -+ fmax2 = fam; -+ la_max = i; -+ } -+ } -+ } -+ else -+ { -+ for (i = start; i < end; i++) -+ { -+ fam = norm2(f[i]); -+ fnorm2 += fam; -+ if (fam > fmax2) -+ { -+ fmax2 = fam; -+ la_max = i; -+ } -+ } -+ } -+ -+ if (la_max >= 0 && DOMAINDECOMP(cr)) -+ { -+ a_max = cr->dd->gatindex[la_max]; -+ } -+ else -+ { -+ a_max = la_max; -+ } -+ if (PAR(cr)) -+ { -+ snew(sum, 2*cr->nnodes+1); -+ sum[2*cr->nodeid] = fmax2; -+ sum[2*cr->nodeid+1] = a_max; -+ sum[2*cr->nnodes] = fnorm2; -+ gmx_sumd(2*cr->nnodes+1, sum, cr); -+ fnorm2 = sum[2*cr->nnodes]; -+ /* Determine the global maximum */ -+ for (i = 0; i < cr->nnodes; i++) -+ { -+ if (sum[2*i] > fmax2) -+ { -+ fmax2 = sum[2*i]; -+ a_max = (int)(sum[2*i+1] + 0.5); -+ } -+ } -+ sfree(sum); -+ } -+ -+ if (fnorm) -+ { -+ *fnorm = sqrt(fnorm2); -+ } -+ if (fmax) -+ { -+ *fmax = sqrt(fmax2); -+ } -+ if (a_fmax) -+ { -+ *a_fmax = a_max; -+ } -+} -+ -+static void get_state_f_norm_max(t_commrec *cr, -+ t_grpopts *opts, t_mdatoms *mdatoms, -+ em_state_t *ems) -+{ -+ get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax); -+} -+ -+void init_em(FILE *fplog, const char *title, -+ t_commrec *cr, t_inputrec *ir, -+ t_state *state_global, gmx_mtop_t *top_global, -+ em_state_t *ems, gmx_localtop_t **top, -+ rvec **f, rvec **f_global, -+ t_nrnb *nrnb, rvec mu_tot, -+ t_forcerec *fr, gmx_enerdata_t **enerd, -+ t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int nfile, const t_filenm fnm[], -+ gmx_mdoutf_t *outf, t_mdebin **mdebin, -+ int imdport, unsigned long gmx_unused Flags, -+ gmx_wallcycle_t wcycle) -+{ -+ int i; -+ real dvdl_constr; -+ -+ if (fplog) -+ { -+ fprintf(fplog, "Initiating %s\n", title); -+ } -+ -+ state_global->ngtc = 0; -+ -+ /* Initialize lambda variables */ -+ initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL); -+ -+ init_nrnb(nrnb); -+ -+ /* Interactive molecular dynamics */ -+ init_IMD(ir, cr, top_global, fplog, 1, state_global->x, -+ nfile, fnm, NULL, imdport, Flags); -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ *top = dd_init_local_top(top_global); -+ -+ dd_init_local_state(cr->dd, state_global, &ems->s); -+ -+ *f = NULL; -+ -+ /* Distribute the charge groups over the nodes from the master node */ -+ dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ &ems->s, &ems->f, mdatoms, *top, -+ fr, vsite, NULL, constr, -+ nrnb, NULL, FALSE); -+ dd_store_state(cr->dd, &ems->s); -+ -+ if (ir->nstfout) -+ { -+ snew(*f_global, top_global->natoms); -+ } -+ else -+ { -+ *f_global = NULL; -+ } -+ *graph = NULL; -+ } -+ else -+ { -+ snew(*f, top_global->natoms); -+ -+ /* Just copy the state */ -+ ems->s = *state_global; -+ snew(ems->s.x, ems->s.nalloc); -+ snew(ems->f, ems->s.nalloc); -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(state_global->x[i], ems->s.x[i]); -+ } -+ copy_mat(state_global->box, ems->s.box); -+ -+ *top = gmx_mtop_generate_local_top(top_global, ir); -+ *f_global = *f; -+ -+ forcerec_set_excl_load(fr, *top); -+ -+ setup_bonded_threading(fr, &(*top)->idef); -+ -+ if (ir->ePBC != epbcNONE && !fr->bMolPBC) -+ { -+ *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE); -+ } -+ else -+ { -+ *graph = NULL; -+ } -+ -+ atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms); -+ update_mdatoms(mdatoms, state_global->lambda[efptFEP]); -+ -+ if (vsite) -+ { -+ set_vsite_top(vsite, *top, mdatoms, cr); -+ } -+ } -+ -+ if (constr) -+ { -+ if (ir->eConstrAlg == econtSHAKE && -+ gmx_mtop_ftype_count(top_global, F_CONSTR) > 0) -+ { -+ gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n", -+ econstr_names[econtSHAKE], econstr_names[econtLINCS]); -+ } -+ -+ if (!DOMAINDECOMP(cr)) -+ { -+ set_constraints(constr, *top, ir, mdatoms, cr); -+ } -+ -+ if (!ir->bContinuation) -+ { -+ /* Constrain the starting coordinates */ -+ dvdl_constr = 0; -+ constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef, -+ ir, NULL, cr, -1, 0, 1.0, mdatoms, -+ ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box, -+ ems->s.lambda[efptFEP], &dvdl_constr, -+ NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); -+ } -+ } -+ -+ if (PAR(cr)) -+ { -+ *gstat = global_stat_init(ir); -+ } -+ -+ *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL, wcycle); -+ -+ snew(*enerd, 1); -+ init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, -+ *enerd); -+ -+ if (mdebin != NULL) -+ { -+ /* Init bin for energy stuff */ -+ *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, NULL); -+ } -+ -+ clear_rvec(mu_tot); -+ calc_shifts(ems->s.box, fr->shift_vec); -+} -+ -+static void finish_em(t_commrec *cr, gmx_mdoutf_t outf, -+ gmx_walltime_accounting_t walltime_accounting, -+ gmx_wallcycle_t wcycle) -+{ -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell the PME only node to finish */ -+ gmx_pme_send_finish(cr); -+ } -+ -+ done_mdoutf(outf); -+ -+ em_time_end(walltime_accounting, wcycle); -+} -+ -+static void swap_em_state(em_state_t *ems1, em_state_t *ems2) -+{ -+ em_state_t tmp; -+ -+ tmp = *ems1; -+ *ems1 = *ems2; -+ *ems2 = tmp; -+} -+ -+static void copy_em_coords(em_state_t *ems, t_state *state) -+{ -+ int i; -+ -+ for (i = 0; (i < state->natoms); i++) -+ { -+ copy_rvec(ems->s.x[i], state->x[i]); -+ } -+} -+ -+static void write_em_traj(FILE *fplog, t_commrec *cr, -+ gmx_mdoutf_t outf, -+ gmx_bool bX, gmx_bool bF, const char *confout, -+ gmx_mtop_t *top_global, -+ t_inputrec *ir, gmx_int64_t step, -+ em_state_t *state, -+ t_state *state_global, rvec *f_global) -+{ -+ int mdof_flags; -+ gmx_bool bIMDout = FALSE; -+ -+ -+ /* Shall we do IMD output? */ -+ if (ir->bIMD) -+ { -+ bIMDout = do_per_step(step, IMD_get_step(ir->imd->setup)); -+ } -+ -+ if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr)) -+ { -+ copy_em_coords(state, state_global); -+ f_global = state->f; -+ } -+ -+ mdof_flags = 0; -+ if (bX) -+ { -+ mdof_flags |= MDOF_X; -+ } -+ if (bF) -+ { -+ mdof_flags |= MDOF_F; -+ } -+ -+ /* If we want IMD output, set appropriate MDOF flag */ -+ if (ir->bIMD) -+ { -+ mdof_flags |= MDOF_IMD; -+ } -+ -+ mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, -+ top_global, step, (double)step, -+ &state->s, state_global, state->f, f_global); -+ -+ if (confout != NULL && MASTER(cr)) -+ { -+ if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr)) -+ { -+ /* Make molecules whole only for confout writing */ -+ do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global, -+ state_global->x); -+ } -+ -+ write_sto_conf_mtop(confout, -+ *top_global->name, top_global, -+ state_global->x, NULL, ir->ePBC, state_global->box); -+ } -+} -+ -+static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, -+ gmx_bool bMolPBC, -+ em_state_t *ems1, real a, rvec *f, em_state_t *ems2, -+ gmx_constr_t constr, gmx_localtop_t *top, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_int64_t count) -+ -+{ -+ t_state *s1, *s2; -+ int i; -+ int start, end; -+ rvec *x1, *x2; -+ real dvdl_constr; -+ int nthreads gmx_unused; -+ -+ s1 = &ems1->s; -+ s2 = &ems2->s; -+ -+ if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count) -+ { -+ gmx_incons("state mismatch in do_em_step"); -+ } -+ -+ s2->flags = s1->flags; -+ -+ if (s2->nalloc != s1->nalloc) -+ { -+ s2->nalloc = s1->nalloc; -+ srenew(s2->x, s1->nalloc); -+ srenew(ems2->f, s1->nalloc); -+ if (s2->flags & (1<cg_p, s1->nalloc); -+ } -+ } -+ -+ s2->natoms = s1->natoms; -+ copy_mat(s1->box, s2->box); -+ /* Copy free energy state */ -+ for (i = 0; i < efptNR; i++) -+ { -+ s2->lambda[i] = s1->lambda[i]; -+ } -+ copy_mat(s1->box, s2->box); -+ -+ start = 0; -+ end = md->homenr; -+ -+ x1 = s1->x; -+ x2 = s2->x; -+ -+ nthreads = gmx_omp_nthreads_get(emntUpdate); -+#pragma omp parallel num_threads(nthreads) -+ { -+ int gf, i, m; -+ -+ gf = 0; -+#pragma omp for schedule(static) nowait -+ for (i = start; i < end; i++) -+ { -+ if (md->cFREEZE) -+ { -+ gf = md->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (ir->opts.nFreeze[gf][m]) -+ { -+ x2[i][m] = x1[i][m]; -+ } -+ else -+ { -+ x2[i][m] = x1[i][m] + a*f[i][m]; -+ } -+ } -+ } -+ -+ if (s2->flags & (1<cg_p; -+ x2 = s2->cg_p; -+#pragma omp for schedule(static) nowait -+ for (i = start; i < end; i++) -+ { -+ copy_rvec(x1[i], x2[i]); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ s2->ddp_count = s1->ddp_count; -+ if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) -+ { -+#pragma omp barrier -+ s2->cg_gl_nalloc = s1->cg_gl_nalloc; -+ srenew(s2->cg_gl, s2->cg_gl_nalloc); -+#pragma omp barrier -+ } -+ s2->ncg_gl = s1->ncg_gl; -+#pragma omp for schedule(static) nowait -+ for (i = 0; i < s2->ncg_gl; i++) -+ { -+ s2->cg_gl[i] = s1->cg_gl[i]; -+ } -+ s2->ddp_count_cg_gl = s1->ddp_count_cg_gl; -+ } -+ } -+ -+ if (constr) -+ { -+ wallcycle_start(wcycle, ewcCONSTR); -+ dvdl_constr = 0; -+ constrain(NULL, TRUE, TRUE, constr, &top->idef, -+ ir, NULL, cr, count, 0, 1.0, md, -+ s1->x, s2->x, NULL, bMolPBC, s2->box, -+ s2->lambda[efptBONDED], &dvdl_constr, -+ NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); -+ wallcycle_stop(wcycle, ewcCONSTR); -+ } -+} -+ -+static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr, -+ gmx_mtop_t *top_global, t_inputrec *ir, -+ em_state_t *ems, gmx_localtop_t *top, -+ t_mdatoms *mdatoms, t_forcerec *fr, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle) -+{ -+ /* Repartition the domain decomposition */ -+ wallcycle_start(wcycle, ewcDOMDEC); -+ dd_partition_system(fplog, step, cr, FALSE, 1, -+ NULL, top_global, ir, -+ &ems->s, &ems->f, -+ mdatoms, top, fr, vsite, NULL, constr, -+ nrnb, wcycle, FALSE); -+ dd_store_state(cr->dd, &ems->s); -+ wallcycle_stop(wcycle, ewcDOMDEC); -+} -+ -+static void evaluate_energy(FILE *fplog, t_commrec *cr, -+ gmx_mtop_t *top_global, -+ em_state_t *ems, gmx_localtop_t *top, -+ t_inputrec *inputrec, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_global_stat_t gstat, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ t_fcdata *fcd, -+ t_graph *graph, t_mdatoms *mdatoms, -+ t_forcerec *fr, rvec mu_tot, -+ gmx_enerdata_t *enerd, tensor vir, tensor pres, -+ gmx_int64_t count, gmx_bool bFirst) -+{ -+ real t; -+ gmx_bool bNS; -+ int nabnsb; -+ tensor force_vir, shake_vir, ekin; -+ real dvdl_constr, prescorr, enercorr, dvdlcorr; -+ real terminate = 0; -+ -+ /* Set the time to the initial time, the time does not change during EM */ -+ t = inputrec->init_t; -+ -+ if (bFirst || -+ (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) -+ { -+ /* This is the first state or an old state used before the last ns */ -+ bNS = TRUE; -+ } -+ else -+ { -+ bNS = FALSE; -+ if (inputrec->nstlist > 0) -+ { -+ bNS = TRUE; -+ } -+ else if (inputrec->nstlist == -1) -+ { -+ nabnsb = natoms_beyond_ns_buffer(inputrec, fr, &top->cgs, NULL, ems->s.x); -+ if (PAR(cr)) -+ { -+ gmx_sumi(1, &nabnsb, cr); -+ } -+ bNS = (nabnsb > 0); -+ } -+ } -+ -+ if (vsite) -+ { -+ construct_vsites(vsite, ems->s.x, 1, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, ems->s.box); -+ } -+ -+ if (DOMAINDECOMP(cr) && bNS) -+ { -+ /* Repartition the domain decomposition */ -+ em_dd_partition_system(fplog, count, cr, top_global, inputrec, -+ ems, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ -+ /* Calc force & energy on new trial position */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole in congrad.c -+ */ -+ do_force(fplog, cr, inputrec, -+ count, nrnb, wcycle, top, &top_global->groups, -+ ems->s.box, ems->s.x, &ems->s.hist, -+ ems->f, force_vir, mdatoms, enerd, fcd, -+ ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE, -+ GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | -+ GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | -+ (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0)); -+ -+ /* Clear the unused shake virial and pressure */ -+ clear_mat(shake_vir); -+ clear_mat(pres); -+ -+ /* Communicate stuff when parallel */ -+ if (PAR(cr) && inputrec->eI != eiNM) -+ { -+ wallcycle_start(wcycle, ewcMoveE); -+ -+ global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot, -+ inputrec, NULL, NULL, NULL, 1, &terminate, -+ top_global, &ems->s, FALSE, -+ CGLO_ENERGY | -+ CGLO_PRESSURE | -+ CGLO_CONSTRAINT | -+ CGLO_FIRSTITERATE); -+ -+ wallcycle_stop(wcycle, ewcMoveE); -+ } -+ -+ /* Calculate long range corrections to pressure and energy */ -+ calc_dispcorr(fplog, inputrec, fr, count, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW], -+ pres, force_vir, &prescorr, &enercorr, &dvdlcorr); -+ enerd->term[F_DISPCORR] = enercorr; -+ enerd->term[F_EPOT] += enercorr; -+ enerd->term[F_PRES] += prescorr; -+ enerd->term[F_DVDL] += dvdlcorr; -+ -+ ems->epot = enerd->term[F_EPOT]; -+ -+ if (constr) -+ { -+ /* Project out the constraint components of the force */ -+ wallcycle_start(wcycle, ewcCONSTR); -+ dvdl_constr = 0; -+ constrain(NULL, FALSE, FALSE, constr, &top->idef, -+ inputrec, NULL, cr, count, 0, 1.0, mdatoms, -+ ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box, -+ ems->s.lambda[efptBONDED], &dvdl_constr, -+ NULL, &shake_vir, nrnb, econqForceDispl, FALSE, 0, 0); -+ if (fr->bSepDVDL && fplog) -+ { -+ gmx_print_sepdvdl(fplog, "Constraints", t, dvdl_constr); -+ } -+ enerd->term[F_DVDL_CONSTR] += dvdl_constr; -+ m_add(force_vir, shake_vir, vir); -+ wallcycle_stop(wcycle, ewcCONSTR); -+ } -+ else -+ { -+ copy_mat(force_vir, vir); -+ } -+ -+ clear_mat(ekin); -+ enerd->term[F_PRES] = -+ calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres); -+ -+ sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals); -+ -+ if (EI_ENERGY_MINIMIZATION(inputrec->eI)) -+ { -+ get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems); -+ } -+} -+ -+static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, -+ gmx_mtop_t *mtop, -+ em_state_t *s_min, em_state_t *s_b) -+{ -+ rvec *fm, *fb, *fmg; -+ t_block *cgs_gl; -+ int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m; -+ double partsum; -+ unsigned char *grpnrFREEZE; -+ -+ if (debug) -+ { -+ fprintf(debug, "Doing reorder_partsum\n"); -+ } -+ -+ fm = s_min->f; -+ fb = s_b->f; -+ -+ cgs_gl = dd_charge_groups_global(cr->dd); -+ index = cgs_gl->index; -+ -+ /* Collect fm in a global vector fmg. -+ * This conflicts with the spirit of domain decomposition, -+ * but to fully optimize this a much more complicated algorithm is required. -+ */ -+ snew(fmg, mtop->natoms); -+ -+ ncg = s_min->s.ncg_gl; -+ cg_gl = s_min->s.cg_gl; -+ i = 0; -+ for (c = 0; c < ncg; c++) -+ { -+ cg = cg_gl[c]; -+ a0 = index[cg]; -+ a1 = index[cg+1]; -+ for (a = a0; a < a1; a++) -+ { -+ copy_rvec(fm[i], fmg[a]); -+ i++; -+ } -+ } -+ gmx_sum(mtop->natoms*3, fmg[0], cr); -+ -+ /* Now we will determine the part of the sum for the cgs in state s_b */ -+ ncg = s_b->s.ncg_gl; -+ cg_gl = s_b->s.cg_gl; -+ partsum = 0; -+ i = 0; -+ gf = 0; -+ grpnrFREEZE = mtop->groups.grpnr[egcFREEZE]; -+ for (c = 0; c < ncg; c++) -+ { -+ cg = cg_gl[c]; -+ a0 = index[cg]; -+ a1 = index[cg+1]; -+ for (a = a0; a < a1; a++) -+ { -+ if (mdatoms->cFREEZE && grpnrFREEZE) -+ { -+ gf = grpnrFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ partsum += (fb[i][m] - fmg[a][m])*fb[i][m]; -+ } -+ } -+ i++; -+ } -+ } -+ -+ sfree(fmg); -+ -+ return partsum; -+} -+ -+static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, -+ gmx_mtop_t *mtop, -+ em_state_t *s_min, em_state_t *s_b) -+{ -+ rvec *fm, *fb; -+ double sum; -+ int gf, i, m; -+ -+ /* This is just the classical Polak-Ribiere calculation of beta; -+ * it looks a bit complicated since we take freeze groups into account, -+ * and might have to sum it in parallel runs. -+ */ -+ -+ if (!DOMAINDECOMP(cr) || -+ (s_min->s.ddp_count == cr->dd->ddp_count && -+ s_b->s.ddp_count == cr->dd->ddp_count)) -+ { -+ fm = s_min->f; -+ fb = s_b->f; -+ sum = 0; -+ gf = 0; -+ /* This part of code can be incorrect with DD, -+ * since the atom ordering in s_b and s_min might differ. -+ */ -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!opts->nFreeze[gf][m]) -+ { -+ sum += (fb[i][m] - fm[i][m])*fb[i][m]; -+ } -+ } -+ } -+ } -+ else -+ { -+ /* We need to reorder cgs while summing */ -+ sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b); -+ } -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &sum, cr); -+ } -+ -+ return sum/sqr(s_min->fnorm); -+} -+ -+double do_cg(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ const char *CG = "Polak-Ribiere Conjugate Gradients"; -+ -+ em_state_t *s_min, *s_a, *s_b, *s_c; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ rvec *f_global, *p, *sf, *sfm; -+ double gpa, gpb, gpc, tmp, sum[2], minstep; -+ real fnormn; -+ real stepsize; -+ real a, b, c, beta = 0.0; -+ real epot_repl = 0; -+ real pnorm; -+ t_mdebin *mdebin; -+ gmx_bool converged, foundlower; -+ rvec mu_tot; -+ gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; -+ tensor vir, pres; -+ int number_steps, neval = 0, nstcg = inputrec->nstcgsteep; -+ gmx_mdoutf_t outf; -+ int i, m, gf, step, nminstep; -+ real terminate = 0; -+ -+ step = 0; -+ -+ s_min = init_em_state(); -+ s_a = init_em_state(); -+ s_b = init_em_state(); -+ s_c = init_em_state(); -+ -+ /* Init em and store the local state in s_min */ -+ init_em(fplog, CG, cr, inputrec, -+ state_global, top_global, s_min, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, CG); -+ -+ /* Max number of steps */ -+ number_steps = inputrec->nsteps; -+ -+ if (MASTER(cr)) -+ { -+ sp_header(stderr, CG, inputrec->em_tol, number_steps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, CG, inputrec->em_tol, number_steps); -+ } -+ -+ /* Call the force routine and some auxiliary (neighboursearching etc.) */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole in congrad.c -+ */ -+ evaluate_energy(fplog, cr, -+ top_global, s_min, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ where(); -+ -+ if (MASTER(cr)) -+ { -+ /* Copy stuff to the energy bin for easy printing etc. */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ where(); -+ -+ /* Estimate/guess the initial stepsize */ -+ stepsize = inputrec->em_stepsize/s_min->fnorm; -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, " F-max = %12.5e on atom %d\n", -+ s_min->fmax, s_min->a_fmax+1); -+ fprintf(stderr, " F-Norm = %12.5e\n", -+ s_min->fnorm/sqrt(state_global->natoms)); -+ fprintf(stderr, "\n"); -+ /* and copy to the log file too... */ -+ fprintf(fplog, " F-max = %12.5e on atom %d\n", -+ s_min->fmax, s_min->a_fmax+1); -+ fprintf(fplog, " F-Norm = %12.5e\n", -+ s_min->fnorm/sqrt(state_global->natoms)); -+ fprintf(fplog, "\n"); -+ } -+ /* Start the loop over CG steps. -+ * Each successful step is counted, and we continue until -+ * we either converge or reach the max number of steps. -+ */ -+ converged = FALSE; -+ for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++) -+ { -+ -+ /* start taking steps in a new direction -+ * First time we enter the routine, beta=0, and the direction is -+ * simply the negative gradient. -+ */ -+ -+ /* Calculate the new direction in p, and the gradient in this direction, gpa */ -+ p = s_min->s.cg_p; -+ sf = s_min->f; -+ gpa = 0; -+ gf = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ if (!inputrec->opts.nFreeze[gf][m]) -+ { -+ p[i][m] = sf[i][m] + beta*p[i][m]; -+ gpa -= p[i][m]*sf[i][m]; -+ /* f is negative gradient, thus the sign */ -+ } -+ else -+ { -+ p[i][m] = 0; -+ } -+ } -+ } -+ -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpa, cr); -+ } -+ -+ /* Calculate the norm of the search vector */ -+ get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL); -+ -+ /* Just in case stepsize reaches zero due to numerical precision... */ -+ if (stepsize <= 0) -+ { -+ stepsize = inputrec->em_stepsize/pnorm; -+ } -+ -+ /* -+ * Double check the value of the derivative in the search direction. -+ * If it is positive it must be due to the old information in the -+ * CG formula, so just remove that and start over with beta=0. -+ * This corresponds to a steepest descent step. -+ */ -+ if (gpa > 0) -+ { -+ beta = 0; -+ step--; /* Don't count this step since we are restarting */ -+ continue; /* Go back to the beginning of the big for-loop */ -+ } -+ -+ /* Calculate minimum allowed stepsize, before the average (norm) -+ * relative change in coordinate is smaller than precision -+ */ -+ minstep = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ tmp = fabs(s_min->s.x[i][m]); -+ if (tmp < 1.0) -+ { -+ tmp = 1.0; -+ } -+ tmp = p[i][m]/tmp; -+ minstep += tmp*tmp; -+ } -+ } -+ /* Add up from all CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &minstep, cr); -+ } -+ -+ minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms)); -+ -+ if (stepsize < minstep) -+ { -+ converged = TRUE; -+ break; -+ } -+ -+ /* Write coordinates if necessary */ -+ do_x = do_per_step(step, inputrec->nstxout); -+ do_f = do_per_step(step, inputrec->nstfout); -+ -+ write_em_traj(fplog, cr, outf, do_x, do_f, NULL, -+ top_global, inputrec, step, -+ s_min, state_global, f_global); -+ -+ /* Take a step downhill. -+ * In theory, we should minimize the function along this direction. -+ * That is quite possible, but it turns out to take 5-10 function evaluations -+ * for each line. However, we dont really need to find the exact minimum - -+ * it is much better to start a new CG step in a modified direction as soon -+ * as we are close to it. This will save a lot of energy evaluations. -+ * -+ * In practice, we just try to take a single step. -+ * If it worked (i.e. lowered the energy), we increase the stepsize but -+ * the continue straight to the next CG step without trying to find any minimum. -+ * If it didn't work (higher energy), there must be a minimum somewhere between -+ * the old position and the new one. -+ * -+ * Due to the finite numerical accuracy, it turns out that it is a good idea -+ * to even accept a SMALL increase in energy, if the derivative is still downhill. -+ * This leads to lower final energies in the tests I've done. / Erik -+ */ -+ s_a->epot = s_min->epot; -+ a = 0.0; -+ c = a + stepsize; /* reference position along line is zero */ -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) -+ { -+ em_dd_partition_system(fplog, step, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ -+ /* Take a trial step (new coords in s_c) */ -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c, -+ constr, top, nrnb, wcycle, -1); -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ evaluate_energy(fplog, cr, -+ top_global, s_c, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, FALSE); -+ -+ /* Calc derivative along line */ -+ p = s_c->s.cg_p; -+ sf = s_c->f; -+ gpc = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ -+ } -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpc, cr); -+ } -+ -+ /* This is the max amount of increase in energy we tolerate */ -+ tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot); -+ -+ /* Accept the step if the energy is lower, or if it is not significantly higher -+ * and the line derivative is still negative. -+ */ -+ if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) -+ { -+ foundlower = TRUE; -+ /* Great, we found a better energy. Increase step for next iteration -+ * if we are still going down, decrease it otherwise -+ */ -+ if (gpc < 0) -+ { -+ stepsize *= 1.618034; /* The golden section */ -+ } -+ else -+ { -+ stepsize *= 0.618034; /* 1/golden section */ -+ } -+ } -+ else -+ { -+ /* New energy is the same or higher. We will have to do some work -+ * to find a smaller value in the interval. Take smaller step next time! -+ */ -+ foundlower = FALSE; -+ stepsize *= 0.618034; -+ } -+ -+ -+ -+ -+ /* OK, if we didn't find a lower value we will have to locate one now - there must -+ * be one in the interval [a=0,c]. -+ * The same thing is valid here, though: Don't spend dozens of iterations to find -+ * the line minimum. We try to interpolate based on the derivative at the endpoints, -+ * and only continue until we find a lower value. In most cases this means 1-2 iterations. -+ * -+ * I also have a safeguard for potentially really patological functions so we never -+ * take more than 20 steps before we give up ... -+ * -+ * If we already found a lower value we just skip this step and continue to the update. -+ */ -+ if (!foundlower) -+ { -+ nminstep = 0; -+ -+ do -+ { -+ /* Select a new trial point. -+ * If the derivatives at points a & c have different sign we interpolate to zero, -+ * otherwise just do a bisection. -+ */ -+ if (gpa < 0 && gpc > 0) -+ { -+ b = a + gpa*(a-c)/(gpc-gpa); -+ } -+ else -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* safeguard if interpolation close to machine accuracy causes errors: -+ * never go outside the interval -+ */ -+ if (b <= a || b >= c) -+ { -+ b = 0.5*(a+c); -+ } -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) -+ { -+ /* Reload the old state */ -+ em_dd_partition_system(fplog, -1, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ -+ /* Take a trial step to this new point - new coords in s_b */ -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b, -+ constr, top, nrnb, wcycle, -1); -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ evaluate_energy(fplog, cr, -+ top_global, s_b, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, FALSE); -+ -+ /* p does not change within a step, but since the domain decomposition -+ * might change, we have to use cg_p of s_b here. -+ */ -+ p = s_b->s.cg_p; -+ sf = s_b->f; -+ gpb = 0; -+ for (i = 0; i < mdatoms->homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ -+ } -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpb, cr); -+ } -+ -+ if (debug) -+ { -+ fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", -+ s_a->epot, s_b->epot, s_c->epot, gpb); -+ } -+ -+ epot_repl = s_b->epot; -+ -+ /* Keep one of the intervals based on the value of the derivative at the new point */ -+ if (gpb > 0) -+ { -+ /* Replace c endpoint with b */ -+ swap_em_state(s_b, s_c); -+ c = b; -+ gpc = gpb; -+ } -+ else -+ { -+ /* Replace a endpoint with b */ -+ swap_em_state(s_b, s_a); -+ a = b; -+ gpa = gpb; -+ } -+ -+ /* -+ * Stop search as soon as we find a value smaller than the endpoints. -+ * Never run more than 20 steps, no matter what. -+ */ -+ nminstep++; -+ } -+ while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && -+ (nminstep < 20)); -+ -+ if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS || -+ nminstep >= 20) -+ { -+ /* OK. We couldn't find a significantly lower energy. -+ * If beta==0 this was steepest descent, and then we give up. -+ * If not, set beta=0 and restart with steepest descent before quitting. -+ */ -+ if (beta == 0.0) -+ { -+ /* Converged */ -+ converged = TRUE; -+ break; -+ } -+ else -+ { -+ /* Reset memory before giving up */ -+ beta = 0.0; -+ continue; -+ } -+ } -+ -+ /* Select min energy state of A & C, put the best in B. -+ */ -+ if (s_c->epot < s_a->epot) -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", -+ s_c->epot, s_a->epot); -+ } -+ swap_em_state(s_b, s_c); -+ gpb = gpc; -+ b = c; -+ } -+ else -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", -+ s_a->epot, s_c->epot); -+ } -+ swap_em_state(s_b, s_a); -+ gpb = gpa; -+ b = a; -+ } -+ -+ } -+ else -+ { -+ if (debug) -+ { -+ fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", -+ s_c->epot); -+ } -+ swap_em_state(s_b, s_c); -+ gpb = gpc; -+ b = c; -+ } -+ -+ /* new search direction */ -+ /* beta = 0 means forget all memory and restart with steepest descents. */ -+ if (nstcg && ((step % nstcg) == 0)) -+ { -+ beta = 0.0; -+ } -+ else -+ { -+ /* s_min->fnorm cannot be zero, because then we would have converged -+ * and broken out. -+ */ -+ -+ /* Polak-Ribiere update. -+ * Change to fnorm2/fnorm2_old for Fletcher-Reeves -+ */ -+ beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b); -+ } -+ /* Limit beta to prevent oscillations */ -+ if (fabs(beta) > 5.0) -+ { -+ beta = 0.0; -+ } -+ -+ -+ /* update positions */ -+ swap_em_state(s_min, s_b); -+ gpa = gpb; -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", -+ step, s_min->epot, s_min->fnorm/sqrt(state_global->natoms), -+ s_min->fmax, s_min->a_fmax+1); -+ } -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ do_log = do_per_step(step, inputrec->nstlog); -+ do_ene = do_per_step(step, inputrec->nstenergy); -+ -+ /* Prepare IMD energy record, if bIMD is TRUE. */ -+ IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE); -+ -+ if (do_log) -+ { -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ } -+ print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, -+ do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Send energies and positions to the IMD client if bIMD is TRUE. */ -+ if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) -+ { -+ IMD_send_positions(inputrec->imd); -+ } -+ -+ /* Stop when the maximum force lies below tolerance. -+ * If we have reached machine precision, converged is already set to true. -+ */ -+ converged = converged || (s_min->fmax < inputrec->em_tol); -+ -+ } /* End of the loop */ -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(inputrec->bIMD, inputrec->imd); -+ -+ if (converged) -+ { -+ step--; /* we never took that last step in this case */ -+ -+ } -+ if (s_min->fmax > inputrec->em_tol) -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); -+ warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); -+ } -+ converged = FALSE; -+ } -+ -+ if (MASTER(cr)) -+ { -+ /* If we printed energy and/or logfile last step (which was the last step) -+ * we don't have to do it again, but otherwise print the final values. -+ */ -+ if (!do_log) -+ { -+ /* Write final value to log since we didn't do anything the last step */ -+ print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); -+ } -+ if (!do_ene || !do_log) -+ { -+ /* Write final energy file entries */ -+ print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, -+ !do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ } -+ -+ /* Print some stuff... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ -+ /* IMPORTANT! -+ * For accurate normal mode calculation it is imperative that we -+ * store the last conformation into the full precision binary trajectory. -+ * -+ * However, we should only do it if we did NOT already write this step -+ * above (which we did if do_x or do_f was true). -+ */ -+ do_x = !do_per_step(step, inputrec->nstxout); -+ do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout)); -+ -+ write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, step, -+ s_min, state_global, f_global); -+ -+ fnormn = s_min->fnorm/sqrt(state_global->natoms); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ -+ fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ walltime_accounting_set_nsteps_done(walltime_accounting, step); -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_lbfgs(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ static const char *LBFGS = "Low-Memory BFGS Minimizer"; -+ em_state_t ems; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ rvec *f_global; -+ int ncorr, nmaxcorr, point, cp, neval, nminstep; -+ double stepsize, gpa, gpb, gpc, tmp, minstep; -+ real *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg; -+ real *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp; -+ real a, b, c, maxdelta, delta; -+ real diag, Epot0, Epot, EpotA, EpotB, EpotC; -+ real dgdx, dgdg, sq, yr, beta; -+ t_mdebin *mdebin; -+ gmx_bool converged, first; -+ rvec mu_tot; -+ real fnorm, fmax; -+ gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; -+ tensor vir, pres; -+ int start, end, number_steps; -+ gmx_mdoutf_t outf; -+ int i, k, m, n, nfmax, gf, step; -+ int mdof_flags; -+ /* not used */ -+ real terminate; -+ -+ if (PAR(cr)) -+ { -+ gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n"); -+ } -+ -+ if (NULL != constr) -+ { -+ gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent)."); -+ } -+ -+ n = 3*state->natoms; -+ nmaxcorr = inputrec->nbfgscorr; -+ -+ /* Allocate memory */ -+ /* Use pointers to real so we dont have to loop over both atoms and -+ * dimensions all the time... -+ * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real -+ * that point to the same memory. -+ */ -+ snew(xa, n); -+ snew(xb, n); -+ snew(xc, n); -+ snew(fa, n); -+ snew(fb, n); -+ snew(fc, n); -+ snew(frozen, n); -+ -+ snew(p, n); -+ snew(lastx, n); -+ snew(lastf, n); -+ snew(rho, nmaxcorr); -+ snew(alpha, nmaxcorr); -+ -+ snew(dx, nmaxcorr); -+ for (i = 0; i < nmaxcorr; i++) -+ { -+ snew(dx[i], n); -+ } -+ -+ snew(dg, nmaxcorr); -+ for (i = 0; i < nmaxcorr; i++) -+ { -+ snew(dg[i], n); -+ } -+ -+ step = 0; -+ neval = 0; -+ -+ /* Init em */ -+ init_em(fplog, LBFGS, cr, inputrec, -+ state, top_global, &ems, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); -+ /* Do_lbfgs is not completely updated like do_steep and do_cg, -+ * so we free some memory again. -+ */ -+ sfree(ems.s.x); -+ sfree(ems.f); -+ -+ xx = (real *)state->x; -+ ff = (real *)f; -+ -+ start = 0; -+ end = mdatoms->homenr; -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); -+ -+ do_log = do_ene = do_x = do_f = TRUE; -+ -+ /* Max number of steps */ -+ number_steps = inputrec->nsteps; -+ -+ /* Create a 3*natoms index to tell whether each degree of freedom is frozen */ -+ gf = 0; -+ for (i = start; i < end; i++) -+ { -+ if (mdatoms->cFREEZE) -+ { -+ gf = mdatoms->cFREEZE[i]; -+ } -+ for (m = 0; m < DIM; m++) -+ { -+ frozen[3*i+m] = inputrec->opts.nFreeze[gf][m]; -+ } -+ } -+ if (MASTER(cr)) -+ { -+ sp_header(stderr, LBFGS, inputrec->em_tol, number_steps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, LBFGS, inputrec->em_tol, number_steps); -+ } -+ -+ if (vsite) -+ { -+ construct_vsites(vsite, state->x, 1, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ } -+ -+ /* Call the force routine and some auxiliary (neighboursearching etc.) */ -+ /* do_force always puts the charge groups in the box and shifts again -+ * We do not unshift, so molecules are always whole -+ */ -+ neval++; -+ ems.s.x = state->x; -+ ems.f = f; -+ evaluate_energy(fplog, cr, -+ top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ where(); -+ -+ if (MASTER(cr)) -+ { -+ /* Copy stuff to the energy bin for easy printing etc. */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ where(); -+ -+ /* This is the starting energy */ -+ Epot = enerd->term[F_EPOT]; -+ -+ fnorm = ems.fnorm; -+ fmax = ems.fmax; -+ nfmax = ems.a_fmax; -+ -+ /* Set the initial step. -+ * since it will be multiplied by the non-normalized search direction -+ * vector (force vector the first time), we scale it by the -+ * norm of the force. -+ */ -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr); -+ fprintf(stderr, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); -+ fprintf(stderr, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms)); -+ fprintf(stderr, "\n"); -+ /* and copy to the log file too... */ -+ fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr); -+ fprintf(fplog, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); -+ fprintf(fplog, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms)); -+ fprintf(fplog, "\n"); -+ } -+ -+ point = 0; -+ for (i = 0; i < n; i++) -+ { -+ if (!frozen[i]) -+ { -+ dx[point][i] = ff[i]; /* Initial search direction */ -+ } -+ else -+ { -+ dx[point][i] = 0; -+ } -+ } -+ -+ stepsize = 1.0/fnorm; -+ converged = FALSE; -+ -+ /* Start the loop over BFGS steps. -+ * Each successful step is counted, and we continue until -+ * we either converge or reach the max number of steps. -+ */ -+ -+ ncorr = 0; -+ -+ /* Set the gradient from the force */ -+ converged = FALSE; -+ for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++) -+ { -+ -+ /* Write coordinates if necessary */ -+ do_x = do_per_step(step, inputrec->nstxout); -+ do_f = do_per_step(step, inputrec->nstfout); -+ -+ mdof_flags = 0; -+ if (do_x) -+ { -+ mdof_flags |= MDOF_X; -+ } -+ -+ if (do_f) -+ { -+ mdof_flags |= MDOF_F; -+ } -+ -+ if (inputrec->bIMD) -+ { -+ mdof_flags |= MDOF_IMD; -+ } -+ -+ mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, -+ top_global, step, (real)step, state, state, f, f); -+ -+ /* Do the linesearching in the direction dx[point][0..(n-1)] */ -+ -+ /* pointer to current direction - point=0 first time here */ -+ s = dx[point]; -+ -+ /* calculate line gradient */ -+ for (gpa = 0, i = 0; i < n; i++) -+ { -+ gpa -= s[i]*ff[i]; -+ } -+ -+ /* Calculate minimum allowed stepsize, before the average (norm) -+ * relative change in coordinate is smaller than precision -+ */ -+ for (minstep = 0, i = 0; i < n; i++) -+ { -+ tmp = fabs(xx[i]); -+ if (tmp < 1.0) -+ { -+ tmp = 1.0; -+ } -+ tmp = s[i]/tmp; -+ minstep += tmp*tmp; -+ } -+ minstep = GMX_REAL_EPS/sqrt(minstep/n); -+ -+ if (stepsize < minstep) -+ { -+ converged = TRUE; -+ break; -+ } -+ -+ /* Store old forces and coordinates */ -+ for (i = 0; i < n; i++) -+ { -+ lastx[i] = xx[i]; -+ lastf[i] = ff[i]; -+ } -+ Epot0 = Epot; -+ -+ first = TRUE; -+ -+ for (i = 0; i < n; i++) -+ { -+ xa[i] = xx[i]; -+ } -+ -+ /* Take a step downhill. -+ * In theory, we should minimize the function along this direction. -+ * That is quite possible, but it turns out to take 5-10 function evaluations -+ * for each line. However, we dont really need to find the exact minimum - -+ * it is much better to start a new BFGS step in a modified direction as soon -+ * as we are close to it. This will save a lot of energy evaluations. -+ * -+ * In practice, we just try to take a single step. -+ * If it worked (i.e. lowered the energy), we increase the stepsize but -+ * the continue straight to the next BFGS step without trying to find any minimum. -+ * If it didn't work (higher energy), there must be a minimum somewhere between -+ * the old position and the new one. -+ * -+ * Due to the finite numerical accuracy, it turns out that it is a good idea -+ * to even accept a SMALL increase in energy, if the derivative is still downhill. -+ * This leads to lower final energies in the tests I've done. / Erik -+ */ -+ foundlower = FALSE; -+ EpotA = Epot0; -+ a = 0.0; -+ c = a + stepsize; /* reference position along line is zero */ -+ -+ /* Check stepsize first. We do not allow displacements -+ * larger than emstep. -+ */ -+ do -+ { -+ c = a + stepsize; -+ maxdelta = 0; -+ for (i = 0; i < n; i++) -+ { -+ delta = c*s[i]; -+ if (delta > maxdelta) -+ { -+ maxdelta = delta; -+ } -+ } -+ if (maxdelta > inputrec->em_stepsize) -+ { -+ stepsize *= 0.1; -+ } -+ } -+ while (maxdelta > inputrec->em_stepsize); -+ -+ /* Take a trial step */ -+ for (i = 0; i < n; i++) -+ { -+ xc[i] = lastx[i] + c*s[i]; -+ } -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ ems.s.x = (rvec *)xc; -+ ems.f = (rvec *)fc; -+ evaluate_energy(fplog, cr, -+ top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, step, FALSE); -+ EpotC = ems.epot; -+ -+ /* Calc derivative along line */ -+ for (gpc = 0, i = 0; i < n; i++) -+ { -+ gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */ -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpc, cr); -+ } -+ -+ /* This is the max amount of increase in energy we tolerate */ -+ tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA); -+ -+ /* Accept the step if the energy is lower, or if it is not significantly higher -+ * and the line derivative is still negative. -+ */ -+ if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp))) -+ { -+ foundlower = TRUE; -+ /* Great, we found a better energy. Increase step for next iteration -+ * if we are still going down, decrease it otherwise -+ */ -+ if (gpc < 0) -+ { -+ stepsize *= 1.618034; /* The golden section */ -+ } -+ else -+ { -+ stepsize *= 0.618034; /* 1/golden section */ -+ } -+ } -+ else -+ { -+ /* New energy is the same or higher. We will have to do some work -+ * to find a smaller value in the interval. Take smaller step next time! -+ */ -+ foundlower = FALSE; -+ stepsize *= 0.618034; -+ } -+ -+ /* OK, if we didn't find a lower value we will have to locate one now - there must -+ * be one in the interval [a=0,c]. -+ * The same thing is valid here, though: Don't spend dozens of iterations to find -+ * the line minimum. We try to interpolate based on the derivative at the endpoints, -+ * and only continue until we find a lower value. In most cases this means 1-2 iterations. -+ * -+ * I also have a safeguard for potentially really patological functions so we never -+ * take more than 20 steps before we give up ... -+ * -+ * If we already found a lower value we just skip this step and continue to the update. -+ */ -+ -+ if (!foundlower) -+ { -+ -+ nminstep = 0; -+ do -+ { -+ /* Select a new trial point. -+ * If the derivatives at points a & c have different sign we interpolate to zero, -+ * otherwise just do a bisection. -+ */ -+ -+ if (gpa < 0 && gpc > 0) -+ { -+ b = a + gpa*(a-c)/(gpc-gpa); -+ } -+ else -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* safeguard if interpolation close to machine accuracy causes errors: -+ * never go outside the interval -+ */ -+ if (b <= a || b >= c) -+ { -+ b = 0.5*(a+c); -+ } -+ -+ /* Take a trial step */ -+ for (i = 0; i < n; i++) -+ { -+ xb[i] = lastx[i] + b*s[i]; -+ } -+ -+ neval++; -+ /* Calculate energy for the trial step */ -+ ems.s.x = (rvec *)xb; -+ ems.f = (rvec *)fb; -+ evaluate_energy(fplog, cr, -+ top_global, &ems, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, step, FALSE); -+ EpotB = ems.epot; -+ -+ fnorm = ems.fnorm; -+ -+ for (gpb = 0, i = 0; i < n; i++) -+ { -+ gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */ -+ -+ } -+ /* Sum the gradient along the line across CPUs */ -+ if (PAR(cr)) -+ { -+ gmx_sumd(1, &gpb, cr); -+ } -+ -+ /* Keep one of the intervals based on the value of the derivative at the new point */ -+ if (gpb > 0) -+ { -+ /* Replace c endpoint with b */ -+ EpotC = EpotB; -+ c = b; -+ gpc = gpb; -+ /* swap coord pointers b/c */ -+ xtmp = xb; -+ ftmp = fb; -+ xb = xc; -+ fb = fc; -+ xc = xtmp; -+ fc = ftmp; -+ } -+ else -+ { -+ /* Replace a endpoint with b */ -+ EpotA = EpotB; -+ a = b; -+ gpa = gpb; -+ /* swap coord pointers a/b */ -+ xtmp = xb; -+ ftmp = fb; -+ xb = xa; -+ fb = fa; -+ xa = xtmp; -+ fa = ftmp; -+ } -+ -+ /* -+ * Stop search as soon as we find a value smaller than the endpoints, -+ * or if the tolerance is below machine precision. -+ * Never run more than 20 steps, no matter what. -+ */ -+ nminstep++; -+ } -+ while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20)); -+ -+ if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20) -+ { -+ /* OK. We couldn't find a significantly lower energy. -+ * If ncorr==0 this was steepest descent, and then we give up. -+ * If not, reset memory to restart as steepest descent before quitting. -+ */ -+ if (ncorr == 0) -+ { -+ /* Converged */ -+ converged = TRUE; -+ break; -+ } -+ else -+ { -+ /* Reset memory */ -+ ncorr = 0; -+ /* Search in gradient direction */ -+ for (i = 0; i < n; i++) -+ { -+ dx[point][i] = ff[i]; -+ } -+ /* Reset stepsize */ -+ stepsize = 1.0/fnorm; -+ continue; -+ } -+ } -+ -+ /* Select min energy state of A & C, put the best in xx/ff/Epot -+ */ -+ if (EpotC < EpotA) -+ { -+ Epot = EpotC; -+ /* Use state C */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xc[i]; -+ ff[i] = fc[i]; -+ } -+ stepsize = c; -+ } -+ else -+ { -+ Epot = EpotA; -+ /* Use state A */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xa[i]; -+ ff[i] = fa[i]; -+ } -+ stepsize = a; -+ } -+ -+ } -+ else -+ { -+ /* found lower */ -+ Epot = EpotC; -+ /* Use state C */ -+ for (i = 0; i < n; i++) -+ { -+ xx[i] = xc[i]; -+ ff[i] = fc[i]; -+ } -+ stepsize = c; -+ } -+ -+ /* Update the memory information, and calculate a new -+ * approximation of the inverse hessian -+ */ -+ -+ /* Have new data in Epot, xx, ff */ -+ if (ncorr < nmaxcorr) -+ { -+ ncorr++; -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ dg[point][i] = lastf[i]-ff[i]; -+ dx[point][i] *= stepsize; -+ } -+ -+ dgdg = 0; -+ dgdx = 0; -+ for (i = 0; i < n; i++) -+ { -+ dgdg += dg[point][i]*dg[point][i]; -+ dgdx += dg[point][i]*dx[point][i]; -+ } -+ -+ diag = dgdx/dgdg; -+ -+ rho[point] = 1.0/dgdx; -+ point++; -+ -+ if (point >= nmaxcorr) -+ { -+ point = 0; -+ } -+ -+ /* Update */ -+ for (i = 0; i < n; i++) -+ { -+ p[i] = ff[i]; -+ } -+ -+ cp = point; -+ -+ /* Recursive update. First go back over the memory points */ -+ for (k = 0; k < ncorr; k++) -+ { -+ cp--; -+ if (cp < 0) -+ { -+ cp = ncorr-1; -+ } -+ -+ sq = 0; -+ for (i = 0; i < n; i++) -+ { -+ sq += dx[cp][i]*p[i]; -+ } -+ -+ alpha[cp] = rho[cp]*sq; -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] -= alpha[cp]*dg[cp][i]; -+ } -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] *= diag; -+ } -+ -+ /* And then go forward again */ -+ for (k = 0; k < ncorr; k++) -+ { -+ yr = 0; -+ for (i = 0; i < n; i++) -+ { -+ yr += p[i]*dg[cp][i]; -+ } -+ -+ beta = rho[cp]*yr; -+ beta = alpha[cp]-beta; -+ -+ for (i = 0; i < n; i++) -+ { -+ p[i] += beta*dx[cp][i]; -+ } -+ -+ cp++; -+ if (cp >= ncorr) -+ { -+ cp = 0; -+ } -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ if (!frozen[i]) -+ { -+ dx[point][i] = p[i]; -+ } -+ else -+ { -+ dx[point][i] = 0; -+ } -+ } -+ -+ stepsize = 1.0; -+ -+ /* Test whether the convergence criterion is met */ -+ get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax); -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", -+ step, Epot, fnorm/sqrt(state->natoms), fmax, nfmax+1); -+ } -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)step, -+ mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box, -+ NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ do_log = do_per_step(step, inputrec->nstlog); -+ do_ene = do_per_step(step, inputrec->nstenergy); -+ if (do_log) -+ { -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ } -+ print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, -+ do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Send x and E to IMD client, if bIMD is TRUE. */ -+ if (do_IMD(inputrec->bIMD, step, cr, TRUE, state->box, state->x, inputrec, 0, wcycle) && MASTER(cr)) -+ { -+ IMD_send_positions(inputrec->imd); -+ } -+ -+ /* Stop when the maximum force lies below tolerance. -+ * If we have reached machine precision, converged is already set to true. -+ */ -+ -+ converged = converged || (fmax < inputrec->em_tol); -+ -+ } /* End of the loop */ -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(inputrec->bIMD, inputrec->imd); -+ -+ if (converged) -+ { -+ step--; /* we never took that last step in this case */ -+ -+ } -+ if (fmax > inputrec->em_tol) -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); -+ warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); -+ } -+ converged = FALSE; -+ } -+ -+ /* If we printed energy and/or logfile last step (which was the last step) -+ * we don't have to do it again, but otherwise print the final values. -+ */ -+ if (!do_log) /* Write final value to log since we didn't do anythin last step */ -+ { -+ print_ebin_header(fplog, step, step, state->lambda[efptFEP]); -+ } -+ if (!do_ene || !do_log) /* Write final energy file entries */ -+ { -+ print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, -+ !do_log ? fplog : NULL, step, step, eprNORMAL, -+ TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ } -+ -+ /* Print some stuff... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ -+ /* IMPORTANT! -+ * For accurate normal mode calculation it is imperative that we -+ * store the last conformation into the full precision binary trajectory. -+ * -+ * However, we should only do it if we did NOT already write this step -+ * above (which we did if do_x or do_f was true). -+ */ -+ do_x = !do_per_step(step, inputrec->nstxout); -+ do_f = !do_per_step(step, inputrec->nstfout); -+ write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, step, -+ &ems, state, f); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, -+ number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms)); -+ print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, -+ number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms)); -+ -+ fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ walltime_accounting_set_nsteps_done(walltime_accounting, step); -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_steep(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ const char *SD = "Steepest Descents"; -+ em_state_t *s_min, *s_try; -+ rvec *f_global; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ real stepsize, constepsize; -+ real ustep, fnormn; -+ gmx_mdoutf_t outf; -+ t_mdebin *mdebin; -+ gmx_bool bDone, bAbort, do_x, do_f; -+ tensor vir, pres; -+ rvec mu_tot; -+ int nsteps; -+ int count = 0; -+ int steps_accepted = 0; -+ /* not used */ -+ real terminate = 0; -+ -+ s_min = init_em_state(); -+ s_try = init_em_state(); -+ -+ /* Init em and store the local state in s_try */ -+ init_em(fplog, SD, cr, inputrec, -+ state_global, top_global, s_try, &top, &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); -+ -+ /* Print to log file */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, SD); -+ -+ /* Set variables for stepsize (in nm). This is the largest -+ * step that we are going to make in any direction. -+ */ -+ ustep = inputrec->em_stepsize; -+ stepsize = 0; -+ -+ /* Max number of steps */ -+ nsteps = inputrec->nsteps; -+ -+ if (MASTER(cr)) -+ { -+ /* Print to the screen */ -+ sp_header(stderr, SD, inputrec->em_tol, nsteps); -+ } -+ if (fplog) -+ { -+ sp_header(fplog, SD, inputrec->em_tol, nsteps); -+ } -+ -+ /**** HERE STARTS THE LOOP **** -+ * count is the counter for the number of steps -+ * bDone will be TRUE when the minimization has converged -+ * bAbort will be TRUE when nsteps steps have been performed or when -+ * the stepsize becomes smaller than is reasonable for machine precision -+ */ -+ count = 0; -+ bDone = FALSE; -+ bAbort = FALSE; -+ while (!bDone && !bAbort) -+ { -+ bAbort = (nsteps >= 0) && (count == nsteps); -+ -+ /* set new coordinates, except for first step */ -+ if (count > 0) -+ { -+ do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, -+ s_min, stepsize, s_min->f, s_try, -+ constr, top, nrnb, wcycle, count); -+ } -+ -+ evaluate_energy(fplog, cr, -+ top_global, s_try, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, count, count == 0); -+ -+ if (MASTER(cr)) -+ { -+ print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]); -+ } -+ -+ if (count == 0) -+ { -+ s_min->epot = s_try->epot + 1; -+ } -+ -+ /* Print it if necessary */ -+ if (MASTER(cr)) -+ { -+ if (bVerbose) -+ { -+ fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c", -+ count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1, -+ (s_try->epot < s_min->epot) ? '\n' : '\r'); -+ } -+ -+ if (s_try->epot < s_min->epot) -+ { -+ /* Store the new (lower) energies */ -+ upd_mdebin(mdebin, FALSE, FALSE, (double)count, -+ mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals, -+ s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr); -+ -+ /* Prepare IMD energy record, if bIMD is TRUE. */ -+ IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE); -+ -+ print_ebin(mdoutf_get_fp_ene(outf), TRUE, -+ do_per_step(steps_accepted, inputrec->nstdisreout), -+ do_per_step(steps_accepted, inputrec->nstorireout), -+ fplog, count, count, eprNORMAL, TRUE, -+ mdebin, fcd, &(top_global->groups), &(inputrec->opts)); -+ fflush(fplog); -+ } -+ } -+ -+ /* Now if the new energy is smaller than the previous... -+ * or if this is the first step! -+ * or if we did random steps! -+ */ -+ -+ if ( (count == 0) || (s_try->epot < s_min->epot) ) -+ { -+ steps_accepted++; -+ -+ /* Test whether the convergence criterion is met... */ -+ bDone = (s_try->fmax < inputrec->em_tol); -+ -+ /* Copy the arrays for force, positions and energy */ -+ /* The 'Min' array always holds the coords and forces of the minimal -+ sampled energy */ -+ swap_em_state(s_min, s_try); -+ if (count > 0) -+ { -+ ustep *= 1.2; -+ } -+ -+ /* Write to trn, if necessary */ -+ do_x = do_per_step(steps_accepted, inputrec->nstxout); -+ do_f = do_per_step(steps_accepted, inputrec->nstfout); -+ write_em_traj(fplog, cr, outf, do_x, do_f, NULL, -+ top_global, inputrec, count, -+ s_min, state_global, f_global); -+ } -+ else -+ { -+ /* If energy is not smaller make the step smaller... */ -+ ustep *= 0.5; -+ -+ if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) -+ { -+ /* Reload the old state */ -+ em_dd_partition_system(fplog, count, cr, top_global, inputrec, -+ s_min, top, mdatoms, fr, vsite, constr, -+ nrnb, wcycle); -+ } -+ } -+ -+ /* Determine new step */ -+ stepsize = ustep/s_min->fmax; -+ -+ /* Check if stepsize is too small, with 1 nm as a characteristic length */ -+#ifdef GMX_DOUBLE -+ if (count == nsteps || ustep < 1e-12) -+#else -+ if (count == nsteps || ustep < 1e-6) -+#endif -+ { -+ if (MASTER(cr)) -+ { -+ warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL); -+ warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL); -+ } -+ bAbort = TRUE; -+ } -+ -+ /* Send IMD energies and positions, if bIMD is TRUE. */ -+ if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) -+ { -+ IMD_send_positions(inputrec->imd); -+ } -+ -+ count++; -+ } /* End of the loop */ -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(inputrec->bIMD, inputrec->imd); -+ -+ /* Print some data... */ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\nwriting lowest energy coordinates.\n"); -+ } -+ write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm), -+ top_global, inputrec, count, -+ s_min, state_global, f_global); -+ -+ fnormn = s_min->fnorm/sqrt(state_global->natoms); -+ -+ if (MASTER(cr)) -+ { -+ print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, -+ s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ /* To print the actual number of steps we needed somewhere */ -+ inputrec->nsteps = count; -+ -+ walltime_accounting_set_nsteps_done(walltime_accounting, count); -+ -+ return 0; -+} /* That's all folks */ -+ -+ -+double do_nm(FILE *fplog, t_commrec *cr, -+ int nfile, const t_filenm fnm[], -+ const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, -+ int gmx_unused nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int gmx_unused stepout, -+ t_inputrec *inputrec, -+ gmx_mtop_t *top_global, t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t gmx_unused ed, -+ t_forcerec *fr, -+ int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, -+ gmx_membed_t gmx_unused membed, -+ real gmx_unused cpt_period, real gmx_unused max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long gmx_unused Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ const char *NM = "Normal Mode Analysis"; -+ gmx_mdoutf_t outf; -+ int natoms, atom, d; -+ int nnodes, node; -+ rvec *f_global; -+ gmx_localtop_t *top; -+ gmx_enerdata_t *enerd; -+ rvec *f; -+ gmx_global_stat_t gstat; -+ t_graph *graph; -+ real t, t0, lambda, lam0; -+ gmx_bool bNS; -+ tensor vir, pres; -+ rvec mu_tot; -+ rvec *fneg, *dfdx; -+ gmx_bool bSparse; /* use sparse matrix storage format */ -+ size_t sz = 0; -+ gmx_sparsematrix_t * sparse_matrix = NULL; -+ real * full_matrix = NULL; -+ em_state_t * state_work; -+ -+ /* added with respect to mdrun */ -+ int i, j, k, row, col; -+ real der_range = 10.0*sqrt(GMX_REAL_EPS); -+ real x_min; -+ real fnorm, fmax; -+ -+ if (constr != NULL) -+ { -+ gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported"); -+ } -+ -+ state_work = init_em_state(); -+ -+ /* Init em and store the local state in state_minimum */ -+ init_em(fplog, NM, cr, inputrec, -+ state_global, top_global, state_work, &top, -+ &f, &f_global, -+ nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, -+ nfile, fnm, &outf, NULL, imdport, Flags, wcycle); -+ -+ natoms = top_global->natoms; -+ snew(fneg, natoms); -+ snew(dfdx, natoms); -+ -+#ifndef GMX_DOUBLE -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, -+ "NOTE: This version of Gromacs has been compiled in single precision,\n" -+ " which MIGHT not be accurate enough for normal mode analysis.\n" -+ " Gromacs now uses sparse matrix storage, so the memory requirements\n" -+ " are fairly modest even if you recompile in double precision.\n\n"); -+ } -+#endif -+ -+ /* Check if we can/should use sparse storage format. -+ * -+ * Sparse format is only useful when the Hessian itself is sparse, which it -+ * will be when we use a cutoff. -+ * For small systems (n<1000) it is easier to always use full matrix format, though. -+ */ -+ if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0) -+ { -+ md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n"); -+ bSparse = FALSE; -+ } -+ else if (top_global->natoms < 1000) -+ { -+ md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms); -+ bSparse = FALSE; -+ } -+ else -+ { -+ md_print_info(cr, fplog, "Using compressed symmetric sparse Hessian format.\n"); -+ bSparse = TRUE; -+ } -+ -+ if (MASTER(cr)) -+ { -+ sz = DIM*top_global->natoms; -+ -+ fprintf(stderr, "Allocating Hessian memory...\n\n"); -+ -+ if (bSparse) -+ { -+ sparse_matrix = gmx_sparsematrix_init(sz); -+ sparse_matrix->compressed_symmetric = TRUE; -+ } -+ else -+ { -+ snew(full_matrix, sz*sz); -+ } -+ } -+ -+ /* Initial values */ -+ t0 = inputrec->init_t; -+ lam0 = inputrec->fepvals->init_lambda; -+ t = t0; -+ lambda = lam0; -+ -+ init_nrnb(nrnb); -+ -+ where(); -+ -+ /* Write start time and temperature */ -+ print_em_start(fplog, cr, walltime_accounting, wcycle, NM); -+ -+ /* fudge nr of steps to nr of atoms */ -+ inputrec->nsteps = natoms*2; -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n", -+ *(top_global->name), (int)inputrec->nsteps); -+ } -+ -+ nnodes = cr->nnodes; -+ -+ /* Make evaluate_energy do a single node force calculation */ -+ cr->nnodes = 1; -+ evaluate_energy(fplog, cr, -+ top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, -1, TRUE); -+ cr->nnodes = nnodes; -+ -+ /* if forces are not small, warn user */ -+ get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work); -+ -+ md_print_info(cr, fplog, "Maximum force:%12.5e\n", state_work->fmax); -+ if (state_work->fmax > 1.0e-3) -+ { -+ md_print_info(cr, fplog, -+ "The force is probably not small enough to " -+ "ensure that you are at a minimum.\n" -+ "Be aware that negative eigenvalues may occur\n" -+ "when the resulting matrix is diagonalized.\n\n"); -+ } -+ -+ /*********************************************************** -+ * -+ * Loop over all pairs in matrix -+ * -+ * do_force called twice. Once with positive and -+ * once with negative displacement -+ * -+ ************************************************************/ -+ -+ /* Steps are divided one by one over the nodes */ -+ for (atom = cr->nodeid; atom < natoms; atom += nnodes) -+ { -+ -+ for (d = 0; d < DIM; d++) -+ { -+ x_min = state_work->s.x[atom][d]; -+ -+ state_work->s.x[atom][d] = x_min - der_range; -+ -+ /* Make evaluate_energy do a single node force calculation */ -+ cr->nnodes = 1; -+ evaluate_energy(fplog, cr, -+ top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, atom*2, FALSE); -+ -+ for (i = 0; i < natoms; i++) -+ { -+ copy_rvec(state_work->f[i], fneg[i]); -+ } -+ -+ state_work->s.x[atom][d] = x_min + der_range; -+ -+ evaluate_energy(fplog, cr, -+ top_global, state_work, top, -+ inputrec, nrnb, wcycle, gstat, -+ vsite, constr, fcd, graph, mdatoms, fr, -+ mu_tot, enerd, vir, pres, atom*2+1, FALSE); -+ cr->nnodes = nnodes; -+ -+ /* x is restored to original */ -+ state_work->s.x[atom][d] = x_min; -+ -+ for (j = 0; j < natoms; j++) -+ { -+ for (k = 0; (k < DIM); k++) -+ { -+ dfdx[j][k] = -+ -(state_work->f[j][k] - fneg[j][k])/(2*der_range); -+ } -+ } -+ -+ if (!MASTER(cr)) -+ { -+#ifdef GMX_MPI -+#ifdef GMX_DOUBLE -+#define mpi_type MPI_DOUBLE -+#else -+#define mpi_type MPI_FLOAT -+#endif -+ MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid, -+ cr->mpi_comm_mygroup); -+#endif -+ } -+ else -+ { -+ for (node = 0; (node < nnodes && atom+node < natoms); node++) -+ { -+ if (node > 0) -+ { -+#ifdef GMX_MPI -+ MPI_Status stat; -+ MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node, -+ cr->mpi_comm_mygroup, &stat); -+#undef mpi_type -+#endif -+ } -+ -+ row = (atom + node)*DIM + d; -+ -+ for (j = 0; j < natoms; j++) -+ { -+ for (k = 0; k < DIM; k++) -+ { -+ col = j*DIM + k; -+ -+ if (bSparse) -+ { -+ if (col >= row && dfdx[j][k] != 0.0) -+ { -+ gmx_sparsematrix_increment_value(sparse_matrix, -+ row, col, dfdx[j][k]); -+ } -+ } -+ else -+ { -+ full_matrix[row*sz+col] = dfdx[j][k]; -+ } -+ } -+ } -+ } -+ } -+ -+ if (bVerbose && fplog) -+ { -+ fflush(fplog); -+ } -+ } -+ /* write progress */ -+ if (MASTER(cr) && bVerbose) -+ { -+ fprintf(stderr, "\rFinished step %d out of %d", -+ min(atom+nnodes, natoms), natoms); -+ fflush(stderr); -+ } -+ } -+ -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, "\n\nWriting Hessian...\n"); -+ gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); -+ } -+ -+ finish_em(cr, outf, walltime_accounting, wcycle); -+ -+ walltime_accounting_set_nsteps_done(walltime_accounting, natoms*2); -+ -+ return 0; -+} -diff --git a/src/programs/mdrun/md.c b/src/programs/mdrun/md.c -index 3d98d59..b34d23c 100644 ---- a/src/programs/mdrun/md.c -+++ b/src/programs/mdrun/md.c -@@ -96,6 +96,12 @@ - #include "gromacs/swap/swapcoords.h" - #include "gromacs/imd/imd.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+/* END PLUMED */ -+ - #ifdef GMX_FAHCORE - #include "corewrap.h" - #endif -@@ -224,6 +230,12 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - /* Interactive MD */ - gmx_bool bIMDstep = FALSE; - -+ /* PLUMED */ -+ int plumedNeedsEnergy=0; -+ int plumedWantsToStop=0; -+ matrix plumed_vir; -+ /* END PLUMED */ -+ - #ifdef GMX_FAHCORE - /* Temporary addition for FAHCORE checkpointing */ - int chkpt_ret; -@@ -651,6 +663,48 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - fprintf(fplog, "\n"); - } - -+ /* PLUMED */ -+ if(plumedswitch){ -+ /* detect plumed API version */ -+ int pversion=0; -+ plumed_cmd(plumedmain,"getApiVersion",&pversion); -+ /* setting kbT is only implemented with api>1) */ -+ real kbT=ir->opts.ref_t[0]*BOLTZ; -+ if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT); -+ -+ if(cr->ms && cr->ms->nsim>1) { -+ if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters); -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); -+ }else{ -+ plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); -+ } -+ } -+ plumed_cmd(plumedmain,"GREX init",NULL); -+ } -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); -+ } -+ } -+ plumed_cmd(plumedmain,"setNatoms",&top_global->natoms); -+ plumed_cmd(plumedmain,"setMDEngine","gromacs"); -+ plumed_cmd(plumedmain,"setLog",fplog); -+ real real_delta_t; -+ real_delta_t=ir->delta_t; -+ plumed_cmd(plumedmain,"setTimestep",&real_delta_t); -+ plumed_cmd(plumedmain,"init",NULL); -+ -+ if(PAR(cr)){ -+ if(DOMAINDECOMP(cr)) { -+ plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ } -+ } -+ /* END PLUMED */ -+ - walltime_accounting_start(walltime_accounting); - wallcycle_start(wcycle, ewcRUN); - print_start(fplog, cr, walltime_accounting, "mdrun"); -@@ -955,6 +1009,13 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - do_verbose && !bPMETuneRunning); - wallcycle_stop(wcycle, ewcDOMDEC); - /* If using an iterative integrator, reallocate space to match the decomposition */ -+ -+ /* PLUMED */ -+ if(plumedswitch){ -+ plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home); -+ plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex); -+ } -+ /* END PLUMED */ - } - } - -@@ -1078,12 +1139,45 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], - * This is parallellized as well, and does communication too. - * Check comments in sim_util.c - */ -+ -+ /* PLUMED */ -+ plumedNeedsEnergy=0; -+ if(plumedswitch){ -+ long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep); -+ plumed_cmd(plumedmain,"setPositions",&state->x[0][0]); -+ plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[0]); -+ plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[0]); -+ plumed_cmd(plumedmain,"setBox",&state->box[0][0]); -+ plumed_cmd(plumedmain,"prepareCalc",NULL); -+ plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop); -+ plumed_cmd(plumedmain,"setForces",&f[0][0]); -+ plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); -+ clear_mat(plumed_vir); -+ plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]); -+ } -+ /* END PLUMED */ - do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups, - state->box, state->x, &state->hist, - f, force_vir, mdatoms, enerd, fcd, - state->lambda, graph, - fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii, - (bNS ? GMX_FORCE_NS : 0) | force_flags); -+ /* PLUMED */ -+ if(plumedswitch){ -+ if(plumedNeedsEnergy){ -+ msmul(force_vir,2.0,plumed_vir); -+ plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]); -+ plumed_cmd(plumedmain,"performCalc",NULL); -+ msmul(plumed_vir,0.5,force_vir); -+ } else { -+ msmul(plumed_vir,0.5,plumed_vir); -+ m_add(force_vir,plumed_vir,force_vir); -+ } -+ if ((repl_ex_nst > 0) && (step > 0) && !bLastStep && -+ do_per_step(step,repl_ex_nst)) plumed_cmd(plumedmain,"GREX savePositions",NULL); -+ if(plumedWantsToStop) ir->nsteps=step_rel+1; -+ } -+ /* END PLUMED */ - } - - if (bVV && !bStartingFromCpt && !bRerunMD) -diff --git a/src/programs/mdrun/md.c.preplumed b/src/programs/mdrun/md.c.preplumed -new file mode 100644 -index 0000000..3d98d59 ---- /dev/null -+++ b/src/programs/mdrun/md.c.preplumed -@@ -0,0 +1,2058 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include "typedefs.h" -+#include "gromacs/utility/smalloc.h" -+#include "sysstuff.h" -+#include "vec.h" -+#include "vcm.h" -+#include "mdebin.h" -+#include "nrnb.h" -+#include "calcmu.h" -+#include "index.h" -+#include "vsite.h" -+#include "update.h" -+#include "ns.h" -+#include "mdrun.h" -+#include "md_support.h" -+#include "md_logging.h" -+#include "network.h" -+#include "xvgr.h" -+#include "physics.h" -+#include "names.h" -+#include "force.h" -+#include "disre.h" -+#include "orires.h" -+#include "pme.h" -+#include "mdatoms.h" -+#include "repl_ex.h" -+#include "deform.h" -+#include "qmmm.h" -+#include "domdec.h" -+#include "domdec_network.h" -+#include "gromacs/gmxlib/topsort.h" -+#include "coulomb.h" -+#include "constr.h" -+#include "shellfc.h" -+#include "gromacs/gmxpreprocess/compute_io.h" -+#include "checkpoint.h" -+#include "mtop_util.h" -+#include "sighandler.h" -+#include "txtdump.h" -+#include "gromacs/utility/cstringutil.h" -+#include "pme_loadbal.h" -+#include "bondf.h" -+#include "membed.h" -+#include "types/nlistheuristics.h" -+#include "types/iteratedconstraints.h" -+#include "nbnxn_cuda_data_mgmt.h" -+ -+#include "gromacs/utility/gmxmpi.h" -+#include "gromacs/fileio/confio.h" -+#include "gromacs/fileio/trajectory_writing.h" -+#include "gromacs/fileio/trnio.h" -+#include "gromacs/fileio/trxio.h" -+#include "gromacs/fileio/xtcio.h" -+#include "gromacs/timing/wallcycle.h" -+#include "gromacs/timing/walltime_accounting.h" -+#include "gromacs/pulling/pull.h" -+#include "gromacs/swap/swapcoords.h" -+#include "gromacs/imd/imd.h" -+ -+#ifdef GMX_FAHCORE -+#include "corewrap.h" -+#endif -+ -+static void reset_all_counters(FILE *fplog, t_commrec *cr, -+ gmx_int64_t step, -+ gmx_int64_t *step_rel, t_inputrec *ir, -+ gmx_wallcycle_t wcycle, t_nrnb *nrnb, -+ gmx_walltime_accounting_t walltime_accounting, -+ nbnxn_cuda_ptr_t cu_nbv) -+{ -+ char sbuf[STEPSTRSIZE]; -+ -+ /* Reset all the counters related to performance over the run */ -+ md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n", -+ gmx_step_str(step, sbuf)); -+ -+ if (cu_nbv) -+ { -+ nbnxn_cuda_reset_timings(cu_nbv); -+ } -+ -+ wallcycle_stop(wcycle, ewcRUN); -+ wallcycle_reset_all(wcycle); -+ if (DOMAINDECOMP(cr)) -+ { -+ reset_dd_statistics_counters(cr->dd); -+ } -+ init_nrnb(nrnb); -+ ir->init_step += *step_rel; -+ ir->nsteps -= *step_rel; -+ *step_rel = 0; -+ wallcycle_start(wcycle, ewcRUN); -+ walltime_accounting_start(walltime_accounting); -+ print_date_and_time(fplog, cr->nodeid, "Restarted time", gmx_gettime()); -+} -+ -+double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], -+ const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, -+ int nstglobalcomm, -+ gmx_vsite_t *vsite, gmx_constr_t constr, -+ int stepout, t_inputrec *ir, -+ gmx_mtop_t *top_global, -+ t_fcdata *fcd, -+ t_state *state_global, -+ t_mdatoms *mdatoms, -+ t_nrnb *nrnb, gmx_wallcycle_t wcycle, -+ gmx_edsam_t ed, t_forcerec *fr, -+ int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed, -+ real cpt_period, real max_hours, -+ const char gmx_unused *deviceOptions, -+ int imdport, -+ unsigned long Flags, -+ gmx_walltime_accounting_t walltime_accounting) -+{ -+ gmx_mdoutf_t outf = NULL; -+ gmx_int64_t step, step_rel; -+ double elapsed_time; -+ double t, t0, lam0[efptNR]; -+ gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEner; -+ gmx_bool bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE, -+ bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep, -+ bBornRadii, bStartingFromCpt; -+ gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; -+ gmx_bool do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE, -+ bForceUpdate = FALSE, bCPT; -+ gmx_bool bMasterState; -+ int force_flags, cglo_flags; -+ tensor force_vir, shake_vir, total_vir, tmp_vir, pres; -+ int i, m; -+ t_trxstatus *status; -+ rvec mu_tot; -+ t_vcm *vcm; -+ t_state *bufstate = NULL; -+ matrix *scale_tot, pcoupl_mu, M, ebox; -+ gmx_nlheur_t nlh; -+ t_trxframe rerun_fr; -+ gmx_repl_ex_t repl_ex = NULL; -+ int nchkpt = 1; -+ gmx_localtop_t *top; -+ t_mdebin *mdebin = NULL; -+ t_state *state = NULL; -+ rvec *f_global = NULL; -+ gmx_enerdata_t *enerd; -+ rvec *f = NULL; -+ gmx_global_stat_t gstat; -+ gmx_update_t upd = NULL; -+ t_graph *graph = NULL; -+ globsig_t gs; -+ gmx_groups_t *groups; -+ gmx_ekindata_t *ekind, *ekind_save; -+ gmx_shellfc_t shellfc; -+ int count, nconverged = 0; -+ real timestep = 0; -+ double tcount = 0; -+ gmx_bool bConverged = TRUE, bOK, bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition; -+ gmx_bool bAppend; -+ gmx_bool bResetCountersHalfMaxH = FALSE; -+ gmx_bool bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter; -+ gmx_bool bUpdateDoLR; -+ real dvdl_constr; -+ rvec *cbuf = NULL; -+ matrix lastbox; -+ real veta_save, scalevir, tracevir; -+ real vetanew = 0; -+ int lamnew = 0; -+ /* for FEP */ -+ int nstfep; -+ double cycles; -+ real saved_conserved_quantity = 0; -+ real last_ekin = 0; -+ int iter_i; -+ t_extmass MassQ; -+ int **trotter_seq; -+ char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; -+ int handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/ -+ gmx_iterate_t iterate; -+ gmx_int64_t multisim_nsteps = -1; /* number of steps to do before first multisim -+ simulation stops. If equal to zero, don't -+ communicate any more between multisims.*/ -+ /* PME load balancing data for GPU kernels */ -+ pme_load_balancing_t pme_loadbal = NULL; -+ double cycles_pmes; -+ gmx_bool bPMETuneTry = FALSE, bPMETuneRunning = FALSE; -+ -+ /* Interactive MD */ -+ gmx_bool bIMDstep = FALSE; -+ -+#ifdef GMX_FAHCORE -+ /* Temporary addition for FAHCORE checkpointing */ -+ int chkpt_ret; -+#endif -+ -+ /* Check for special mdrun options */ -+ bRerunMD = (Flags & MD_RERUN); -+ bAppend = (Flags & MD_APPENDFILES); -+ if (Flags & MD_RESETCOUNTERSHALFWAY) -+ { -+ if (ir->nsteps > 0) -+ { -+ /* Signal to reset the counters half the simulation steps. */ -+ wcycle_set_reset_counters(wcycle, ir->nsteps/2); -+ } -+ /* Signal to reset the counters halfway the simulation time. */ -+ bResetCountersHalfMaxH = (max_hours > 0); -+ } -+ -+ /* md-vv uses averaged full step velocities for T-control -+ md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) -+ md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ -+ bVV = EI_VV(ir->eI); -+ if (bVV) /* to store the initial velocities while computing virial */ -+ { -+ snew(cbuf, top_global->natoms); -+ } -+ /* all the iteratative cases - only if there are constraints */ -+ bIterativeCase = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD)); -+ gmx_iterate_init(&iterate, FALSE); /* The default value of iterate->bIterationActive is set to -+ false in this step. The correct value, true or false, -+ is set at each step, as it depends on the frequency of temperature -+ and pressure control.*/ -+ bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir))); -+ -+ if (bRerunMD) -+ { -+ /* Since we don't know if the frames read are related in any way, -+ * rebuild the neighborlist at every step. -+ */ -+ ir->nstlist = 1; -+ ir->nstcalcenergy = 1; -+ nstglobalcomm = 1; -+ } -+ -+ check_ir_old_tpx_versions(cr, fplog, ir, top_global); -+ -+ nstglobalcomm = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir); -+ bGStatEveryStep = (nstglobalcomm == 1); -+ -+ if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL) -+ { -+ fprintf(fplog, -+ "To reduce the energy communication with nstlist = -1\n" -+ "the neighbor list validity should not be checked at every step,\n" -+ "this means that exact integration is not guaranteed.\n" -+ "The neighbor list validity is checked after:\n" -+ " - 2*std.dev.(n.list life time) steps.\n" -+ "In most cases this will result in exact integration.\n" -+ "This reduces the energy communication by a factor of 2 to 3.\n" -+ "If you want less energy communication, set nstlist > 3.\n\n"); -+ } -+ -+ if (bRerunMD) -+ { -+ ir->nstxout_compressed = 0; -+ } -+ groups = &top_global->groups; -+ -+ /* Initial values */ -+ init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda, -+ &(state_global->fep_state), lam0, -+ nrnb, top_global, &upd, -+ nfile, fnm, &outf, &mdebin, -+ force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags, wcycle); -+ -+ clear_mat(total_vir); -+ clear_mat(pres); -+ /* Energy terms and groups */ -+ snew(enerd, 1); -+ init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, -+ enerd); -+ if (DOMAINDECOMP(cr)) -+ { -+ f = NULL; -+ } -+ else -+ { -+ snew(f, top_global->natoms); -+ } -+ -+ /* Kinetic energy data */ -+ snew(ekind, 1); -+ init_ekindata(fplog, top_global, &(ir->opts), ekind); -+ /* needed for iteration of constraints */ -+ snew(ekind_save, 1); -+ init_ekindata(fplog, top_global, &(ir->opts), ekind_save); -+ /* Copy the cos acceleration to the groups struct */ -+ ekind->cosacc.cos_accel = ir->cos_accel; -+ -+ gstat = global_stat_init(ir); -+ debug_gmx(); -+ -+ /* Check for polarizable models and flexible constraints */ -+ shellfc = init_shell_flexcon(fplog, -+ top_global, n_flexible_constraints(constr), -+ (ir->bContinuation || -+ (DOMAINDECOMP(cr) && !MASTER(cr))) ? -+ NULL : state_global->x); -+ if (shellfc && ir->nstcalcenergy != 1) -+ { -+ gmx_fatal(FARGS, "You have nstcalcenergy set to a value (%d) that is different from 1.\nThis is not supported in combinations with shell particles.\nPlease make a new tpr file.", ir->nstcalcenergy); -+ } -+ if (shellfc && DOMAINDECOMP(cr)) -+ { -+ gmx_fatal(FARGS, "Shell particles are not implemented with domain decomposition, use a single rank"); -+ } -+ if (shellfc && ir->eI == eiNM) -+ { -+ /* Currently shells don't work with Normal Modes */ -+ gmx_fatal(FARGS, "Normal Mode analysis is not supported with shells.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n"); -+ } -+ -+ if (vsite && ir->eI == eiNM) -+ { -+ /* Currently virtual sites don't work with Normal Modes */ -+ gmx_fatal(FARGS, "Normal Mode analysis is not supported with virtual sites.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n"); -+ } -+ -+ if (DEFORM(*ir)) -+ { -+ tMPI_Thread_mutex_lock(&deform_init_box_mutex); -+ set_deform_reference_box(upd, -+ deform_init_init_step_tpx, -+ deform_init_box_tpx); -+ tMPI_Thread_mutex_unlock(&deform_init_box_mutex); -+ } -+ -+ { -+ double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1); -+ if ((io > 2000) && MASTER(cr)) -+ { -+ fprintf(stderr, -+ "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", -+ io); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ top = dd_init_local_top(top_global); -+ -+ snew(state, 1); -+ dd_init_local_state(cr->dd, state_global, state); -+ -+ if (DDMASTER(cr->dd) && ir->nstfout) -+ { -+ snew(f_global, state_global->natoms); -+ } -+ } -+ else -+ { -+ top = gmx_mtop_generate_local_top(top_global, ir); -+ -+ forcerec_set_excl_load(fr, top); -+ -+ state = serial_init_local_state(state_global); -+ f_global = f; -+ -+ atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms); -+ -+ if (vsite) -+ { -+ set_vsite_top(vsite, top, mdatoms, cr); -+ } -+ -+ if (ir->ePBC != epbcNONE && !fr->bMolPBC) -+ { -+ graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE); -+ } -+ -+ if (shellfc) -+ { -+ make_local_shells(cr, mdatoms, shellfc); -+ } -+ -+ setup_bonded_threading(fr, &top->idef); -+ } -+ -+ /* Set up interactive MD (IMD) */ -+ init_IMD(ir, cr, top_global, fplog, ir->nstcalcenergy, state_global->x, -+ nfile, fnm, oenv, imdport, Flags); -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ /* Distribute the charge groups over the nodes from the master node */ -+ dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, FALSE); -+ -+ } -+ -+ update_mdatoms(mdatoms, state->lambda[efptMASS]); -+ -+ if (opt2bSet("-cpi", nfile, fnm)) -+ { -+ bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr); -+ } -+ else -+ { -+ bStateFromCP = FALSE; -+ } -+ -+ if (ir->bExpanded) -+ { -+ init_expanded_ensemble(bStateFromCP, ir, &state->dfhist); -+ } -+ -+ if (MASTER(cr)) -+ { -+ if (bStateFromCP) -+ { -+ /* Update mdebin with energy history if appending to output files */ -+ if (Flags & MD_APPENDFILES) -+ { -+ restore_energyhistory_from_state(mdebin, &state_global->enerhist); -+ } -+ else -+ { -+ /* We might have read an energy history from checkpoint, -+ * free the allocated memory and reset the counts. -+ */ -+ done_energyhistory(&state_global->enerhist); -+ init_energyhistory(&state_global->enerhist); -+ } -+ } -+ /* Set the initial energy history in state by updating once */ -+ update_energyhistory(&state_global->enerhist, mdebin); -+ } -+ -+ /* Initialize constraints */ -+ if (constr && !DOMAINDECOMP(cr)) -+ { -+ set_constraints(constr, top, ir, mdatoms, cr); -+ } -+ -+ if (repl_ex_nst > 0 && MASTER(cr)) -+ { -+ repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir, -+ repl_ex_nst, repl_ex_nex, repl_ex_seed); -+ } -+ -+ /* PME tuning is only supported with GPUs or PME nodes and not with rerun. -+ * PME tuning is not supported with PME only for LJ and not for Coulomb. -+ */ -+ if ((Flags & MD_TUNEPME) && -+ EEL_PME(fr->eeltype) && -+ ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) && -+ !bRerunMD) -+ { -+ pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata); -+ cycles_pmes = 0; -+ if (cr->duty & DUTY_PME) -+ { -+ /* Start tuning right away, as we can't measure the load */ -+ bPMETuneRunning = TRUE; -+ } -+ else -+ { -+ /* Separate PME nodes, we can measure the PP/PME load balance */ -+ bPMETuneTry = TRUE; -+ } -+ } -+ -+ if (!ir->bContinuation && !bRerunMD) -+ { -+ if (mdatoms->cFREEZE && (state->flags & (1<homenr; i++) -+ { -+ for (m = 0; m < DIM; m++) -+ { -+ if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) -+ { -+ state->v[i][m] = 0; -+ } -+ } -+ } -+ } -+ -+ if (constr) -+ { -+ /* Constrain the initial coordinates and velocities */ -+ do_constrain_first(fplog, constr, ir, mdatoms, state, -+ cr, nrnb, fr, top); -+ } -+ if (vsite) -+ { -+ /* Construct the virtual sites for the initial configuration */ -+ construct_vsites(vsite, state->x, ir->delta_t, NULL, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ } -+ } -+ -+ debug_gmx(); -+ -+ /* set free energy calculation frequency as the minimum -+ greatest common denominator of nstdhdl, nstexpanded, and repl_ex_nst*/ -+ nstfep = ir->fepvals->nstdhdl; -+ if (ir->bExpanded) -+ { -+ nstfep = gmx_greatest_common_divisor(ir->fepvals->nstdhdl, nstfep); -+ } -+ if (repl_ex_nst > 0) -+ { -+ nstfep = gmx_greatest_common_divisor(repl_ex_nst, nstfep); -+ } -+ -+ /* I'm assuming we need global communication the first time! MRS */ -+ cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT -+ | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM : 0) -+ | (bVV ? CGLO_PRESSURE : 0) -+ | (bVV ? CGLO_CONSTRAINT : 0) -+ | (bRerunMD ? CGLO_RERUNMD : 0) -+ | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0)); -+ -+ bSumEkinhOld = FALSE; -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, cglo_flags); -+ if (ir->eI == eiVVAK) -+ { -+ /* a second call to get the half step temperature initialized as well */ -+ /* we do the same call as above, but turn the pressure off -- internally to -+ compute_globals, this is recognized as a velocity verlet half-step -+ kinetic energy calculation. This minimized excess variables, but -+ perhaps loses some logic?*/ -+ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE)); -+ } -+ -+ /* Calculate the initial half step temperature, and save the ekinh_old */ -+ if (!(Flags & MD_STARTFROMCPT)) -+ { -+ for (i = 0; (i < ir->opts.ngtc); i++) -+ { -+ copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); -+ } -+ } -+ if (ir->eI != eiVV) -+ { -+ enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step, -+ and there is no previous step */ -+ } -+ -+ /* if using an iterative algorithm, we need to create a working directory for the state. */ -+ if (bIterativeCase) -+ { -+ bufstate = init_bufstate(state); -+ } -+ -+ /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter -+ temperature control */ -+ trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); -+ -+ if (MASTER(cr)) -+ { -+ if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS) -+ { -+ fprintf(fplog, -+ "RMS relative constraint deviation after constraining: %.2e\n", -+ constr_rmsd(constr, FALSE)); -+ } -+ if (EI_STATE_VELOCITY(ir->eI)) -+ { -+ fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]); -+ } -+ if (bRerunMD) -+ { -+ fprintf(stderr, "starting md rerun '%s', reading coordinates from" -+ " input trajectory '%s'\n\n", -+ *(top_global->name), opt2fn("-rerun", nfile, fnm)); -+ if (bVerbose) -+ { -+ fprintf(stderr, "Calculated time to finish depends on nsteps from " -+ "run input file,\nwhich may not correspond to the time " -+ "needed to process input trajectory.\n\n"); -+ } -+ } -+ else -+ { -+ char tbuf[20]; -+ fprintf(stderr, "starting mdrun '%s'\n", -+ *(top_global->name)); -+ if (ir->nsteps >= 0) -+ { -+ sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t); -+ } -+ else -+ { -+ sprintf(tbuf, "%s", "infinite"); -+ } -+ if (ir->init_step > 0) -+ { -+ fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", -+ gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf, -+ gmx_step_str(ir->init_step, sbuf2), -+ ir->init_step*ir->delta_t); -+ } -+ else -+ { -+ fprintf(stderr, "%s steps, %s ps.\n", -+ gmx_step_str(ir->nsteps, sbuf), tbuf); -+ } -+ } -+ fprintf(fplog, "\n"); -+ } -+ -+ walltime_accounting_start(walltime_accounting); -+ wallcycle_start(wcycle, ewcRUN); -+ print_start(fplog, cr, walltime_accounting, "mdrun"); -+ -+ /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ -+#ifdef GMX_FAHCORE -+ chkpt_ret = fcCheckPointParallel( cr->nodeid, -+ NULL, 0); -+ if (chkpt_ret == 0) -+ { -+ gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 ); -+ } -+#endif -+ -+ debug_gmx(); -+ /*********************************************************** -+ * -+ * Loop over MD steps -+ * -+ ************************************************************/ -+ -+ /* if rerunMD then read coordinates and velocities from input trajectory */ -+ if (bRerunMD) -+ { -+ if (getenv("GMX_FORCE_UPDATE")) -+ { -+ bForceUpdate = TRUE; -+ } -+ -+ rerun_fr.natoms = 0; -+ if (MASTER(cr)) -+ { -+ bNotLastFrame = read_first_frame(oenv, &status, -+ opt2fn("-rerun", nfile, fnm), -+ &rerun_fr, TRX_NEED_X | TRX_READ_V); -+ if (rerun_fr.natoms != top_global->natoms) -+ { -+ gmx_fatal(FARGS, -+ "Number of atoms in trajectory (%d) does not match the " -+ "run input file (%d)\n", -+ rerun_fr.natoms, top_global->natoms); -+ } -+ if (ir->ePBC != epbcNONE) -+ { -+ if (!rerun_fr.bBox) -+ { -+ gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time); -+ } -+ if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong)) -+ { -+ gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time); -+ } -+ } -+ } -+ -+ if (PAR(cr)) -+ { -+ rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); -+ } -+ -+ if (ir->ePBC != epbcNONE) -+ { -+ /* Set the shift vectors. -+ * Necessary here when have a static box different from the tpr box. -+ */ -+ calc_shifts(rerun_fr.box, fr->shift_vec); -+ } -+ } -+ -+ /* loop over MD steps or if rerunMD to end of input trajectory */ -+ bFirstStep = TRUE; -+ /* Skip the first Nose-Hoover integration when we get the state from tpx */ -+ bStateFromTPX = !bStateFromCP; -+ bInitStep = bFirstStep && (bStateFromTPX || bVV); -+ bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep; -+ bLastStep = FALSE; -+ bSumEkinhOld = FALSE; -+ bDoReplEx = FALSE; -+ bExchanged = FALSE; -+ bNeedRepartition = FALSE; -+ -+ init_global_signals(&gs, cr, ir, repl_ex_nst); -+ -+ step = ir->init_step; -+ step_rel = 0; -+ -+ if (ir->nstlist == -1) -+ { -+ init_nlistheuristics(&nlh, bGStatEveryStep, step); -+ } -+ -+ if (MULTISIM(cr) && (repl_ex_nst <= 0 )) -+ { -+ /* check how many steps are left in other sims */ -+ multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps); -+ } -+ -+ -+ /* and stop now if we should */ -+ bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) || -+ ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps ))); -+ while (!bLastStep || (bRerunMD && bNotLastFrame)) -+ { -+ -+ wallcycle_start(wcycle, ewcSTEP); -+ -+ if (bRerunMD) -+ { -+ if (rerun_fr.bStep) -+ { -+ step = rerun_fr.step; -+ step_rel = step - ir->init_step; -+ } -+ if (rerun_fr.bTime) -+ { -+ t = rerun_fr.time; -+ } -+ else -+ { -+ t = step; -+ } -+ } -+ else -+ { -+ bLastStep = (step_rel == ir->nsteps); -+ t = t0 + step*ir->delta_t; -+ } -+ -+ if (ir->efep != efepNO || ir->bSimTemp) -+ { -+ /* find and set the current lambdas. If rerunning, we either read in a state, or a lambda value, -+ requiring different logic. */ -+ -+ set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0); -+ bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); -+ bDoFEP = (do_per_step(step, nstfep) && (ir->efep != efepNO)); -+ bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) -+ && (ir->bExpanded) && (step > 0) && (!bStartingFromCpt)); -+ } -+ -+ bDoReplEx = ((repl_ex_nst > 0) && (step > 0) && !bLastStep && -+ do_per_step(step, repl_ex_nst)); -+ -+ if (bSimAnn) -+ { -+ update_annealing_target_temp(&(ir->opts), t); -+ } -+ -+ if (bRerunMD) -+ { -+ if (!DOMAINDECOMP(cr) || MASTER(cr)) -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(rerun_fr.x[i], state_global->x[i]); -+ } -+ if (rerun_fr.bV) -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ copy_rvec(rerun_fr.v[i], state_global->v[i]); -+ } -+ } -+ else -+ { -+ for (i = 0; i < state_global->natoms; i++) -+ { -+ clear_rvec(state_global->v[i]); -+ } -+ if (bRerunWarnNoV) -+ { -+ fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n" -+ " Ekin, temperature and pressure are incorrect,\n" -+ " the virial will be incorrect when constraints are present.\n" -+ "\n"); -+ bRerunWarnNoV = FALSE; -+ } -+ } -+ } -+ copy_mat(rerun_fr.box, state_global->box); -+ copy_mat(state_global->box, state->box); -+ -+ if (vsite && (Flags & MD_RERUN_VSITE)) -+ { -+ if (DOMAINDECOMP(cr)) -+ { -+ gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented with domain decomposition, use a single rank"); -+ } -+ if (graph) -+ { -+ /* Following is necessary because the graph may get out of sync -+ * with the coordinates if we only have every N'th coordinate set -+ */ -+ mk_mshift(fplog, graph, fr->ePBC, state->box, state->x); -+ shift_self(graph, state->box, state->x); -+ } -+ construct_vsites(vsite, state->x, ir->delta_t, state->v, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ if (graph) -+ { -+ unshift_self(graph, state->box, state->x); -+ } -+ } -+ } -+ -+ /* Stop Center of Mass motion */ -+ bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); -+ -+ if (bRerunMD) -+ { -+ /* for rerun MD always do Neighbour Searching */ -+ bNS = (bFirstStep || ir->nstlist != 0); -+ bNStList = bNS; -+ } -+ else -+ { -+ /* Determine whether or not to do Neighbour Searching and LR */ -+ bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); -+ -+ bNS = (bFirstStep || bExchanged || bNeedRepartition || bNStList || bDoFEP || -+ (ir->nstlist == -1 && nlh.nabnsb > 0)); -+ -+ if (bNS && ir->nstlist == -1) -+ { -+ set_nlistheuristics(&nlh, bFirstStep || bExchanged || bNeedRepartition || bDoFEP, step); -+ } -+ } -+ -+ /* check whether we should stop because another simulation has -+ stopped. */ -+ if (MULTISIM(cr)) -+ { -+ if ( (multisim_nsteps >= 0) && (step_rel >= multisim_nsteps) && -+ (multisim_nsteps != ir->nsteps) ) -+ { -+ if (bNS) -+ { -+ if (MASTER(cr)) -+ { -+ fprintf(stderr, -+ "Stopping simulation %d because another one has finished\n", -+ cr->ms->sim); -+ } -+ bLastStep = TRUE; -+ gs.sig[eglsCHKPT] = 1; -+ } -+ } -+ } -+ -+ /* < 0 means stop at next step, > 0 means stop at next NS step */ -+ if ( (gs.set[eglsSTOPCOND] < 0) || -+ ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) ) -+ { -+ bLastStep = TRUE; -+ } -+ -+ /* Determine whether or not to update the Born radii if doing GB */ -+ bBornRadii = bFirstStep; -+ if (ir->implicit_solvent && (step % ir->nstgbradii == 0)) -+ { -+ bBornRadii = TRUE; -+ } -+ -+ do_log = do_per_step(step, ir->nstlog) || bFirstStep || bLastStep; -+ do_verbose = bVerbose && -+ (step % stepout == 0 || bFirstStep || bLastStep); -+ -+ if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD)) -+ { -+ if (bRerunMD) -+ { -+ bMasterState = TRUE; -+ } -+ else -+ { -+ bMasterState = FALSE; -+ /* Correct the new box if it is too skewed */ -+ if (DYNAMIC_BOX(*ir)) -+ { -+ if (correct_box(fplog, step, state->box, graph)) -+ { -+ bMasterState = TRUE; -+ } -+ } -+ if (DOMAINDECOMP(cr) && bMasterState) -+ { -+ dd_collect_state(cr->dd, state, state_global); -+ } -+ } -+ -+ if (DOMAINDECOMP(cr)) -+ { -+ /* Repartition the domain decomposition */ -+ wallcycle_start(wcycle, ewcDOMDEC); -+ dd_partition_system(fplog, step, cr, -+ bMasterState, nstglobalcomm, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, -+ do_verbose && !bPMETuneRunning); -+ wallcycle_stop(wcycle, ewcDOMDEC); -+ /* If using an iterative integrator, reallocate space to match the decomposition */ -+ } -+ } -+ -+ if (MASTER(cr) && do_log) -+ { -+ print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */ -+ } -+ -+ if (ir->efep != efepNO) -+ { -+ update_mdatoms(mdatoms, state->lambda[efptMASS]); -+ } -+ -+ if ((bRerunMD && rerun_fr.bV) || bExchanged) -+ { -+ -+ /* We need the kinetic energy at minus the half step for determining -+ * the full step kinetic energy and possibly for T-coupling.*/ -+ /* This may not be quite working correctly yet . . . . */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); -+ } -+ clear_mat(force_vir); -+ -+ /* We write a checkpoint at this MD step when: -+ * either at an NS step when we signalled through gs, -+ * or at the last step (but not when we do not want confout), -+ * but never at the first step or with rerun. -+ */ -+ bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) || -+ (bLastStep && (Flags & MD_CONFOUT))) && -+ step > ir->init_step && !bRerunMD); -+ if (bCPT) -+ { -+ gs.set[eglsCHKPT] = 0; -+ } -+ -+ /* Determine the energy and pressure: -+ * at nstcalcenergy steps and at energy output steps (set below). -+ */ -+ if (EI_VV(ir->eI) && (!bInitStep)) -+ { -+ /* for vv, the first half of the integration actually corresponds -+ to the previous step. bCalcEner is only required to be evaluated on the 'next' step, -+ but the virial needs to be calculated on both the current step and the 'next' step. Future -+ reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */ -+ -+ bCalcEner = do_per_step(step-1, ir->nstcalcenergy); -+ bCalcVir = bCalcEner || -+ (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple))); -+ } -+ else -+ { -+ bCalcEner = do_per_step(step, ir->nstcalcenergy); -+ bCalcVir = bCalcEner || -+ (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); -+ } -+ -+ /* Do we need global communication ? */ -+ bGStat = (bCalcVir || bCalcEner || bStopCM || -+ do_per_step(step, nstglobalcomm) || (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)) || -+ (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck)); -+ -+ do_ene = (do_per_step(step, ir->nstenergy) || bLastStep); -+ -+ if (do_ene || do_log || bDoReplEx) -+ { -+ bCalcVir = TRUE; -+ bCalcEner = TRUE; -+ bGStat = TRUE; -+ } -+ -+ /* these CGLO_ options remain the same throughout the iteration */ -+ cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) | -+ (bGStat ? CGLO_GSTAT : 0) -+ ); -+ -+ force_flags = (GMX_FORCE_STATECHANGED | -+ ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) | -+ GMX_FORCE_ALLFORCES | -+ GMX_FORCE_SEPLRF | -+ (bCalcVir ? GMX_FORCE_VIRIAL : 0) | -+ (bCalcEner ? GMX_FORCE_ENERGY : 0) | -+ (bDoFEP ? GMX_FORCE_DHDL : 0) -+ ); -+ -+ if (fr->bTwinRange) -+ { -+ if (do_per_step(step, ir->nstcalclr)) -+ { -+ force_flags |= GMX_FORCE_DO_LR; -+ } -+ } -+ -+ if (shellfc) -+ { -+ /* Now is the time to relax the shells */ -+ count = relax_shell_flexcon(fplog, cr, bVerbose, step, -+ ir, bNS, force_flags, -+ top, -+ constr, enerd, fcd, -+ state, f, force_vir, mdatoms, -+ nrnb, wcycle, graph, groups, -+ shellfc, fr, bBornRadii, t, mu_tot, -+ &bConverged, vsite, -+ mdoutf_get_fp_field(outf)); -+ tcount += count; -+ -+ if (bConverged) -+ { -+ nconverged++; -+ } -+ } -+ else -+ { -+ /* The coordinates (x) are shifted (to get whole molecules) -+ * in do_force. -+ * This is parallellized as well, and does communication too. -+ * Check comments in sim_util.c -+ */ -+ do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups, -+ state->box, state->x, &state->hist, -+ f, force_vir, mdatoms, enerd, fcd, -+ state->lambda, graph, -+ fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii, -+ (bNS ? GMX_FORCE_NS : 0) | force_flags); -+ } -+ -+ if (bVV && !bStartingFromCpt && !bRerunMD) -+ /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ -+ { -+ wallcycle_start(wcycle, ewcUPDATE); -+ if (ir->eI == eiVV && bInitStep) -+ { -+ /* if using velocity verlet with full time step Ekin, -+ * take the first half step only to compute the -+ * virial for the first step. From there, -+ * revert back to the initial coordinates -+ * so that the input is actually the initial step. -+ */ -+ copy_rvecn(state->v, cbuf, 0, state->natoms); /* should make this better for parallelizing? */ -+ } -+ else -+ { -+ /* this is for NHC in the Ekin(t+dt/2) version of vv */ -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1); -+ } -+ -+ /* If we are using twin-range interactions where the long-range component -+ * is only evaluated every nstcalclr>1 steps, we should do a special update -+ * step to combine the long-range forces on these steps. -+ * For nstcalclr=1 this is not done, since the forces would have been added -+ * directly to the short-range forces already. -+ * -+ * TODO Remove various aspects of VV+twin-range in master -+ * branch, because VV integrators did not ever support -+ * twin-range multiple time stepping with constraints. -+ */ -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, -+ f, bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, bInitStep, etrtVELOCITY1, -+ cr, nrnb, constr, &top->idef); -+ -+ if (bIterativeCase && do_per_step(step-1, ir->nstpcouple) && !bInitStep) -+ { -+ gmx_iterate_init(&iterate, TRUE); -+ } -+ /* for iterations, we save these vectors, as we will be self-consistently iterating -+ the calculations */ -+ -+ /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */ -+ -+ /* save the state */ -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); -+ } -+ -+ bFirstIterate = TRUE; -+ while (bFirstIterate || iterate.bIterationActive) -+ { -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); -+ if (bFirstIterate && bTrotter) -+ { -+ /* The first time through, we need a decent first estimate -+ of veta(t+dt) to compute the constraints. Do -+ this by computing the box volume part of the -+ trotter integration at this time. Nothing else -+ should be changed by this routine here. If -+ !(first time), we start with the previous value -+ of veta. */ -+ -+ veta_save = state->veta; -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ0); -+ vetanew = state->veta; -+ state->veta = veta_save; -+ } -+ } -+ -+ bOK = TRUE; -+ if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */ -+ { -+ wallcycle_stop(wcycle, ewcUPDATE); -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, shake_vir, -+ cr, nrnb, wcycle, upd, constr, -+ TRUE, bCalcVir, vetanew); -+ wallcycle_start(wcycle, ewcUPDATE); -+ -+ if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1) -+ { -+ /* Correct the virial for multiple time stepping */ -+ m_sub(shake_vir, fr->vir_twin_constr, shake_vir); -+ } -+ -+ if (!bOK) -+ { -+ gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); -+ } -+ -+ } -+ else if (graph) -+ { -+ /* Need to unshift here if a do_force has been -+ called in the previous step */ -+ unshift_self(graph, state->box, state->x); -+ } -+ -+ /* if VV, compute the pressure and constraints */ -+ /* For VV2, we strictly only need this if using pressure -+ * control, but we really would like to have accurate pressures -+ * printed out. -+ * Think about ways around this in the future? -+ * For now, keep this choice in comments. -+ */ -+ /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */ -+ /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/ -+ bPres = TRUE; -+ bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); -+ if (bCalcEner && ir->eI == eiVVAK) /*MRS: 7/9/2010 -- this still doesn't fix it?*/ -+ { -+ bSumEkinhOld = TRUE; -+ } -+ /* for vv, the first half of the integration actually corresponds to the previous step. -+ So we need information from the last step in the first half of the integration */ -+ if (bGStat || do_per_step(step-1, nstglobalcomm)) -+ { -+ wallcycle_stop(wcycle, ewcUPDATE); -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ cglo_flags -+ | CGLO_ENERGY -+ | (bTemp ? CGLO_TEMPERATURE : 0) -+ | (bPres ? CGLO_PRESSURE : 0) -+ | (bPres ? CGLO_CONSTRAINT : 0) -+ | ((iterate.bIterationActive) ? CGLO_ITERATE : 0) -+ | (bFirstIterate ? CGLO_FIRSTITERATE : 0) -+ | CGLO_SCALEEKIN -+ ); -+ /* explanation of above: -+ a) We compute Ekin at the full time step -+ if 1) we are using the AveVel Ekin, and it's not the -+ initial step, or 2) if we are using AveEkin, but need the full -+ time step kinetic energy for the pressure (always true now, since we want accurate statistics). -+ b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in -+ EkinAveVel because it's needed for the pressure */ -+ wallcycle_start(wcycle, ewcUPDATE); -+ } -+ /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ -+ if (!bInitStep) -+ { -+ if (bTrotter) -+ { -+ m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */ -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2); -+ } -+ else -+ { -+ if (bExchanged) -+ { -+ wallcycle_stop(wcycle, ewcUPDATE); -+ /* We need the kinetic energy at minus the half step for determining -+ * the full step kinetic energy and possibly for T-coupling.*/ -+ /* This may not be quite working correctly yet . . . . */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, -+ constr, NULL, FALSE, state->box, -+ top_global, &bSumEkinhOld, -+ CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); -+ wallcycle_start(wcycle, ewcUPDATE); -+ } -+ } -+ } -+ -+ if (iterate.bIterationActive && -+ done_iterating(cr, fplog, step, &iterate, bFirstIterate, -+ state->veta, &vetanew)) -+ { -+ break; -+ } -+ bFirstIterate = FALSE; -+ } -+ -+ if (bTrotter && !bInitStep) -+ { -+ copy_mat(shake_vir, state->svir_prev); -+ copy_mat(force_vir, state->fvir_prev); -+ if (IR_NVT_TROTTER(ir) && ir->eI == eiVV) -+ { -+ /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ -+ enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE); -+ enerd->term[F_EKIN] = trace(ekind->ekin); -+ } -+ } -+ /* if it's the initial step, we performed this first step just to get the constraint virial */ -+ if (bInitStep && ir->eI == eiVV) -+ { -+ copy_rvecn(cbuf, state->v, 0, state->natoms); -+ } -+ wallcycle_stop(wcycle, ewcUPDATE); -+ } -+ -+ /* MRS -- now done iterating -- compute the conserved quantity */ -+ if (bVV) -+ { -+ saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ); -+ if (ir->eI == eiVV) -+ { -+ last_ekin = enerd->term[F_EKIN]; -+ } -+ if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) -+ { -+ saved_conserved_quantity -= enerd->term[F_DISPCORR]; -+ } -+ /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */ -+ if (!bRerunMD) -+ { -+ sum_dhdl(enerd, state->lambda, ir->fepvals); -+ } -+ } -+ -+ /* ######## END FIRST UPDATE STEP ############## */ -+ /* ######## If doing VV, we now have v(dt) ###### */ -+ if (bDoExpanded) -+ { -+ /* perform extended ensemble sampling in lambda - we don't -+ actually move to the new state before outputting -+ statistics, but if performing simulated tempering, we -+ do update the velocities and the tau_t. */ -+ -+ lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, &state->dfhist, step, state->v, mdatoms); -+ /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */ -+ copy_df_history(&state_global->dfhist, &state->dfhist); -+ } -+ -+ /* Now we have the energies and forces corresponding to the -+ * coordinates at time t. We must output all of this before -+ * the update. -+ */ -+ do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, -+ ir, state, state_global, top_global, fr, -+ outf, mdebin, ekind, f, f_global, -+ &nchkpt, -+ bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT), -+ bSumEkinhOld); -+ /* Check if IMD step and do IMD communication, if bIMD is TRUE. */ -+ bIMDstep = do_IMD(ir->bIMD, step, cr, bNS, state->box, state->x, ir, t, wcycle); -+ -+ /* kludge -- virial is lost with restart for NPT control. Must restart */ -+ if (bStartingFromCpt && bVV) -+ { -+ copy_mat(state->svir_prev, shake_vir); -+ copy_mat(state->fvir_prev, force_vir); -+ } -+ -+ elapsed_time = walltime_accounting_get_current_elapsed_time(walltime_accounting); -+ -+ /* Check whether everything is still allright */ -+ if (((int)gmx_get_stop_condition() > handled_stop_condition) -+#ifdef GMX_THREAD_MPI -+ && MASTER(cr) -+#endif -+ ) -+ { -+ /* this is just make gs.sig compatible with the hack -+ of sending signals around by MPI_Reduce with together with -+ other floats */ -+ if (gmx_get_stop_condition() == gmx_stop_cond_next_ns) -+ { -+ gs.sig[eglsSTOPCOND] = 1; -+ } -+ if (gmx_get_stop_condition() == gmx_stop_cond_next) -+ { -+ gs.sig[eglsSTOPCOND] = -1; -+ } -+ /* < 0 means stop at next step, > 0 means stop at next NS step */ -+ if (fplog) -+ { -+ fprintf(fplog, -+ "\n\nReceived the %s signal, stopping at the next %sstep\n\n", -+ gmx_get_signal_name(), -+ gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); -+ fflush(fplog); -+ } -+ fprintf(stderr, -+ "\n\nReceived the %s signal, stopping at the next %sstep\n\n", -+ gmx_get_signal_name(), -+ gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); -+ fflush(stderr); -+ handled_stop_condition = (int)gmx_get_stop_condition(); -+ } -+ else if (MASTER(cr) && (bNS || ir->nstlist <= 0) && -+ (max_hours > 0 && elapsed_time > max_hours*60.0*60.0*0.99) && -+ gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0) -+ { -+ /* Signal to terminate the run */ -+ gs.sig[eglsSTOPCOND] = 1; -+ if (fplog) -+ { -+ fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); -+ } -+ fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); -+ } -+ -+ if (bResetCountersHalfMaxH && MASTER(cr) && -+ elapsed_time > max_hours*60.0*60.0*0.495) -+ { -+ gs.sig[eglsRESETCOUNTERS] = 1; -+ } -+ -+ if (ir->nstlist == -1 && !bRerunMD) -+ { -+ /* When bGStatEveryStep=FALSE, global_stat is only called -+ * when we check the atom displacements, not at NS steps. -+ * This means that also the bonded interaction count check is not -+ * performed immediately after NS. Therefore a few MD steps could -+ * be performed with missing interactions. -+ * But wrong energies are never written to file, -+ * since energies are only written after global_stat -+ * has been called. -+ */ -+ if (step >= nlh.step_nscheck) -+ { -+ nlh.nabnsb = natoms_beyond_ns_buffer(ir, fr, &top->cgs, -+ nlh.scale_tot, state->x); -+ } -+ else -+ { -+ /* This is not necessarily true, -+ * but step_nscheck is determined quite conservatively. -+ */ -+ nlh.nabnsb = 0; -+ } -+ } -+ -+ /* In parallel we only have to check for checkpointing in steps -+ * where we do global communication, -+ * otherwise the other nodes don't know. -+ */ -+ if (MASTER(cr) && ((bGStat || !PAR(cr)) && -+ cpt_period >= 0 && -+ (cpt_period == 0 || -+ elapsed_time >= nchkpt*cpt_period*60.0)) && -+ gs.set[eglsCHKPT] == 0) -+ { -+ gs.sig[eglsCHKPT] = 1; -+ } -+ -+ /* at the start of step, randomize or scale the velocities (trotter done elsewhere) */ -+ if (EI_VV(ir->eI)) -+ { -+ if (!bInitStep) -+ { -+ update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); -+ } -+ if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ -+ { -+ gmx_bool bIfRandomize; -+ bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state, upd, constr); -+ /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ -+ if (constr && bIfRandomize) -+ { -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, tmp_vir, -+ cr, nrnb, wcycle, upd, constr, -+ TRUE, bCalcVir, vetanew); -+ } -+ } -+ } -+ -+ if (bIterativeCase && do_per_step(step, ir->nstpcouple)) -+ { -+ gmx_iterate_init(&iterate, TRUE); -+ /* for iterations, we save these vectors, as we will be redoing the calculations */ -+ copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); -+ } -+ -+ bFirstIterate = TRUE; -+ while (bFirstIterate || iterate.bIterationActive) -+ { -+ /* We now restore these vectors to redo the calculation with improved extended variables */ -+ if (iterate.bIterationActive) -+ { -+ copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); -+ } -+ -+ /* We make the decision to break or not -after- the calculation of Ekin and Pressure, -+ so scroll down for that logic */ -+ -+ /* ######### START SECOND UPDATE STEP ################# */ -+ /* Box is changed in update() when we do pressure coupling, -+ * but we should still use the old box for energy corrections and when -+ * writing it to the energy file, so it matches the trajectory files for -+ * the same timestep above. Make a copy in a separate array. -+ */ -+ copy_mat(state->box, lastbox); -+ -+ bOK = TRUE; -+ dvdl_constr = 0; -+ -+ if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate)) -+ { -+ wallcycle_start(wcycle, ewcUPDATE); -+ /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ -+ if (bTrotter) -+ { -+ if (iterate.bIterationActive) -+ { -+ if (bFirstIterate) -+ { -+ scalevir = 1; -+ } -+ else -+ { -+ /* we use a new value of scalevir to converge the iterations faster */ -+ scalevir = tracevir/trace(shake_vir); -+ } -+ msmul(shake_vir, scalevir, shake_vir); -+ m_add(force_vir, shake_vir, total_vir); -+ clear_mat(shake_vir); -+ } -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); -+ /* We can only do Berendsen coupling after we have summed -+ * the kinetic energy or virial. Since the happens -+ * in global_state after update, we should only do it at -+ * step % nstlist = 1 with bGStatEveryStep=FALSE. -+ */ -+ } -+ else -+ { -+ update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); -+ update_pcouple(fplog, step, ir, state, pcoupl_mu, M, bInitStep); -+ } -+ -+ if (bVV) -+ { -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ /* velocity half-step update */ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, FALSE, etrtVELOCITY2, -+ cr, nrnb, constr, &top->idef); -+ } -+ -+ /* Above, initialize just copies ekinh into ekin, -+ * it doesn't copy position (for VV), -+ * and entire integrator for MD. -+ */ -+ -+ if (ir->eI == eiVVAK) -+ { -+ copy_rvecn(state->x, cbuf, 0, state->natoms); -+ } -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); -+ wallcycle_stop(wcycle, ewcUPDATE); -+ -+ update_constraints(fplog, step, &dvdl_constr, ir, ekind, mdatoms, state, -+ fr->bMolPBC, graph, f, -+ &top->idef, shake_vir, -+ cr, nrnb, wcycle, upd, constr, -+ FALSE, bCalcVir, state->veta); -+ -+ if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1) -+ { -+ /* Correct the virial for multiple time stepping */ -+ m_sub(shake_vir, fr->vir_twin_constr, shake_vir); -+ } -+ -+ if (ir->eI == eiVVAK) -+ { -+ /* erase F_EKIN and F_TEMP here? */ -+ /* just compute the kinetic energy at the half step to perform a trotter step */ -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, NULL, FALSE, lastbox, -+ top_global, &bSumEkinhOld, -+ cglo_flags | CGLO_TEMPERATURE -+ ); -+ wallcycle_start(wcycle, ewcUPDATE); -+ trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); -+ /* now we know the scaling, we can compute the positions again again */ -+ copy_rvecn(cbuf, state->x, 0, state->natoms); -+ -+ bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); -+ -+ update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, -+ bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd, -+ ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); -+ wallcycle_stop(wcycle, ewcUPDATE); -+ -+ /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */ -+ /* are the small terms in the shake_vir here due -+ * to numerical errors, or are they important -+ * physically? I'm thinking they are just errors, but not completely sure. -+ * For now, will call without actually constraining, constr=NULL*/ -+ update_constraints(fplog, step, NULL, ir, ekind, mdatoms, -+ state, fr->bMolPBC, graph, f, -+ &top->idef, tmp_vir, -+ cr, nrnb, wcycle, upd, NULL, -+ FALSE, bCalcVir, -+ state->veta); -+ } -+ if (!bOK) -+ { -+ gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); -+ } -+ -+ if (fr->bSepDVDL && fplog && do_log) -+ { -+ gmx_print_sepdvdl(fplog, "Constraint dV/dl", 0.0, dvdl_constr); -+ } -+ if (bVV) -+ { -+ /* this factor or 2 correction is necessary -+ because half of the constraint force is removed -+ in the vv step, so we have to double it. See -+ the Redmine issue #1255. It is not yet clear -+ if the factor of 2 is exact, or just a very -+ good approximation, and this will be -+ investigated. The next step is to see if this -+ can be done adding a dhdl contribution from the -+ rattle step, but this is somewhat more -+ complicated with the current code. Will be -+ investigated, hopefully for 4.6.3. However, -+ this current solution is much better than -+ having it completely wrong. -+ */ -+ enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr; -+ } -+ else -+ { -+ enerd->term[F_DVDL_CONSTR] += dvdl_constr; -+ } -+ } -+ else if (graph) -+ { -+ /* Need to unshift here */ -+ unshift_self(graph, state->box, state->x); -+ } -+ -+ if (vsite != NULL) -+ { -+ wallcycle_start(wcycle, ewcVSITECONSTR); -+ if (graph != NULL) -+ { -+ shift_self(graph, state->box, state->x); -+ } -+ construct_vsites(vsite, state->x, ir->delta_t, state->v, -+ top->idef.iparams, top->idef.il, -+ fr->ePBC, fr->bMolPBC, cr, state->box); -+ -+ if (graph != NULL) -+ { -+ unshift_self(graph, state->box, state->x); -+ } -+ wallcycle_stop(wcycle, ewcVSITECONSTR); -+ } -+ -+ /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */ -+ /* With Leap-Frog we can skip compute_globals at -+ * non-communication steps, but we need to calculate -+ * the kinetic energy one step before communication. -+ */ -+ if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm))) -+ { -+ if (ir->nstlist == -1 && bFirstIterate) -+ { -+ gs.sig[eglsNABNSB] = nlh.nabnsb; -+ } -+ compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, -+ wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, -+ constr, -+ bFirstIterate ? &gs : NULL, -+ (step_rel % gs.nstms == 0) && -+ (multisim_nsteps < 0 || (step_rel < multisim_nsteps)), -+ lastbox, -+ top_global, &bSumEkinhOld, -+ cglo_flags -+ | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0) -+ | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) -+ | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) -+ | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) -+ | (iterate.bIterationActive ? CGLO_ITERATE : 0) -+ | (bFirstIterate ? CGLO_FIRSTITERATE : 0) -+ | CGLO_CONSTRAINT -+ ); -+ if (ir->nstlist == -1 && bFirstIterate) -+ { -+ nlh.nabnsb = gs.set[eglsNABNSB]; -+ gs.set[eglsNABNSB] = 0; -+ } -+ } -+ /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */ -+ /* ############# END CALC EKIN AND PRESSURE ################# */ -+ -+ /* Note: this is OK, but there are some numerical precision issues with using the convergence of -+ the virial that should probably be addressed eventually. state->veta has better properies, -+ but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could -+ generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ -+ -+ if (iterate.bIterationActive && -+ done_iterating(cr, fplog, step, &iterate, bFirstIterate, -+ trace(shake_vir), &tracevir)) -+ { -+ break; -+ } -+ bFirstIterate = FALSE; -+ } -+ -+ if (!bVV || bRerunMD) -+ { -+ /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */ -+ sum_dhdl(enerd, state->lambda, ir->fepvals); -+ } -+ update_box(fplog, step, ir, mdatoms, state, f, -+ ir->nstlist == -1 ? &nlh.scale_tot : NULL, pcoupl_mu, nrnb, upd); -+ -+ /* ################# END UPDATE STEP 2 ################# */ -+ /* #### We now have r(t+dt) and v(t+dt/2) ############# */ -+ -+ /* The coordinates (x) were unshifted in update */ -+ if (!bGStat) -+ { -+ /* We will not sum ekinh_old, -+ * so signal that we still have to do it. -+ */ -+ bSumEkinhOld = TRUE; -+ } -+ -+ /* ######### BEGIN PREPARING EDR OUTPUT ########### */ -+ -+ /* use the directly determined last velocity, not actually the averaged half steps */ -+ if (bTrotter && ir->eI == eiVV) -+ { -+ enerd->term[F_EKIN] = last_ekin; -+ } -+ enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; -+ -+ if (bVV) -+ { -+ enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; -+ } -+ else -+ { -+ enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ); -+ } -+ /* ######### END PREPARING EDR OUTPUT ########### */ -+ -+ /* Output stuff */ -+ if (MASTER(cr)) -+ { -+ gmx_bool do_dr, do_or; -+ -+ if (fplog && do_log && bDoExpanded) -+ { -+ /* only needed if doing expanded ensemble */ -+ PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL, -+ &state_global->dfhist, state->fep_state, ir->nstlog, step); -+ } -+ if (!(bStartingFromCpt && (EI_VV(ir->eI)))) -+ { -+ if (bCalcEner) -+ { -+ upd_mdebin(mdebin, bDoDHDL, TRUE, -+ t, mdatoms->tmass, enerd, state, -+ ir->fepvals, ir->expandedvals, lastbox, -+ shake_vir, force_vir, total_vir, pres, -+ ekind, mu_tot, constr); -+ } -+ else -+ { -+ upd_mdebin_step(mdebin); -+ } -+ -+ do_dr = do_per_step(step, ir->nstdisreout); -+ do_or = do_per_step(step, ir->nstorireout); -+ -+ print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : NULL, -+ step, t, -+ eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts)); -+ } -+ if (ir->ePull != epullNO) -+ { -+ pull_print_output(ir->pull, step, t); -+ } -+ -+ if (do_per_step(step, ir->nstlog)) -+ { -+ if (fflush(fplog) != 0) -+ { -+ gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); -+ } -+ } -+ } -+ if (bDoExpanded) -+ { -+ /* Have to do this part _after_ outputting the logfile and the edr file */ -+ /* Gets written into the state at the beginning of next loop*/ -+ state->fep_state = lamnew; -+ } -+ /* Print the remaining wall clock time for the run */ -+ if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning) -+ { -+ if (shellfc) -+ { -+ fprintf(stderr, "\n"); -+ } -+ print_time(stderr, walltime_accounting, step, ir, cr); -+ } -+ -+ /* Ion/water position swapping. -+ * Not done in last step since trajectory writing happens before this call -+ * in the MD loop and exchanges would be lost anyway. */ -+ bNeedRepartition = FALSE; -+ if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && -+ do_per_step(step, ir->swap->nstswap)) -+ { -+ bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle, -+ bRerunMD ? rerun_fr.x : state->x, -+ bRerunMD ? rerun_fr.box : state->box, -+ top_global, MASTER(cr) && bVerbose, bRerunMD); -+ -+ if (bNeedRepartition && DOMAINDECOMP(cr)) -+ { -+ dd_collect_state(cr->dd, state, state_global); -+ } -+ } -+ -+ /* Replica exchange */ -+ bExchanged = FALSE; -+ if (bDoReplEx) -+ { -+ bExchanged = replica_exchange(fplog, cr, repl_ex, -+ state_global, enerd, -+ state, step, t); -+ } -+ -+ if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) ) -+ { -+ dd_partition_system(fplog, step, cr, TRUE, 1, -+ state_global, top_global, ir, -+ state, &f, mdatoms, top, fr, -+ vsite, shellfc, constr, -+ nrnb, wcycle, FALSE); -+ } -+ -+ bFirstStep = FALSE; -+ bInitStep = FALSE; -+ bStartingFromCpt = FALSE; -+ -+ /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ -+ /* With all integrators, except VV, we need to retain the pressure -+ * at the current step for coupling at the next step. -+ */ -+ if ((state->flags & (1<nstpcouple > 0 && step % ir->nstpcouple == 0))) -+ { -+ /* Store the pressure in t_state for pressure coupling -+ * at the next MD step. -+ */ -+ copy_mat(pres, state->pres_prev); -+ } -+ -+ /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ -+ -+ if ( (membed != NULL) && (!bLastStep) ) -+ { -+ rescale_membed(step_rel, membed, state_global->x); -+ } -+ -+ if (bRerunMD) -+ { -+ if (MASTER(cr)) -+ { -+ /* read next frame from input trajectory */ -+ bNotLastFrame = read_next_frame(oenv, status, &rerun_fr); -+ } -+ -+ if (PAR(cr)) -+ { -+ rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); -+ } -+ } -+ -+ if (!bRerunMD || !rerun_fr.bStep) -+ { -+ /* increase the MD step number */ -+ step++; -+ step_rel++; -+ } -+ -+ cycles = wallcycle_stop(wcycle, ewcSTEP); -+ if (DOMAINDECOMP(cr) && wcycle) -+ { -+ dd_cycles_add(cr->dd, cycles, ddCyclStep); -+ } -+ -+ if (bPMETuneRunning || bPMETuneTry) -+ { -+ /* PME grid + cut-off optimization with GPUs or PME nodes */ -+ -+ /* Count the total cycles over the last steps */ -+ cycles_pmes += cycles; -+ -+ /* We can only switch cut-off at NS steps */ -+ if (step % ir->nstlist == 0) -+ { -+ /* PME grid + cut-off optimization with GPUs or PME nodes */ -+ if (bPMETuneTry) -+ { -+ if (DDMASTER(cr->dd)) -+ { -+ /* PME node load is too high, start tuning */ -+ bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05); -+ } -+ dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning); -+ -+ if (bPMETuneRunning && -+ fr->nbv->bUseGPU && DOMAINDECOMP(cr) && -+ !(cr->duty & DUTY_PME)) -+ { -+ /* Lock DLB=auto to off (does nothing when DLB=yes/no). -+ * With GPUs + separate PME ranks, we don't want DLB. -+ * This could happen when we scan coarse grids and -+ * it would then never be turned off again. -+ * This would hurt performance at the final, optimal -+ * grid spacing, where DLB almost never helps. -+ * Also, DLB can limit the cut-off for PME tuning. -+ */ -+ dd_dlb_set_lock(cr->dd, TRUE); -+ } -+ -+ if (bPMETuneRunning || step_rel > ir->nstlist*50) -+ { -+ bPMETuneTry = FALSE; -+ } -+ } -+ if (bPMETuneRunning) -+ { -+ /* init_step might not be a multiple of nstlist, -+ * but the first cycle is always skipped anyhow. -+ */ -+ bPMETuneRunning = -+ pme_load_balance(pme_loadbal, cr, -+ (bVerbose && MASTER(cr)) ? stderr : NULL, -+ fplog, -+ ir, state, cycles_pmes, -+ fr->ic, fr->nbv, &fr->pmedata, -+ step); -+ -+ /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */ -+ fr->ewaldcoeff_q = fr->ic->ewaldcoeff_q; -+ fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj; -+ fr->rlist = fr->ic->rlist; -+ fr->rlistlong = fr->ic->rlistlong; -+ fr->rcoulomb = fr->ic->rcoulomb; -+ fr->rvdw = fr->ic->rvdw; -+ -+ if (ir->eDispCorr != edispcNO) -+ { -+ calc_enervirdiff(NULL, ir->eDispCorr, fr); -+ } -+ -+ if (!bPMETuneRunning && -+ DOMAINDECOMP(cr) && -+ dd_dlb_is_locked(cr->dd)) -+ { -+ /* Unlock the DLB=auto, DLB is allowed to activate -+ * (but we don't expect it to activate in most cases). -+ */ -+ dd_dlb_set_lock(cr->dd, FALSE); -+ } -+ } -+ cycles_pmes = 0; -+ } -+ } -+ -+ if (step_rel == wcycle_get_reset_counters(wcycle) || -+ gs.set[eglsRESETCOUNTERS] != 0) -+ { -+ /* Reset all the counters related to performance over the run */ -+ reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting, -+ fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL); -+ wcycle_set_reset_counters(wcycle, -1); -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell our PME node to reset its counters */ -+ gmx_pme_send_resetcounters(cr, step); -+ } -+ /* Correct max_hours for the elapsed time */ -+ max_hours -= elapsed_time/(60.0*60.0); -+ bResetCountersHalfMaxH = FALSE; -+ gs.set[eglsRESETCOUNTERS] = 0; -+ } -+ -+ /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */ -+ IMD_prep_energies_send_positions(ir->bIMD && MASTER(cr), bIMDstep, ir->imd, enerd, step, bCalcEner, wcycle); -+ -+ } -+ /* End of main MD loop */ -+ debug_gmx(); -+ -+ /* Closing TNG files can include compressing data. Therefore it is good to do that -+ * before stopping the time measurements. */ -+ mdoutf_tng_close(outf); -+ -+ /* Stop measuring walltime */ -+ walltime_accounting_end(walltime_accounting); -+ -+ if (bRerunMD && MASTER(cr)) -+ { -+ close_trj(status); -+ } -+ -+ if (!(cr->duty & DUTY_PME)) -+ { -+ /* Tell the PME only node to finish */ -+ gmx_pme_send_finish(cr); -+ } -+ -+ if (MASTER(cr)) -+ { -+ if (ir->nstcalcenergy > 0 && !bRerunMD) -+ { -+ print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t, -+ eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts)); -+ } -+ } -+ -+ done_mdoutf(outf); -+ debug_gmx(); -+ -+ if (ir->nstlist == -1 && nlh.nns > 0 && fplog) -+ { -+ fprintf(fplog, "Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n", nlh.s1/nlh.nns, sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns))); -+ fprintf(fplog, "Average number of atoms that crossed the half buffer length: %.1f\n\n", nlh.ab/nlh.nns); -+ } -+ -+ if (pme_loadbal != NULL) -+ { -+ pme_loadbal_done(pme_loadbal, cr, fplog, -+ fr->nbv != NULL && fr->nbv->bUseGPU); -+ } -+ -+ if (shellfc && fplog) -+ { -+ fprintf(fplog, "Fraction of iterations that converged: %.2f %%\n", -+ (nconverged*100.0)/step_rel); -+ fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n", -+ tcount/step_rel); -+ } -+ -+ if (repl_ex_nst > 0 && MASTER(cr)) -+ { -+ print_replica_exchange_statistics(fplog, repl_ex); -+ } -+ -+ /* IMD cleanup, if bIMD is TRUE. */ -+ IMD_finalize(ir->bIMD, ir->imd); -+ -+ walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); -+ -+ return 0; -+} -diff --git a/src/programs/mdrun/mdrun.cpp b/src/programs/mdrun/mdrun.cpp -index 6bac3f0..e9fbf48 100644 ---- a/src/programs/mdrun/mdrun.cpp -+++ b/src/programs/mdrun/mdrun.cpp -@@ -55,6 +55,12 @@ - - #include "gromacs/commandline/pargs.h" - #include "gromacs/fileio/filenm.h" -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+extern void(*plumedcmd)(plumed,const char*,const void*); -+/* END PLUMED */ - - int gmx_mdrun(int argc, char *argv[]) - { -@@ -428,6 +434,7 @@ int gmx_mdrun(int argc, char *argv[]) - { efMTX, "-mtx", "nm", ffOPTWR }, - { efNDX, "-dn", "dipole", ffOPTWR }, - { efRND, "-multidir", NULL, ffOPTRDMULT}, -+ { efDAT, "-plumed", "plumed", ffOPTRD }, /* PLUMED */ - { efDAT, "-membed", "membed", ffOPTRD }, - { efTOP, "-mp", "membed", ffOPTRD }, - { efNDX, "-mn", "membed", ffOPTRD }, -@@ -780,6 +787,32 @@ int gmx_mdrun(int argc, char *argv[]) - ddxyz[YY] = (int)(realddxyz[YY] + 0.5); - ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5); - -+ /* PLUMED */ -+ plumedswitch=0; -+ if (opt2bSet("-plumed",NFILE,fnm)) plumedswitch=1; -+ if(plumedswitch){ -+ plumedcmd=plumed_cmd; -+ int plumed_is_there=0; -+ int real_precision=sizeof(real); -+ real energyUnits=1.0; -+ real lengthUnits=1.0; -+ real timeUnits=1.0; -+ -+ if(!plumed_installed()){ -+ gmx_fatal(FARGS,"Plumed is not available. Check your PLUMED_KERNEL variable."); -+ } -+ plumedmain=plumed_create(); -+ plumed_cmd(plumedmain,"setRealPrecision",&real_precision); -+ // this is not necessary for gromacs units: -+ plumed_cmd(plumedmain,"setMDEnergyUnits",&energyUnits); -+ plumed_cmd(plumedmain,"setMDLengthUnits",&lengthUnits); -+ plumed_cmd(plumedmain,"setMDTimeUnits",&timeUnits); -+ // -+ plumed_cmd(plumedmain,"setPlumedDat",ftp2fn(efDAT,NFILE,fnm)); -+ plumedswitch=1; -+ } -+ /* END PLUMED */ -+ - rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact, - nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr, - dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz, -@@ -788,6 +821,12 @@ int gmx_mdrun(int argc, char *argv[]) - nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, - pforce, cpt_period, max_hours, deviceOptions, imdport, Flags); - -+ /* PLUMED */ -+ if(plumedswitch){ -+ plumed_finalize(plumedmain); -+ } -+ /* END PLUMED */ -+ - /* Log file has to be closed in mdrunner if we are appending to it - (fplog not set here) */ - if (MASTER(cr) && !bAppendFiles) -diff --git a/src/programs/mdrun/mdrun.cpp.preplumed b/src/programs/mdrun/mdrun.cpp.preplumed -new file mode 100644 -index 0000000..6bac3f0 ---- /dev/null -+++ b/src/programs/mdrun/mdrun.cpp.preplumed -@@ -0,0 +1,799 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#include "mdrun_main.h" -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+ -+#include "gromacs/legacyheaders/checkpoint.h" -+#include "gromacs/legacyheaders/copyrite.h" -+#include "gromacs/legacyheaders/gmx_fatal.h" -+#include "gromacs/legacyheaders/macros.h" -+#include "gromacs/legacyheaders/main.h" -+#include "gromacs/legacyheaders/mdrun.h" -+#include "gromacs/legacyheaders/network.h" -+#include "gromacs/legacyheaders/readinp.h" -+#include "gromacs/legacyheaders/typedefs.h" -+#include "gromacs/legacyheaders/types/commrec.h" -+ -+#include "gromacs/commandline/pargs.h" -+#include "gromacs/fileio/filenm.h" -+ -+int gmx_mdrun(int argc, char *argv[]) -+{ -+ const char *desc[] = { -+ "[THISMODULE] is the main computational chemistry engine", -+ "within GROMACS. Obviously, it performs Molecular Dynamics simulations,", -+ "but it can also perform Stochastic Dynamics, Energy Minimization,", -+ "test particle insertion or (re)calculation of energies.", -+ "Normal mode analysis is another option. In this case [TT]mdrun[tt]", -+ "builds a Hessian matrix from single conformation.", -+ "For usual Normal Modes-like calculations, make sure that", -+ "the structure provided is properly energy-minimized.", -+ "The generated matrix can be diagonalized by [gmx-nmeig].[PAR]", -+ "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])", -+ "and distributes the topology over ranks if needed.", -+ "[TT]mdrun[tt] produces at least four output files.", -+ "A single log file ([TT]-g[tt]) is written, unless the option", -+ "[TT]-seppot[tt] is used, in which case each rank writes a log file.", -+ "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and", -+ "optionally forces.", -+ "The structure file ([TT]-c[tt]) contains the coordinates and", -+ "velocities of the last step.", -+ "The energy file ([TT]-e[tt]) contains energies, the temperature,", -+ "pressure, etc, a lot of these things are also printed in the log file.", -+ "Optionally coordinates can be written to a compressed trajectory file", -+ "([TT]-x[tt]).[PAR]", -+ "The option [TT]-dhdl[tt] is only used when free energy calculation is", -+ "turned on.[PAR]", -+ "A simulation can be run in parallel using two different parallelization", -+ "schemes: MPI parallelization and/or OpenMP thread parallelization.", -+ "The MPI parallelization uses multiple processes when [TT]mdrun[tt] is", -+ "compiled with a normal MPI library or threads when [TT]mdrun[tt] is", -+ "compiled with the GROMACS built-in thread-MPI library. OpenMP threads", -+ "are supported when [TT]mdrun[tt] is compiled with OpenMP. Full OpenMP support", -+ "is only available with the Verlet cut-off scheme, with the (older)", -+ "group scheme only PME-only ranks can use OpenMP parallelization.", -+ "In all cases [TT]mdrun[tt] will by default try to use all the available", -+ "hardware resources. With a normal MPI library only the options", -+ "[TT]-ntomp[tt] (with the Verlet cut-off scheme) and [TT]-ntomp_pme[tt],", -+ "for PME-only ranks, can be used to control the number of threads.", -+ "With thread-MPI there are additional options [TT]-nt[tt], which sets", -+ "the total number of threads, and [TT]-ntmpi[tt], which sets the number", -+ "of thread-MPI threads.", -+ "The number of OpenMP threads used by [TT]mdrun[tt] can also be set with", -+ "the standard environment variable, [TT]OMP_NUM_THREADS[tt].", -+ "The [TT]GMX_PME_NUM_THREADS[tt] environment variable can be used to specify", -+ "the number of threads used by the PME-only ranks.[PAR]", -+ "Note that combined MPI+OpenMP parallelization is in many cases", -+ "slower than either on its own. However, at high parallelization, using the", -+ "combination is often beneficial as it reduces the number of domains and/or", -+ "the number of MPI ranks. (Less and larger domains can improve scaling,", -+ "with separate PME ranks, using fewer MPI ranks reduces communication costs.)", -+ "OpenMP-only parallelization is typically faster than MPI-only parallelization", -+ "on a single CPU(-die). Since we currently don't have proper hardware", -+ "topology detection, [TT]mdrun[tt] compiled with thread-MPI will only", -+ "automatically use OpenMP-only parallelization when you use up to 4", -+ "threads, up to 12 threads with Intel Nehalem/Westmere, or up to 16", -+ "threads with Intel Sandy Bridge or newer CPUs. Otherwise MPI-only", -+ "parallelization is used (except with GPUs, see below).", -+ "[PAR]", -+ "To quickly test the performance of the new Verlet cut-off scheme", -+ "with old [TT].tpr[tt] files, either on CPUs or CPUs+GPUs, you can use", -+ "the [TT]-testverlet[tt] option. This should not be used for production,", -+ "since it can slightly modify potentials and it will remove charge groups", -+ "making analysis difficult, as the [TT].tpr[tt] file will still contain", -+ "charge groups. For production simulations it is highly recommended", -+ "to specify [TT]cutoff-scheme = Verlet[tt] in the [TT].mdp[tt] file.", -+ "[PAR]", -+ "With GPUs (only supported with the Verlet cut-off scheme), the number", -+ "of GPUs should match the number of particle-particle ranks, i.e.", -+ "excluding PME-only ranks. With thread-MPI, unless set on the command line, the number", -+ "of MPI threads will automatically be set to the number of GPUs detected.", -+ "To use a subset of the available GPUs, or to manually provide a mapping of", -+ "GPUs to PP ranks, you can use the [TT]-gpu_id[tt] option. The argument of [TT]-gpu_id[tt] is", -+ "a string of digits (without delimiter) representing device id-s of the GPUs to be used.", -+ "For example, \"[TT]02[tt]\" specifies using GPUs 0 and 2 in the first and second PP ranks per compute node", -+ "respectively. To select different sets of GPU-s", -+ "on different nodes of a compute cluster, use the [TT]GMX_GPU_ID[tt] environment", -+ "variable instead. The format for [TT]GMX_GPU_ID[tt] is identical to ", -+ "[TT]-gpu_id[tt], with the difference that an environment variable can have", -+ "different values on different compute nodes. Multiple MPI ranks on each node", -+ "can share GPUs. This is accomplished by specifying the id(s) of the GPU(s)", -+ "multiple times, e.g. \"[TT]0011[tt]\" for four ranks sharing two GPUs in this node.", -+ "This works within a single simulation, or a multi-simulation, with any form of MPI.", -+ "[PAR]", -+ "With the Verlet cut-off scheme and verlet-buffer-tolerance set,", -+ "the pair-list update interval nstlist can be chosen freely with", -+ "the option [TT]-nstlist[tt]. [TT]mdrun[tt] will then adjust", -+ "the pair-list cut-off to maintain accuracy, and not adjust nstlist.", -+ "Otherwise, by default, [TT]mdrun[tt] will try to increase the", -+ "value of nstlist set in the [TT].mdp[tt] file to improve the", -+ "performance. For CPU-only runs, nstlist might increase to 20, for", -+ "GPU runs up to 40. For medium to high parallelization or with", -+ "fast GPUs, a (user-supplied) larger nstlist value can give much", -+ "better performance.", -+ "[PAR]", -+ "When using PME with separate PME ranks or with a GPU, the two major", -+ "compute tasks, the non-bonded force calculation and the PME calculation", -+ "run on different compute resources. If this load is not balanced,", -+ "some of the resources will be idle part of time. With the Verlet", -+ "cut-off scheme this load is automatically balanced when the PME load", -+ "is too high (but not when it is too low). This is done by scaling", -+ "the Coulomb cut-off and PME grid spacing by the same amount. In the first", -+ "few hundred steps different settings are tried and the fastest is chosen", -+ "for the rest of the simulation. This does not affect the accuracy of", -+ "the results, but it does affect the decomposition of the Coulomb energy", -+ "into particle and mesh contributions. The auto-tuning can be turned off", -+ "with the option [TT]-notunepme[tt].", -+ "[PAR]", -+ "[TT]mdrun[tt] pins (sets affinity of) threads to specific cores,", -+ "when all (logical) cores on a compute node are used by [TT]mdrun[tt],", -+ "even when no multi-threading is used,", -+ "as this usually results in significantly better performance.", -+ "If the queuing systems or the OpenMP library pinned threads, we honor", -+ "this and don't pin again, even though the layout may be sub-optimal.", -+ "If you want to have [TT]mdrun[tt] override an already set thread affinity", -+ "or pin threads when using less cores, use [TT]-pin on[tt].", -+ "With SMT (simultaneous multithreading), e.g. Intel Hyper-Threading,", -+ "there are multiple logical cores per physical core.", -+ "The option [TT]-pinstride[tt] sets the stride in logical cores for", -+ "pinning consecutive threads. Without SMT, 1 is usually the best choice.", -+ "With Intel Hyper-Threading 2 is best when using half or less of the", -+ "logical cores, 1 otherwise. The default value of 0 do exactly that:", -+ "it minimizes the threads per logical core, to optimize performance.", -+ "If you want to run multiple [TT]mdrun[tt] jobs on the same physical node," -+ "you should set [TT]-pinstride[tt] to 1 when using all logical cores.", -+ "When running multiple [TT]mdrun[tt] (or other) simulations on the same physical", -+ "node, some simulations need to start pinning from a non-zero core", -+ "to avoid overloading cores; with [TT]-pinoffset[tt] you can specify", -+ "the offset in logical cores for pinning.", -+ "[PAR]", -+ "When [TT]mdrun[tt] is started with more than 1 rank,", -+ "parallelization with domain decomposition is used.", -+ "[PAR]", -+ "With domain decomposition, the spatial decomposition can be set", -+ "with option [TT]-dd[tt]. By default [TT]mdrun[tt] selects a good decomposition.", -+ "The user only needs to change this when the system is very inhomogeneous.", -+ "Dynamic load balancing is set with the option [TT]-dlb[tt],", -+ "which can give a significant performance improvement,", -+ "especially for inhomogeneous systems. The only disadvantage of", -+ "dynamic load balancing is that runs are no longer binary reproducible,", -+ "but in most cases this is not important.", -+ "By default the dynamic load balancing is automatically turned on", -+ "when the measured performance loss due to load imbalance is 5% or more.", -+ "At low parallelization these are the only important options", -+ "for domain decomposition.", -+ "At high parallelization the options in the next two sections", -+ "could be important for increasing the performace.", -+ "[PAR]", -+ "When PME is used with domain decomposition, separate ranks can", -+ "be assigned to do only the PME mesh calculation;", -+ "this is computationally more efficient starting at about 12 ranks,", -+ "or even fewer when OpenMP parallelization is used.", -+ "The number of PME ranks is set with option [TT]-npme[tt],", -+ "but this cannot be more than half of the ranks.", -+ "By default [TT]mdrun[tt] makes a guess for the number of PME", -+ "ranks when the number of ranks is larger than 16. With GPUs,", -+ "using separate PME ranks is not selected automatically,", -+ "since the optimal setup depends very much on the details", -+ "of the hardware. In all cases, you might gain performance", -+ "by optimizing [TT]-npme[tt]. Performance statistics on this issue", -+ "are written at the end of the log file.", -+ "For good load balancing at high parallelization, the PME grid x and y", -+ "dimensions should be divisible by the number of PME ranks", -+ "(the simulation will run correctly also when this is not the case).", -+ "[PAR]", -+ "This section lists all options that affect the domain decomposition.", -+ "[PAR]", -+ "Option [TT]-rdd[tt] can be used to set the required maximum distance", -+ "for inter charge-group bonded interactions.", -+ "Communication for two-body bonded interactions below the non-bonded", -+ "cut-off distance always comes for free with the non-bonded communication.", -+ "Atoms beyond the non-bonded cut-off are only communicated when they have", -+ "missing bonded interactions; this means that the extra cost is minor", -+ "and nearly indepedent of the value of [TT]-rdd[tt].", -+ "With dynamic load balancing option [TT]-rdd[tt] also sets", -+ "the lower limit for the domain decomposition cell sizes.", -+ "By default [TT]-rdd[tt] is determined by [TT]mdrun[tt] based on", -+ "the initial coordinates. The chosen value will be a balance", -+ "between interaction range and communication cost.", -+ "[PAR]", -+ "When inter charge-group bonded interactions are beyond", -+ "the bonded cut-off distance, [TT]mdrun[tt] terminates with an error message.", -+ "For pair interactions and tabulated bonds", -+ "that do not generate exclusions, this check can be turned off", -+ "with the option [TT]-noddcheck[tt].", -+ "[PAR]", -+ "When constraints are present, option [TT]-rcon[tt] influences", -+ "the cell size limit as well.", -+ "Atoms connected by NC constraints, where NC is the LINCS order plus 1,", -+ "should not be beyond the smallest cell size. A error message is", -+ "generated when this happens and the user should change the decomposition", -+ "or decrease the LINCS order and increase the number of LINCS iterations.", -+ "By default [TT]mdrun[tt] estimates the minimum cell size required for P-LINCS", -+ "in a conservative fashion. For high parallelization it can be useful", -+ "to set the distance required for P-LINCS with the option [TT]-rcon[tt].", -+ "[PAR]", -+ "The [TT]-dds[tt] option sets the minimum allowed x, y and/or z scaling", -+ "of the cells with dynamic load balancing. [TT]mdrun[tt] will ensure that", -+ "the cells can scale down by at least this factor. This option is used", -+ "for the automated spatial decomposition (when not using [TT]-dd[tt])", -+ "as well as for determining the number of grid pulses, which in turn", -+ "sets the minimum allowed cell size. Under certain circumstances", -+ "the value of [TT]-dds[tt] might need to be adjusted to account for", -+ "high or low spatial inhomogeneity of the system.", -+ "[PAR]", -+ "The option [TT]-gcom[tt] can be used to only do global communication", -+ "every n steps.", -+ "This can improve performance for highly parallel simulations", -+ "where this global communication step becomes the bottleneck.", -+ "For a global thermostat and/or barostat the temperature", -+ "and/or pressure will also only be updated every [TT]-gcom[tt] steps.", -+ "By default it is set to the minimum of nstcalcenergy and nstlist.[PAR]", -+ "With [TT]-rerun[tt] an input trajectory can be given for which ", -+ "forces and energies will be (re)calculated. Neighbor searching will be", -+ "performed for every frame, unless [TT]nstlist[tt] is zero", -+ "(see the [TT].mdp[tt] file).[PAR]", -+ "ED (essential dynamics) sampling and/or additional flooding potentials", -+ "are switched on by using the [TT]-ei[tt] flag followed by an [TT].edi[tt]", -+ "file. The [TT].edi[tt] file can be produced with the [TT]make_edi[tt] tool", -+ "or by using options in the essdyn menu of the WHAT IF program.", -+ "[TT]mdrun[tt] produces a [TT].xvg[tt] output file that", -+ "contains projections of positions, velocities and forces onto selected", -+ "eigenvectors.[PAR]", -+ "When user-defined potential functions have been selected in the", -+ "[TT].mdp[tt] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]", -+ "a formatted table with potential functions. The file is read from", -+ "either the current directory or from the [TT]GMXLIB[tt] directory.", -+ "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,", -+ "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with", -+ "normal Coulomb.", -+ "When pair interactions are present, a separate table for pair interaction", -+ "functions is read using the [TT]-tablep[tt] option.[PAR]", -+ "When tabulated bonded functions are present in the topology,", -+ "interaction functions are read using the [TT]-tableb[tt] option.", -+ "For each different tabulated interaction type the table file name is", -+ "modified in a different way: before the file extension an underscore is", -+ "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals", -+ "and finally the table number of the interaction type.[PAR]", -+ "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM", -+ "coordinates and forces when pulling is selected", -+ "in the [TT].mdp[tt] file.[PAR]", -+ "With [TT]-multi[tt] or [TT]-multidir[tt], multiple systems can be ", -+ "simulated in parallel.", -+ "As many input files/directories are required as the number of systems. ", -+ "The [TT]-multidir[tt] option takes a list of directories (one for each ", -+ "system) and runs in each of them, using the input/output file names, ", -+ "such as specified by e.g. the [TT]-s[tt] option, relative to these ", -+ "directories.", -+ "With [TT]-multi[tt], the system number is appended to the run input ", -+ "and each output filename, for instance [TT]topol.tpr[tt] becomes", -+ "[TT]topol0.tpr[tt], [TT]topol1.tpr[tt] etc.", -+ "The number of ranks per system is the total number of ranks", -+ "divided by the number of systems.", -+ "One use of this option is for NMR refinement: when distance", -+ "or orientation restraints are present these can be ensemble averaged", -+ "over all the systems.[PAR]", -+ "With [TT]-replex[tt] replica exchange is attempted every given number", -+ "of steps. The number of replicas is set with the [TT]-multi[tt] or ", -+ "[TT]-multidir[tt] option, described above.", -+ "All run input files should use a different coupling temperature,", -+ "the order of the files is not important. The random seed is set with", -+ "[TT]-reseed[tt]. The velocities are scaled and neighbor searching", -+ "is performed after every exchange.[PAR]", -+ "Finally some experimental algorithms can be tested when the", -+ "appropriate options have been given. Currently under", -+ "investigation are: polarizability.", -+ "[PAR]", -+ "The option [TT]-membed[tt] does what used to be g_membed, i.e. embed", -+ "a protein into a membrane. The data file should contain the options", -+ "that where passed to g_membed before. The [TT]-mn[tt] and [TT]-mp[tt]", -+ "both apply to this as well.", -+ "[PAR]", -+ "The option [TT]-pforce[tt] is useful when you suspect a simulation", -+ "crashes due to too large forces. With this option coordinates and", -+ "forces of atoms with a force larger than a certain value will", -+ "be printed to stderr.", -+ "[PAR]", -+ "Checkpoints containing the complete state of the system are written", -+ "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],", -+ "unless option [TT]-cpt[tt] is set to -1.", -+ "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to", -+ "make sure that a recent state of the system is always available,", -+ "even when the simulation is terminated while writing a checkpoint.", -+ "With [TT]-cpnum[tt] all checkpoint files are kept and appended", -+ "with the step number.", -+ "A simulation can be continued by reading the full state from file", -+ "with option [TT]-cpi[tt]. This option is intelligent in the way that", -+ "if no checkpoint file is found, Gromacs just assumes a normal run and", -+ "starts from the first step of the [TT].tpr[tt] file. By default the output", -+ "will be appending to the existing output files. The checkpoint file", -+ "contains checksums of all output files, such that you will never", -+ "loose data when some output files are modified, corrupt or removed.", -+ "There are three scenarios with [TT]-cpi[tt]:[PAR]", -+ "[TT]*[tt] no files with matching names are present: new output files are written[PAR]", -+ "[TT]*[tt] all files are present with names and checksums matching those stored", -+ "in the checkpoint file: files are appended[PAR]", -+ "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]", -+ "With [TT]-noappend[tt] new output files are opened and the simulation", -+ "part number is added to all output file names.", -+ "Note that in all cases the checkpoint file itself is not renamed", -+ "and will be overwritten, unless its name does not match", -+ "the [TT]-cpo[tt] option.", -+ "[PAR]", -+ "With checkpointing the output is appended to previously written", -+ "output files, unless [TT]-noappend[tt] is used or none of the previous", -+ "output files are present (except for the checkpoint file).", -+ "The integrity of the files to be appended is verified using checksums", -+ "which are stored in the checkpoint file. This ensures that output can", -+ "not be mixed up or corrupted due to file appending. When only some", -+ "of the previous output files are present, a fatal error is generated", -+ "and no old output files are modified and no new output files are opened.", -+ "The result with appending will be the same as from a single run.", -+ "The contents will be binary identical, unless you use a different number", -+ "of ranks or dynamic load balancing or the FFT library uses optimizations", -+ "through timing.", -+ "[PAR]", -+ "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint", -+ "file is written at the first neighbor search step where the run time", -+ "exceeds [TT]-maxh[tt]*0.99 hours.", -+ "[PAR]", -+ "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current", -+ "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is", -+ "pressed), it will stop after the next neighbor search step ", -+ "(with nstlist=0 at the next step).", -+ "In both cases all the usual output will be written to file.", -+ "When running with MPI, a signal to one of the [TT]mdrun[tt] ranks", -+ "is sufficient, this signal should not be sent to mpirun or", -+ "the [TT]mdrun[tt] process that is the parent of the others.", -+ "[PAR]", -+ "Interactive molecular dynamics (IMD) can be activated by using at least one", -+ "of the three IMD switches: The [TT]-imdterm[tt] switch allows to terminate the", -+ "simulation from the molecular viewer (e.g. VMD). With [TT]-imdwait[tt],", -+ "[TT]mdrun[tt] pauses whenever no IMD client is connected. Pulling from the", -+ "IMD remote can be turned on by [TT]-imdpull[tt].", -+ "The port [TT]mdrun[tt] listens to can be altered by [TT]-imdport[tt].The", -+ "file pointed to by [TT]-if[tt] contains atom indices and forces if IMD", -+ "pulling is used." -+ "[PAR]", -+ "When [TT]mdrun[tt] is started with MPI, it does not run niced by default." -+ }; -+ t_commrec *cr; -+ t_filenm fnm[] = { -+ { efTPX, NULL, NULL, ffREAD }, -+ { efTRN, "-o", NULL, ffWRITE }, -+ { efCOMPRESSED, "-x", NULL, ffOPTWR }, -+ { efCPT, "-cpi", NULL, ffOPTRD }, -+ { efCPT, "-cpo", NULL, ffOPTWR }, -+ { efSTO, "-c", "confout", ffWRITE }, -+ { efEDR, "-e", "ener", ffWRITE }, -+ { efLOG, "-g", "md", ffWRITE }, -+ { efXVG, "-dhdl", "dhdl", ffOPTWR }, -+ { efXVG, "-field", "field", ffOPTWR }, -+ { efXVG, "-table", "table", ffOPTRD }, -+ { efXVG, "-tabletf", "tabletf", ffOPTRD }, -+ { efXVG, "-tablep", "tablep", ffOPTRD }, -+ { efXVG, "-tableb", "table", ffOPTRD }, -+ { efTRX, "-rerun", "rerun", ffOPTRD }, -+ { efXVG, "-tpi", "tpi", ffOPTWR }, -+ { efXVG, "-tpid", "tpidist", ffOPTWR }, -+ { efEDI, "-ei", "sam", ffOPTRD }, -+ { efXVG, "-eo", "edsam", ffOPTWR }, -+ { efXVG, "-devout", "deviatie", ffOPTWR }, -+ { efXVG, "-runav", "runaver", ffOPTWR }, -+ { efXVG, "-px", "pullx", ffOPTWR }, -+ { efXVG, "-pf", "pullf", ffOPTWR }, -+ { efXVG, "-ro", "rotation", ffOPTWR }, -+ { efLOG, "-ra", "rotangles", ffOPTWR }, -+ { efLOG, "-rs", "rotslabs", ffOPTWR }, -+ { efLOG, "-rt", "rottorque", ffOPTWR }, -+ { efMTX, "-mtx", "nm", ffOPTWR }, -+ { efNDX, "-dn", "dipole", ffOPTWR }, -+ { efRND, "-multidir", NULL, ffOPTRDMULT}, -+ { efDAT, "-membed", "membed", ffOPTRD }, -+ { efTOP, "-mp", "membed", ffOPTRD }, -+ { efNDX, "-mn", "membed", ffOPTRD }, -+ { efXVG, "-if", "imdforces", ffOPTWR }, -+ { efXVG, "-swap", "swapions", ffOPTWR } -+ }; -+#define NFILE asize(fnm) -+ -+ /* Command line options ! */ -+ gmx_bool bDDBondCheck = TRUE; -+ gmx_bool bDDBondComm = TRUE; -+ gmx_bool bTunePME = TRUE; -+ gmx_bool bTestVerlet = FALSE; -+ gmx_bool bVerbose = FALSE; -+ gmx_bool bCompact = TRUE; -+ gmx_bool bSepPot = FALSE; -+ gmx_bool bRerunVSite = FALSE; -+ gmx_bool bConfout = TRUE; -+ gmx_bool bReproducible = FALSE; -+ gmx_bool bIMDwait = FALSE; -+ gmx_bool bIMDterm = FALSE; -+ gmx_bool bIMDpull = FALSE; -+ -+ int npme = -1; -+ int nstlist = 0; -+ int nmultisim = 0; -+ int nstglobalcomm = -1; -+ int repl_ex_nst = 0; -+ int repl_ex_seed = -1; -+ int repl_ex_nex = 0; -+ int nstepout = 100; -+ int resetstep = -1; -+ gmx_int64_t nsteps = -2; /* the value -2 means that the mdp option will be used */ -+ int imdport = 8888; /* can be almost anything, 8888 is easy to remember */ -+ -+ rvec realddxyz = {0, 0, 0}; -+ const char *ddno_opt[ddnoNR+1] = -+ { NULL, "interleave", "pp_pme", "cartesian", NULL }; -+ const char *dddlb_opt[] = -+ { NULL, "auto", "no", "yes", NULL }; -+ const char *thread_aff_opt[threadaffNR+1] = -+ { NULL, "auto", "on", "off", NULL }; -+ const char *nbpu_opt[] = -+ { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL }; -+ real rdd = 0.0, rconstr = 0.0, dlb_scale = 0.8, pforce = -1; -+ char *ddcsx = NULL, *ddcsy = NULL, *ddcsz = NULL; -+ real cpt_period = 15.0, max_hours = -1; -+ gmx_bool bAppendFiles = TRUE; -+ gmx_bool bKeepAndNumCPT = FALSE; -+ gmx_bool bResetCountersHalfWay = FALSE; -+ output_env_t oenv = NULL; -+ const char *deviceOptions = ""; -+ -+ /* Non transparent initialization of a complex gmx_hw_opt_t struct. -+ * But unfortunately we are not allowed to call a function here, -+ * since declarations follow below. -+ */ -+ gmx_hw_opt_t hw_opt = { -+ 0, 0, 0, 0, threadaffSEL, 0, 0, -+ { NULL, FALSE, 0, NULL } -+ }; -+ -+ t_pargs pa[] = { -+ -+ { "-dd", FALSE, etRVEC, {&realddxyz}, -+ "Domain decomposition grid, 0 is optimize" }, -+ { "-ddorder", FALSE, etENUM, {ddno_opt}, -+ "DD rank order" }, -+ { "-npme", FALSE, etINT, {&npme}, -+ "Number of separate ranks to be used for PME, -1 is guess" }, -+ { "-nt", FALSE, etINT, {&hw_opt.nthreads_tot}, -+ "Total number of threads to start (0 is guess)" }, -+ { "-ntmpi", FALSE, etINT, {&hw_opt.nthreads_tmpi}, -+ "Number of thread-MPI threads to start (0 is guess)" }, -+ { "-ntomp", FALSE, etINT, {&hw_opt.nthreads_omp}, -+ "Number of OpenMP threads per MPI rank to start (0 is guess)" }, -+ { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme}, -+ "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" }, -+ { "-pin", FALSE, etENUM, {thread_aff_opt}, -+ "Set thread affinities" }, -+ { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset}, -+ "The starting logical core number for pinning to cores; used to avoid pinning threads from different mdrun instances to the same core" }, -+ { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride}, -+ "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" }, -+ { "-gpu_id", FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id}, -+ "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" }, -+ { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck}, -+ "Check for all bonded interactions with DD" }, -+ { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm}, -+ "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" }, -+ { "-rdd", FALSE, etREAL, {&rdd}, -+ "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" }, -+ { "-rcon", FALSE, etREAL, {&rconstr}, -+ "Maximum distance for P-LINCS (nm), 0 is estimate" }, -+ { "-dlb", FALSE, etENUM, {dddlb_opt}, -+ "Dynamic load balancing (with DD)" }, -+ { "-dds", FALSE, etREAL, {&dlb_scale}, -+ "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in order to " -+ "provide a margin in which dynamic load balancing can act while preserving the minimum cell size." }, -+ { "-ddcsx", FALSE, etSTR, {&ddcsx}, -+ "HIDDENA string containing a vector of the relative sizes in the x " -+ "direction of the corresponding DD cells. Only effective with static " -+ "load balancing." }, -+ { "-ddcsy", FALSE, etSTR, {&ddcsy}, -+ "HIDDENA string containing a vector of the relative sizes in the y " -+ "direction of the corresponding DD cells. Only effective with static " -+ "load balancing." }, -+ { "-ddcsz", FALSE, etSTR, {&ddcsz}, -+ "HIDDENA string containing a vector of the relative sizes in the z " -+ "direction of the corresponding DD cells. Only effective with static " -+ "load balancing." }, -+ { "-gcom", FALSE, etINT, {&nstglobalcomm}, -+ "Global communication frequency" }, -+ { "-nb", FALSE, etENUM, {&nbpu_opt}, -+ "Calculate non-bonded interactions on" }, -+ { "-nstlist", FALSE, etINT, {&nstlist}, -+ "Set nstlist when using a Verlet buffer tolerance (0 is guess)" }, -+ { "-tunepme", FALSE, etBOOL, {&bTunePME}, -+ "Optimize PME load between PP/PME ranks or GPU/CPU" }, -+ { "-testverlet", FALSE, etBOOL, {&bTestVerlet}, -+ "Test the Verlet non-bonded scheme" }, -+ { "-v", FALSE, etBOOL, {&bVerbose}, -+ "Be loud and noisy" }, -+ { "-compact", FALSE, etBOOL, {&bCompact}, -+ "Write a compact log file" }, -+ { "-seppot", FALSE, etBOOL, {&bSepPot}, -+ "Write separate V and dVdl terms for each interaction type and rank to the log file(s)" }, -+ { "-pforce", FALSE, etREAL, {&pforce}, -+ "Print all forces larger than this (kJ/mol nm)" }, -+ { "-reprod", FALSE, etBOOL, {&bReproducible}, -+ "Try to avoid optimizations that affect binary reproducibility" }, -+ { "-cpt", FALSE, etREAL, {&cpt_period}, -+ "Checkpoint interval (minutes)" }, -+ { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT}, -+ "Keep and number checkpoint files" }, -+ { "-append", FALSE, etBOOL, {&bAppendFiles}, -+ "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" }, -+ { "-nsteps", FALSE, etINT64, {&nsteps}, -+ "Run this number of steps, overrides .mdp file option" }, -+ { "-maxh", FALSE, etREAL, {&max_hours}, -+ "Terminate after 0.99 times this time (hours)" }, -+ { "-multi", FALSE, etINT, {&nmultisim}, -+ "Do multiple simulations in parallel" }, -+ { "-replex", FALSE, etINT, {&repl_ex_nst}, -+ "Attempt replica exchange periodically with this period (steps)" }, -+ { "-nex", FALSE, etINT, {&repl_ex_nex}, -+ "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). -nex zero or not specified gives neighbor replica exchange." }, -+ { "-reseed", FALSE, etINT, {&repl_ex_seed}, -+ "Seed for replica exchange, -1 is generate a seed" }, -+ { "-imdport", FALSE, etINT, {&imdport}, -+ "HIDDENIMD listening port" }, -+ { "-imdwait", FALSE, etBOOL, {&bIMDwait}, -+ "HIDDENPause the simulation while no IMD client is connected" }, -+ { "-imdterm", FALSE, etBOOL, {&bIMDterm}, -+ "HIDDENAllow termination of the simulation from IMD client" }, -+ { "-imdpull", FALSE, etBOOL, {&bIMDpull}, -+ "HIDDENAllow pulling in the simulation from IMD client" }, -+ { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite}, -+ "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" }, -+ { "-confout", FALSE, etBOOL, {&bConfout}, -+ "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" }, -+ { "-stepout", FALSE, etINT, {&nstepout}, -+ "HIDDENFrequency of writing the remaining wall clock time for the run" }, -+ { "-resetstep", FALSE, etINT, {&resetstep}, -+ "HIDDENReset cycle counters after these many time steps" }, -+ { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay}, -+ "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" } -+ }; -+ unsigned long Flags, PCA_Flags; -+ ivec ddxyz; -+ int dd_node_order; -+ gmx_bool bAddPart; -+ FILE *fplog, *fpmulti; -+ int sim_part, sim_part_fn; -+ const char *part_suffix = ".part"; -+ char suffix[STRLEN]; -+ int rc; -+ char **multidir = NULL; -+ -+ -+ cr = init_commrec(); -+ -+ PCA_Flags = (PCA_CAN_SET_DEFFNM | (MASTER(cr) ? 0 : PCA_QUIET)); -+ -+ /* Comment this in to do fexist calls only on master -+ * works not with rerun or tables at the moment -+ * also comment out the version of init_forcerec in md.c -+ * with NULL instead of opt2fn -+ */ -+ /* -+ if (!MASTER(cr)) -+ { -+ PCA_Flags |= PCA_NOT_READ_NODE; -+ } -+ */ -+ -+ if (!parse_common_args(&argc, argv, PCA_Flags, NFILE, fnm, asize(pa), pa, -+ asize(desc), desc, 0, NULL, &oenv)) -+ { -+ return 0; -+ } -+ -+ -+ /* we set these early because they might be used in init_multisystem() -+ Note that there is the potential for npme>nnodes until the number of -+ threads is set later on, if there's thread parallelization. That shouldn't -+ lead to problems. */ -+ dd_node_order = nenum(ddno_opt); -+ cr->npmenodes = npme; -+ -+ hw_opt.thread_affinity = nenum(thread_aff_opt); -+ -+ /* now check the -multi and -multidir option */ -+ if (opt2bSet("-multidir", NFILE, fnm)) -+ { -+ if (nmultisim > 0) -+ { -+ gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive."); -+ } -+ nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm); -+ } -+ -+ -+ if (repl_ex_nst != 0 && nmultisim < 2) -+ { -+ gmx_fatal(FARGS, "Need at least two replicas for replica exchange (option -multi)"); -+ } -+ -+ if (repl_ex_nex < 0) -+ { -+ gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive"); -+ } -+ -+ if (nmultisim > 1) -+ { -+#ifndef GMX_THREAD_MPI -+ gmx_bool bParFn = (multidir == NULL); -+ init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn); -+#else -+ gmx_fatal(FARGS, "mdrun -multi is not supported with the thread library. " -+ "Please compile GROMACS with MPI support"); -+#endif -+ } -+ -+ bAddPart = !bAppendFiles; -+ -+ /* Check if there is ANY checkpoint file available */ -+ sim_part = 1; -+ sim_part_fn = sim_part; -+ if (opt2bSet("-cpi", NFILE, fnm)) -+ { -+ if (bSepPot && bAppendFiles) -+ { -+ gmx_fatal(FARGS, "Output file appending is not supported with -seppot"); -+ } -+ -+ bAppendFiles = -+ read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE, -+ fnm, cr), -+ &sim_part_fn, NULL, cr, -+ bAppendFiles, NFILE, fnm, -+ part_suffix, &bAddPart); -+ if (sim_part_fn == 0 && MULTIMASTER(cr)) -+ { -+ fprintf(stdout, "No previous checkpoint file present, assuming this is a new run.\n"); -+ } -+ else -+ { -+ sim_part = sim_part_fn + 1; -+ } -+ -+ if (MULTISIM(cr) && MASTER(cr)) -+ { -+ if (MULTIMASTER(cr)) -+ { -+ /* Log file is not yet available, so if there's a -+ * problem we can only write to stderr. */ -+ fpmulti = stderr; -+ } -+ else -+ { -+ fpmulti = NULL; -+ } -+ check_multi_int(fpmulti, cr->ms, sim_part, "simulation part", TRUE); -+ } -+ } -+ else -+ { -+ bAppendFiles = FALSE; -+ } -+ -+ if (!bAppendFiles) -+ { -+ sim_part_fn = sim_part; -+ } -+ -+ if (bAddPart) -+ { -+ /* Rename all output files (except checkpoint files) */ -+ /* create new part name first (zero-filled) */ -+ sprintf(suffix, "%s%04d", part_suffix, sim_part_fn); -+ -+ add_suffix_to_output_names(fnm, NFILE, suffix); -+ if (MULTIMASTER(cr)) -+ { -+ fprintf(stdout, "Checkpoint file is from part %d, new output files will be suffixed '%s'.\n", sim_part-1, suffix); -+ } -+ } -+ -+ Flags = opt2bSet("-rerun", NFILE, fnm) ? MD_RERUN : 0; -+ Flags = Flags | (bSepPot ? MD_SEPPOT : 0); -+ Flags = Flags | (bDDBondCheck ? MD_DDBONDCHECK : 0); -+ Flags = Flags | (bDDBondComm ? MD_DDBONDCOMM : 0); -+ Flags = Flags | (bTunePME ? MD_TUNEPME : 0); -+ Flags = Flags | (bTestVerlet ? MD_TESTVERLET : 0); -+ Flags = Flags | (bConfout ? MD_CONFOUT : 0); -+ Flags = Flags | (bRerunVSite ? MD_RERUN_VSITE : 0); -+ Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0); -+ Flags = Flags | (bAppendFiles ? MD_APPENDFILES : 0); -+ Flags = Flags | (opt2parg_bSet("-append", asize(pa), pa) ? MD_APPENDFILESSET : 0); -+ Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0); -+ Flags = Flags | (sim_part > 1 ? MD_STARTFROMCPT : 0); -+ Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0); -+ Flags = Flags | (bIMDwait ? MD_IMDWAIT : 0); -+ Flags = Flags | (bIMDterm ? MD_IMDTERM : 0); -+ Flags = Flags | (bIMDpull ? MD_IMDPULL : 0); -+ -+ /* We postpone opening the log file if we are appending, so we can -+ first truncate the old log file and append to the correct position -+ there instead. */ -+ if ((MASTER(cr) || bSepPot) && !bAppendFiles) -+ { -+ gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr, -+ !bSepPot, Flags & MD_APPENDFILES, &fplog); -+ please_cite(fplog, "Hess2008b"); -+ please_cite(fplog, "Spoel2005a"); -+ please_cite(fplog, "Lindahl2001a"); -+ please_cite(fplog, "Berendsen95a"); -+ } -+ else if (!MASTER(cr) && bSepPot) -+ { -+ gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr, !bSepPot, Flags, &fplog); -+ } -+ else -+ { -+ fplog = NULL; -+ } -+ -+ ddxyz[XX] = (int)(realddxyz[XX] + 0.5); -+ ddxyz[YY] = (int)(realddxyz[YY] + 0.5); -+ ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5); -+ -+ rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact, -+ nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr, -+ dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz, -+ nbpu_opt[0], nstlist, -+ nsteps, nstepout, resetstep, -+ nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, -+ pforce, cpt_period, max_hours, deviceOptions, imdport, Flags); -+ -+ /* Log file has to be closed in mdrunner if we are appending to it -+ (fplog not set here) */ -+ if (MASTER(cr) && !bAppendFiles) -+ { -+ gmx_log_close(fplog); -+ } -+ -+ return rc; -+} -diff --git a/src/programs/mdrun/repl_ex.c b/src/programs/mdrun/repl_ex.c -index 46a9bc0..cfb0b7f 100644 ---- a/src/programs/mdrun/repl_ex.c -+++ b/src/programs/mdrun/repl_ex.c -@@ -51,6 +51,12 @@ - #include "domdec.h" - #include "gromacs/random/random.h" - -+/* PLUMED */ -+#include "../../../Plumed.h" -+extern int plumedswitch; -+extern plumed plumedmain; -+/* END PLUMED */ -+ - #define PROBABILITYCUTOFF 100 - /* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ - -@@ -112,14 +118,16 @@ static gmx_bool repl_quantity(const gmx_multisim_t *ms, - qall[re->repl] = q; - gmx_sum_sim(ms->nsim, qall, ms); - -- bDiff = FALSE; -- for (s = 1; s < ms->nsim; s++) -- { -- if (qall[s] != qall[0]) -- { -+ /* PLUMED */ -+ //bDiff = FALSE; -+ //for (s = 1; s < ms->nsim; s++) -+ //{ -+ // if (qall[s] != qall[0]) -+ // { - bDiff = TRUE; -- } -- } -+ // } -+ //} -+ /* END PLUMED */ - - if (bDiff) - { -@@ -269,6 +277,10 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - re->ind[i] = i; - } - -+ /* PLUMED */ -+ // plumed2: check if we want alternative patterns (i.e. for bias-exchange metaD) -+ // in those cases replicas can share the same temperature. -+ /* - if (re->type < ereENDSINGLE) - { - -@@ -277,11 +289,12 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - for (j = i+1; j < re->nrepl; j++) - { - if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) -- { -+ {*/ - /* Unordered replicas are supposed to work, but there - * is still an issues somewhere. - * Note that at this point still re->ind[i]=i. - */ -+ /* - gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", - i, j, - erename[re->type], -@@ -299,6 +312,8 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog, - } - } - } -+ */ -+ /* END PLUMED */ - - /* keep track of all the swaps, starting with the initial placement. */ - snew(re->allswaps, re->nrepl); -@@ -982,6 +997,10 @@ test_for_replica_exchange(FILE *fplog, - pind[i] = re->ind[i]; - } - -+ /* PLUMED */ -+ int plumed_test_exchange_pattern=0; -+ /* END PLUMED */ -+ - if (bMultiEx) - { - /* multiple random switch exchange */ -@@ -1057,6 +1076,31 @@ test_for_replica_exchange(FILE *fplog, - /* standard nearest neighbor replica exchange */ - - m = (step / re->nst) % 2; -+ /* PLUMED */ -+ if(plumedswitch){ -+ int partner=re->repl; -+ plumed_cmd(plumedmain,"getExchangesFlag",&plumed_test_exchange_pattern); -+ if(plumed_test_exchange_pattern>0){ -+ int *list; -+ snew(list,re->nrepl); -+ plumed_cmd(plumedmain,"setNumberOfReplicas",&(re->nrepl)); -+ plumed_cmd(plumedmain,"getExchangesList",list); -+ for(i=0; inrepl; i++) re->ind[i]=list[i]; -+ sfree(list); -+ } -+ -+ for(i=1; inrepl; i++) { -+ if (i % 2 != m) continue; -+ a = re->ind[i-1]; -+ b = re->ind[i]; -+ if(re->repl==a) partner=b; -+ if(re->repl==b) partner=a; -+ } -+ plumed_cmd(plumedmain,"GREX setPartner",&partner); -+ plumed_cmd(plumedmain,"GREX calculate",NULL); -+ plumed_cmd(plumedmain,"GREX shareAllDeltaBias",NULL); -+ } -+ /* END PLUMED */ - for (i = 1; i < re->nrepl; i++) - { - a = re->ind[i-1]; -@@ -1066,6 +1110,18 @@ test_for_replica_exchange(FILE *fplog, - if (i % 2 == m) - { - delta = calc_delta(fplog, bPrint, re, a, b, a, b); -+ /* PLUMED */ -+ if(plumedswitch){ -+ real adb,bdb,dplumed; -+ char buf[300]; -+ sprintf(buf,"GREX getDeltaBias %d",a); plumed_cmd(plumedmain,buf,&adb); -+ sprintf(buf,"GREX getDeltaBias %d",b); plumed_cmd(plumedmain,buf,&bdb); -+ dplumed=adb*re->beta[a]+bdb*re->beta[b]; -+ delta+=dplumed; -+ if (bPrint) -+ fprintf(fplog,"dplumed = %10.3e dE_Term = %10.3e (kT)\n",dplumed,delta); -+ } -+ /* END PLUMED */ - if (delta <= 0) - { - /* accepted */ -@@ -1092,11 +1148,22 @@ test_for_replica_exchange(FILE *fplog, - - if (bEx[i]) - { -+ /* PLUMED */ -+ if(!plumed_test_exchange_pattern) { -+ /* standard neighbour swapping */ - /* swap these two */ - tmp = pind[i-1]; - pind[i-1] = pind[i]; - pind[i] = tmp; - re->nexchange[i]++; /* statistics for back compatibility */ -+ } else { -+ /* alternative swapping patterns */ -+ tmp = pind[a]; -+ pind[a] = pind[b]; -+ pind[b] = tmp; -+ re->nexchange[i]++; /* statistics for back compatibility */ -+ } -+ /* END PLUMED */ - } - } - else -@@ -1112,6 +1179,15 @@ test_for_replica_exchange(FILE *fplog, - re->nattempt[m]++; - } - -+ /* PLUMED */ -+ if(plumed_test_exchange_pattern>0) { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->ind[i] = i; -+ } -+ } -+ /* END PLUMED */ -+ - /* record which moves were made and accepted */ - for (i = 0; i < re->nrepl; i++) - { -@@ -1316,6 +1392,10 @@ gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex * - /* The order in which multiple exchanges will occur. */ - gmx_bool bThisReplicaExchanged = FALSE; - -+ /* PLUMED */ -+ if(plumedswitch)plumed_cmd(plumedmain,"GREX prepare",NULL); -+ /* END PLUMED */ -+ - if (MASTER(cr)) - { - replica_id = re->repl; -diff --git a/src/programs/mdrun/repl_ex.c.preplumed b/src/programs/mdrun/repl_ex.c.preplumed -new file mode 100644 -index 0000000..46a9bc0 ---- /dev/null -+++ b/src/programs/mdrun/repl_ex.c.preplumed -@@ -0,0 +1,1439 @@ -+/* -+ * This file is part of the GROMACS molecular simulation package. -+ * -+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. -+ * Copyright (c) 2001-2004, The GROMACS development team. -+ * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by -+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -+ * and including many others, as listed in the AUTHORS file in the -+ * top-level source directory and at http://www.gromacs.org. -+ * -+ * GROMACS is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * as published by the Free Software Foundation; either version 2.1 -+ * of the License, or (at your option) any later version. -+ * -+ * GROMACS is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with GROMACS; if not, see -+ * http://www.gnu.org/licenses, or write to the Free Software Foundation, -+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * If you want to redistribute modifications to GROMACS, please -+ * consider that scientific software is very special. Version -+ * control is crucial - bugs must be traceable. We will be happy to -+ * consider code for inclusion in the official distribution, but -+ * derived work must not be called official GROMACS. Details are found -+ * in the README & COPYING files - if they are missing, get the -+ * official version at http://www.gromacs.org. -+ * -+ * To help us fund GROMACS development, we humbly ask that you cite -+ * the research papers on the package. Check out http://www.gromacs.org. -+ */ -+#ifdef HAVE_CONFIG_H -+#include -+#endif -+ -+#include -+#include "repl_ex.h" -+#include "network.h" -+#include "gromacs/random/random.h" -+#include "gromacs/utility/smalloc.h" -+#include "physics.h" -+#include "copyrite.h" -+#include "macros.h" -+#include "vec.h" -+#include "names.h" -+#include "domdec.h" -+#include "gromacs/random/random.h" -+ -+#define PROBABILITYCUTOFF 100 -+/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ -+ -+enum { -+ ereTEMP, ereLAMBDA, ereENDSINGLE, ereTL, ereNR -+}; -+const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"}; -+/* end_single_marker merely notes the end of single variable replica exchange. All types higher than -+ it are multiple replica exchange methods */ -+/* Eventually, should add 'pressure', 'temperature and pressure', 'lambda_and_pressure', 'temperature_lambda_pressure'?; -+ Let's wait until we feel better about the pressure control methods giving exact ensembles. Right now, we assume constant pressure */ -+ -+typedef struct gmx_repl_ex -+{ -+ int repl; -+ int nrepl; -+ real temp; -+ int type; -+ real **q; -+ gmx_bool bNPT; -+ real *pres; -+ int *ind; -+ int *allswaps; -+ int nst; -+ int nex; -+ int seed; -+ int nattempt[2]; -+ real *prob_sum; -+ int **nmoves; -+ int *nexchange; -+ gmx_rng_t rng; -+ -+ /* these are helper arrays for replica exchange; allocated here so they -+ don't have to be allocated each time */ -+ int *destinations; -+ int **cyclic; -+ int **order; -+ int *tmpswap; -+ gmx_bool *incycle; -+ gmx_bool *bEx; -+ -+ /* helper arrays to hold the quantities that are exchanged */ -+ real *prob; -+ real *Epot; -+ real *beta; -+ real *Vol; -+ real **de; -+ -+} t_gmx_repl_ex; -+ -+static gmx_bool repl_quantity(const gmx_multisim_t *ms, -+ struct gmx_repl_ex *re, int ere, real q) -+{ -+ real *qall; -+ gmx_bool bDiff; -+ int i, s; -+ -+ snew(qall, ms->nsim); -+ qall[re->repl] = q; -+ gmx_sum_sim(ms->nsim, qall, ms); -+ -+ bDiff = FALSE; -+ for (s = 1; s < ms->nsim; s++) -+ { -+ if (qall[s] != qall[0]) -+ { -+ bDiff = TRUE; -+ } -+ } -+ -+ if (bDiff) -+ { -+ /* Set the replica exchange type and quantities */ -+ re->type = ere; -+ -+ snew(re->q[ere], re->nrepl); -+ for (s = 0; s < ms->nsim; s++) -+ { -+ re->q[ere][s] = qall[s]; -+ } -+ } -+ sfree(qall); -+ return bDiff; -+} -+ -+gmx_repl_ex_t init_replica_exchange(FILE *fplog, -+ const gmx_multisim_t *ms, -+ const t_state *state, -+ const t_inputrec *ir, -+ int nst, int nex, int init_seed) -+{ -+ real temp, pres; -+ int i, j, k; -+ struct gmx_repl_ex *re; -+ gmx_bool bTemp; -+ gmx_bool bLambda = FALSE; -+ -+ fprintf(fplog, "\nInitializing Replica Exchange\n"); -+ -+ if (ms == NULL || ms->nsim == 1) -+ { -+ gmx_fatal(FARGS, "Nothing to exchange with only one replica, maybe you forgot to set the -multi option of mdrun?"); -+ } -+ if (!EI_DYNAMICS(ir->eI)) -+ { -+ gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations"); -+ /* Note that PAR(cr) is defined by cr->nnodes > 1, which is -+ * distinct from MULTISIM(cr). A multi-simulation only runs -+ * with real MPI parallelism, but this does not imply PAR(cr) -+ * is true! -+ * -+ * Since we are using a dynamical integrator, the only -+ * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are -+ * synonymous. The only way for cr->nnodes > 1 to be true is -+ * if we are using DD. */ -+ } -+ -+ snew(re, 1); -+ -+ re->repl = ms->sim; -+ re->nrepl = ms->nsim; -+ snew(re->q, ereENDSINGLE); -+ -+ fprintf(fplog, "Repl There are %d replicas:\n", re->nrepl); -+ -+ check_multi_int(fplog, ms, state->natoms, "the number of atoms", FALSE); -+ check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE); -+ check_multi_int64(fplog, ms, ir->init_step+ir->nsteps, "init_step+nsteps", FALSE); -+ check_multi_int64(fplog, ms, (ir->init_step+nst-1)/nst, -+ "first exchange step: init_step/-replex", FALSE); -+ check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE); -+ check_multi_int(fplog, ms, ir->opts.ngtc, -+ "the number of temperature coupling groups", FALSE); -+ check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE); -+ check_multi_int(fplog, ms, ir->efep, "free energy", FALSE); -+ check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE); -+ -+ re->temp = ir->opts.ref_t[0]; -+ for (i = 1; (i < ir->opts.ngtc); i++) -+ { -+ if (ir->opts.ref_t[i] != re->temp) -+ { -+ fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); -+ fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); -+ } -+ } -+ -+ re->type = -1; -+ bTemp = repl_quantity(ms, re, ereTEMP, re->temp); -+ if (ir->efep != efepNO) -+ { -+ bLambda = repl_quantity(ms, re, ereLAMBDA, (real)ir->fepvals->init_fep_state); -+ } -+ if (re->type == -1) /* nothing was assigned */ -+ { -+ gmx_fatal(FARGS, "The properties of the %d systems are all the same, there is nothing to exchange", re->nrepl); -+ } -+ if (bLambda && bTemp) -+ { -+ re->type = ereTL; -+ } -+ -+ if (bTemp) -+ { -+ please_cite(fplog, "Sugita1999a"); -+ if (ir->epc != epcNO) -+ { -+ re->bNPT = TRUE; -+ fprintf(fplog, "Repl Using Constant Pressure REMD.\n"); -+ please_cite(fplog, "Okabe2001a"); -+ } -+ if (ir->etc == etcBERENDSEN) -+ { -+ gmx_fatal(FARGS, "REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead", -+ ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE)); -+ } -+ } -+ if (bLambda) -+ { -+ if (ir->fepvals->delta_lambda != 0) /* check this? */ -+ { -+ gmx_fatal(FARGS, "delta_lambda is not zero"); -+ } -+ } -+ if (re->bNPT) -+ { -+ snew(re->pres, re->nrepl); -+ if (ir->epct == epctSURFACETENSION) -+ { -+ pres = ir->ref_p[ZZ][ZZ]; -+ } -+ else -+ { -+ pres = 0; -+ j = 0; -+ for (i = 0; i < DIM; i++) -+ { -+ if (ir->compress[i][i] != 0) -+ { -+ pres += ir->ref_p[i][i]; -+ j++; -+ } -+ } -+ pres /= j; -+ } -+ re->pres[re->repl] = pres; -+ gmx_sum_sim(re->nrepl, re->pres, ms); -+ } -+ -+ /* Make an index for increasing replica order */ -+ /* only makes sense if one or the other is varying, not both! -+ if both are varying, we trust the order the person gave. */ -+ snew(re->ind, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->ind[i] = i; -+ } -+ -+ if (re->type < ereENDSINGLE) -+ { -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = i+1; j < re->nrepl; j++) -+ { -+ if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) -+ { -+ /* Unordered replicas are supposed to work, but there -+ * is still an issues somewhere. -+ * Note that at this point still re->ind[i]=i. -+ */ -+ gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", -+ i, j, -+ erename[re->type], -+ re->q[re->type][i], re->q[re->type][j], -+ erename[re->type]); -+ -+ k = re->ind[i]; -+ re->ind[i] = re->ind[j]; -+ re->ind[j] = k; -+ } -+ else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]]) -+ { -+ gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]); -+ } -+ } -+ } -+ } -+ -+ /* keep track of all the swaps, starting with the initial placement. */ -+ snew(re->allswaps, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->allswaps[i] = re->ind[i]; -+ } -+ -+ switch (re->type) -+ { -+ case ereTEMP: -+ fprintf(fplog, "\nReplica exchange in temperature\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ case ereLAMBDA: -+ fprintf(fplog, "\nReplica exchange in lambda\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %3d", (int)re->q[re->type][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ case ereTL: -+ fprintf(fplog, "\nReplica exchange in temperature and lambda state\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5d", (int)re->q[ereLAMBDA][re->ind[i]]); -+ } -+ fprintf(fplog, "\n"); -+ break; -+ default: -+ gmx_incons("Unknown replica exchange quantity"); -+ } -+ if (re->bNPT) -+ { -+ fprintf(fplog, "\nRepl p"); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ fprintf(fplog, " %5.2f", re->pres[re->ind[i]]); -+ } -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]])) -+ { -+ fprintf(fplog, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); -+ fprintf(stderr, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); -+ } -+ } -+ } -+ re->nst = nst; -+ if (init_seed == -1) -+ { -+ if (MASTERSIM(ms)) -+ { -+ re->seed = (int)gmx_rng_make_seed(); -+ } -+ else -+ { -+ re->seed = 0; -+ } -+ gmx_sumi_sim(1, &(re->seed), ms); -+ } -+ else -+ { -+ re->seed = init_seed; -+ } -+ fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst); -+ fprintf(fplog, "\nReplica random seed: %d\n", re->seed); -+ re->rng = gmx_rng_init(re->seed); -+ -+ re->nattempt[0] = 0; -+ re->nattempt[1] = 0; -+ -+ snew(re->prob_sum, re->nrepl); -+ snew(re->nexchange, re->nrepl); -+ snew(re->nmoves, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->nmoves[i], re->nrepl); -+ } -+ fprintf(fplog, "Replica exchange information below: x=exchange, pr=probability\n"); -+ -+ /* generate space for the helper functions so we don't have to snew each time */ -+ -+ snew(re->destinations, re->nrepl); -+ snew(re->incycle, re->nrepl); -+ snew(re->tmpswap, re->nrepl); -+ snew(re->cyclic, re->nrepl); -+ snew(re->order, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->cyclic[i], re->nrepl); -+ snew(re->order[i], re->nrepl); -+ } -+ /* allocate space for the functions storing the data for the replicas */ -+ /* not all of these arrays needed in all cases, but they don't take -+ up much space, since the max size is nrepl**2 */ -+ snew(re->prob, re->nrepl); -+ snew(re->bEx, re->nrepl); -+ snew(re->beta, re->nrepl); -+ snew(re->Vol, re->nrepl); -+ snew(re->Epot, re->nrepl); -+ snew(re->de, re->nrepl); -+ for (i = 0; i < re->nrepl; i++) -+ { -+ snew(re->de[i], re->nrepl); -+ } -+ re->nex = nex; -+ return re; -+} -+ -+static void exchange_reals(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, real *v, int n) -+{ -+ real *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+ -+static void exchange_ints(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, int *v, int n) -+{ -+ int *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_doubles(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, double *v, int n) -+{ -+ double *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v, n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, -+ buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ v[i] = buf[i]; -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_rvecs(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, rvec *v, int n) -+{ -+ rvec *buf; -+ int i; -+ -+ if (v) -+ { -+ snew(buf, n); -+#ifdef GMX_MPI -+ /* -+ MPI_Sendrecv(v[0], n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, -+ buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, -+ ms->mpi_comm_masters,MPI_STATUS_IGNORE); -+ */ -+ { -+ MPI_Request mpi_req; -+ -+ MPI_Isend(v[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, &mpi_req); -+ MPI_Recv(buf[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, -+ ms->mpi_comm_masters, MPI_STATUS_IGNORE); -+ MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); -+ } -+#endif -+ for (i = 0; i < n; i++) -+ { -+ copy_rvec(buf[i], v[i]); -+ } -+ sfree(buf); -+ } -+} -+ -+static void exchange_state(const gmx_multisim_t *ms, int b, t_state *state) -+{ -+ /* When t_state changes, this code should be updated. */ -+ int ngtc, nnhpres; -+ ngtc = state->ngtc * state->nhchainlength; -+ nnhpres = state->nnhpres* state->nhchainlength; -+ exchange_rvecs(ms, b, state->box, DIM); -+ exchange_rvecs(ms, b, state->box_rel, DIM); -+ exchange_rvecs(ms, b, state->boxv, DIM); -+ exchange_reals(ms, b, &(state->veta), 1); -+ exchange_reals(ms, b, &(state->vol0), 1); -+ exchange_rvecs(ms, b, state->svir_prev, DIM); -+ exchange_rvecs(ms, b, state->fvir_prev, DIM); -+ exchange_rvecs(ms, b, state->pres_prev, DIM); -+ exchange_doubles(ms, b, state->nosehoover_xi, ngtc); -+ exchange_doubles(ms, b, state->nosehoover_vxi, ngtc); -+ exchange_doubles(ms, b, state->nhpres_xi, nnhpres); -+ exchange_doubles(ms, b, state->nhpres_vxi, nnhpres); -+ exchange_doubles(ms, b, state->therm_integral, state->ngtc); -+ exchange_rvecs(ms, b, state->x, state->natoms); -+ exchange_rvecs(ms, b, state->v, state->natoms); -+ exchange_rvecs(ms, b, state->sd_X, state->natoms); -+} -+ -+static void copy_rvecs(rvec *s, rvec *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ copy_rvec(s[i], d[i]); -+ } -+ } -+} -+ -+static void copy_doubles(const double *s, double *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+static void copy_reals(const real *s, real *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+static void copy_ints(const int *s, int *d, int n) -+{ -+ int i; -+ -+ if (d != NULL) -+ { -+ for (i = 0; i < n; i++) -+ { -+ d[i] = s[i]; -+ } -+ } -+} -+ -+#define scopy_rvecs(v, n) copy_rvecs(state->v, state_local->v, n); -+#define scopy_doubles(v, n) copy_doubles(state->v, state_local->v, n); -+#define scopy_reals(v, n) copy_reals(state->v, state_local->v, n); -+#define scopy_ints(v, n) copy_ints(state->v, state_local->v, n); -+ -+static void copy_state_nonatomdata(t_state *state, t_state *state_local) -+{ -+ /* When t_state changes, this code should be updated. */ -+ int ngtc, nnhpres; -+ ngtc = state->ngtc * state->nhchainlength; -+ nnhpres = state->nnhpres* state->nhchainlength; -+ scopy_rvecs(box, DIM); -+ scopy_rvecs(box_rel, DIM); -+ scopy_rvecs(boxv, DIM); -+ state_local->veta = state->veta; -+ state_local->vol0 = state->vol0; -+ scopy_rvecs(svir_prev, DIM); -+ scopy_rvecs(fvir_prev, DIM); -+ scopy_rvecs(pres_prev, DIM); -+ scopy_doubles(nosehoover_xi, ngtc); -+ scopy_doubles(nosehoover_vxi, ngtc); -+ scopy_doubles(nhpres_xi, nnhpres); -+ scopy_doubles(nhpres_vxi, nnhpres); -+ scopy_doubles(therm_integral, state->ngtc); -+ scopy_rvecs(x, state->natoms); -+ scopy_rvecs(v, state->natoms); -+ scopy_rvecs(sd_X, state->natoms); -+ copy_ints(&(state->fep_state), &(state_local->fep_state), 1); -+ scopy_reals(lambda, efptNR); -+} -+ -+static void scale_velocities(t_state *state, real fac) -+{ -+ int i; -+ -+ if (state->v) -+ { -+ for (i = 0; i < state->natoms; i++) -+ { -+ svmul(fac, state->v[i], state->v[i]); -+ } -+ } -+} -+ -+static void print_transition_matrix(FILE *fplog, int n, int **nmoves, int *nattempt) -+{ -+ int i, j, ntot; -+ float Tprint; -+ -+ ntot = nattempt[0] + nattempt[1]; -+ fprintf(fplog, "\n"); -+ fprintf(fplog, "Repl"); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, " "); /* put the title closer to the center */ -+ } -+ fprintf(fplog, "Empirical Transition Matrix\n"); -+ -+ fprintf(fplog, "Repl"); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%8d", (i+1)); -+ } -+ fprintf(fplog, "\n"); -+ -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "Repl"); -+ for (j = 0; j < n; j++) -+ { -+ Tprint = 0.0; -+ if (nmoves[i][j] > 0) -+ { -+ Tprint = nmoves[i][j]/(2.0*ntot); -+ } -+ fprintf(fplog, "%8.4f", Tprint); -+ } -+ fprintf(fplog, "%3d\n", i); -+ } -+} -+ -+static void print_ind(FILE *fplog, const char *leg, int n, int *ind, gmx_bool *bEx) -+{ -+ int i; -+ -+ fprintf(fplog, "Repl %2s %2d", leg, ind[0]); -+ for (i = 1; i < n; i++) -+ { -+ fprintf(fplog, " %c %2d", (bEx != 0 && bEx[i]) ? 'x' : ' ', ind[i]); -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static void print_allswitchind(FILE *fplog, int n, int *pind, int *allswaps, int *tmpswap) -+{ -+ int i; -+ -+ for (i = 0; i < n; i++) -+ { -+ tmpswap[i] = allswaps[i]; -+ } -+ for (i = 0; i < n; i++) -+ { -+ allswaps[i] = tmpswap[pind[i]]; -+ } -+ -+ fprintf(fplog, "\nAccepted Exchanges: "); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%d ", pind[i]); -+ } -+ fprintf(fplog, "\n"); -+ -+ /* the "Order After Exchange" is the state label corresponding to the configuration that -+ started in state listed in order, i.e. -+ -+ 3 0 1 2 -+ -+ means that the: -+ configuration starting in simulation 3 is now in simulation 0, -+ configuration starting in simulation 0 is now in simulation 1, -+ configuration starting in simulation 1 is now in simulation 2, -+ configuration starting in simulation 2 is now in simulation 3 -+ */ -+ fprintf(fplog, "Order After Exchange: "); -+ for (i = 0; i < n; i++) -+ { -+ fprintf(fplog, "%d ", allswaps[i]); -+ } -+ fprintf(fplog, "\n\n"); -+} -+ -+static void print_prob(FILE *fplog, const char *leg, int n, real *prob) -+{ -+ int i; -+ char buf[8]; -+ -+ fprintf(fplog, "Repl %2s ", leg); -+ for (i = 1; i < n; i++) -+ { -+ if (prob[i] >= 0) -+ { -+ sprintf(buf, "%4.2f", prob[i]); -+ fprintf(fplog, " %3s", buf[0] == '1' ? "1.0" : buf+1); -+ } -+ else -+ { -+ fprintf(fplog, " "); -+ } -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static void print_count(FILE *fplog, const char *leg, int n, int *count) -+{ -+ int i; -+ -+ fprintf(fplog, "Repl %2s ", leg); -+ for (i = 1; i < n; i++) -+ { -+ fprintf(fplog, " %4d", count[i]); -+ } -+ fprintf(fplog, "\n"); -+} -+ -+static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, int a, int b, int ap, int bp) -+{ -+ -+ real ediff, dpV, delta = 0; -+ real *Epot = re->Epot; -+ real *Vol = re->Vol; -+ real **de = re->de; -+ real *beta = re->beta; -+ -+ /* Two cases; we are permuted and not. In all cases, setting ap = a and bp = b will reduce -+ to the non permuted case */ -+ -+ switch (re->type) -+ { -+ case ereTEMP: -+ /* -+ * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439 -+ */ -+ ediff = Epot[b] - Epot[a]; -+ delta = -(beta[bp] - beta[ap])*ediff; -+ break; -+ case ereLAMBDA: -+ /* two cases: when we are permuted, and not. */ -+ /* non-permuted: -+ ediff = E_new - E_old -+ = [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)] -+ = [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)] -+ = de[b][a] + de[a][b] */ -+ -+ /* permuted: -+ ediff = E_new - E_old -+ = [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)] -+ = [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)] -+ = [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)] -+ = [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)] -+ = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]) */ -+ /* but, in the current code implementation, we flip configurations, not indices . . . -+ So let's examine that. -+ = [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)] -+ = [H_b(x_ap) - H_a(x_ap)] + [H_a(x_bp) - H_b(x_pb)] -+ = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp] -+ So, if we exchange b<=> bp and a<=> ap, we return to the same result. -+ So the simple solution is to flip the -+ position of perturbed and original indices in the tests. -+ */ -+ -+ ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]); -+ delta = ediff*beta[a]; /* assume all same temperature in this case */ -+ break; -+ case ereTL: -+ /* not permuted: */ -+ /* delta = reduced E_new - reduced E_old -+ = [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)] -+ = [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)] -+ = [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] + -+ [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)] -+ = [beta_b dH_b(x_a) + [beta_a dH_a(x_b) + -+ beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b)) -+ = beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */ -+ /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */ -+ /* permuted (big breath!) */ -+ /* delta = reduced E_new - reduced E_old -+ = [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)] -+ = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] -+ = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] -+ - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a) -+ - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b) -+ = [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] + -+ [(beta_ap H_ap(x_b) - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))] -+ + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b) -+ = [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] + -+ [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))] -+ + beta_pb (H_a(x_a) - H_b(x_b)) - beta_ap (H_a(x_a) - H_b(x_b)) -+ = ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b] - beta_bp de[bp][b]) -+ + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b)) */ -+ delta = beta[bp]*(de[bp][a] - de[bp][b]) + beta[ap]*(de[ap][b] - de[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]); -+ break; -+ default: -+ gmx_incons("Unknown replica exchange quantity"); -+ } -+ if (bPrint) -+ { -+ fprintf(fplog, "Repl %d <-> %d dE_term = %10.3e (kT)\n", a, b, delta); -+ } -+ if (re->bNPT) -+ { -+ /* revist the calculation for 5.0. Might be some improvements. */ -+ dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC; -+ if (bPrint) -+ { -+ fprintf(fplog, " dpV = %10.3e d = %10.3e\n", dpV, delta + dpV); -+ } -+ delta += dpV; -+ } -+ return delta; -+} -+ -+static void -+test_for_replica_exchange(FILE *fplog, -+ const gmx_multisim_t *ms, -+ struct gmx_repl_ex *re, -+ gmx_enerdata_t *enerd, -+ real vol, -+ gmx_int64_t step, -+ real time) -+{ -+ int m, i, j, a, b, ap, bp, i0, i1, tmp; -+ real ediff = 0, delta = 0, dpV = 0; -+ gmx_bool bPrint, bMultiEx; -+ gmx_bool *bEx = re->bEx; -+ real *prob = re->prob; -+ int *pind = re->destinations; /* permuted index */ -+ gmx_bool bEpot = FALSE; -+ gmx_bool bDLambda = FALSE; -+ gmx_bool bVol = FALSE; -+ gmx_rng_t rng; -+ -+ bMultiEx = (re->nex > 1); /* multiple exchanges at each state */ -+ fprintf(fplog, "Replica exchange at step " "%"GMX_PRId64 " time %.5f\n", step, time); -+ -+ if (re->bNPT) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->Vol[i] = 0; -+ } -+ bVol = TRUE; -+ re->Vol[re->repl] = vol; -+ } -+ if ((re->type == ereTEMP || re->type == ereTL)) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->Epot[i] = 0; -+ } -+ bEpot = TRUE; -+ re->Epot[re->repl] = enerd->term[F_EPOT]; -+ /* temperatures of different states*/ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ); -+ } -+ } -+ else -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->beta[i] = 1.0/(re->temp*BOLTZ); /* we have a single temperature */ -+ } -+ } -+ if (re->type == ereLAMBDA || re->type == ereTL) -+ { -+ bDLambda = TRUE; -+ /* lambda differences. */ -+ /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian -+ minus the energy of the jth simulation in the jth Hamiltonian */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = 0; j < re->nrepl; j++) -+ { -+ re->de[i][j] = 0; -+ } -+ } -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->de[i][re->repl] = (enerd->enerpart_lambda[(int)re->q[ereLAMBDA][i]+1]-enerd->enerpart_lambda[0]); -+ } -+ } -+ -+ /* now actually do the communication */ -+ if (bVol) -+ { -+ gmx_sum_sim(re->nrepl, re->Vol, ms); -+ } -+ if (bEpot) -+ { -+ gmx_sum_sim(re->nrepl, re->Epot, ms); -+ } -+ if (bDLambda) -+ { -+ for (i = 0; i < re->nrepl; i++) -+ { -+ gmx_sum_sim(re->nrepl, re->de[i], ms); -+ } -+ } -+ -+ /* make a duplicate set of indices for shuffling */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ pind[i] = re->ind[i]; -+ } -+ -+ if (bMultiEx) -+ { -+ /* multiple random switch exchange */ -+ int nself = 0; -+ for (i = 0; i < re->nex + nself; i++) -+ { -+ double rnd[2]; -+ -+ gmx_rng_cycle_2uniform(step, i*2, re->seed, RND_SEED_REPLEX, rnd); -+ /* randomly select a pair */ -+ /* in theory, could reduce this by identifying only which switches had a nonneglibible -+ probability of occurring (log p > -100) and only operate on those switches */ -+ /* find out which state it is from, and what label that state currently has. Likely -+ more work that useful. */ -+ i0 = (int)(re->nrepl*rnd[0]); -+ i1 = (int)(re->nrepl*rnd[1]); -+ if (i0 == i1) -+ { -+ nself++; -+ continue; /* self-exchange, back up and do it again */ -+ } -+ -+ a = re->ind[i0]; /* what are the indices of these states? */ -+ b = re->ind[i1]; -+ ap = pind[i0]; -+ bp = pind[i1]; -+ -+ bPrint = FALSE; /* too noisy */ -+ /* calculate the energy difference */ -+ /* if the code changes to flip the STATES, rather than the configurations, -+ use the commented version of the code */ -+ /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */ -+ delta = calc_delta(fplog, bPrint, re, ap, bp, a, b); -+ -+ /* we actually only use the first space in the prob and bEx array, -+ since there are actually many switches between pairs. */ -+ -+ if (delta <= 0) -+ { -+ /* accepted */ -+ prob[0] = 1; -+ bEx[0] = TRUE; -+ } -+ else -+ { -+ if (delta > PROBABILITYCUTOFF) -+ { -+ prob[0] = 0; -+ } -+ else -+ { -+ prob[0] = exp(-delta); -+ } -+ /* roll a number to determine if accepted */ -+ gmx_rng_cycle_2uniform(step, i*2+1, re->seed, RND_SEED_REPLEX, rnd); -+ bEx[0] = rnd[0] < prob[0]; -+ } -+ re->prob_sum[0] += prob[0]; -+ -+ if (bEx[0]) -+ { -+ /* swap the states */ -+ tmp = pind[i0]; -+ pind[i0] = pind[i1]; -+ pind[i1] = tmp; -+ } -+ } -+ re->nattempt[0]++; /* keep track of total permutation trials here */ -+ print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap); -+ } -+ else -+ { -+ /* standard nearest neighbor replica exchange */ -+ -+ m = (step / re->nst) % 2; -+ for (i = 1; i < re->nrepl; i++) -+ { -+ a = re->ind[i-1]; -+ b = re->ind[i]; -+ -+ bPrint = (re->repl == a || re->repl == b); -+ if (i % 2 == m) -+ { -+ delta = calc_delta(fplog, bPrint, re, a, b, a, b); -+ if (delta <= 0) -+ { -+ /* accepted */ -+ prob[i] = 1; -+ bEx[i] = TRUE; -+ } -+ else -+ { -+ double rnd[2]; -+ -+ if (delta > PROBABILITYCUTOFF) -+ { -+ prob[i] = 0; -+ } -+ else -+ { -+ prob[i] = exp(-delta); -+ } -+ /* roll a number to determine if accepted */ -+ gmx_rng_cycle_2uniform(step, i, re->seed, RND_SEED_REPLEX, rnd); -+ bEx[i] = rnd[0] < prob[i]; -+ } -+ re->prob_sum[i] += prob[i]; -+ -+ if (bEx[i]) -+ { -+ /* swap these two */ -+ tmp = pind[i-1]; -+ pind[i-1] = pind[i]; -+ pind[i] = tmp; -+ re->nexchange[i]++; /* statistics for back compatibility */ -+ } -+ } -+ else -+ { -+ prob[i] = -1; -+ bEx[i] = FALSE; -+ } -+ } -+ /* print some statistics */ -+ print_ind(fplog, "ex", re->nrepl, re->ind, bEx); -+ print_prob(fplog, "pr", re->nrepl, prob); -+ fprintf(fplog, "\n"); -+ re->nattempt[m]++; -+ } -+ -+ /* record which moves were made and accepted */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ re->nmoves[re->ind[i]][pind[i]] += 1; -+ re->nmoves[pind[i]][re->ind[i]] += 1; -+ } -+ fflush(fplog); /* make sure we can see what the last exchange was */ -+} -+ -+static void write_debug_x(t_state *state) -+{ -+ int i; -+ -+ if (debug) -+ { -+ for (i = 0; i < state->natoms; i += 10) -+ { -+ fprintf(debug, "dx %5d %10.5f %10.5f %10.5f\n", i, state->x[i][XX], state->x[i][YY], state->x[i][ZZ]); -+ } -+ } -+} -+ -+static void -+cyclic_decomposition(const int *destinations, -+ int **cyclic, -+ gmx_bool *incycle, -+ const int nrepl, -+ int *nswap) -+{ -+ -+ int i, j, c, p; -+ int maxlen = 1; -+ for (i = 0; i < nrepl; i++) -+ { -+ incycle[i] = FALSE; -+ } -+ for (i = 0; i < nrepl; i++) /* one cycle for each replica */ -+ { -+ if (incycle[i]) -+ { -+ cyclic[i][0] = -1; -+ continue; -+ } -+ cyclic[i][0] = i; -+ incycle[i] = TRUE; -+ c = 1; -+ p = i; -+ for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */ -+ { -+ p = destinations[p]; /* start permuting */ -+ if (p == i) -+ { -+ cyclic[i][c] = -1; -+ if (c > maxlen) -+ { -+ maxlen = c; -+ } -+ break; /* we've reached the original element, the cycle is complete, and we marked the end. */ -+ } -+ else -+ { -+ cyclic[i][c] = p; /* each permutation gives a new member of the cycle */ -+ incycle[p] = TRUE; -+ c++; -+ } -+ } -+ } -+ *nswap = maxlen - 1; -+ -+ if (debug) -+ { -+ for (i = 0; i < nrepl; i++) -+ { -+ fprintf(debug, "Cycle %d:", i); -+ for (j = 0; j < nrepl; j++) -+ { -+ if (cyclic[i][j] < 0) -+ { -+ break; -+ } -+ fprintf(debug, "%2d", cyclic[i][j]); -+ } -+ fprintf(debug, "\n"); -+ } -+ fflush(debug); -+ } -+} -+ -+static void -+compute_exchange_order(FILE *fplog, -+ int **cyclic, -+ int **order, -+ const int nrepl, -+ const int maxswap) -+{ -+ int i, j; -+ -+ for (j = 0; j < maxswap; j++) -+ { -+ for (i = 0; i < nrepl; i++) -+ { -+ if (cyclic[i][j+1] >= 0) -+ { -+ order[cyclic[i][j+1]][j] = cyclic[i][j]; -+ order[cyclic[i][j]][j] = cyclic[i][j+1]; -+ } -+ } -+ for (i = 0; i < nrepl; i++) -+ { -+ if (order[i][j] < 0) -+ { -+ order[i][j] = i; /* if it's not exchanging, it should stay this round*/ -+ } -+ } -+ } -+ -+ if (debug) -+ { -+ fprintf(fplog, "Replica Exchange Order\n"); -+ for (i = 0; i < nrepl; i++) -+ { -+ fprintf(fplog, "Replica %d:", i); -+ for (j = 0; j < maxswap; j++) -+ { -+ if (order[i][j] < 0) -+ { -+ break; -+ } -+ fprintf(debug, "%2d", order[i][j]); -+ } -+ fprintf(fplog, "\n"); -+ } -+ fflush(fplog); -+ } -+} -+ -+static void -+prepare_to_do_exchange(FILE *fplog, -+ struct gmx_repl_ex *re, -+ const int replica_id, -+ int *maxswap, -+ gmx_bool *bThisReplicaExchanged) -+{ -+ int i, j; -+ /* Hold the cyclic decomposition of the (multiple) replica -+ * exchange. */ -+ gmx_bool bAnyReplicaExchanged = FALSE; -+ *bThisReplicaExchanged = FALSE; -+ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ if (re->destinations[i] != re->ind[i]) -+ { -+ /* only mark as exchanged if the index has been shuffled */ -+ bAnyReplicaExchanged = TRUE; -+ break; -+ } -+ } -+ if (bAnyReplicaExchanged) -+ { -+ /* reinitialize the placeholder arrays */ -+ for (i = 0; i < re->nrepl; i++) -+ { -+ for (j = 0; j < re->nrepl; j++) -+ { -+ re->cyclic[i][j] = -1; -+ re->order[i][j] = -1; -+ } -+ } -+ -+ /* Identify the cyclic decomposition of the permutation (very -+ * fast if neighbor replica exchange). */ -+ cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap); -+ -+ /* Now translate the decomposition into a replica exchange -+ * order at each step. */ -+ compute_exchange_order(fplog, re->cyclic, re->order, re->nrepl, *maxswap); -+ -+ /* Did this replica do any exchange at any point? */ -+ for (j = 0; j < *maxswap; j++) -+ { -+ if (replica_id != re->order[replica_id][j]) -+ { -+ *bThisReplicaExchanged = TRUE; -+ break; -+ } -+ } -+ } -+} -+ -+gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex *re, -+ t_state *state, gmx_enerdata_t *enerd, -+ t_state *state_local, gmx_int64_t step, real time) -+{ -+ int i, j; -+ int replica_id = 0; -+ int exchange_partner; -+ int maxswap = 0; -+ /* Number of rounds of exchanges needed to deal with any multiple -+ * exchanges. */ -+ /* Where each replica ends up after the exchange attempt(s). */ -+ /* The order in which multiple exchanges will occur. */ -+ gmx_bool bThisReplicaExchanged = FALSE; -+ -+ if (MASTER(cr)) -+ { -+ replica_id = re->repl; -+ test_for_replica_exchange(fplog, cr->ms, re, enerd, det(state_local->box), step, time); -+ prepare_to_do_exchange(fplog, re, replica_id, &maxswap, &bThisReplicaExchanged); -+ } -+ /* Do intra-simulation broadcast so all processors belonging to -+ * each simulation know whether they need to participate in -+ * collecting the state. Otherwise, they might as well get on with -+ * the next thing to do. */ -+ if (DOMAINDECOMP(cr)) -+ { -+#ifdef GMX_MPI -+ MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), -+ cr->mpi_comm_mygroup); -+#endif -+ } -+ -+ if (bThisReplicaExchanged) -+ { -+ /* Exchange the states */ -+ /* Collect the global state on the master node */ -+ if (DOMAINDECOMP(cr)) -+ { -+ dd_collect_state(cr->dd, state_local, state); -+ } -+ else -+ { -+ copy_state_nonatomdata(state_local, state); -+ } -+ -+ if (MASTER(cr)) -+ { -+ /* There will be only one swap cycle with standard replica -+ * exchange, but there may be multiple swap cycles if we -+ * allow multiple swaps. */ -+ -+ for (j = 0; j < maxswap; j++) -+ { -+ exchange_partner = re->order[replica_id][j]; -+ -+ if (exchange_partner != replica_id) -+ { -+ /* Exchange the global states between the master nodes */ -+ if (debug) -+ { -+ fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner); -+ } -+ exchange_state(cr->ms, exchange_partner, state); -+ } -+ } -+ /* For temperature-type replica exchange, we need to scale -+ * the velocities. */ -+ if (re->type == ereTEMP || re->type == ereTL) -+ { -+ scale_velocities(state, sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]])); -+ } -+ -+ } -+ -+ /* With domain decomposition the global state is distributed later */ -+ if (!DOMAINDECOMP(cr)) -+ { -+ /* Copy the global state to the local state data structure */ -+ copy_state_nonatomdata(state, state_local); -+ } -+ } -+ -+ return bThisReplicaExchanged; -+} -+ -+void print_replica_exchange_statistics(FILE *fplog, struct gmx_repl_ex *re) -+{ -+ int i; -+ -+ fprintf(fplog, "\nReplica exchange statistics\n"); -+ -+ if (re->nex == 0) -+ { -+ fprintf(fplog, "Repl %d attempts, %d odd, %d even\n", -+ re->nattempt[0]+re->nattempt[1], re->nattempt[1], re->nattempt[0]); -+ -+ fprintf(fplog, "Repl average probabilities:\n"); -+ for (i = 1; i < re->nrepl; i++) -+ { -+ if (re->nattempt[i%2] == 0) -+ { -+ re->prob[i] = 0; -+ } -+ else -+ { -+ re->prob[i] = re->prob_sum[i]/re->nattempt[i%2]; -+ } -+ } -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_prob(fplog, "", re->nrepl, re->prob); -+ -+ fprintf(fplog, "Repl number of exchanges:\n"); -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_count(fplog, "", re->nrepl, re->nexchange); -+ -+ fprintf(fplog, "Repl average number of exchanges:\n"); -+ for (i = 1; i < re->nrepl; i++) -+ { -+ if (re->nattempt[i%2] == 0) -+ { -+ re->prob[i] = 0; -+ } -+ else -+ { -+ re->prob[i] = ((real)re->nexchange[i])/re->nattempt[i%2]; -+ } -+ } -+ print_ind(fplog, "", re->nrepl, re->ind, NULL); -+ print_prob(fplog, "", re->nrepl, re->prob); -+ -+ fprintf(fplog, "\n"); -+ } -+ /* print the transition matrix */ -+ print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt); -+}