Commit 39120086 authored by John Chodera (MSKCC)'s avatar John Chodera (MSKCC)
Browse files

Merge remote-tracking branch 'upstream/master' into forcefield

parents 87b95319 2acba7ad
...@@ -18,8 +18,6 @@ Need Help? Check out the [documentation](https://simtk.org/docman/?group_id=161) ...@@ -18,8 +18,6 @@ Need Help? Check out the [documentation](https://simtk.org/docman/?group_id=161)
Badges Badges
------ ------
* Travis CI `linux` integration tests: [![Build Status](https://travis-ci.org/pandegroup/openmm.png?branch=master)](https://travis-ci.org/pandegroup/openmm) * Travis CI `linux` integration tests: [![Build Status](https://travis-ci.org/pandegroup/openmm.png?branch=master)](https://travis-ci.org/pandegroup/openmm)
* Jenkins `openmm-dev` conda `osx` build: [![Jenkins `openmm-dev` conda `osx` build](https://jenkins.choderalab.org/job/conda-openmm-dev-osx-2/badge/icon)](https://jenkins.choderalab.org/job/conda-openmm-dev-osx-2/) [[console log]](https://jenkins.choderalab.org/job/conda-openmm-dev-osx-2/lastBuild/consoleFull) * Anaconda Cloud `openmm` conda release: ![Binstar `openmm` conda release](https://binstar.org/omnia/openmm/badges/version.svg)
* Jenkins `openmm-dev` conda `linux` build: [![Jenkins `openmm-dev` conda `linux` build](https://jenkins.choderalab.org/job/conda-openmm-dev-linux-vagrant-2/badge/icon)](https://jenkins.choderalab.org/job/conda-openmm-dev-linux-vagrant-2/) [[console log]](https://jenkins.choderalab.org/job/conda-openmm-dev-linux-vagrant-2/lastBuild/consoleFull) * Anaconda Cloud `openmm-dev` conda package: ![Binstar `openmm-dev` conda package](https://binstar.org/omnia/openmm-dev/badges/version.svg)
* Binstar `openmm` conda release: ![Binstar `openmm` conda release](https://binstar.org/omnia/openmm/badges/version.svg)
* Binstar `openmm-dev` conda package: ![Binstar `openmm-dev` conda package](https://binstar.org/omnia/openmm-dev/badges/version.svg)
...@@ -922,29 +922,33 @@ If :code:`vdwCutoff` is not specified, then the value of ...@@ -922,29 +922,33 @@ If :code:`vdwCutoff` is not specified, then the value of
Specifying the Polarization Method Specifying the Polarization Method
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
OpenMM allows the setting of several other parameters particular to the AMOEBA When using the AMOEBA force field, OpenMM allows the induced dipoles to be
force field. The :code:`mutualInducedTargetEpsilon` option allows you to calculated in any of three different ways. The slowest but potentially most
specify the accuracy to which the induced dipoles are calculated at each time accurate method is to iterate the calculation until the dipoles converge to a
step; the default value is 0.01. The :code:`polarization` setting specified tolerance. To select this, specify :code:`polarization='mutual'`.
determines whether the calculation of the induced dipoles is continued until the Use the :code:`mutualInducedTargetEpsilon` option to select the tolerance; for
dipoles are self-consistent to within the tolerance specified by most situations, a value of 0.00001 works well. Alternatively you can specify
:code:`mutualInducedTargetEpsilon` or whether a quick estimate of the induced :code:`polarization='extrapolated'`. This uses an analytic approximation
dipoles is used instead. The first option corresponds to the :cite:`Simmonett2015` to estimate what the fully converged dipoles will be without
:code:`polarization='mutual'` setting and is the default; the quick estimate actually continuing the calculation to convergence. In many cases this can be
option is given by :code:`polarization='direct'` and in this case, significantly faster with only a small loss in accuracy. Finally, you can
:code:`mutualInducedTargetEpsilon` is ignored, if provided. Simulations using specify :code:`polarization='direct'` to use the direct polarization
:code:`polarization='direct'` will be significantly faster than those with approximation, in which induced dipoles depend only on the fixed multipoles, not
:code:`polarization='mutual'`\ , but less accurate. Examples using the two on other induced dipoles. This is even faster, but it produces very different
options are given below: forces from mutual polarization, so it should only be used with force fields
:: that have been specifically parameterized for use with this approximation.
system = forcefield.createSystem(nonbondedMethod=PME, Here are examples of using each method:
nonbondedCutoff=1*nanometer,ewaldErrorTolerance=0.00001, ::
vdwCutoff=1.2*nanometer, mutualInducedTargetEpsilon=0.01)
system = forcefield.createSystem(nonbondedMethod=PME, nonbondedCutoff=1*nanometer,
system = forcefield.createSystem(nonbondedMethod=PME, vdwCutoff=1.2*nanometer, polarization='mutual', mutualInducedTargetEpsilon=0.00001)
nonbondedCutoff=1*nanometer,ewaldErrorTolerance=0.00001,
vdwCutoff=1.2*nanometer, polarization ='direct') system = forcefield.createSystem(nonbondedMethod=PME, nonbondedCutoff=1*nanometer,
vdwCutoff=1.2*nanometer, polarization='extrapolated')
system = forcefield.createSystem(nonbondedMethod=PME, nonbondedCutoff=1*nanometer,
vdwCutoff=1.2*nanometer, polarization='direct')
Implicit Solvent and Solute Dielectrics Implicit Solvent and Solute Dielectrics
......
...@@ -389,6 +389,17 @@ ...@@ -389,6 +389,17 @@
type = {Journal Article} type = {Journal Article}
} }
@article{Simmonett2015
author = {Simmonett, Andrew C. and Pickard, Frank C. and Shao, Yihan and Cheatham, Thomas E. and Brooks, Bernard R.},
title = {Efficient treatment of induced dipoles},
journal = {Journal of Chemical Physics},
year = {2015},
volume = {143},
number = {7},
pages = {074115},
type = {Journal Article}
}
@article{Sindhikara2009, @article{Sindhikara2009,
author = {Sindhikara, Daniel J. and Kim, Seonah and Voter, author = {Sindhikara, Daniel J. and Kim, Seonah and Voter,
Arthur F. and Roitberg, Adrian E.}, Arthur F. and Roitberg, Adrian E.},
......
...@@ -36,22 +36,18 @@ def runOneTest(testName, options): ...@@ -36,22 +36,18 @@ def runOneTest(testName, options):
if amoeba: if amoeba:
constraints = None constraints = None
epsilon = float(options.epsilon) epsilon = float(options.epsilon)
if epsilon == 0:
polarization = 'direct'
else:
polarization = 'mutual'
if explicit: if explicit:
ff = app.ForceField('amoeba2009.xml') ff = app.ForceField('amoeba2009.xml')
pdb = app.PDBFile('5dfr_solv-cube_equil.pdb') pdb = app.PDBFile('5dfr_solv-cube_equil.pdb')
cutoff = 0.7*unit.nanometers cutoff = 0.7*unit.nanometers
vdwCutoff = 0.9*unit.nanometers vdwCutoff = 0.9*unit.nanometers
system = ff.createSystem(pdb.topology, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, vdwCutoff=vdwCutoff, constraints=constraints, ewaldErrorTolerance=0.00075, mutualInducedTargetEpsilon=epsilon, polarization=polarization) system = ff.createSystem(pdb.topology, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, vdwCutoff=vdwCutoff, constraints=constraints, ewaldErrorTolerance=0.00075, mutualInducedTargetEpsilon=epsilon, polarization=options.polarization)
else: else:
ff = app.ForceField('amoeba2009.xml', 'amoeba2009_gk.xml') ff = app.ForceField('amoeba2009.xml', 'amoeba2009_gk.xml')
pdb = app.PDBFile('5dfr_minimized.pdb') pdb = app.PDBFile('5dfr_minimized.pdb')
cutoff = 2.0*unit.nanometers cutoff = 2.0*unit.nanometers
vdwCutoff = 1.2*unit.nanometers vdwCutoff = 1.2*unit.nanometers
system = ff.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff, constraints=constraints, mutualInducedTargetEpsilon=epsilon, polarization=polarization) system = ff.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff, constraints=constraints, mutualInducedTargetEpsilon=epsilon, polarization=options.polarization)
for f in system.getForces(): for f in system.getForces():
if isinstance(f, mm.AmoebaMultipoleForce) or isinstance(f, mm.AmoebaVdwForce) or isinstance(f, mm.AmoebaGeneralizedKirkwoodForce) or isinstance(f, mm.AmoebaWcaDispersionForce): if isinstance(f, mm.AmoebaMultipoleForce) or isinstance(f, mm.AmoebaVdwForce) or isinstance(f, mm.AmoebaGeneralizedKirkwoodForce) or isinstance(f, mm.AmoebaWcaDispersionForce):
f.setForceGroup(1) f.setForceGroup(1)
...@@ -127,7 +123,8 @@ parser.add_option('--platform', dest='platform', choices=platformNames, help='na ...@@ -127,7 +123,8 @@ parser.add_option('--platform', dest='platform', choices=platformNames, help='na
parser.add_option('--test', dest='test', choices=('gbsa', 'rf', 'pme', 'amoebagk', 'amoebapme'), help='the test to perform: gbsa, rf, pme, amoebagk, or amoebapme [default: all]') parser.add_option('--test', dest='test', choices=('gbsa', 'rf', 'pme', 'amoebagk', 'amoebapme'), help='the test to perform: gbsa, rf, pme, amoebagk, or amoebapme [default: all]')
parser.add_option('--pme-cutoff', default='0.9', dest='cutoff', type='float', help='direct space cutoff for PME in nm [default: 0.9]') parser.add_option('--pme-cutoff', default='0.9', dest='cutoff', type='float', help='direct space cutoff for PME in nm [default: 0.9]')
parser.add_option('--seconds', default='60', dest='seconds', type='float', help='target simulation length in seconds [default: 60]') parser.add_option('--seconds', default='60', dest='seconds', type='float', help='target simulation length in seconds [default: 60]')
parser.add_option('--mutual-epsilon', default='1e-4', dest='epsilon', type='float', help='mutual induced epsilon for AMOEBA [default: 1e-4]') parser.add_option('--polarization', default='mutual', dest='polarization', choices=('direct', 'extrapolated', 'mutual'), help='the polarization method for AMOEBA: direct, extrapolated, or mutual [default: mutual]')
parser.add_option('--mutual-epsilon', default='1e-5', dest='epsilon', type='float', help='mutual induced epsilon for AMOEBA [default: 1e-5]')
parser.add_option('--heavy-hydrogens', action='store_true', default=False, dest='heavy', help='repartition mass to allow a larger time step') parser.add_option('--heavy-hydrogens', action='store_true', default=False, dest='heavy', help='repartition mass to allow a larger time step')
parser.add_option('--device', default=None, dest='device', help='device index for CUDA or OpenCL') parser.add_option('--device', default=None, dest='device', help='device index for CUDA or OpenCL')
parser.add_option('--precision', default='single', dest='precision', choices=('single', 'mixed', 'double'), help='precision mode for CUDA or OpenCL: single, mixed, or double [default: single]') parser.add_option('--precision', default='single', dest='precision', choices=('single', 'mixed', 'double'), help='precision mode for CUDA or OpenCL: single, mixed, or double [default: single]')
......
...@@ -449,4 +449,5 @@ void ContextImpl::loadCheckpoint(istream& stream) { ...@@ -449,4 +449,5 @@ void ContextImpl::loadCheckpoint(istream& stream) {
parameters[name] = value; parameters[name] = value;
} }
updateStateDataKernel.getAs<UpdateStateDataKernel>().loadCheckpoint(*this, stream); updateStateDataKernel.getAs<UpdateStateDataKernel>().loadCheckpoint(*this, stream);
hasSetPositions = true;
} }
...@@ -92,6 +92,15 @@ void testCheckpoint() { ...@@ -92,6 +92,15 @@ void testCheckpoint() {
integrator.step(10); integrator.step(10);
State s4 = context.getState(State::Positions | State::Velocities | State::Parameters); State s4 = context.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s2, s4); compareStates(s2, s4);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator integrator2(0.001);
Context context2(system, integrator2, platform);
stream1.seekg(0, stream1.beg);
context2.loadCheckpoint(stream1);
State s5 = context2.getState(State::Positions | State::Velocities | State::Parameters | State::Energy);
compareStates(s1, s5);
} }
void runPlatformTests() { void runPlatformTests() {
......
...@@ -120,6 +120,15 @@ void testCheckpoint() { ...@@ -120,6 +120,15 @@ void testCheckpoint() {
integrator2.step(10); integrator2.step(10);
State s8 = context2.getState(State::Positions | State::Velocities | State::Parameters); State s8 = context2.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s6, s8); compareStates(s6, s8);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator integrator3(0.001);
Context context3(system, integrator3, platform);
stream1.seekg(0, stream1.beg);
context3.loadCheckpoint(stream1);
State s9 = context3.getState(State::Positions | State::Velocities | State::Parameters | State::Energy);
compareStates(s1, s9);
} }
void runPlatformTests() { void runPlatformTests() {
......
...@@ -120,6 +120,15 @@ void testCheckpoint() { ...@@ -120,6 +120,15 @@ void testCheckpoint() {
integrator2.step(10); integrator2.step(10);
State s8 = context2.getState(State::Positions | State::Velocities | State::Parameters); State s8 = context2.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s6, s8); compareStates(s6, s8);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator integrator3(0.001);
Context context3(system, integrator3, platform);
stream1.seekg(0, stream1.beg);
context3.loadCheckpoint(stream1);
State s9 = context3.getState(State::Positions | State::Velocities | State::Parameters | State::Energy);
compareStates(s1, s9);
} }
void runPlatformTests() { void runPlatformTests() {
......
...@@ -92,6 +92,15 @@ void testCheckpoint() { ...@@ -92,6 +92,15 @@ void testCheckpoint() {
integrator.step(10); integrator.step(10);
State s4 = context.getState(State::Positions | State::Velocities | State::Parameters); State s4 = context.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s2, s4); compareStates(s2, s4);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator integrator2(0.001);
Context context2(system, integrator2, platform);
stream1.seekg(0, stream1.beg);
context2.loadCheckpoint(stream1);
State s5 = context2.getState(State::Positions | State::Velocities | State::Parameters | State::Energy);
compareStates(s1, s5);
} }
void runPlatformTests() { void runPlatformTests() {
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. * * Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Peter Eastman * * Authors: Mark Friedrichs, Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -72,14 +72,25 @@ public: ...@@ -72,14 +72,25 @@ public:
enum PolarizationType { enum PolarizationType {
/** /**
* Mutual polarization * Full mutually induced polarization. The dipoles are iterated until the converge to the accuracy specified
* by getMutualInducedTargetEpsilon().
*/ */
Mutual = 0, Mutual = 0,
/** /**
* Direct polarization * Direct polarization approximation. The induced dipoles depend only on the fixed multipoles, not on other
* induced dipoles.
*/ */
Direct = 1 Direct = 1,
/**
* Extrapolated perturbation theory approximation. The dipoles are iterated a few times, and then an analytic
* approximation is used to extrapolate to the fully converged values. Call setExtrapolationCoefficients()
* to set the coefficients used for the extrapolation. The default coefficients used in this release are
* [0, -0.3, 0, 1.3], but be aware that those may change in a future release.
*/
Extrapolated = 2
}; };
enum MultipoleAxisTypes { ZThenX = 0, Bisector = 1, ZBisect = 2, ThreeFold = 3, ZOnly = 4, NoAxisType = 5, LastAxisTypeIndex = 6 }; enum MultipoleAxisTypes { ZThenX = 0, Bisector = 1, ZBisect = 2, ThreeFold = 3, ZOnly = 4, NoAxisType = 5, LastAxisTypeIndex = 6 };
...@@ -298,6 +309,23 @@ public: ...@@ -298,6 +309,23 @@ public:
*/ */
void setMutualInducedTargetEpsilon(double inputMutualInducedTargetEpsilon); void setMutualInducedTargetEpsilon(double inputMutualInducedTargetEpsilon);
/**
* Set the coefficients for the mu_0, mu_1, mu_2, ..., mu_n terms in the extrapolation
* algorithm for induced dipoles.
*
* @param coefficients a vector whose mth entry specifies the coefficient for mu_m. The length of this
* vector determines how many iterations are performed.
*
*/
void setExtrapolationCoefficients(const std::vector<double> &coefficients);
/**
* Get the coefficients for the mu_0, mu_1, mu_2, ..., mu_n terms in the extrapolation
* algorithm for induced dipoles. In this release, the default values for the coefficients are
* [0, -0.3, 0, 1.3], but be aware that those may change in a future release.
*/
const std::vector<double>& getExtrapolationCoefficients() const;
/** /**
* Get the error tolerance for Ewald summation. This corresponds to the fractional error in the forces * Get the error tolerance for Ewald summation. This corresponds to the fractional error in the forces
* which is acceptable. This value is used to select the grid dimensions and separation (alpha) * which is acceptable. This value is used to select the grid dimensions and separation (alpha)
...@@ -384,6 +412,8 @@ private: ...@@ -384,6 +412,8 @@ private:
int pmeBSplineOrder; int pmeBSplineOrder;
std::vector<int> pmeGridDimension; std::vector<int> pmeGridDimension;
int mutualInducedMaxIterations; int mutualInducedMaxIterations;
std::vector<double> extrapolationCoefficients;
double mutualInducedTargetEpsilon; double mutualInducedTargetEpsilon;
double scalingDistanceCutoff; double scalingDistanceCutoff;
double electricConstant; double electricConstant;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. * * Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: * * Authors: *
* Contributors: * * Contributors: *
* * * *
...@@ -43,6 +43,10 @@ AmoebaMultipoleForce::AmoebaMultipoleForce() : nonbondedMethod(NoCutoff), polari ...@@ -43,6 +43,10 @@ AmoebaMultipoleForce::AmoebaMultipoleForce() : nonbondedMethod(NoCutoff), polari
mutualInducedTargetEpsilon(1.0e-02), scalingDistanceCutoff(100.0), electricConstant(138.9354558456), aewald(0.0) { mutualInducedTargetEpsilon(1.0e-02), scalingDistanceCutoff(100.0), electricConstant(138.9354558456), aewald(0.0) {
pmeGridDimension.resize(3); pmeGridDimension.resize(3);
pmeGridDimension[0] = pmeGridDimension[1] = pmeGridDimension[2]; pmeGridDimension[0] = pmeGridDimension[1] = pmeGridDimension[2];
extrapolationCoefficients.push_back(0.0);
extrapolationCoefficients.push_back(-0.3);
extrapolationCoefficients.push_back(0.0);
extrapolationCoefficients.push_back(1.3);
} }
AmoebaMultipoleForce::NonbondedMethod AmoebaMultipoleForce::getNonbondedMethod() const { AmoebaMultipoleForce::NonbondedMethod AmoebaMultipoleForce::getNonbondedMethod() const {
...@@ -61,6 +65,14 @@ void AmoebaMultipoleForce::setPolarizationType(AmoebaMultipoleForce::Polarizatio ...@@ -61,6 +65,14 @@ void AmoebaMultipoleForce::setPolarizationType(AmoebaMultipoleForce::Polarizatio
polarizationType = type; polarizationType = type;
} }
void AmoebaMultipoleForce::setExtrapolationCoefficients(const std::vector<double> &coefficients) {
extrapolationCoefficients = coefficients;
}
const std::vector<double> & AmoebaMultipoleForce::getExtrapolationCoefficients() const {
return extrapolationCoefficients;
}
double AmoebaMultipoleForce::getCutoffDistance() const { double AmoebaMultipoleForce::getCutoffDistance() const {
return cutoffDistance; return cutoffDistance;
} }
......
...@@ -810,7 +810,10 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri ...@@ -810,7 +810,10 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL), sphericalDipoles(NULL), sphericalQuadrupoles(NULL), multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL), sphericalDipoles(NULL), sphericalQuadrupoles(NULL),
fracDipoles(NULL), fracQuadrupoles(NULL), field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), inducedDipole(NULL), fracDipoles(NULL), fracQuadrupoles(NULL), field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), inducedDipole(NULL),
diisCoefficients(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), prevDipoles(NULL), prevDipolesPolar(NULL), prevDipolesGk(NULL), diisCoefficients(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), prevDipoles(NULL), prevDipolesPolar(NULL), prevDipolesGk(NULL),
prevDipolesGkPolar(NULL), prevErrors(NULL), diisMatrix(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL), prevDipolesGkPolar(NULL), prevErrors(NULL), diisMatrix(NULL), polarizability(NULL), extrapolatedDipole(NULL), extrapolatedDipolePolar(NULL),
extrapolatedDipoleGk(NULL), extrapolatedDipoleGkPolar(NULL), inducedDipoleFieldGradient(NULL), inducedDipoleFieldGradientPolar(NULL),
inducedDipoleFieldGradientGk(NULL), inducedDipoleFieldGradientGkPolar(NULL), extrapolatedDipoleFieldGradient(NULL), extrapolatedDipoleFieldGradientPolar(NULL),
extrapolatedDipoleFieldGradientGk(NULL), extrapolatedDipoleFieldGradientGkPolar(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeIgrid(NULL), pmePhi(NULL), pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeIgrid(NULL), pmePhi(NULL),
pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeCphi(NULL), pmeAtomGridIndex(NULL), lastPositions(NULL), sort(NULL), gkKernel(NULL) { pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeCphi(NULL), pmeAtomGridIndex(NULL), lastPositions(NULL), sort(NULL), gkKernel(NULL) {
} }
...@@ -867,6 +870,30 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() { ...@@ -867,6 +870,30 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete diisMatrix; delete diisMatrix;
if (diisCoefficients != NULL) if (diisCoefficients != NULL)
delete diisCoefficients; delete diisCoefficients;
if (extrapolatedDipole != NULL)
delete extrapolatedDipole;
if (extrapolatedDipolePolar != NULL)
delete extrapolatedDipolePolar;
if (extrapolatedDipoleGk != NULL)
delete extrapolatedDipoleGk;
if (extrapolatedDipoleGkPolar != NULL)
delete extrapolatedDipoleGkPolar;
if (inducedDipoleFieldGradient != NULL)
delete inducedDipoleFieldGradient;
if (inducedDipoleFieldGradientPolar != NULL)
delete inducedDipoleFieldGradientPolar;
if (inducedDipoleFieldGradientGk != NULL)
delete inducedDipoleFieldGradientGk;
if (inducedDipoleFieldGradientGkPolar != NULL)
delete inducedDipoleFieldGradientGkPolar;
if (extrapolatedDipoleFieldGradient != NULL)
delete extrapolatedDipoleFieldGradient;
if (extrapolatedDipoleFieldGradientPolar != NULL)
delete extrapolatedDipoleFieldGradientPolar;
if (extrapolatedDipoleFieldGradientGk != NULL)
delete extrapolatedDipoleFieldGradientGk;
if (extrapolatedDipoleFieldGradientGkPolar != NULL)
delete extrapolatedDipoleFieldGradientGkPolar;
if (polarizability != NULL) if (polarizability != NULL)
delete polarizability; delete polarizability;
if (covalentFlags != NULL) if (covalentFlags != NULL)
...@@ -967,6 +994,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -967,6 +994,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
// Create workspace arrays. // Create workspace arrays.
polarizationType = force.getPolarizationType();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
labFrameDipoles = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "labFrameDipoles"); labFrameDipoles = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "labFrameDipoles");
labFrameQuadrupoles = new CudaArray(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles"); labFrameQuadrupoles = new CudaArray(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles");
...@@ -979,12 +1007,23 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -979,12 +1007,23 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
torque = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "torque"); torque = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "torque");
inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole"); inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole");
inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar"); inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar");
inducedDipoleErrors = new CudaArray(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors"); if (polarizationType == AmoebaMultipoleForce::Mutual) {
prevDipoles = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipoles"); inducedDipoleErrors = new CudaArray(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors");
prevDipolesPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesPolar"); prevDipoles = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipoles");
prevErrors = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevErrors"); prevDipolesPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesPolar");
diisMatrix = new CudaArray(cu, MaxPrevDIISDipoles*MaxPrevDIISDipoles, elementSize, "diisMatrix"); prevErrors = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevErrors");
diisCoefficients = new CudaArray(cu, MaxPrevDIISDipoles+1, sizeof(float), "diisMatrix"); diisMatrix = new CudaArray(cu, MaxPrevDIISDipoles*MaxPrevDIISDipoles, elementSize, "diisMatrix");
diisCoefficients = new CudaArray(cu, MaxPrevDIISDipoles+1, sizeof(float), "diisMatrix");
}
else if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
int numOrders = force.getExtrapolationCoefficients().size();
extrapolatedDipole = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipole");
extrapolatedDipolePolar = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipolePolar");
inducedDipoleFieldGradient = new CudaArray(cu, 6*paddedNumAtoms, sizeof(long long), "inducedDipoleFieldGradient");
inducedDipoleFieldGradientPolar = new CudaArray(cu, 6*paddedNumAtoms, sizeof(long long), "inducedDipoleFieldGradientPolar");
extrapolatedDipoleFieldGradient = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradient");
extrapolatedDipoleFieldGradientPolar = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradientPolar");
}
cu.addAutoclearBuffer(*field); cu.addAutoclearBuffer(*field);
cu.addAutoclearBuffer(*fieldPolar); cu.addAutoclearBuffer(*fieldPolar);
cu.addAutoclearBuffer(*torque); cu.addAutoclearBuffer(*torque);
...@@ -1036,14 +1075,16 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1036,14 +1075,16 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
// Record other options. // Record other options.
if (force.getPolarizationType() == AmoebaMultipoleForce::Mutual) { if (polarizationType == AmoebaMultipoleForce::Mutual) {
maxInducedIterations = force.getMutualInducedMaxIterations(); maxInducedIterations = force.getMutualInducedMaxIterations();
inducedEpsilon = force.getMutualInducedTargetEpsilon(); inducedEpsilon = force.getMutualInducedTargetEpsilon();
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedFieldPolar");
} }
else else
maxInducedIterations = 0; maxInducedIterations = 0;
if (polarizationType != AmoebaMultipoleForce::Direct) {
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedFieldPolar");
}
usePME = (force.getNonbondedMethod() == AmoebaMultipoleForce::PME); usePME = (force.getNonbondedMethod() == AmoebaMultipoleForce::PME);
// See whether there's an AmoebaGeneralizedKirkwoodForce in the System. // See whether there's an AmoebaGeneralizedKirkwoodForce in the System.
...@@ -1058,6 +1099,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1058,6 +1099,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
bool useShuffle = (cu.getComputeCapability() >= 3.0 && !cu.getUseDoublePrecision()); bool useShuffle = (cu.getComputeCapability() >= 3.0 && !cu.getUseDoublePrecision());
double fixedThreadMemory = 19*elementSize+2*sizeof(float)+3*sizeof(int)/(double) cu.TileSize; double fixedThreadMemory = 19*elementSize+2*sizeof(float)+3*sizeof(int)/(double) cu.TileSize;
double inducedThreadMemory = 15*elementSize+2*sizeof(float); double inducedThreadMemory = 15*elementSize+2*sizeof(float);
if (polarizationType == AmoebaMultipoleForce::Extrapolated)
inducedThreadMemory += 12*elementSize;
double electrostaticsThreadMemory = 0; double electrostaticsThreadMemory = 0;
if (!useShuffle) if (!useShuffle)
fixedThreadMemory += 3*elementSize; fixedThreadMemory += 3*elementSize;
...@@ -1066,8 +1109,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1066,8 +1109,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks()); defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/innerDielectric); defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/innerDielectric);
if (force.getPolarizationType() == AmoebaMultipoleForce::Direct) if (polarizationType == AmoebaMultipoleForce::Direct)
defines["DIRECT_POLARIZATION"] = ""; defines["DIRECT_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Mutual)
defines["MUTUAL_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Extrapolated)
defines["EXTRAPOLATED_POLARIZATION"] = "";
if (useShuffle) if (useShuffle)
defines["USE_SHUFFLE"] = ""; defines["USE_SHUFFLE"] = "";
if (hasQuadrupoles) if (hasQuadrupoles)
...@@ -1080,6 +1127,18 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1080,6 +1127,18 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts; int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts;
defines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex); defines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex);
defines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex); defines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex);
maxExtrapolationOrder = force.getExtrapolationCoefficients().size();
defines["MAX_EXTRAPOLATION_ORDER"] = cu.intToString(maxExtrapolationOrder);
stringstream coefficients;
for (int i = 0; i < maxExtrapolationOrder; i++) {
if (i > 0)
coefficients << ",";
double sum = 0;
for (int j = i; j < maxExtrapolationOrder; j++)
sum += force.getExtrapolationCoefficients()[j];
coefficients << cu.doubleToString(sum);
}
defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str();
alpha = force.getAEwald(); alpha = force.getAEwald();
if (usePME) { if (usePME) {
vector<int> pmeGridDimension; vector<int> pmeGridDimension;
...@@ -1113,8 +1172,20 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1113,8 +1172,20 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric)); defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric));
fixedThreadMemory += 4*elementSize; fixedThreadMemory += 4*elementSize;
inducedThreadMemory += 13*elementSize; inducedThreadMemory += 13*elementSize;
prevDipolesGk = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGk"); if (polarizationType == AmoebaMultipoleForce::Mutual) {
prevDipolesGkPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGkPolar"); prevDipolesGk = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGk");
prevDipolesGkPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGkPolar");
}
else if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
inducedThreadMemory += 12*elementSize;
int numOrders = force.getExtrapolationCoefficients().size();
extrapolatedDipoleGk = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipoleGk");
extrapolatedDipoleGkPolar = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipoleGkPolar");
inducedDipoleFieldGradientGk = new CudaArray(cu, 6*numMultipoles, elementSize, "inducedDipoleFieldGradientGk");
inducedDipoleFieldGradientGkPolar = new CudaArray(cu, 6*numMultipoles, elementSize, "inducedDipoleFieldGradientGkPolar");
extrapolatedDipoleFieldGradientGk = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradientGk");
extrapolatedDipoleFieldGradientGkPolar = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradientGkPolar");
}
} }
int maxThreads = cu.getNonbondedUtilities().getForceThreadBlockSize(); int maxThreads = cu.getNonbondedUtilities().getForceThreadBlockSize();
fixedFieldThreads = min(maxThreads, cu.computeThreadBlockSize(fixedThreadMemory)); fixedFieldThreads = min(maxThreads, cu.computeThreadBlockSize(fixedThreadMemory));
...@@ -1127,15 +1198,18 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1127,15 +1198,18 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["THREAD_BLOCK_SIZE"] = cu.intToString(fixedFieldThreads); defines["THREAD_BLOCK_SIZE"] = cu.intToString(fixedFieldThreads);
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines); module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines);
computeFixedFieldKernel = cu.getKernel(module, "computeFixedField"); computeFixedFieldKernel = cu.getKernel(module, "computeFixedField");
if (maxInducedIterations > 0) { if (polarizationType != AmoebaMultipoleForce::Direct) {
defines["THREAD_BLOCK_SIZE"] = cu.intToString(inducedFieldThreads); defines["THREAD_BLOCK_SIZE"] = cu.intToString(inducedFieldThreads);
defines["MAX_PREV_DIIS_DIPOLES"] = cu.intToString(MaxPrevDIISDipoles); defines["MAX_PREV_DIIS_DIPOLES"] = cu.intToString(MaxPrevDIISDipoles);
defines["USE_MUTUAL_POLARIZATION"] = "1";
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleInducedField, defines); module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleInducedField, defines);
computeInducedFieldKernel = cu.getKernel(module, "computeInducedField"); computeInducedFieldKernel = cu.getKernel(module, "computeInducedField");
updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldByDIIS"); updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldByDIIS");
recordDIISDipolesKernel = cu.getKernel(module, "recordInducedDipolesForDIIS"); recordDIISDipolesKernel = cu.getKernel(module, "recordInducedDipolesForDIIS");
buildMatrixKernel = cu.getKernel(module, "computeDIISMatrix"); buildMatrixKernel = cu.getKernel(module, "computeDIISMatrix");
initExtrapolatedKernel = cu.getKernel(module, "initExtrapolatedDipoles");
iterateExtrapolatedKernel = cu.getKernel(module, "iterateExtrapolatedDipoles");
computeExtrapolatedKernel = cu.getKernel(module, "computeExtrapolatedDipoles");
addExtrapolatedGradientKernel = cu.getKernel(module, "addExtrapolatedFieldGradientToForce");
} }
stringstream electrostaticsSource; stringstream electrostaticsSource;
electrostaticsSource << CudaKernelSources::vectorOps; electrostaticsSource << CudaKernelSources::vectorOps;
...@@ -1166,8 +1240,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1166,8 +1240,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ); pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
pmeDefines["M_PI"] = cu.doubleToString(M_PI); pmeDefines["M_PI"] = cu.doubleToString(M_PI);
pmeDefines["SQRT_PI"] = cu.doubleToString(sqrt(M_PI)); pmeDefines["SQRT_PI"] = cu.doubleToString(sqrt(M_PI));
if (force.getPolarizationType() == AmoebaMultipoleForce::Direct) if (polarizationType == AmoebaMultipoleForce::Direct)
pmeDefines["DIRECT_POLARIZATION"] = ""; pmeDefines["DIRECT_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Mutual)
pmeDefines["MUTUAL_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Extrapolated)
pmeDefines["EXTRAPOLATED_POLARIZATION"] = "";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipolePme, pmeDefines); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipolePme, pmeDefines);
pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex"); pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeTransformMultipolesKernel = cu.getKernel(module, "transformMultipolesToFractionalCoordinates"); pmeTransformMultipolesKernel = cu.getKernel(module, "transformMultipolesToFractionalCoordinates");
...@@ -1407,7 +1485,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1407,7 +1485,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
int numTileIndices = nb.getNumTiles(); int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks(); int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
void* npt = NULL;
if (pmeGrid == NULL) { if (pmeGrid == NULL) {
// Compute induced dipoles. // Compute induced dipoles.
...@@ -1436,25 +1513,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1436,25 +1513,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge. // Iterate until the dipoles converge.
if (polarizationType == AmoebaMultipoleForce::Extrapolated)
computeExtrapolatedDipoles(NULL);
for (int i = 0; i < maxInducedIterations; i++) { for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField); computeInducedField(NULL);
cu.clearBuffer(*inducedFieldPolar);
if (gkKernel == NULL) {
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
}
else {
cu.clearBuffer(*gkKernel->getInducedField());
cu.clearBuffer(*gkKernel->getInducedFieldPolar());
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&gkKernel->getInducedField()->getDevicePointer(), &gkKernel->getInducedFieldPolar()->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&gkKernel->getBornRadii()->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
}
bool converged = iterateDipolesByDIIS(i); bool converged = iterateDipolesByDIIS(i);
if (converged) if (converged)
break; break;
...@@ -1579,34 +1641,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1579,34 +1641,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge. // Iterate until the dipoles converge.
vector<float2> errors; if (polarizationType == AmoebaMultipoleForce::Extrapolated)
computeExtrapolatedDipoles(recipBoxVectorPointer);
for (int i = 0; i < maxInducedIterations; i++) { for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField); computeInducedField(recipBoxVectorPointer);
cu.clearBuffer(*inducedFieldPolar);
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
cu.clearBuffer(*pmeGrid);
cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, pmeGrid->getSize());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
bool converged = iterateDipolesByDIIS(i); bool converged = iterateDipolesByDIIS(i);
if (converged) if (converged)
break; break;
...@@ -1632,6 +1670,23 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1632,6 +1670,23 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&pmePhip->getDevicePointer(), &pmePhidp->getDevicePointer(), &pmeCphi->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]}; &pmePhip->getDevicePointer(), &pmePhidp->getDevicePointer(), &pmeCphi->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeInducedForceKernel, pmeInducedForceArgs, cu.getNumAtoms()); cu.executeKernel(pmeInducedForceKernel, pmeInducedForceArgs, cu.getNumAtoms());
} }
// If using extrapolated polarization, add in force contributions from µ(m) T µ(n).
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
if (gkKernel == NULL) {
void* extrapolatedArgs[] = {&cu.getForce().getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer()};
cu.executeKernel(addExtrapolatedGradientKernel, extrapolatedArgs, numMultipoles);
}
else {
void* extrapolatedArgs[] = {&cu.getForce().getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer(),
&extrapolatedDipoleGk->getDevicePointer(), &extrapolatedDipoleGkPolar->getDevicePointer(),
&extrapolatedDipoleFieldGradientGk->getDevicePointer(), &extrapolatedDipoleFieldGradientGkPolar->getDevicePointer()};
cu.executeKernel(addExtrapolatedGradientKernel, extrapolatedArgs, numMultipoles);
}
}
// Map torques to force. // Map torques to force.
...@@ -1646,6 +1701,109 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1646,6 +1701,109 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
return 0.0; return 0.0;
} }
void CudaCalcAmoebaMultipoleForceKernel::computeInducedField(void** recipBoxVectorPointer) {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
int startTileIndex = nb.getStartTileIndex();
int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
unsigned int maxTiles = 0;
vector<void*> computeInducedFieldArgs;
computeInducedFieldArgs.push_back(&inducedField->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedFieldPolar->getDevicePointer());
computeInducedFieldArgs.push_back(&cu.getPosq().getDevicePointer());
computeInducedFieldArgs.push_back(&nb.getExclusionTiles().getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipole->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipolePolar->getDevicePointer());
computeInducedFieldArgs.push_back(&startTileIndex);
computeInducedFieldArgs.push_back(&numTileIndices);
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradient->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradientPolar->getDevicePointer());
}
if (pmeGrid != NULL) {
computeInducedFieldArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
computeInducedFieldArgs.push_back(&nb.getInteractionCount().getDevicePointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxSizePointer());
computeInducedFieldArgs.push_back(cu.getInvPeriodicBoxSizePointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxVecXPointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxVecYPointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxVecZPointer());
computeInducedFieldArgs.push_back(&maxTiles);
computeInducedFieldArgs.push_back(&nb.getBlockCenters().getDevicePointer());
computeInducedFieldArgs.push_back(&nb.getInteractingAtoms().getDevicePointer());
}
if (gkKernel != NULL) {
computeInducedFieldArgs.push_back(&gkKernel->getInducedField()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getInducedFieldPolar()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getInducedDipoles()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getInducedDipolesPolar()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getBornRadii()->getDevicePointer());
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradientGk->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradientGkPolar->getDevicePointer());
}
}
computeInducedFieldArgs.push_back(&dampingAndThole->getDevicePointer());
cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar);
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
cu.clearBuffer(*inducedDipoleFieldGradient);
cu.clearBuffer(*inducedDipoleFieldGradientPolar);
}
if (gkKernel != NULL) {
cu.clearBuffer(*gkKernel->getInducedField());
cu.clearBuffer(*gkKernel->getInducedFieldPolar());
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
cu.clearBuffer(*inducedDipoleFieldGradientGk);
cu.clearBuffer(*inducedDipoleFieldGradientGkPolar);
}
}
if (pmeGrid == NULL)
cu.executeKernel(computeInducedFieldKernel, &computeInducedFieldArgs[0], numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
else {
maxTiles = nb.getInteractingTiles().getSize();
cu.executeKernel(computeInducedFieldKernel, &computeInducedFieldArgs[0], numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
cu.clearBuffer(*pmeGrid);
void* pmeSpreadInducedDipolesArgs[] = {&cu.getPosq().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision()) {
void* finishSpreadArgs[] = {&pmeGrid->getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, pmeGrid->getSize());
}
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
void* pmeConvolutionArgs[] = {&pmeGrid->getDevicePointer(), &pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(),
&pmeBsplineModuliZ->getDevicePointer(), cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
void* pmeInducedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&pmePhidp->getDevicePointer(), &cu.getPosq().getDevicePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(),
cu.getPeriodicBoxVecZPointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2],
&pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
}
else {
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
}
}
}
bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) { bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
void* npt = NULL; void* npt = NULL;
bool trueValue = true, falseValue = false; bool trueValue = true, falseValue = false;
...@@ -1744,6 +1902,62 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) { ...@@ -1744,6 +1902,62 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
return false; return false;
} }
void CudaCalcAmoebaMultipoleForceKernel::computeExtrapolatedDipoles(void** recipBoxVectorPointer) {
// Start by storing the direct dipoles as PT0
if (gkKernel == NULL) {
void* initArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer()};
cu.executeKernel(initExtrapolatedKernel, initArgs, extrapolatedDipole->getSize());
}
else {
void* initArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(), &extrapolatedDipoleGk->getDevicePointer(),
&extrapolatedDipoleGkPolar->getDevicePointer(), &inducedDipoleFieldGradientGk->getDevicePointer(), &inducedDipoleFieldGradientGkPolar->getDevicePointer()};
cu.executeKernel(initExtrapolatedKernel, initArgs, extrapolatedDipole->getSize());
}
// Recursively apply alpha.Tau to the µ_(n) components to generate µ_(n+1), and store the result
for (int order = 1; order < maxExtrapolationOrder; ++order) {
computeInducedField(recipBoxVectorPointer);
if (gkKernel == NULL) {
void* iterateArgs[] = {&order, &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer(),
&polarizability->getDevicePointer()};
cu.executeKernel(iterateExtrapolatedKernel, iterateArgs, extrapolatedDipole->getSize());
}
else {
void* iterateArgs[] = {&order, &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(), &extrapolatedDipoleGk->getDevicePointer(),
&extrapolatedDipoleGkPolar->getDevicePointer(), &inducedDipoleFieldGradientGk->getDevicePointer(), &inducedDipoleFieldGradientGkPolar->getDevicePointer(),
&gkKernel->getInducedField()->getDevicePointer(), &gkKernel->getInducedFieldPolar()->getDevicePointer(),
&extrapolatedDipoleFieldGradientGk->getDevicePointer(), &extrapolatedDipoleFieldGradientGkPolar->getDevicePointer(),
&polarizability->getDevicePointer()};
cu.executeKernel(iterateExtrapolatedKernel, iterateArgs, extrapolatedDipole->getSize());
}
}
// Take a linear combination of the µ_(n) components to form the total dipole
if (gkKernel == NULL) {
void* computeArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer()};
cu.executeKernel(computeExtrapolatedKernel, computeArgs, extrapolatedDipole->getSize());
}
else {
void* computeArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&extrapolatedDipoleGk->getDevicePointer(), &extrapolatedDipoleGkPolar->getDevicePointer()};
cu.executeKernel(computeExtrapolatedKernel, computeArgs, extrapolatedDipole->getSize());
}
computeInducedField(recipBoxVectorPointer);
}
void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& context) { void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& context) {
if (multipolesAreValid) { if (multipolesAreValid) {
int numParticles = cu.getNumAtoms(); int numParticles = cu.getNumAtoms();
...@@ -2088,7 +2302,8 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& syst ...@@ -2088,7 +2302,8 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& syst
bornForce = CudaArray::create<long long>(cu, paddedNumAtoms, "bornForce"); bornForce = CudaArray::create<long long>(cu, paddedNumAtoms, "bornForce");
inducedDipoleS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipoleS"); inducedDipoleS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipoleS");
inducedDipolePolarS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolarS"); inducedDipolePolarS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolarS");
if (multipoles->getPolarizationType() == AmoebaMultipoleForce::Mutual) { polarizationType = multipoles->getPolarizationType();
if (polarizationType != AmoebaMultipoleForce::Direct) {
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkInducedField"); inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkInducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkInducedFieldPolar"); inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkInducedFieldPolar");
} }
...@@ -2141,8 +2356,12 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& syst ...@@ -2141,8 +2356,12 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& syst
defines["EPSILON_FACTOR"] = cu.doubleToString(138.9354558456); defines["EPSILON_FACTOR"] = cu.doubleToString(138.9354558456);
defines["M_PI"] = cu.doubleToString(M_PI); defines["M_PI"] = cu.doubleToString(M_PI);
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/force.getSoluteDielectric()); defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/force.getSoluteDielectric());
if (multipoles->getPolarizationType() == AmoebaMultipoleForce::Direct) if (polarizationType == AmoebaMultipoleForce::Direct)
defines["DIRECT_POLARIZATION"] = ""; defines["DIRECT_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Mutual)
defines["MUTUAL_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Extrapolated)
defines["EXTRAPOLATED_POLARIZATION"] = "";
includeSurfaceArea = force.getIncludeCavityTerm(); includeSurfaceArea = force.getIncludeCavityTerm();
if (includeSurfaceArea) { if (includeSurfaceArea) {
defines["SURFACE_AREA_FACTOR"] = cu.doubleToString(force.getSurfaceAreaFactor()); defines["SURFACE_AREA_FACTOR"] = cu.doubleToString(force.getSurfaceAreaFactor());
......
...@@ -385,14 +385,17 @@ private: ...@@ -385,14 +385,17 @@ private:
const char* getSortKey() const {return "value.y";} const char* getSortKey() const {return "value.y";}
}; };
void initializeScaleFactors(); void initializeScaleFactors();
void computeInducedField(void** recipBoxVectorPointer);
bool iterateDipolesByDIIS(int iteration); bool iterateDipolesByDIIS(int iteration);
void computeExtrapolatedDipoles(void** recipBoxVectorPointer);
void ensureMultipolesValid(ContextImpl& context); void ensureMultipolesValid(ContextImpl& context);
template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments); template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments);
int numMultipoles, maxInducedIterations; int numMultipoles, maxInducedIterations, maxExtrapolationOrder;
int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads; int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads;
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
double alpha, inducedEpsilon; double alpha, inducedEpsilon;
bool usePME, hasQuadrupoles, hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid; bool usePME, hasQuadrupoles, hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid;
AmoebaMultipoleForce::PolarizationType polarizationType;
CudaContext& cu; CudaContext& cu;
const System& system; const System& system;
std::vector<int3> covalentFlagValues; std::vector<int3> covalentFlagValues;
...@@ -422,6 +425,18 @@ private: ...@@ -422,6 +425,18 @@ private:
CudaArray* prevErrors; CudaArray* prevErrors;
CudaArray* diisMatrix; CudaArray* diisMatrix;
CudaArray* diisCoefficients; CudaArray* diisCoefficients;
CudaArray* extrapolatedDipole;
CudaArray* extrapolatedDipolePolar;
CudaArray* extrapolatedDipoleGk;
CudaArray* extrapolatedDipoleGkPolar;
CudaArray* inducedDipoleFieldGradient;
CudaArray* inducedDipoleFieldGradientPolar;
CudaArray* inducedDipoleFieldGradientGk;
CudaArray* inducedDipoleFieldGradientGkPolar;
CudaArray* extrapolatedDipoleFieldGradient;
CudaArray* extrapolatedDipoleFieldGradientPolar;
CudaArray* extrapolatedDipoleFieldGradientGk;
CudaArray* extrapolatedDipoleFieldGradientGkPolar;
CudaArray* polarizability; CudaArray* polarizability;
CudaArray* covalentFlags; CudaArray* covalentFlags;
CudaArray* polarizationGroupFlags; CudaArray* polarizationGroupFlags;
...@@ -444,6 +459,7 @@ private: ...@@ -444,6 +459,7 @@ private:
CUfunction pmeGridIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel; CUfunction pmeGridIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel;
CUfunction pmeFixedPotentialKernel, pmeInducedPotentialKernel, pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel; CUfunction pmeFixedPotentialKernel, pmeInducedPotentialKernel, pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel;
CUfunction recordDIISDipolesKernel, buildMatrixKernel; CUfunction recordDIISDipolesKernel, buildMatrixKernel;
CUfunction initExtrapolatedKernel, iterateExtrapolatedKernel, computeExtrapolatedKernel, addExtrapolatedGradientKernel;
CUfunction pmeTransformMultipolesKernel, pmeTransformPotentialKernel; CUfunction pmeTransformMultipolesKernel, pmeTransformPotentialKernel;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel; CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel;
static const int PmeOrder = 5; static const int PmeOrder = 5;
...@@ -512,6 +528,7 @@ private: ...@@ -512,6 +528,7 @@ private:
const System& system; const System& system;
bool includeSurfaceArea, hasInitializedKernels; bool includeSurfaceArea, hasInitializedKernels;
int computeBornSumThreads, gkForceThreads, chainRuleThreads, ediffThreads; int computeBornSumThreads, gkForceThreads, chainRuleThreads, ediffThreads;
AmoebaMultipoleForce::PolarizationType polarizationType;
std::map<std::string, std::string> defines; std::map<std::string, std::string> defines;
CudaArray* params; CudaArray* params;
CudaArray* bornSum; CudaArray* bornSum;
......
...@@ -39,15 +39,23 @@ real ym = zcp*xap - xcp*zap; ...@@ -39,15 +39,23 @@ real ym = zcp*xap - xcp*zap;
real zm = xcp*yap - ycp*xap; real zm = xcp*yap - ycp*xap;
real rm = max(SQRT(xm*xm + ym*ym + zm*zm), (real) 1e-6f); real rm = max(SQRT(xm*xm + ym*ym + zm*zm), (real) 1e-6f);
real dot = xap*xcp + yap*ycp + zap*zcp; real dotp = xap*xcp + yap*ycp + zap*zcp;
real product = SQRT(rap2*rcp2); real product = SQRT(rap2*rcp2);
real cosine = (product > 0 ? (dot/product) : 0); real cosine = (product > 0 ? (dotp/product) : 0);
cosine = max(min(cosine, (real) 1), (real) -1); cosine = max(min(cosine, (real) 1), (real) -1);
real angle = ACOS(cosine); real angle;
if (cosine > 0.99f || cosine < -0.99f) {
real3 cross_prod = cross(make_real3(xap, yap, zap), make_real3(xcp, ycp, zcp));
angle = ASIN(SQRT(dot(cross_prod, cross_prod)/(rap2*rcp2)))*RAD_TO_DEG;
if (cosine < 0.0f)
angle = 180-angle;
}
else
angle = ACOS(cosine)*RAD_TO_DEG;
// if product == 0, set force/energy to 0 // if product == 0, set force/energy to 0
real deltaIdeal = (product > 0 ? (angle*RAD_TO_DEG - angleParams.x) : 0); real deltaIdeal = (product > 0 ? (angle - angleParams.x) : 0);
real deltaIdeal2 = deltaIdeal*deltaIdeal; real deltaIdeal2 = deltaIdeal*deltaIdeal;
real deltaIdeal3 = deltaIdeal*deltaIdeal2; real deltaIdeal3 = deltaIdeal*deltaIdeal2;
real deltaIdeal4 = deltaIdeal2*deltaIdeal2; real deltaIdeal4 = deltaIdeal2*deltaIdeal2;
...@@ -111,4 +119,4 @@ real dedzid = -dedzia - dedzib - dedzic; ...@@ -111,4 +119,4 @@ real dedzid = -dedzia - dedzib - dedzic;
real3 force1 = make_real3(-dedxia, -dedyia, -dedzia); real3 force1 = make_real3(-dedxia, -dedyia, -dedzia);
real3 force2 = make_real3(-dedxib, -dedyib, -dedzib); real3 force2 = make_real3(-dedxib, -dedyib, -dedzib);
real3 force3 = make_real3(-dedxic, -dedyic, -dedzic); real3 force3 = make_real3(-dedxic, -dedyic, -dedzic);
real3 force4 = make_real3(-dedxid, -dedyid, -dedzid); real3 force4 = make_real3(-dedxid, -dedyid, -dedzid);
\ No newline at end of file
...@@ -17,18 +17,27 @@ real zp = xcb*yab - ycb*xab; ...@@ -17,18 +17,27 @@ real zp = xcb*yab - ycb*xab;
real rp = SQRT(xp*xp + yp*yp + zp*zp); real rp = SQRT(xp*xp + yp*yp + zp*zp);
real dot = xab*xcb + yab*ycb + zab*zcb; real dotp = xab*xcb + yab*ycb + zab*zcb;
real cosine = rab*rcb > 0 ? (dot / (rab*rcb)) : (real) 1; real cosine = rab*rcb > 0 ? (dotp / (rab*rcb)) : (real) 1;
cosine = (cosine > 1 ? (real) 1 : cosine); cosine = (cosine > 1 ? (real) 1 : cosine);
cosine = (cosine < -1 ? -(real) 1 : cosine); cosine = (cosine < -1 ? -(real) 1 : cosine);
real angle = ACOS(cosine); real angle;
if (cosine > 0.99f || cosine < -0.99f) {
// Highly unlikely a stretch-bend angle will be near 0 or 180, but just in case...
real3 cross_prod = cross(make_real3(xab, yab, zab), make_real3(xcb, ycb, zcb));
angle = ASIN(SQRT(dot(cross_prod, cross_prod))/(rab*rcb))*RAD_TO_DEG;
if (cosine < 0.0f)
angle = 180-angle;
}
else
angle = ACOS(cosine)*RAD_TO_DEG;
// find chain rule terms for the bond angle deviation // find chain rule terms for the bond angle deviation
float3 parameters = PARAMS[index]; float3 parameters = PARAMS[index];
float2 force_constants = FORCE_CONSTANTS[index]; float2 force_constants = FORCE_CONSTANTS[index];
real dt = RAD_TO_DEG*(angle - parameters.z); real dt = angle - RAD_TO_DEG*parameters.z;
real terma = rab*rp != 0 ? (-RAD_TO_DEG/(rab*rab*rp)) : (real) 0; real terma = rab*rp != 0 ? (-RAD_TO_DEG/(rab*rab*rp)) : (real) 0;
real termc = rcb*rp != 0 ? (RAD_TO_DEG/(rcb*rcb*rp)) : (real) 0; real termc = rcb*rp != 0 ? (RAD_TO_DEG/(rcb*rcb*rp)) : (real) 0;
......
...@@ -365,7 +365,22 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData ...@@ -365,7 +365,22 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
// correction to convert mutual to direct polarization force // correction to convert mutual to direct polarization force
#ifdef DIRECT_POLARIZATION #ifdef MUTUAL_POLARIZATION
real findmp1 = uscale*(scip2*ddsc3_1 - ddsc5_1*(sci3*scip4+scip3*sci4));
real findmp2 = uscale*(scip2*ddsc3_2 - ddsc5_2*(sci3*scip4+scip3*sci4));
real findmp3 = uscale*(scip2*ddsc3_3 - ddsc5_3*(sci3*scip4+scip3*sci4));
ftm2i1 -= 0.5f*findmp1;
ftm2i2 -= 0.5f*findmp2;
ftm2i3 -= 0.5f*findmp3;
real sci3X = sci3 - sci3Y;
real sci4X = sci4 - sci4Y;
real scip3X = scip3 - scip3Y;
real scip4X = scip4 - scip4Y;
ftm2i1 += 0.5f*uscale*(-ddsc5_1*(sci3X*scip4X+scip3X*sci4X));
ftm2i2 += 0.5f*uscale*(-ddsc5_2*(sci3X*scip4X+scip3X*sci4X));
ftm2i3 += 0.5f*uscale*(-ddsc5_3*(sci3X*scip4X+scip3X*sci4X));
#else
real gfd = (scip2*scale3i - 5*rr2*(scip3*sci4+sci3*scip4)*scale5i); real gfd = (scip2*scale3i - 5*rr2*(scip3*sci4+sci3*scip4)*scale5i);
real fdir1 = gfd*xr + scale5i* (sci4*atom1.inducedDipolePolarS.x+scip4*atom1.inducedDipoleS.x + sci3*atom2.inducedDipolePolarS.x+scip3*atom2.inducedDipoleS.x); real fdir1 = gfd*xr + scale5i* (sci4*atom1.inducedDipolePolarS.x+scip4*atom1.inducedDipoleS.x + sci3*atom2.inducedDipolePolarS.x+scip3*atom2.inducedDipoleS.x);
real fdir2 = gfd*yr + scale5i* (sci4*atom1.inducedDipolePolarS.y+scip4*atom1.inducedDipoleS.y + sci3*atom2.inducedDipolePolarS.y+scip3*atom2.inducedDipoleS.y); real fdir2 = gfd*yr + scale5i* (sci4*atom1.inducedDipolePolarS.y+scip4*atom1.inducedDipoleS.y + sci3*atom2.inducedDipolePolarS.y+scip3*atom2.inducedDipoleS.y);
...@@ -385,21 +400,6 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData ...@@ -385,21 +400,6 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
ftm2i1 += 0.5f*fdir1; ftm2i1 += 0.5f*fdir1;
ftm2i2 += 0.5f*fdir2; ftm2i2 += 0.5f*fdir2;
ftm2i3 += 0.5f*fdir3; ftm2i3 += 0.5f*fdir3;
#else
real findmp1 = uscale*(scip2*ddsc3_1 - ddsc5_1*(sci3*scip4+scip3*sci4));
real findmp2 = uscale*(scip2*ddsc3_2 - ddsc5_2*(sci3*scip4+scip3*sci4));
real findmp3 = uscale*(scip2*ddsc3_3 - ddsc5_3*(sci3*scip4+scip3*sci4));
ftm2i1 -= 0.5f*findmp1;
ftm2i2 -= 0.5f*findmp2;
ftm2i3 -= 0.5f*findmp3;
real sci3X = sci3 - sci3Y;
real sci4X = sci4 - sci4Y;
real scip3X = scip3 - scip3Y;
real scip4X = scip4 - scip4Y;
ftm2i1 += 0.5f*uscale*(-ddsc5_1*(sci3X*scip4X+scip3X*sci4X));
ftm2i2 += 0.5f*uscale*(-ddsc5_2*(sci3X*scip4X+scip3X*sci4X));
ftm2i3 += 0.5f*uscale*(-ddsc5_3*(sci3X*scip4X+scip3X*sci4X));
#endif #endif
#endif #endif
......
...@@ -327,7 +327,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has ...@@ -327,7 +327,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
real iEIY = qiUinpI.x*Vijp[1] + qiUindI.x*Vijd[1] - qiUinpI.y*Vijp[0] - qiUindI.y*Vijd[0]; real iEIY = qiUinpI.x*Vijp[1] + qiUindI.x*Vijd[1] - qiUinpI.y*Vijp[0] - qiUindI.y*Vijd[0];
real iEJY = qiUinpJ.x*Vjip[1] + qiUindJ.x*Vjid[1] - qiUinpJ.y*Vjip[0] - qiUindJ.y*Vjid[0]; real iEJY = qiUinpJ.x*Vjip[1] + qiUindJ.x*Vjid[1] - qiUinpJ.y*Vjip[0] - qiUindJ.y*Vjid[0];
#ifdef USE_MUTUAL_POLARIZATION #ifdef MUTUAL_POLARIZATION
// Uind-Uind terms (m=0) // Uind-Uind terms (m=0)
real eCoef = -4*rInvVec[3]*thole_d0; real eCoef = -4*rInvVec[3]*thole_d0;
real dCoef = 6*rInvVec[4]*dthole_d0; real dCoef = 6*rInvVec[4]*dthole_d0;
......
...@@ -3,9 +3,15 @@ ...@@ -3,9 +3,15 @@
typedef struct { typedef struct {
real3 pos; real3 pos;
real3 field, fieldPolar, inducedDipole, inducedDipolePolar; real3 field, fieldPolar, inducedDipole, inducedDipolePolar;
#ifdef EXTRAPOLATED_POLARIZATION
real fieldGradient[6], fieldGradientPolar[6];
#endif
#ifdef USE_GK #ifdef USE_GK
real3 fieldS, fieldPolarS, inducedDipoleS, inducedDipolePolarS; real3 fieldS, fieldPolarS, inducedDipoleS, inducedDipolePolarS;
real bornRadius; real bornRadius;
#ifdef EXTRAPOLATED_POLARIZATION
real fieldGradientS[6], fieldGradientPolarS[6];
#endif
#endif #endif
float thole, damp; float thole, damp;
} AtomData; } AtomData;
...@@ -47,17 +53,79 @@ inline __device__ void zeroAtomData(AtomData& data) { ...@@ -47,17 +53,79 @@ inline __device__ void zeroAtomData(AtomData& data) {
data.fieldS = make_real3(0); data.fieldS = make_real3(0);
data.fieldPolarS = make_real3(0); data.fieldPolarS = make_real3(0);
#endif #endif
#ifdef EXTRAPOLATED_POLARIZATION
for (int i = 0; i < 6; i++) {
data.fieldGradient[i] = 0;
data.fieldGradientPolar[i] = 0;
#ifdef USE_GK
data.fieldGradientS[i] = 0;
data.fieldGradientPolarS[i] = 0;
#endif
}
#endif
}
#ifdef EXTRAPOLATED_POLARIZATION
#ifdef USE_GK
#define SAVE_ATOM_DATA(index, data) saveAtomData(index, data, field, fieldPolar, fieldGradient, fieldGradientPolar, fieldS, fieldPolarS, fieldGradientS, fieldGradientPolarS);
#else
#define SAVE_ATOM_DATA(index, data) saveAtomData(index, data, field, fieldPolar, fieldGradient, fieldGradientPolar);
#endif
#else
#ifdef USE_GK
#define SAVE_ATOM_DATA(index, data) saveAtomData(index, data, field, fieldPolar, fieldS, fieldPolarS);
#else
#define SAVE_ATOM_DATA(index, data) saveAtomData(index, data, field, fieldPolar);
#endif
#endif
inline __device__ void saveAtomData(int index, AtomData& data, unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar
#ifdef EXTRAPOLATED_POLARIZATION
, unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar
#endif
#ifdef USE_GK
, unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS
#ifdef EXTRAPOLATED_POLARIZATION
, unsigned long long* __restrict__ fieldGradientS, unsigned long long* __restrict__ fieldGradientPolarS
#endif
#endif
) {
atomicAdd(&field[index], static_cast<unsigned long long>((long long) (data.field.x*0x100000000)));
atomicAdd(&field[index+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0x100000000)));
atomicAdd(&field[index+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0x100000000)));
atomicAdd(&fieldPolar[index], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[index+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[index+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[index], static_cast<unsigned long long>((long long) (data.fieldS.x*0x100000000)));
atomicAdd(&fieldS[index+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.y*0x100000000)));
atomicAdd(&fieldS[index+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[index], static_cast<unsigned long long>((long long) (data.fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[index+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[index+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.z*0x100000000)));
#endif
#ifdef EXTRAPOLATED_POLARIZATION
for (int i = 0; i < 6; i++) {
atomicAdd(&fieldGradient[6*index+i], static_cast<unsigned long long>((long long) (data.fieldGradient[i]*0x100000000)));
atomicAdd(&fieldGradientPolar[6*index+i], static_cast<unsigned long long>((long long) (data.fieldGradientPolar[i]*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldGradientS[6*index+i], static_cast<unsigned long long>((long long) (data.fieldGradientS[i]*0x100000000)));
atomicAdd(&fieldGradientPolarS[6*index+i], static_cast<unsigned long long>((long long) (data.fieldGradientPolarS[i]*0x100000000)));
#endif
}
#endif
} }
#ifdef USE_EWALD #ifdef USE_EWALD
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, bool isSelfInteraction) { __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, bool isSelfInteraction) {
if (isSelfInteraction) if (isSelfInteraction)
return; return;
real scale1, scale2; real scale1, scale2, scale3;
real r2 = dot(deltaR, deltaR); real r2 = dot(deltaR, deltaR);
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real rI = RSQRT(r2); real rI = RSQRT(r2);
real r = RECIP(rI); real r = RECIP(rI);
real rI2 = rI*rI;
// calculate the error function damping terms // calculate the error function damping terms
...@@ -77,36 +145,40 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -77,36 +145,40 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
real alsq2 = 2*EWALD_ALPHA*EWALD_ALPHA; real alsq2 = 2*EWALD_ALPHA*EWALD_ALPHA;
real alsq2n = RECIP(SQRT_PI*EWALD_ALPHA); real alsq2n = RECIP(SQRT_PI*EWALD_ALPHA);
alsq2n *= alsq2; alsq2n *= alsq2;
real bn1 = (bn0+alsq2n*exp2a)*rI*rI; real bn1 = (bn0+alsq2n*exp2a)*rI2;
alsq2n *= alsq2; alsq2n *= alsq2;
real bn2 = (3*bn1+alsq2n*exp2a)*rI*rI; real bn2 = (3*bn1+alsq2n*exp2a)*rI2;
alsq2n *= alsq2;
real bn3 = (5*bn2+alsq2n*exp2a)*rI2;
// compute the error function scaled and unscaled terms // compute the error function scaled and unscaled terms
real scale3 = 1;
real scale5 = 1;
real damp = atom1.damp*atom2.damp; real damp = atom1.damp*atom2.damp;
real ratio = (r/damp); real ratio = (r/damp);
ratio = ratio*ratio*ratio; ratio = ratio*ratio*ratio;
float pgamma = atom1.thole < atom2.thole ? atom1.thole : atom2.thole; float pgamma = atom1.thole < atom2.thole ? atom1.thole : atom2.thole;
damp = damp == 0 ? 0 : -pgamma*ratio; damp = damp == 0 ? 0 : -pgamma*ratio;
real expdamp = EXP(damp); real expdamp = EXP(damp);
scale3 = 1 - expdamp; real dsc3 = 1 - expdamp;
scale5 = 1 - expdamp*(1-damp); real dsc5 = 1 - expdamp*(1-damp);
real dsc3 = scale3; real dsc7 = 1 - (1-damp+(0.6f*damp*damp))*expdamp;
real dsc5 = scale5;
real r3 = (r*r2); real r3 = (r*r2);
real r5 = (r3*r2); real r5 = (r3*r2);
real r7 = (r5*r2);
real rr3 = (1-dsc3)/r3; real rr3 = (1-dsc3)/r3;
real rr5 = 3*(1-dsc5)/r5; real rr5 = 3*(1-dsc5)/r5;
real rr7 = 15*(1-dsc7)/r7;
scale1 = rr3 - bn1; scale1 = rr3 - bn1;
scale2 = bn2 - rr5; scale2 = bn2 - rr5;
scale3 = bn3 - rr7;
} }
else { else {
scale1 = 0; scale1 = 0;
scale2 = 0; scale2 = 0;
scale3 = 0;
} }
real dDotDelta = scale2*dot(deltaR, atom2.inducedDipole); real dDotDelta = scale2*dot(deltaR, atom2.inducedDipole);
atom1.field += scale1*atom2.inducedDipole + dDotDelta*deltaR; atom1.field += scale1*atom2.inducedDipole + dDotDelta*deltaR;
...@@ -116,6 +188,45 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -116,6 +188,45 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
atom2.field += scale1*atom1.inducedDipole + dDotDelta*deltaR; atom2.field += scale1*atom1.inducedDipole + dDotDelta*deltaR;
dDotDelta = scale2*dot(deltaR, atom1.inducedDipolePolar); dDotDelta = scale2*dot(deltaR, atom1.inducedDipolePolar);
atom2.fieldPolar += scale1*atom1.inducedDipolePolar + dDotDelta*deltaR; atom2.fieldPolar += scale1*atom1.inducedDipolePolar + dDotDelta*deltaR;
#ifdef EXTRAPOLATED_POLARIZATION
// Compute and store the field gradients for later use.
real3 dipole = atom1.inducedDipole;
real muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom2.fieldGradient[0] -= muDotR*deltaR.x*deltaR.x*scale3 - (2*dipole.x*deltaR.x + muDotR)*scale2;
atom2.fieldGradient[1] -= muDotR*deltaR.y*deltaR.y*scale3 - (2*dipole.y*deltaR.y + muDotR)*scale2;
atom2.fieldGradient[2] -= muDotR*deltaR.z*deltaR.z*scale3 - (2*dipole.z*deltaR.z + muDotR)*scale2;
atom2.fieldGradient[3] -= muDotR*deltaR.x*deltaR.y*scale3 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*scale2;
atom2.fieldGradient[4] -= muDotR*deltaR.x*deltaR.z*scale3 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*scale2;
atom2.fieldGradient[5] -= muDotR*deltaR.y*deltaR.z*scale3 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*scale2;
dipole = atom1.inducedDipolePolar;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom2.fieldGradientPolar[0] -= muDotR*deltaR.x*deltaR.x*scale3 - (2*dipole.x*deltaR.x + muDotR)*scale2;
atom2.fieldGradientPolar[1] -= muDotR*deltaR.y*deltaR.y*scale3 - (2*dipole.y*deltaR.y + muDotR)*scale2;
atom2.fieldGradientPolar[2] -= muDotR*deltaR.z*deltaR.z*scale3 - (2*dipole.z*deltaR.z + muDotR)*scale2;
atom2.fieldGradientPolar[3] -= muDotR*deltaR.x*deltaR.y*scale3 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*scale2;
atom2.fieldGradientPolar[4] -= muDotR*deltaR.x*deltaR.z*scale3 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*scale2;
atom2.fieldGradientPolar[5] -= muDotR*deltaR.y*deltaR.z*scale3 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*scale2;
dipole = atom2.inducedDipole;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom1.fieldGradient[0] += muDotR*deltaR.x*deltaR.x*scale3 - (2*dipole.x*deltaR.x + muDotR)*scale2;
atom1.fieldGradient[1] += muDotR*deltaR.y*deltaR.y*scale3 - (2*dipole.y*deltaR.y + muDotR)*scale2;
atom1.fieldGradient[2] += muDotR*deltaR.z*deltaR.z*scale3 - (2*dipole.z*deltaR.z + muDotR)*scale2;
atom1.fieldGradient[3] += muDotR*deltaR.x*deltaR.y*scale3 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*scale2;
atom1.fieldGradient[4] += muDotR*deltaR.x*deltaR.z*scale3 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*scale2;
atom1.fieldGradient[5] += muDotR*deltaR.y*deltaR.z*scale3 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*scale2;
dipole = atom2.inducedDipolePolar;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom1.fieldGradientPolar[0] += muDotR*deltaR.x*deltaR.x*scale3 - (2*dipole.x*deltaR.x + muDotR)*scale2;
atom1.fieldGradientPolar[1] += muDotR*deltaR.y*deltaR.y*scale3 - (2*dipole.y*deltaR.y + muDotR)*scale2;
atom1.fieldGradientPolar[2] += muDotR*deltaR.z*deltaR.z*scale3 - (2*dipole.z*deltaR.z + muDotR)*scale2;
atom1.fieldGradientPolar[3] += muDotR*deltaR.x*deltaR.y*scale3 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*scale2;
atom1.fieldGradientPolar[4] += muDotR*deltaR.x*deltaR.z*scale3 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*scale2;
atom1.fieldGradientPolar[5] += muDotR*deltaR.y*deltaR.z*scale3 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*scale2;
#endif
} }
#elif defined USE_GK #elif defined USE_GK
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, bool isSelfInteraction) { __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, bool isSelfInteraction) {
...@@ -182,6 +293,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -182,6 +293,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
real r2I = rI*rI; real r2I = rI*rI;
real rr3 = -rI*r2I; real rr3 = -rI*r2I;
real rr5 = -3*rr3*r2I; real rr5 = -3*rr3*r2I;
real rr7 = 5*rr5*r2I;
real dampProd = atom1.damp*atom2.damp; real dampProd = atom1.damp*atom2.damp;
real ratio = (dampProd != 0 ? r/dampProd : 1); real ratio = (dampProd != 0 ? r/dampProd : 1);
float pGamma = (atom2.thole > atom1.thole ? atom1.thole: atom2.thole); float pGamma = (atom2.thole > atom1.thole ? atom1.thole: atom2.thole);
...@@ -189,6 +301,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -189,6 +301,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
real dampExp = (dampProd != 0 ? EXP(-damp) : 0); real dampExp = (dampProd != 0 ? EXP(-damp) : 0);
rr3 *= 1 - dampExp; rr3 *= 1 - dampExp;
rr5 *= 1 - (1+damp)*dampExp; rr5 *= 1 - (1+damp)*dampExp;
rr7 *= 1 - (1+damp+(0.6f*damp*damp))*dampExp;
real dDotDelta = rr5*dot(deltaR, atom2.inducedDipole); real dDotDelta = rr5*dot(deltaR, atom2.inducedDipole);
atom1.field += rr3*atom2.inducedDipole + dDotDelta*deltaR; atom1.field += rr3*atom2.inducedDipole + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom2.inducedDipolePolar); dDotDelta = rr5*dot(deltaR, atom2.inducedDipolePolar);
...@@ -197,6 +310,45 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -197,6 +310,45 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
atom2.field += rr3*atom1.inducedDipole + dDotDelta*deltaR; atom2.field += rr3*atom1.inducedDipole + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom1.inducedDipolePolar); dDotDelta = rr5*dot(deltaR, atom1.inducedDipolePolar);
atom2.fieldPolar += rr3*atom1.inducedDipolePolar + dDotDelta*deltaR; atom2.fieldPolar += rr3*atom1.inducedDipolePolar + dDotDelta*deltaR;
#ifdef EXTRAPOLATED_POLARIZATION
// Compute and store the field gradients for later use.
real3 dipole = atom1.inducedDipole;
real muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom2.fieldGradient[0] -= muDotR*deltaR.x*deltaR.x*rr7 - (2*dipole.x*deltaR.x + muDotR)*rr5;
atom2.fieldGradient[1] -= muDotR*deltaR.y*deltaR.y*rr7 - (2*dipole.y*deltaR.y + muDotR)*rr5;
atom2.fieldGradient[2] -= muDotR*deltaR.z*deltaR.z*rr7 - (2*dipole.z*deltaR.z + muDotR)*rr5;
atom2.fieldGradient[3] -= muDotR*deltaR.x*deltaR.y*rr7 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*rr5;
atom2.fieldGradient[4] -= muDotR*deltaR.x*deltaR.z*rr7 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*rr5;
atom2.fieldGradient[5] -= muDotR*deltaR.y*deltaR.z*rr7 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*rr5;
dipole = atom1.inducedDipolePolar;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom2.fieldGradientPolar[0] -= muDotR*deltaR.x*deltaR.x*rr7 - (2*dipole.x*deltaR.x + muDotR)*rr5;
atom2.fieldGradientPolar[1] -= muDotR*deltaR.y*deltaR.y*rr7 - (2*dipole.y*deltaR.y + muDotR)*rr5;
atom2.fieldGradientPolar[2] -= muDotR*deltaR.z*deltaR.z*rr7 - (2*dipole.z*deltaR.z + muDotR)*rr5;
atom2.fieldGradientPolar[3] -= muDotR*deltaR.x*deltaR.y*rr7 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*rr5;
atom2.fieldGradientPolar[4] -= muDotR*deltaR.x*deltaR.z*rr7 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*rr5;
atom2.fieldGradientPolar[5] -= muDotR*deltaR.y*deltaR.z*rr7 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*rr5;
dipole = atom2.inducedDipole;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom1.fieldGradient[0] += muDotR*deltaR.x*deltaR.x*rr7 - (2*dipole.x*deltaR.x + muDotR)*rr5;
atom1.fieldGradient[1] += muDotR*deltaR.y*deltaR.y*rr7 - (2*dipole.y*deltaR.y + muDotR)*rr5;
atom1.fieldGradient[2] += muDotR*deltaR.z*deltaR.z*rr7 - (2*dipole.z*deltaR.z + muDotR)*rr5;
atom1.fieldGradient[3] += muDotR*deltaR.x*deltaR.y*rr7 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*rr5;
atom1.fieldGradient[4] += muDotR*deltaR.x*deltaR.z*rr7 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*rr5;
atom1.fieldGradient[5] += muDotR*deltaR.y*deltaR.z*rr7 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*rr5;
dipole = atom2.inducedDipolePolar;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom1.fieldGradientPolar[0] += muDotR*deltaR.x*deltaR.x*rr7 - (2*dipole.x*deltaR.x + muDotR)*rr5;
atom1.fieldGradientPolar[1] += muDotR*deltaR.y*deltaR.y*rr7 - (2*dipole.y*deltaR.y + muDotR)*rr5;
atom1.fieldGradientPolar[2] += muDotR*deltaR.z*deltaR.z*rr7 - (2*dipole.z*deltaR.z + muDotR)*rr5;
atom1.fieldGradientPolar[3] += muDotR*deltaR.x*deltaR.y*rr7 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*rr5;
atom1.fieldGradientPolar[4] += muDotR*deltaR.x*deltaR.z*rr7 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*rr5;
atom1.fieldGradientPolar[5] += muDotR*deltaR.y*deltaR.z*rr7 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*rr5;
#endif
} }
#endif #endif
...@@ -206,12 +358,18 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -206,12 +358,18 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
extern "C" __global__ void computeInducedField( extern "C" __global__ void computeInducedField(
unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const ushort2* __restrict__ exclusionTiles,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar,
#endif
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK #elif defined USE_GK
unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS, unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii, const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
#ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradientS, unsigned long long* __restrict__ fieldGradientPolarS,
#endif
#endif #endif
const float2* __restrict__ dampingAndThole) { const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
...@@ -284,36 +442,10 @@ extern "C" __global__ void computeInducedField( ...@@ -284,36 +442,10 @@ extern "C" __global__ void computeInducedField(
// Write results. // Write results.
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (data.field.x*0x100000000))); SAVE_ATOM_DATA(offset, data)
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (data.fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (data.fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.z*0x100000000)));
#endif
if (x != y) { if (x != y) {
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0x100000000))); SAVE_ATOM_DATA(offset, localData[threadIdx.x])
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.z*0x100000000)));
#endif
} }
} }
...@@ -412,39 +544,13 @@ extern "C" __global__ void computeInducedField( ...@@ -412,39 +544,13 @@ extern "C" __global__ void computeInducedField(
// Write results. // Write results.
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (data.field.x*0x100000000))); SAVE_ATOM_DATA(offset, data)
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (data.fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (data.fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.z*0x100000000)));
#endif
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
offset = atomIndices[threadIdx.x]; offset = atomIndices[threadIdx.x];
#else #else
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
#endif #endif
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0x100000000))); SAVE_ATOM_DATA(offset, localData[threadIdx.x])
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.z*0x100000000)));
#endif
} }
pos++; pos++;
} }
...@@ -533,8 +639,8 @@ extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restri ...@@ -533,8 +639,8 @@ extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restri
real oldDipole = inducedDipole[dipoleIndex]; real oldDipole = inducedDipole[dipoleIndex];
real oldDipolePolar = inducedDipolePolar[dipoleIndex]; real oldDipolePolar = inducedDipolePolar[dipoleIndex];
long long fixedS = (fixedFieldS == NULL ? (long long) 0 : fixedFieldS[fieldIndex]); long long fixedS = (fixedFieldS == NULL ? (long long) 0 : fixedFieldS[fieldIndex]);
real newDipole = scale*((fixedField[fieldIndex]+fixedS+inducedField[fieldIndex])*fieldScale+ewaldScale*oldDipole); real newDipole = scale*((fixedField[fieldIndex]+fixedS+inducedField[fieldIndex])*fieldScale);
real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+fixedS+inducedFieldPolar[fieldIndex])*fieldScale+ewaldScale*oldDipolePolar); real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+fixedS+inducedFieldPolar[fieldIndex])*fieldScale);
int storePrevIndex = dipoleIndex+min(iteration, MAX_PREV_DIIS_DIPOLES-1)*NUM_ATOMS*3; int storePrevIndex = dipoleIndex+min(iteration, MAX_PREV_DIIS_DIPOLES-1)*NUM_ATOMS*3;
prevDipoles[storePrevIndex] = newDipole; prevDipoles[storePrevIndex] = newDipole;
prevDipolesPolar[storePrevIndex] = newDipolePolar; prevDipolesPolar[storePrevIndex] = newDipolePolar;
...@@ -607,3 +713,146 @@ extern "C" __global__ void updateInducedFieldByDIIS(real* __restrict__ inducedDi ...@@ -607,3 +713,146 @@ extern "C" __global__ void updateInducedFieldByDIIS(real* __restrict__ inducedDi
inducedDipolePolar[index] = sumPolar; inducedDipolePolar[index] = sumPolar;
} }
} }
extern "C" __global__ void initExtrapolatedDipoles(real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole,
real* __restrict__ extrapolatedDipolePolar, long long* __restrict__ inducedDipoleFieldGradient, long long* __restrict__ inducedDipoleFieldGradientPolar
#ifdef USE_GK
, real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar,
real* __restrict__ inducedDipoleFieldGradientGk, real* __restrict__ inducedDipoleFieldGradientGkPolar
#endif
) {
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
extrapolatedDipole[index] = inducedDipole[index];
extrapolatedDipolePolar[index] = inducedDipolePolar[index];
#ifdef USE_GK
extrapolatedDipoleGk[index] = inducedDipoleGk[index];
extrapolatedDipoleGkPolar[index] = inducedDipoleGkPolar[index];
#endif
}
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 6*NUM_ATOMS; index += blockDim.x*gridDim.x) {
inducedDipoleFieldGradient[index] = 0;
inducedDipoleFieldGradientPolar[index] = 0;
#ifdef USE_GK
inducedDipoleFieldGradientGk[index] = 0;
inducedDipoleFieldGradientGkPolar[index] = 0;
#endif
}
}
extern "C" __global__ void iterateExtrapolatedDipoles(int order, real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole,
real* __restrict__ extrapolatedDipolePolar, long long* __restrict__ inducedDipoleFieldGradient, long long* __restrict__ inducedDipoleFieldGradientPolar,
long long* __restrict__ inducedDipoleField, long long* __restrict__ inducedDipoleFieldPolar, real* __restrict__ extrapolatedDipoleFieldGradient, real* __restrict__ extrapolatedDipoleFieldGradientPolar,
#ifdef USE_GK
real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar,
real* __restrict__ inducedDipoleFieldGradientGk, real* __restrict__ inducedDipoleFieldGradientGkPolar, long long* __restrict__ inducedDipoleFieldGk,
long long* __restrict__ inducedDipoleFieldGkPolar, real* __restrict__ extrapolatedDipoleFieldGradientGk, real* __restrict__ extrapolatedDipoleFieldGradientGkPolar,
#endif
const float* __restrict__ polarizability) {
const real fieldScale = 1/(real) 0x100000000;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
int atom = index/3;
int component = index-3*atom;
int fieldIndex = atom+component*PADDED_NUM_ATOMS;
float polar = polarizability[atom];
real value = inducedDipoleField[fieldIndex]*fieldScale*polar;
inducedDipole[index] = value;
extrapolatedDipole[order*3*NUM_ATOMS+index] = value;
value = inducedDipoleFieldPolar[fieldIndex]*fieldScale*polar;
inducedDipolePolar[index] = value;
extrapolatedDipolePolar[order*3*NUM_ATOMS+index] = value;
#ifdef USE_GK
value = inducedDipoleFieldGk[fieldIndex]*fieldScale*polar;
inducedDipoleGk[index] = value;
extrapolatedDipoleGk[order*3*NUM_ATOMS+index] = value;
value = inducedDipoleFieldGkPolar[fieldIndex]*fieldScale*polar;
inducedDipoleGkPolar[index] = value;
extrapolatedDipoleGkPolar[order*3*NUM_ATOMS+index] = value;
#endif
}
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 6*NUM_ATOMS; index += blockDim.x*gridDim.x) {
int index2 = (order-1)*6*NUM_ATOMS+index;
extrapolatedDipoleFieldGradient[index2] = fieldScale*inducedDipoleFieldGradient[index];
extrapolatedDipoleFieldGradientPolar[index2] = fieldScale*inducedDipoleFieldGradientPolar[index];
#ifdef USE_GK
extrapolatedDipoleFieldGradientGk[index2] = fieldScale*inducedDipoleFieldGradientGk[index];
extrapolatedDipoleFieldGradientGkPolar[index2] = fieldScale*inducedDipoleFieldGradientGkPolar[index];
#endif
}
}
extern "C" __global__ void computeExtrapolatedDipoles(real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole,
real* __restrict__ extrapolatedDipolePolar
#ifdef USE_GK
, real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar
#endif
) {
real coeff[] = {EXTRAPOLATION_COEFFICIENTS_SUM};
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
real sum = 0, sumPolar = 0, sumGk = 0, sumGkPolar = 0;
for (int order = 0; order < MAX_EXTRAPOLATION_ORDER; order++) {
sum += extrapolatedDipole[order*3*NUM_ATOMS+index]*coeff[order];
sumPolar += extrapolatedDipolePolar[order*3*NUM_ATOMS+index]*coeff[order];
#ifdef USE_GK
sumGk += extrapolatedDipoleGk[order*3*NUM_ATOMS+index]*coeff[order];
sumGkPolar += extrapolatedDipoleGkPolar[order*3*NUM_ATOMS+index]*coeff[order];
#endif
}
inducedDipole[index] = sum;
inducedDipolePolar[index] = sumPolar;
#ifdef USE_GK
inducedDipoleGk[index] = sumGk;
inducedDipoleGkPolar[index] = sumGkPolar;
#endif
}
}
extern "C" __global__ void addExtrapolatedFieldGradientToForce(long long* __restrict__ forceBuffers, real* __restrict__ extrapolatedDipole,
real* __restrict__ extrapolatedDipolePolar, real* __restrict__ extrapolatedDipoleFieldGradient, real* __restrict__ extrapolatedDipoleFieldGradientPolar
#ifdef USE_GK
, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar,
real* __restrict__ extrapolatedDipoleFieldGradientGk, real* __restrict__ extrapolatedDipoleFieldGradientGkPolar
#endif
) {
real coeff[] = {EXTRAPOLATION_COEFFICIENTS_SUM};
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
real fx = 0, fy = 0, fz = 0;
for (int l = 0; l < MAX_EXTRAPOLATION_ORDER-1; l++) {
int index1 = 3*(l*NUM_ATOMS+atom);
real dipole[] = {extrapolatedDipole[index1], extrapolatedDipole[index1+1], extrapolatedDipole[index1+2]};
real dipolePolar[] = {extrapolatedDipolePolar[index1], extrapolatedDipolePolar[index1+1], extrapolatedDipolePolar[index1+2]};
#ifdef USE_GK
real dipoleGk[] = {extrapolatedDipoleGk[index1], extrapolatedDipoleGk[index1+1], extrapolatedDipoleGk[index1+2]};
real dipoleGkPolar[] = {extrapolatedDipoleGkPolar[index1], extrapolatedDipoleGkPolar[index1+1], extrapolatedDipoleGkPolar[index1+2]};
#endif
for (int m = 0; m < MAX_EXTRAPOLATION_ORDER-1-l; m++) {
int index2 = 6*(m*NUM_ATOMS+atom);
real scale = 0.5f*coeff[l+m+1]*ENERGY_SCALE_FACTOR;
real gradient[] = {extrapolatedDipoleFieldGradient[index2], extrapolatedDipoleFieldGradient[index2+1], extrapolatedDipoleFieldGradient[index2+2],
extrapolatedDipoleFieldGradient[index2+3], extrapolatedDipoleFieldGradient[index2+4], extrapolatedDipoleFieldGradient[index2+5]};
real gradientPolar[] = {extrapolatedDipoleFieldGradientPolar[index2], extrapolatedDipoleFieldGradientPolar[index2+1], extrapolatedDipoleFieldGradientPolar[index2+2],
extrapolatedDipoleFieldGradientPolar[index2+3], extrapolatedDipoleFieldGradientPolar[index2+4], extrapolatedDipoleFieldGradientPolar[index2+5]};
fx += scale*(dipole[0]*gradientPolar[0] + dipole[1]*gradientPolar[3] + dipole[2]*gradientPolar[4]);
fy += scale*(dipole[0]*gradientPolar[3] + dipole[1]*gradientPolar[1] + dipole[2]*gradientPolar[5]);
fz += scale*(dipole[0]*gradientPolar[4] + dipole[1]*gradientPolar[5] + dipole[2]*gradientPolar[2]);
fx += scale*(dipolePolar[0]*gradient[0] + dipolePolar[1]*gradient[3] + dipolePolar[2]*gradient[4]);
fy += scale*(dipolePolar[0]*gradient[3] + dipolePolar[1]*gradient[1] + dipolePolar[2]*gradient[5]);
fz += scale*(dipolePolar[0]*gradient[4] + dipolePolar[1]*gradient[5] + dipolePolar[2]*gradient[2]);
#ifdef USE_GK
real gradientGk[] = {extrapolatedDipoleFieldGradient[index2], extrapolatedDipoleFieldGradient[index2+1], extrapolatedDipoleFieldGradient[index2+2],
extrapolatedDipoleFieldGradient[index2+3], extrapolatedDipoleFieldGradient[index2+4], extrapolatedDipoleFieldGradient[index2+5]};
real gradientGkPolar[] = {extrapolatedDipoleFieldGradientPolar[index2], extrapolatedDipoleFieldGradientPolar[index2+1], extrapolatedDipoleFieldGradientPolar[index2+2],
extrapolatedDipoleFieldGradientPolar[index2+3], extrapolatedDipoleFieldGradientPolar[index2+4], extrapolatedDipoleFieldGradientPolar[index2+5]};
fx += scale*(dipoleGk[0]*gradientGkPolar[0] + dipoleGk[1]*gradientGkPolar[3] + dipoleGk[2]*gradientGkPolar[4]);
fy += scale*(dipoleGk[0]*gradientGkPolar[3] + dipoleGk[1]*gradientGkPolar[1] + dipoleGk[2]*gradientGkPolar[5]);
fz += scale*(dipoleGk[0]*gradientGkPolar[4] + dipoleGk[1]*gradientGkPolar[5] + dipoleGk[2]*gradientGkPolar[2]);
fx += scale*(dipoleGkPolar[0]*gradientGk[0] + dipoleGkPolar[1]*gradientGk[3] + dipoleGkPolar[2]*gradientGk[4]);
fy += scale*(dipoleGkPolar[0]*gradientGk[3] + dipoleGkPolar[1]*gradientGk[1] + dipoleGkPolar[2]*gradientGk[5]);
fz += scale*(dipoleGkPolar[0]*gradientGk[4] + dipoleGkPolar[1]*gradientGk[5] + dipoleGkPolar[2]*gradientGk[2]);
#endif
}
}
forceBuffers[atom] += (long long) (fx*0x100000000);
forceBuffers[atom+PADDED_NUM_ATOMS] += (long long) (fy*0x100000000);
forceBuffers[atom+PADDED_NUM_ATOMS*2] += (long long) (fz*0x100000000);
}
}
...@@ -643,8 +643,8 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri ...@@ -643,8 +643,8 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
// Process the atoms in spatially sorted order. This improves cache performance when loading // Process the atoms in spatially sorted order. This improves cache performance when loading
// the grid values. // the grid values.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
int m = pmeAtomGridIndex[i].x; int m = pmeAtomGridIndex[atom].x;
real4 pos = posq[m]; real4 pos = posq[m];
pos -= periodicBoxVecZ*floor(pos.z*recipBoxVecZ.z+0.5f); pos -= periodicBoxVecZ*floor(pos.z*recipBoxVecZ.z+0.5f);
pos -= periodicBoxVecY*floor(pos.y*recipBoxVecY.z+0.5f); pos -= periodicBoxVecY*floor(pos.y*recipBoxVecY.z+0.5f);
...@@ -1051,7 +1051,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_ ...@@ -1051,7 +1051,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
f.x += (inducedDipole[k]+inducedDipolePolar[k])*phi[i+NUM_ATOMS*j1]; f.x += (inducedDipole[k]+inducedDipolePolar[k])*phi[i+NUM_ATOMS*j1];
f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[i+NUM_ATOMS*j2]; f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[i+NUM_ATOMS*j2];
f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[i+NUM_ATOMS*j3]; f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[i+NUM_ATOMS*j3];
#ifndef DIRECT_POLARIZATION #ifdef MUTUAL_POLARIZATION
f.x += (inducedDipole[k]*phip[i+NUM_ATOMS*j1] + inducedDipolePolar[k]*phid[i+NUM_ATOMS*j1]); f.x += (inducedDipole[k]*phip[i+NUM_ATOMS*j1] + inducedDipolePolar[k]*phid[i+NUM_ATOMS*j1]);
f.y += (inducedDipole[k]*phip[i+NUM_ATOMS*j2] + inducedDipolePolar[k]*phid[i+NUM_ATOMS*j2]); f.y += (inducedDipole[k]*phip[i+NUM_ATOMS*j2] + inducedDipolePolar[k]*phid[i+NUM_ATOMS*j2]);
f.z += (inducedDipole[k]*phip[i+NUM_ATOMS*j3] + inducedDipolePolar[k]*phid[i+NUM_ATOMS*j3]); f.z += (inducedDipole[k]*phip[i+NUM_ATOMS*j3] + inducedDipolePolar[k]*phid[i+NUM_ATOMS*j3]);
...@@ -1073,8 +1073,12 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_ ...@@ -1073,8 +1073,12 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.25f*EPSILON_FACTOR*energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.25f*EPSILON_FACTOR*energy;
} }
extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip, extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip, long long* __restrict__ inducedField,
long long* __restrict__ inducedField, long long* __restrict__ inducedFieldPolar, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { long long* __restrict__ inducedFieldPolar, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
#ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar,
#endif
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
__shared__ real fracToCart[3][3]; __shared__ real fracToCart[3][3];
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x; fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x;
...@@ -1088,12 +1092,62 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph ...@@ -1088,12 +1092,62 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph
fracToCart[2][2] = GRID_SIZE_Z*recipBoxVecZ.z; fracToCart[2][2] = GRID_SIZE_Z*recipBoxVecZ.z;
} }
__syncthreads(); __syncthreads();
real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
inducedField[i] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2])); inducedField[i] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i]));
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2])); inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1]));
inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2])); inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2]));
inducedFieldPolar[i] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2])); inducedFieldPolar[i] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipolePolar[3*i]));
inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2])); inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipolePolar[3*i+1]));
inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2])); inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipolePolar[3*i+2]));
#ifdef EXTRAPOLATED_POLARIZATION
// Compute and store the field gradients for later use.
real EmatD[3][3] = {
{phid[i+NUM_ATOMS*4], phid[i+NUM_ATOMS*7], phid[i+NUM_ATOMS*8]},
{phid[i+NUM_ATOMS*7], phid[i+NUM_ATOMS*5], phid[i+NUM_ATOMS*9]},
{phid[i+NUM_ATOMS*8], phid[i+NUM_ATOMS*9], phid[i+NUM_ATOMS*6]}
};
real Exx = 0, Eyy = 0, Ezz = 0, Exy = 0, Exz = 0, Eyz = 0;
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 3; ++l) {
Exx += fracToCart[0][k] * EmatD[k][l] * fracToCart[0][l];
Eyy += fracToCart[1][k] * EmatD[k][l] * fracToCart[1][l];
Ezz += fracToCart[2][k] * EmatD[k][l] * fracToCart[2][l];
Exy += fracToCart[0][k] * EmatD[k][l] * fracToCart[1][l];
Exz += fracToCart[0][k] * EmatD[k][l] * fracToCart[2][l];
Eyz += fracToCart[1][k] * EmatD[k][l] * fracToCart[2][l];
}
}
atomicAdd(&fieldGradient[6*i+0], static_cast<unsigned long long>((long long) (-Exx*0x100000000)));
atomicAdd(&fieldGradient[6*i+1], static_cast<unsigned long long>((long long) (-Eyy*0x100000000)));
atomicAdd(&fieldGradient[6*i+2], static_cast<unsigned long long>((long long) (-Ezz*0x100000000)));
atomicAdd(&fieldGradient[6*i+3], static_cast<unsigned long long>((long long) (-Exy*0x100000000)));
atomicAdd(&fieldGradient[6*i+4], static_cast<unsigned long long>((long long) (-Exz*0x100000000)));
atomicAdd(&fieldGradient[6*i+5], static_cast<unsigned long long>((long long) (-Eyz*0x100000000)));
real EmatP[3][3] = {
{phip[i+NUM_ATOMS*4], phip[i+NUM_ATOMS*7], phip[i+NUM_ATOMS*8]},
{phip[i+NUM_ATOMS*7], phip[i+NUM_ATOMS*5], phip[i+NUM_ATOMS*9]},
{phip[i+NUM_ATOMS*8], phip[i+NUM_ATOMS*9], phip[i+NUM_ATOMS*6]}
};
Exx = 0; Eyy = 0; Ezz = 0; Exy = 0; Exz = 0; Eyz = 0;
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 3; ++l) {
Exx += fracToCart[0][k] * EmatP[k][l] * fracToCart[0][l];
Eyy += fracToCart[1][k] * EmatP[k][l] * fracToCart[1][l];
Ezz += fracToCart[2][k] * EmatP[k][l] * fracToCart[2][l];
Exy += fracToCart[0][k] * EmatP[k][l] * fracToCart[1][l];
Exz += fracToCart[0][k] * EmatP[k][l] * fracToCart[2][l];
Eyz += fracToCart[1][k] * EmatP[k][l] * fracToCart[2][l];
}
}
atomicAdd(&fieldGradientPolar[6*i+0], static_cast<unsigned long long>((long long) (-Exx*0x100000000)));
atomicAdd(&fieldGradientPolar[6*i+1], static_cast<unsigned long long>((long long) (-Eyy*0x100000000)));
atomicAdd(&fieldGradientPolar[6*i+2], static_cast<unsigned long long>((long long) (-Ezz*0x100000000)));
atomicAdd(&fieldGradientPolar[6*i+3], static_cast<unsigned long long>((long long) (-Exy*0x100000000)));
atomicAdd(&fieldGradientPolar[6*i+4], static_cast<unsigned long long>((long long) (-Exz*0x100000000)));
atomicAdd(&fieldGradientPolar[6*i+5], static_cast<unsigned long long>((long long) (-Eyz*0x100000000)));
#endif
} }
} }
...@@ -365,7 +365,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has ...@@ -365,7 +365,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
real iEIY = qiUinpI.x*Vijp[1] + qiUindI.x*Vijd[1] - qiUinpI.y*Vijp[0] - qiUindI.y*Vijd[0]; real iEIY = qiUinpI.x*Vijp[1] + qiUindI.x*Vijd[1] - qiUinpI.y*Vijp[0] - qiUindI.y*Vijd[0];
real iEJY = qiUinpJ.x*Vjip[1] + qiUindJ.x*Vjid[1] - qiUinpJ.y*Vjip[0] - qiUindJ.y*Vjid[0]; real iEJY = qiUinpJ.x*Vjip[1] + qiUindJ.x*Vjid[1] - qiUinpJ.y*Vjip[0] - qiUindJ.y*Vjid[0];
#ifdef USE_MUTUAL_POLARIZATION #ifdef MUTUAL_POLARIZATION
// Uind-Uind terms (m=0) // Uind-Uind terms (m=0)
real eCoef = -fourThirds*rInvVec[3]*(3*(thole_d0 + bVec[3]) + alphaRVec[3]*X); real eCoef = -fourThirds*rInvVec[3]*(3*(thole_d0 + bVec[3]) + alphaRVec[3]*X);
real dCoef = rInvVec[4]*(6*(dthole_d0 + bVec[3]) + 4*alphaRVec[5]*X); real dCoef = rInvVec[4]*(6*(dthole_d0 + bVec[3]) + 4*alphaRVec[5]*X);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment