"platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl" did not exist on "ed1fb1923aeff2fe99e090bab87aeddd63115043"
Commit 39120086 authored by John Chodera (MSKCC)'s avatar John Chodera (MSKCC)
Browse files

Merge remote-tracking branch 'upstream/master' into forcefield

parents 87b95319 2acba7ad
......@@ -18,8 +18,6 @@ Need Help? Check out the [documentation](https://simtk.org/docman/?group_id=161)
Badges
------
* Travis CI `linux` integration tests: [![Build Status](https://travis-ci.org/pandegroup/openmm.png?branch=master)](https://travis-ci.org/pandegroup/openmm)
* Jenkins `openmm-dev` conda `osx` build: [![Jenkins `openmm-dev` conda `osx` build](https://jenkins.choderalab.org/job/conda-openmm-dev-osx-2/badge/icon)](https://jenkins.choderalab.org/job/conda-openmm-dev-osx-2/) [[console log]](https://jenkins.choderalab.org/job/conda-openmm-dev-osx-2/lastBuild/consoleFull)
* Jenkins `openmm-dev` conda `linux` build: [![Jenkins `openmm-dev` conda `linux` build](https://jenkins.choderalab.org/job/conda-openmm-dev-linux-vagrant-2/badge/icon)](https://jenkins.choderalab.org/job/conda-openmm-dev-linux-vagrant-2/) [[console log]](https://jenkins.choderalab.org/job/conda-openmm-dev-linux-vagrant-2/lastBuild/consoleFull)
* Binstar `openmm` conda release: ![Binstar `openmm` conda release](https://binstar.org/omnia/openmm/badges/version.svg)
* Binstar `openmm-dev` conda package: ![Binstar `openmm-dev` conda package](https://binstar.org/omnia/openmm-dev/badges/version.svg)
* Anaconda Cloud `openmm` conda release: ![Binstar `openmm` conda release](https://binstar.org/omnia/openmm/badges/version.svg)
* Anaconda Cloud `openmm-dev` conda package: ![Binstar `openmm-dev` conda package](https://binstar.org/omnia/openmm-dev/badges/version.svg)
......@@ -922,29 +922,33 @@ If :code:`vdwCutoff` is not specified, then the value of
Specifying the Polarization Method
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
OpenMM allows the setting of several other parameters particular to the AMOEBA
force field. The :code:`mutualInducedTargetEpsilon` option allows you to
specify the accuracy to which the induced dipoles are calculated at each time
step; the default value is 0.01. The :code:`polarization` setting
determines whether the calculation of the induced dipoles is continued until the
dipoles are self-consistent to within the tolerance specified by
:code:`mutualInducedTargetEpsilon` or whether a quick estimate of the induced
dipoles is used instead. The first option corresponds to the
:code:`polarization='mutual'` setting and is the default; the quick estimate
option is given by :code:`polarization='direct'` and in this case,
:code:`mutualInducedTargetEpsilon` is ignored, if provided. Simulations using
:code:`polarization='direct'` will be significantly faster than those with
:code:`polarization='mutual'`\ , but less accurate. Examples using the two
options are given below:
::
system = forcefield.createSystem(nonbondedMethod=PME,
nonbondedCutoff=1*nanometer,ewaldErrorTolerance=0.00001,
vdwCutoff=1.2*nanometer, mutualInducedTargetEpsilon=0.01)
system = forcefield.createSystem(nonbondedMethod=PME,
nonbondedCutoff=1*nanometer,ewaldErrorTolerance=0.00001,
vdwCutoff=1.2*nanometer, polarization ='direct')
When using the AMOEBA force field, OpenMM allows the induced dipoles to be
calculated in any of three different ways. The slowest but potentially most
accurate method is to iterate the calculation until the dipoles converge to a
specified tolerance. To select this, specify :code:`polarization='mutual'`.
Use the :code:`mutualInducedTargetEpsilon` option to select the tolerance; for
most situations, a value of 0.00001 works well. Alternatively you can specify
:code:`polarization='extrapolated'`. This uses an analytic approximation
:cite:`Simmonett2015` to estimate what the fully converged dipoles will be without
actually continuing the calculation to convergence. In many cases this can be
significantly faster with only a small loss in accuracy. Finally, you can
specify :code:`polarization='direct'` to use the direct polarization
approximation, in which induced dipoles depend only on the fixed multipoles, not
on other induced dipoles. This is even faster, but it produces very different
forces from mutual polarization, so it should only be used with force fields
that have been specifically parameterized for use with this approximation.
Here are examples of using each method:
::
system = forcefield.createSystem(nonbondedMethod=PME, nonbondedCutoff=1*nanometer,
vdwCutoff=1.2*nanometer, polarization='mutual', mutualInducedTargetEpsilon=0.00001)
system = forcefield.createSystem(nonbondedMethod=PME, nonbondedCutoff=1*nanometer,
vdwCutoff=1.2*nanometer, polarization='extrapolated')
system = forcefield.createSystem(nonbondedMethod=PME, nonbondedCutoff=1*nanometer,
vdwCutoff=1.2*nanometer, polarization='direct')
Implicit Solvent and Solute Dielectrics
......
......@@ -389,6 +389,17 @@
type = {Journal Article}
}
@article{Simmonett2015
author = {Simmonett, Andrew C. and Pickard, Frank C. and Shao, Yihan and Cheatham, Thomas E. and Brooks, Bernard R.},
title = {Efficient treatment of induced dipoles},
journal = {Journal of Chemical Physics},
year = {2015},
volume = {143},
number = {7},
pages = {074115},
type = {Journal Article}
}
@article{Sindhikara2009,
author = {Sindhikara, Daniel J. and Kim, Seonah and Voter,
Arthur F. and Roitberg, Adrian E.},
......
......@@ -36,22 +36,18 @@ def runOneTest(testName, options):
if amoeba:
constraints = None
epsilon = float(options.epsilon)
if epsilon == 0:
polarization = 'direct'
else:
polarization = 'mutual'
if explicit:
ff = app.ForceField('amoeba2009.xml')
pdb = app.PDBFile('5dfr_solv-cube_equil.pdb')
cutoff = 0.7*unit.nanometers
vdwCutoff = 0.9*unit.nanometers
system = ff.createSystem(pdb.topology, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, vdwCutoff=vdwCutoff, constraints=constraints, ewaldErrorTolerance=0.00075, mutualInducedTargetEpsilon=epsilon, polarization=polarization)
system = ff.createSystem(pdb.topology, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, vdwCutoff=vdwCutoff, constraints=constraints, ewaldErrorTolerance=0.00075, mutualInducedTargetEpsilon=epsilon, polarization=options.polarization)
else:
ff = app.ForceField('amoeba2009.xml', 'amoeba2009_gk.xml')
pdb = app.PDBFile('5dfr_minimized.pdb')
cutoff = 2.0*unit.nanometers
vdwCutoff = 1.2*unit.nanometers
system = ff.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff, constraints=constraints, mutualInducedTargetEpsilon=epsilon, polarization=polarization)
system = ff.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff, constraints=constraints, mutualInducedTargetEpsilon=epsilon, polarization=options.polarization)
for f in system.getForces():
if isinstance(f, mm.AmoebaMultipoleForce) or isinstance(f, mm.AmoebaVdwForce) or isinstance(f, mm.AmoebaGeneralizedKirkwoodForce) or isinstance(f, mm.AmoebaWcaDispersionForce):
f.setForceGroup(1)
......@@ -127,7 +123,8 @@ parser.add_option('--platform', dest='platform', choices=platformNames, help='na
parser.add_option('--test', dest='test', choices=('gbsa', 'rf', 'pme', 'amoebagk', 'amoebapme'), help='the test to perform: gbsa, rf, pme, amoebagk, or amoebapme [default: all]')
parser.add_option('--pme-cutoff', default='0.9', dest='cutoff', type='float', help='direct space cutoff for PME in nm [default: 0.9]')
parser.add_option('--seconds', default='60', dest='seconds', type='float', help='target simulation length in seconds [default: 60]')
parser.add_option('--mutual-epsilon', default='1e-4', dest='epsilon', type='float', help='mutual induced epsilon for AMOEBA [default: 1e-4]')
parser.add_option('--polarization', default='mutual', dest='polarization', choices=('direct', 'extrapolated', 'mutual'), help='the polarization method for AMOEBA: direct, extrapolated, or mutual [default: mutual]')
parser.add_option('--mutual-epsilon', default='1e-5', dest='epsilon', type='float', help='mutual induced epsilon for AMOEBA [default: 1e-5]')
parser.add_option('--heavy-hydrogens', action='store_true', default=False, dest='heavy', help='repartition mass to allow a larger time step')
parser.add_option('--device', default=None, dest='device', help='device index for CUDA or OpenCL')
parser.add_option('--precision', default='single', dest='precision', choices=('single', 'mixed', 'double'), help='precision mode for CUDA or OpenCL: single, mixed, or double [default: single]')
......
......@@ -449,4 +449,5 @@ void ContextImpl::loadCheckpoint(istream& stream) {
parameters[name] = value;
}
updateStateDataKernel.getAs<UpdateStateDataKernel>().loadCheckpoint(*this, stream);
hasSetPositions = true;
}
......@@ -92,6 +92,15 @@ void testCheckpoint() {
integrator.step(10);
State s4 = context.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s2, s4);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator integrator2(0.001);
Context context2(system, integrator2, platform);
stream1.seekg(0, stream1.beg);
context2.loadCheckpoint(stream1);
State s5 = context2.getState(State::Positions | State::Velocities | State::Parameters | State::Energy);
compareStates(s1, s5);
}
void runPlatformTests() {
......
......@@ -120,6 +120,15 @@ void testCheckpoint() {
integrator2.step(10);
State s8 = context2.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s6, s8);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator integrator3(0.001);
Context context3(system, integrator3, platform);
stream1.seekg(0, stream1.beg);
context3.loadCheckpoint(stream1);
State s9 = context3.getState(State::Positions | State::Velocities | State::Parameters | State::Energy);
compareStates(s1, s9);
}
void runPlatformTests() {
......
......@@ -120,6 +120,15 @@ void testCheckpoint() {
integrator2.step(10);
State s8 = context2.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s6, s8);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator integrator3(0.001);
Context context3(system, integrator3, platform);
stream1.seekg(0, stream1.beg);
context3.loadCheckpoint(stream1);
State s9 = context3.getState(State::Positions | State::Velocities | State::Parameters | State::Energy);
compareStates(s1, s9);
}
void runPlatformTests() {
......
......@@ -92,6 +92,15 @@ void testCheckpoint() {
integrator.step(10);
State s4 = context.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s2, s4);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator integrator2(0.001);
Context context2(system, integrator2, platform);
stream1.seekg(0, stream1.beg);
context2.loadCheckpoint(stream1);
State s5 = context2.getState(State::Positions | State::Velocities | State::Parameters | State::Energy);
compareStates(s1, s5);
}
void runPlatformTests() {
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Peter Eastman *
* Contributors: *
* *
......@@ -72,14 +72,25 @@ public:
enum PolarizationType {
/**
* Mutual polarization
* Full mutually induced polarization. The dipoles are iterated until the converge to the accuracy specified
* by getMutualInducedTargetEpsilon().
*/
Mutual = 0,
/**
* Direct polarization
* Direct polarization approximation. The induced dipoles depend only on the fixed multipoles, not on other
* induced dipoles.
*/
Direct = 1
Direct = 1,
/**
* Extrapolated perturbation theory approximation. The dipoles are iterated a few times, and then an analytic
* approximation is used to extrapolate to the fully converged values. Call setExtrapolationCoefficients()
* to set the coefficients used for the extrapolation. The default coefficients used in this release are
* [0, -0.3, 0, 1.3], but be aware that those may change in a future release.
*/
Extrapolated = 2
};
enum MultipoleAxisTypes { ZThenX = 0, Bisector = 1, ZBisect = 2, ThreeFold = 3, ZOnly = 4, NoAxisType = 5, LastAxisTypeIndex = 6 };
......@@ -298,6 +309,23 @@ public:
*/
void setMutualInducedTargetEpsilon(double inputMutualInducedTargetEpsilon);
/**
* Set the coefficients for the mu_0, mu_1, mu_2, ..., mu_n terms in the extrapolation
* algorithm for induced dipoles.
*
* @param coefficients a vector whose mth entry specifies the coefficient for mu_m. The length of this
* vector determines how many iterations are performed.
*
*/
void setExtrapolationCoefficients(const std::vector<double> &coefficients);
/**
* Get the coefficients for the mu_0, mu_1, mu_2, ..., mu_n terms in the extrapolation
* algorithm for induced dipoles. In this release, the default values for the coefficients are
* [0, -0.3, 0, 1.3], but be aware that those may change in a future release.
*/
const std::vector<double>& getExtrapolationCoefficients() const;
/**
* Get the error tolerance for Ewald summation. This corresponds to the fractional error in the forces
* which is acceptable. This value is used to select the grid dimensions and separation (alpha)
......@@ -384,6 +412,8 @@ private:
int pmeBSplineOrder;
std::vector<int> pmeGridDimension;
int mutualInducedMaxIterations;
std::vector<double> extrapolationCoefficients;
double mutualInducedTargetEpsilon;
double scalingDistanceCutoff;
double electricConstant;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
......@@ -43,6 +43,10 @@ AmoebaMultipoleForce::AmoebaMultipoleForce() : nonbondedMethod(NoCutoff), polari
mutualInducedTargetEpsilon(1.0e-02), scalingDistanceCutoff(100.0), electricConstant(138.9354558456), aewald(0.0) {
pmeGridDimension.resize(3);
pmeGridDimension[0] = pmeGridDimension[1] = pmeGridDimension[2];
extrapolationCoefficients.push_back(0.0);
extrapolationCoefficients.push_back(-0.3);
extrapolationCoefficients.push_back(0.0);
extrapolationCoefficients.push_back(1.3);
}
AmoebaMultipoleForce::NonbondedMethod AmoebaMultipoleForce::getNonbondedMethod() const {
......@@ -61,6 +65,14 @@ void AmoebaMultipoleForce::setPolarizationType(AmoebaMultipoleForce::Polarizatio
polarizationType = type;
}
void AmoebaMultipoleForce::setExtrapolationCoefficients(const std::vector<double> &coefficients) {
extrapolationCoefficients = coefficients;
}
const std::vector<double> & AmoebaMultipoleForce::getExtrapolationCoefficients() const {
return extrapolationCoefficients;
}
double AmoebaMultipoleForce::getCutoffDistance() const {
return cutoffDistance;
}
......
......@@ -810,7 +810,10 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL), sphericalDipoles(NULL), sphericalQuadrupoles(NULL),
fracDipoles(NULL), fracQuadrupoles(NULL), field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), inducedDipole(NULL),
diisCoefficients(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), prevDipoles(NULL), prevDipolesPolar(NULL), prevDipolesGk(NULL),
prevDipolesGkPolar(NULL), prevErrors(NULL), diisMatrix(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
prevDipolesGkPolar(NULL), prevErrors(NULL), diisMatrix(NULL), polarizability(NULL), extrapolatedDipole(NULL), extrapolatedDipolePolar(NULL),
extrapolatedDipoleGk(NULL), extrapolatedDipoleGkPolar(NULL), inducedDipoleFieldGradient(NULL), inducedDipoleFieldGradientPolar(NULL),
inducedDipoleFieldGradientGk(NULL), inducedDipoleFieldGradientGkPolar(NULL), extrapolatedDipoleFieldGradient(NULL), extrapolatedDipoleFieldGradientPolar(NULL),
extrapolatedDipoleFieldGradientGk(NULL), extrapolatedDipoleFieldGradientGkPolar(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeIgrid(NULL), pmePhi(NULL),
pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeCphi(NULL), pmeAtomGridIndex(NULL), lastPositions(NULL), sort(NULL), gkKernel(NULL) {
}
......@@ -867,6 +870,30 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete diisMatrix;
if (diisCoefficients != NULL)
delete diisCoefficients;
if (extrapolatedDipole != NULL)
delete extrapolatedDipole;
if (extrapolatedDipolePolar != NULL)
delete extrapolatedDipolePolar;
if (extrapolatedDipoleGk != NULL)
delete extrapolatedDipoleGk;
if (extrapolatedDipoleGkPolar != NULL)
delete extrapolatedDipoleGkPolar;
if (inducedDipoleFieldGradient != NULL)
delete inducedDipoleFieldGradient;
if (inducedDipoleFieldGradientPolar != NULL)
delete inducedDipoleFieldGradientPolar;
if (inducedDipoleFieldGradientGk != NULL)
delete inducedDipoleFieldGradientGk;
if (inducedDipoleFieldGradientGkPolar != NULL)
delete inducedDipoleFieldGradientGkPolar;
if (extrapolatedDipoleFieldGradient != NULL)
delete extrapolatedDipoleFieldGradient;
if (extrapolatedDipoleFieldGradientPolar != NULL)
delete extrapolatedDipoleFieldGradientPolar;
if (extrapolatedDipoleFieldGradientGk != NULL)
delete extrapolatedDipoleFieldGradientGk;
if (extrapolatedDipoleFieldGradientGkPolar != NULL)
delete extrapolatedDipoleFieldGradientGkPolar;
if (polarizability != NULL)
delete polarizability;
if (covalentFlags != NULL)
......@@ -967,6 +994,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
// Create workspace arrays.
polarizationType = force.getPolarizationType();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
labFrameDipoles = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "labFrameDipoles");
labFrameQuadrupoles = new CudaArray(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles");
......@@ -979,12 +1007,23 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
torque = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "torque");
inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole");
inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar");
inducedDipoleErrors = new CudaArray(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors");
prevDipoles = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipoles");
prevDipolesPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesPolar");
prevErrors = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevErrors");
diisMatrix = new CudaArray(cu, MaxPrevDIISDipoles*MaxPrevDIISDipoles, elementSize, "diisMatrix");
diisCoefficients = new CudaArray(cu, MaxPrevDIISDipoles+1, sizeof(float), "diisMatrix");
if (polarizationType == AmoebaMultipoleForce::Mutual) {
inducedDipoleErrors = new CudaArray(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors");
prevDipoles = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipoles");
prevDipolesPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesPolar");
prevErrors = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevErrors");
diisMatrix = new CudaArray(cu, MaxPrevDIISDipoles*MaxPrevDIISDipoles, elementSize, "diisMatrix");
diisCoefficients = new CudaArray(cu, MaxPrevDIISDipoles+1, sizeof(float), "diisMatrix");
}
else if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
int numOrders = force.getExtrapolationCoefficients().size();
extrapolatedDipole = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipole");
extrapolatedDipolePolar = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipolePolar");
inducedDipoleFieldGradient = new CudaArray(cu, 6*paddedNumAtoms, sizeof(long long), "inducedDipoleFieldGradient");
inducedDipoleFieldGradientPolar = new CudaArray(cu, 6*paddedNumAtoms, sizeof(long long), "inducedDipoleFieldGradientPolar");
extrapolatedDipoleFieldGradient = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradient");
extrapolatedDipoleFieldGradientPolar = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradientPolar");
}
cu.addAutoclearBuffer(*field);
cu.addAutoclearBuffer(*fieldPolar);
cu.addAutoclearBuffer(*torque);
......@@ -1036,14 +1075,16 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
// Record other options.
if (force.getPolarizationType() == AmoebaMultipoleForce::Mutual) {
if (polarizationType == AmoebaMultipoleForce::Mutual) {
maxInducedIterations = force.getMutualInducedMaxIterations();
inducedEpsilon = force.getMutualInducedTargetEpsilon();
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedFieldPolar");
}
else
maxInducedIterations = 0;
if (polarizationType != AmoebaMultipoleForce::Direct) {
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedFieldPolar");
}
usePME = (force.getNonbondedMethod() == AmoebaMultipoleForce::PME);
// See whether there's an AmoebaGeneralizedKirkwoodForce in the System.
......@@ -1058,6 +1099,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
bool useShuffle = (cu.getComputeCapability() >= 3.0 && !cu.getUseDoublePrecision());
double fixedThreadMemory = 19*elementSize+2*sizeof(float)+3*sizeof(int)/(double) cu.TileSize;
double inducedThreadMemory = 15*elementSize+2*sizeof(float);
if (polarizationType == AmoebaMultipoleForce::Extrapolated)
inducedThreadMemory += 12*elementSize;
double electrostaticsThreadMemory = 0;
if (!useShuffle)
fixedThreadMemory += 3*elementSize;
......@@ -1066,8 +1109,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/innerDielectric);
if (force.getPolarizationType() == AmoebaMultipoleForce::Direct)
if (polarizationType == AmoebaMultipoleForce::Direct)
defines["DIRECT_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Mutual)
defines["MUTUAL_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Extrapolated)
defines["EXTRAPOLATED_POLARIZATION"] = "";
if (useShuffle)
defines["USE_SHUFFLE"] = "";
if (hasQuadrupoles)
......@@ -1080,6 +1127,18 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts;
defines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex);
defines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex);
maxExtrapolationOrder = force.getExtrapolationCoefficients().size();
defines["MAX_EXTRAPOLATION_ORDER"] = cu.intToString(maxExtrapolationOrder);
stringstream coefficients;
for (int i = 0; i < maxExtrapolationOrder; i++) {
if (i > 0)
coefficients << ",";
double sum = 0;
for (int j = i; j < maxExtrapolationOrder; j++)
sum += force.getExtrapolationCoefficients()[j];
coefficients << cu.doubleToString(sum);
}
defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str();
alpha = force.getAEwald();
if (usePME) {
vector<int> pmeGridDimension;
......@@ -1113,8 +1172,20 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric));
fixedThreadMemory += 4*elementSize;
inducedThreadMemory += 13*elementSize;
prevDipolesGk = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGk");
prevDipolesGkPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGkPolar");
if (polarizationType == AmoebaMultipoleForce::Mutual) {
prevDipolesGk = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGk");
prevDipolesGkPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGkPolar");
}
else if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
inducedThreadMemory += 12*elementSize;
int numOrders = force.getExtrapolationCoefficients().size();
extrapolatedDipoleGk = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipoleGk");
extrapolatedDipoleGkPolar = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipoleGkPolar");
inducedDipoleFieldGradientGk = new CudaArray(cu, 6*numMultipoles, elementSize, "inducedDipoleFieldGradientGk");
inducedDipoleFieldGradientGkPolar = new CudaArray(cu, 6*numMultipoles, elementSize, "inducedDipoleFieldGradientGkPolar");
extrapolatedDipoleFieldGradientGk = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradientGk");
extrapolatedDipoleFieldGradientGkPolar = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradientGkPolar");
}
}
int maxThreads = cu.getNonbondedUtilities().getForceThreadBlockSize();
fixedFieldThreads = min(maxThreads, cu.computeThreadBlockSize(fixedThreadMemory));
......@@ -1127,15 +1198,18 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["THREAD_BLOCK_SIZE"] = cu.intToString(fixedFieldThreads);
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines);
computeFixedFieldKernel = cu.getKernel(module, "computeFixedField");
if (maxInducedIterations > 0) {
if (polarizationType != AmoebaMultipoleForce::Direct) {
defines["THREAD_BLOCK_SIZE"] = cu.intToString(inducedFieldThreads);
defines["MAX_PREV_DIIS_DIPOLES"] = cu.intToString(MaxPrevDIISDipoles);
defines["USE_MUTUAL_POLARIZATION"] = "1";
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleInducedField, defines);
computeInducedFieldKernel = cu.getKernel(module, "computeInducedField");
updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldByDIIS");
recordDIISDipolesKernel = cu.getKernel(module, "recordInducedDipolesForDIIS");
buildMatrixKernel = cu.getKernel(module, "computeDIISMatrix");
initExtrapolatedKernel = cu.getKernel(module, "initExtrapolatedDipoles");
iterateExtrapolatedKernel = cu.getKernel(module, "iterateExtrapolatedDipoles");
computeExtrapolatedKernel = cu.getKernel(module, "computeExtrapolatedDipoles");
addExtrapolatedGradientKernel = cu.getKernel(module, "addExtrapolatedFieldGradientToForce");
}
stringstream electrostaticsSource;
electrostaticsSource << CudaKernelSources::vectorOps;
......@@ -1166,8 +1240,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
pmeDefines["M_PI"] = cu.doubleToString(M_PI);
pmeDefines["SQRT_PI"] = cu.doubleToString(sqrt(M_PI));
if (force.getPolarizationType() == AmoebaMultipoleForce::Direct)
if (polarizationType == AmoebaMultipoleForce::Direct)
pmeDefines["DIRECT_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Mutual)
pmeDefines["MUTUAL_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Extrapolated)
pmeDefines["EXTRAPOLATED_POLARIZATION"] = "";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipolePme, pmeDefines);
pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeTransformMultipolesKernel = cu.getKernel(module, "transformMultipolesToFractionalCoordinates");
......@@ -1407,7 +1485,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
void* npt = NULL;
if (pmeGrid == NULL) {
// Compute induced dipoles.
......@@ -1436,25 +1513,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge.
if (polarizationType == AmoebaMultipoleForce::Extrapolated)
computeExtrapolatedDipoles(NULL);
for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar);
if (gkKernel == NULL) {
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
}
else {
cu.clearBuffer(*gkKernel->getInducedField());
cu.clearBuffer(*gkKernel->getInducedFieldPolar());
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&gkKernel->getInducedField()->getDevicePointer(), &gkKernel->getInducedFieldPolar()->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&gkKernel->getBornRadii()->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
}
computeInducedField(NULL);
bool converged = iterateDipolesByDIIS(i);
if (converged)
break;
......@@ -1579,34 +1641,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge.
vector<float2> errors;
if (polarizationType == AmoebaMultipoleForce::Extrapolated)
computeExtrapolatedDipoles(recipBoxVectorPointer);
for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar);
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
cu.clearBuffer(*pmeGrid);
cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, pmeGrid->getSize());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
computeInducedField(recipBoxVectorPointer);
bool converged = iterateDipolesByDIIS(i);
if (converged)
break;
......@@ -1632,6 +1670,23 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&pmePhip->getDevicePointer(), &pmePhidp->getDevicePointer(), &pmeCphi->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeInducedForceKernel, pmeInducedForceArgs, cu.getNumAtoms());
}
// If using extrapolated polarization, add in force contributions from µ(m) T µ(n).
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
if (gkKernel == NULL) {
void* extrapolatedArgs[] = {&cu.getForce().getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer()};
cu.executeKernel(addExtrapolatedGradientKernel, extrapolatedArgs, numMultipoles);
}
else {
void* extrapolatedArgs[] = {&cu.getForce().getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer(),
&extrapolatedDipoleGk->getDevicePointer(), &extrapolatedDipoleGkPolar->getDevicePointer(),
&extrapolatedDipoleFieldGradientGk->getDevicePointer(), &extrapolatedDipoleFieldGradientGkPolar->getDevicePointer()};
cu.executeKernel(addExtrapolatedGradientKernel, extrapolatedArgs, numMultipoles);
}
}
// Map torques to force.
......@@ -1646,6 +1701,109 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
return 0.0;
}
void CudaCalcAmoebaMultipoleForceKernel::computeInducedField(void** recipBoxVectorPointer) {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
int startTileIndex = nb.getStartTileIndex();
int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
unsigned int maxTiles = 0;
vector<void*> computeInducedFieldArgs;
computeInducedFieldArgs.push_back(&inducedField->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedFieldPolar->getDevicePointer());
computeInducedFieldArgs.push_back(&cu.getPosq().getDevicePointer());
computeInducedFieldArgs.push_back(&nb.getExclusionTiles().getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipole->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipolePolar->getDevicePointer());
computeInducedFieldArgs.push_back(&startTileIndex);
computeInducedFieldArgs.push_back(&numTileIndices);
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradient->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradientPolar->getDevicePointer());
}
if (pmeGrid != NULL) {
computeInducedFieldArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
computeInducedFieldArgs.push_back(&nb.getInteractionCount().getDevicePointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxSizePointer());
computeInducedFieldArgs.push_back(cu.getInvPeriodicBoxSizePointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxVecXPointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxVecYPointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxVecZPointer());
computeInducedFieldArgs.push_back(&maxTiles);
computeInducedFieldArgs.push_back(&nb.getBlockCenters().getDevicePointer());
computeInducedFieldArgs.push_back(&nb.getInteractingAtoms().getDevicePointer());
}
if (gkKernel != NULL) {
computeInducedFieldArgs.push_back(&gkKernel->getInducedField()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getInducedFieldPolar()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getInducedDipoles()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getInducedDipolesPolar()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getBornRadii()->getDevicePointer());
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradientGk->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradientGkPolar->getDevicePointer());
}
}
computeInducedFieldArgs.push_back(&dampingAndThole->getDevicePointer());
cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar);
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
cu.clearBuffer(*inducedDipoleFieldGradient);
cu.clearBuffer(*inducedDipoleFieldGradientPolar);
}
if (gkKernel != NULL) {
cu.clearBuffer(*gkKernel->getInducedField());
cu.clearBuffer(*gkKernel->getInducedFieldPolar());
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
cu.clearBuffer(*inducedDipoleFieldGradientGk);
cu.clearBuffer(*inducedDipoleFieldGradientGkPolar);
}
}
if (pmeGrid == NULL)
cu.executeKernel(computeInducedFieldKernel, &computeInducedFieldArgs[0], numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
else {
maxTiles = nb.getInteractingTiles().getSize();
cu.executeKernel(computeInducedFieldKernel, &computeInducedFieldArgs[0], numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
cu.clearBuffer(*pmeGrid);
void* pmeSpreadInducedDipolesArgs[] = {&cu.getPosq().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision()) {
void* finishSpreadArgs[] = {&pmeGrid->getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, pmeGrid->getSize());
}
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
void* pmeConvolutionArgs[] = {&pmeGrid->getDevicePointer(), &pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(),
&pmeBsplineModuliZ->getDevicePointer(), cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
void* pmeInducedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&pmePhidp->getDevicePointer(), &cu.getPosq().getDevicePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(),
cu.getPeriodicBoxVecZPointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2],
&pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
}
else {
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
}
}
}
bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
void* npt = NULL;
bool trueValue = true, falseValue = false;
......@@ -1744,6 +1902,62 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
return false;
}
void CudaCalcAmoebaMultipoleForceKernel::computeExtrapolatedDipoles(void** recipBoxVectorPointer) {
// Start by storing the direct dipoles as PT0
if (gkKernel == NULL) {
void* initArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer()};
cu.executeKernel(initExtrapolatedKernel, initArgs, extrapolatedDipole->getSize());
}
else {
void* initArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(), &extrapolatedDipoleGk->getDevicePointer(),
&extrapolatedDipoleGkPolar->getDevicePointer(), &inducedDipoleFieldGradientGk->getDevicePointer(), &inducedDipoleFieldGradientGkPolar->getDevicePointer()};
cu.executeKernel(initExtrapolatedKernel, initArgs, extrapolatedDipole->getSize());
}
// Recursively apply alpha.Tau to the µ_(n) components to generate µ_(n+1), and store the result
for (int order = 1; order < maxExtrapolationOrder; ++order) {
computeInducedField(recipBoxVectorPointer);
if (gkKernel == NULL) {
void* iterateArgs[] = {&order, &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer(),
&polarizability->getDevicePointer()};
cu.executeKernel(iterateExtrapolatedKernel, iterateArgs, extrapolatedDipole->getSize());
}
else {
void* iterateArgs[] = {&order, &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(), &extrapolatedDipoleGk->getDevicePointer(),
&extrapolatedDipoleGkPolar->getDevicePointer(), &inducedDipoleFieldGradientGk->getDevicePointer(), &inducedDipoleFieldGradientGkPolar->getDevicePointer(),
&gkKernel->getInducedField()->getDevicePointer(), &gkKernel->getInducedFieldPolar()->getDevicePointer(),
&extrapolatedDipoleFieldGradientGk->getDevicePointer(), &extrapolatedDipoleFieldGradientGkPolar->getDevicePointer(),
&polarizability->getDevicePointer()};
cu.executeKernel(iterateExtrapolatedKernel, iterateArgs, extrapolatedDipole->getSize());
}
}
// Take a linear combination of the µ_(n) components to form the total dipole
if (gkKernel == NULL) {
void* computeArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer()};
cu.executeKernel(computeExtrapolatedKernel, computeArgs, extrapolatedDipole->getSize());
}
else {
void* computeArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&extrapolatedDipoleGk->getDevicePointer(), &extrapolatedDipoleGkPolar->getDevicePointer()};
cu.executeKernel(computeExtrapolatedKernel, computeArgs, extrapolatedDipole->getSize());
}
computeInducedField(recipBoxVectorPointer);
}
void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& context) {
if (multipolesAreValid) {
int numParticles = cu.getNumAtoms();
......@@ -2088,7 +2302,8 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& syst
bornForce = CudaArray::create<long long>(cu, paddedNumAtoms, "bornForce");
inducedDipoleS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipoleS");
inducedDipolePolarS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolarS");
if (multipoles->getPolarizationType() == AmoebaMultipoleForce::Mutual) {
polarizationType = multipoles->getPolarizationType();
if (polarizationType != AmoebaMultipoleForce::Direct) {
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkInducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkInducedFieldPolar");
}
......@@ -2141,8 +2356,12 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& syst
defines["EPSILON_FACTOR"] = cu.doubleToString(138.9354558456);
defines["M_PI"] = cu.doubleToString(M_PI);
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/force.getSoluteDielectric());
if (multipoles->getPolarizationType() == AmoebaMultipoleForce::Direct)
if (polarizationType == AmoebaMultipoleForce::Direct)
defines["DIRECT_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Mutual)
defines["MUTUAL_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Extrapolated)
defines["EXTRAPOLATED_POLARIZATION"] = "";
includeSurfaceArea = force.getIncludeCavityTerm();
if (includeSurfaceArea) {
defines["SURFACE_AREA_FACTOR"] = cu.doubleToString(force.getSurfaceAreaFactor());
......
......@@ -385,14 +385,17 @@ private:
const char* getSortKey() const {return "value.y";}
};
void initializeScaleFactors();
void computeInducedField(void** recipBoxVectorPointer);
bool iterateDipolesByDIIS(int iteration);
void computeExtrapolatedDipoles(void** recipBoxVectorPointer);
void ensureMultipolesValid(ContextImpl& context);
template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments);
int numMultipoles, maxInducedIterations;
int numMultipoles, maxInducedIterations, maxExtrapolationOrder;
int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads;
int gridSizeX, gridSizeY, gridSizeZ;
double alpha, inducedEpsilon;
bool usePME, hasQuadrupoles, hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid;
AmoebaMultipoleForce::PolarizationType polarizationType;
CudaContext& cu;
const System& system;
std::vector<int3> covalentFlagValues;
......@@ -422,6 +425,18 @@ private:
CudaArray* prevErrors;
CudaArray* diisMatrix;
CudaArray* diisCoefficients;
CudaArray* extrapolatedDipole;
CudaArray* extrapolatedDipolePolar;
CudaArray* extrapolatedDipoleGk;
CudaArray* extrapolatedDipoleGkPolar;
CudaArray* inducedDipoleFieldGradient;
CudaArray* inducedDipoleFieldGradientPolar;
CudaArray* inducedDipoleFieldGradientGk;
CudaArray* inducedDipoleFieldGradientGkPolar;
CudaArray* extrapolatedDipoleFieldGradient;
CudaArray* extrapolatedDipoleFieldGradientPolar;
CudaArray* extrapolatedDipoleFieldGradientGk;
CudaArray* extrapolatedDipoleFieldGradientGkPolar;
CudaArray* polarizability;
CudaArray* covalentFlags;
CudaArray* polarizationGroupFlags;
......@@ -444,6 +459,7 @@ private:
CUfunction pmeGridIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel;
CUfunction pmeFixedPotentialKernel, pmeInducedPotentialKernel, pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel;
CUfunction recordDIISDipolesKernel, buildMatrixKernel;
CUfunction initExtrapolatedKernel, iterateExtrapolatedKernel, computeExtrapolatedKernel, addExtrapolatedGradientKernel;
CUfunction pmeTransformMultipolesKernel, pmeTransformPotentialKernel;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel;
static const int PmeOrder = 5;
......@@ -512,6 +528,7 @@ private:
const System& system;
bool includeSurfaceArea, hasInitializedKernels;
int computeBornSumThreads, gkForceThreads, chainRuleThreads, ediffThreads;
AmoebaMultipoleForce::PolarizationType polarizationType;
std::map<std::string, std::string> defines;
CudaArray* params;
CudaArray* bornSum;
......
......@@ -39,15 +39,23 @@ real ym = zcp*xap - xcp*zap;
real zm = xcp*yap - ycp*xap;
real rm = max(SQRT(xm*xm + ym*ym + zm*zm), (real) 1e-6f);
real dot = xap*xcp + yap*ycp + zap*zcp;
real dotp = xap*xcp + yap*ycp + zap*zcp;
real product = SQRT(rap2*rcp2);
real cosine = (product > 0 ? (dot/product) : 0);
real cosine = (product > 0 ? (dotp/product) : 0);
cosine = max(min(cosine, (real) 1), (real) -1);
real angle = ACOS(cosine);
real angle;
if (cosine > 0.99f || cosine < -0.99f) {
real3 cross_prod = cross(make_real3(xap, yap, zap), make_real3(xcp, ycp, zcp));
angle = ASIN(SQRT(dot(cross_prod, cross_prod)/(rap2*rcp2)))*RAD_TO_DEG;
if (cosine < 0.0f)
angle = 180-angle;
}
else
angle = ACOS(cosine)*RAD_TO_DEG;
// if product == 0, set force/energy to 0
real deltaIdeal = (product > 0 ? (angle*RAD_TO_DEG - angleParams.x) : 0);
real deltaIdeal = (product > 0 ? (angle - angleParams.x) : 0);
real deltaIdeal2 = deltaIdeal*deltaIdeal;
real deltaIdeal3 = deltaIdeal*deltaIdeal2;
real deltaIdeal4 = deltaIdeal2*deltaIdeal2;
......@@ -111,4 +119,4 @@ real dedzid = -dedzia - dedzib - dedzic;
real3 force1 = make_real3(-dedxia, -dedyia, -dedzia);
real3 force2 = make_real3(-dedxib, -dedyib, -dedzib);
real3 force3 = make_real3(-dedxic, -dedyic, -dedzic);
real3 force4 = make_real3(-dedxid, -dedyid, -dedzid);
\ No newline at end of file
real3 force4 = make_real3(-dedxid, -dedyid, -dedzid);
......@@ -17,18 +17,27 @@ real zp = xcb*yab - ycb*xab;
real rp = SQRT(xp*xp + yp*yp + zp*zp);
real dot = xab*xcb + yab*ycb + zab*zcb;
real cosine = rab*rcb > 0 ? (dot / (rab*rcb)) : (real) 1;
real dotp = xab*xcb + yab*ycb + zab*zcb;
real cosine = rab*rcb > 0 ? (dotp / (rab*rcb)) : (real) 1;
cosine = (cosine > 1 ? (real) 1 : cosine);
cosine = (cosine < -1 ? -(real) 1 : cosine);
real angle = ACOS(cosine);
real angle;
if (cosine > 0.99f || cosine < -0.99f) {
// Highly unlikely a stretch-bend angle will be near 0 or 180, but just in case...
real3 cross_prod = cross(make_real3(xab, yab, zab), make_real3(xcb, ycb, zcb));
angle = ASIN(SQRT(dot(cross_prod, cross_prod))/(rab*rcb))*RAD_TO_DEG;
if (cosine < 0.0f)
angle = 180-angle;
}
else
angle = ACOS(cosine)*RAD_TO_DEG;
// find chain rule terms for the bond angle deviation
float3 parameters = PARAMS[index];
float2 force_constants = FORCE_CONSTANTS[index];
real dt = RAD_TO_DEG*(angle - parameters.z);
real dt = angle - RAD_TO_DEG*parameters.z;
real terma = rab*rp != 0 ? (-RAD_TO_DEG/(rab*rab*rp)) : (real) 0;
real termc = rcb*rp != 0 ? (RAD_TO_DEG/(rcb*rcb*rp)) : (real) 0;
......
......@@ -365,7 +365,22 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
// correction to convert mutual to direct polarization force
#ifdef DIRECT_POLARIZATION
#ifdef MUTUAL_POLARIZATION
real findmp1 = uscale*(scip2*ddsc3_1 - ddsc5_1*(sci3*scip4+scip3*sci4));
real findmp2 = uscale*(scip2*ddsc3_2 - ddsc5_2*(sci3*scip4+scip3*sci4));
real findmp3 = uscale*(scip2*ddsc3_3 - ddsc5_3*(sci3*scip4+scip3*sci4));
ftm2i1 -= 0.5f*findmp1;
ftm2i2 -= 0.5f*findmp2;
ftm2i3 -= 0.5f*findmp3;
real sci3X = sci3 - sci3Y;
real sci4X = sci4 - sci4Y;
real scip3X = scip3 - scip3Y;
real scip4X = scip4 - scip4Y;
ftm2i1 += 0.5f*uscale*(-ddsc5_1*(sci3X*scip4X+scip3X*sci4X));
ftm2i2 += 0.5f*uscale*(-ddsc5_2*(sci3X*scip4X+scip3X*sci4X));
ftm2i3 += 0.5f*uscale*(-ddsc5_3*(sci3X*scip4X+scip3X*sci4X));
#else
real gfd = (scip2*scale3i - 5*rr2*(scip3*sci4+sci3*scip4)*scale5i);
real fdir1 = gfd*xr + scale5i* (sci4*atom1.inducedDipolePolarS.x+scip4*atom1.inducedDipoleS.x + sci3*atom2.inducedDipolePolarS.x+scip3*atom2.inducedDipoleS.x);
real fdir2 = gfd*yr + scale5i* (sci4*atom1.inducedDipolePolarS.y+scip4*atom1.inducedDipoleS.y + sci3*atom2.inducedDipolePolarS.y+scip3*atom2.inducedDipoleS.y);
......@@ -385,21 +400,6 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
ftm2i1 += 0.5f*fdir1;
ftm2i2 += 0.5f*fdir2;
ftm2i3 += 0.5f*fdir3;
#else
real findmp1 = uscale*(scip2*ddsc3_1 - ddsc5_1*(sci3*scip4+scip3*sci4));
real findmp2 = uscale*(scip2*ddsc3_2 - ddsc5_2*(sci3*scip4+scip3*sci4));
real findmp3 = uscale*(scip2*ddsc3_3 - ddsc5_3*(sci3*scip4+scip3*sci4));
ftm2i1 -= 0.5f*findmp1;
ftm2i2 -= 0.5f*findmp2;
ftm2i3 -= 0.5f*findmp3;
real sci3X = sci3 - sci3Y;
real sci4X = sci4 - sci4Y;
real scip3X = scip3 - scip3Y;
real scip4X = scip4 - scip4Y;
ftm2i1 += 0.5f*uscale*(-ddsc5_1*(sci3X*scip4X+scip3X*sci4X));
ftm2i2 += 0.5f*uscale*(-ddsc5_2*(sci3X*scip4X+scip3X*sci4X));
ftm2i3 += 0.5f*uscale*(-ddsc5_3*(sci3X*scip4X+scip3X*sci4X));
#endif
#endif
......
......@@ -327,7 +327,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
real iEIY = qiUinpI.x*Vijp[1] + qiUindI.x*Vijd[1] - qiUinpI.y*Vijp[0] - qiUindI.y*Vijd[0];
real iEJY = qiUinpJ.x*Vjip[1] + qiUindJ.x*Vjid[1] - qiUinpJ.y*Vjip[0] - qiUindJ.y*Vjid[0];
#ifdef USE_MUTUAL_POLARIZATION
#ifdef MUTUAL_POLARIZATION
// Uind-Uind terms (m=0)
real eCoef = -4*rInvVec[3]*thole_d0;
real dCoef = 6*rInvVec[4]*dthole_d0;
......
......@@ -3,9 +3,15 @@
typedef struct {
real3 pos;
real3 field, fieldPolar, inducedDipole, inducedDipolePolar;
#ifdef EXTRAPOLATED_POLARIZATION
real fieldGradient[6], fieldGradientPolar[6];
#endif
#ifdef USE_GK
real3 fieldS, fieldPolarS, inducedDipoleS, inducedDipolePolarS;
real bornRadius;
#ifdef EXTRAPOLATED_POLARIZATION
real fieldGradientS[6], fieldGradientPolarS[6];
#endif
#endif
float thole, damp;
} AtomData;
......@@ -47,17 +53,79 @@ inline __device__ void zeroAtomData(AtomData& data) {
data.fieldS = make_real3(0);
data.fieldPolarS = make_real3(0);
#endif
#ifdef EXTRAPOLATED_POLARIZATION
for (int i = 0; i < 6; i++) {
data.fieldGradient[i] = 0;
data.fieldGradientPolar[i] = 0;
#ifdef USE_GK
data.fieldGradientS[i] = 0;
data.fieldGradientPolarS[i] = 0;
#endif
}
#endif
}
#ifdef EXTRAPOLATED_POLARIZATION
#ifdef USE_GK
#define SAVE_ATOM_DATA(index, data) saveAtomData(index, data, field, fieldPolar, fieldGradient, fieldGradientPolar, fieldS, fieldPolarS, fieldGradientS, fieldGradientPolarS);
#else
#define SAVE_ATOM_DATA(index, data) saveAtomData(index, data, field, fieldPolar, fieldGradient, fieldGradientPolar);
#endif
#else
#ifdef USE_GK
#define SAVE_ATOM_DATA(index, data) saveAtomData(index, data, field, fieldPolar, fieldS, fieldPolarS);
#else
#define SAVE_ATOM_DATA(index, data) saveAtomData(index, data, field, fieldPolar);
#endif
#endif
inline __device__ void saveAtomData(int index, AtomData& data, unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar
#ifdef EXTRAPOLATED_POLARIZATION
, unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar
#endif
#ifdef USE_GK
, unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS
#ifdef EXTRAPOLATED_POLARIZATION
, unsigned long long* __restrict__ fieldGradientS, unsigned long long* __restrict__ fieldGradientPolarS
#endif
#endif
) {
atomicAdd(&field[index], static_cast<unsigned long long>((long long) (data.field.x*0x100000000)));
atomicAdd(&field[index+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0x100000000)));
atomicAdd(&field[index+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0x100000000)));
atomicAdd(&fieldPolar[index], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[index+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[index+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[index], static_cast<unsigned long long>((long long) (data.fieldS.x*0x100000000)));
atomicAdd(&fieldS[index+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.y*0x100000000)));
atomicAdd(&fieldS[index+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[index], static_cast<unsigned long long>((long long) (data.fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[index+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[index+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.z*0x100000000)));
#endif
#ifdef EXTRAPOLATED_POLARIZATION
for (int i = 0; i < 6; i++) {
atomicAdd(&fieldGradient[6*index+i], static_cast<unsigned long long>((long long) (data.fieldGradient[i]*0x100000000)));
atomicAdd(&fieldGradientPolar[6*index+i], static_cast<unsigned long long>((long long) (data.fieldGradientPolar[i]*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldGradientS[6*index+i], static_cast<unsigned long long>((long long) (data.fieldGradientS[i]*0x100000000)));
atomicAdd(&fieldGradientPolarS[6*index+i], static_cast<unsigned long long>((long long) (data.fieldGradientPolarS[i]*0x100000000)));
#endif
}
#endif
}
#ifdef USE_EWALD
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, bool isSelfInteraction) {
if (isSelfInteraction)
return;
real scale1, scale2;
real scale1, scale2, scale3;
real r2 = dot(deltaR, deltaR);
if (r2 < CUTOFF_SQUARED) {
real rI = RSQRT(r2);
real r = RECIP(rI);
real rI2 = rI*rI;
// calculate the error function damping terms
......@@ -77,36 +145,40 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
real alsq2 = 2*EWALD_ALPHA*EWALD_ALPHA;
real alsq2n = RECIP(SQRT_PI*EWALD_ALPHA);
alsq2n *= alsq2;
real bn1 = (bn0+alsq2n*exp2a)*rI*rI;
real bn1 = (bn0+alsq2n*exp2a)*rI2;
alsq2n *= alsq2;
real bn2 = (3*bn1+alsq2n*exp2a)*rI*rI;
real bn2 = (3*bn1+alsq2n*exp2a)*rI2;
alsq2n *= alsq2;
real bn3 = (5*bn2+alsq2n*exp2a)*rI2;
// compute the error function scaled and unscaled terms
real scale3 = 1;
real scale5 = 1;
real damp = atom1.damp*atom2.damp;
real ratio = (r/damp);
ratio = ratio*ratio*ratio;
float pgamma = atom1.thole < atom2.thole ? atom1.thole : atom2.thole;
damp = damp == 0 ? 0 : -pgamma*ratio;
real expdamp = EXP(damp);
scale3 = 1 - expdamp;
scale5 = 1 - expdamp*(1-damp);
real dsc3 = scale3;
real dsc5 = scale5;
real dsc3 = 1 - expdamp;
real dsc5 = 1 - expdamp*(1-damp);
real dsc7 = 1 - (1-damp+(0.6f*damp*damp))*expdamp;
real r3 = (r*r2);
real r5 = (r3*r2);
real r7 = (r5*r2);
real rr3 = (1-dsc3)/r3;
real rr5 = 3*(1-dsc5)/r5;
real rr7 = 15*(1-dsc7)/r7;
scale1 = rr3 - bn1;
scale2 = bn2 - rr5;
scale3 = bn3 - rr7;
}
else {
scale1 = 0;
scale2 = 0;
scale3 = 0;
}
real dDotDelta = scale2*dot(deltaR, atom2.inducedDipole);
atom1.field += scale1*atom2.inducedDipole + dDotDelta*deltaR;
......@@ -116,6 +188,45 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
atom2.field += scale1*atom1.inducedDipole + dDotDelta*deltaR;
dDotDelta = scale2*dot(deltaR, atom1.inducedDipolePolar);
atom2.fieldPolar += scale1*atom1.inducedDipolePolar + dDotDelta*deltaR;
#ifdef EXTRAPOLATED_POLARIZATION
// Compute and store the field gradients for later use.
real3 dipole = atom1.inducedDipole;
real muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom2.fieldGradient[0] -= muDotR*deltaR.x*deltaR.x*scale3 - (2*dipole.x*deltaR.x + muDotR)*scale2;
atom2.fieldGradient[1] -= muDotR*deltaR.y*deltaR.y*scale3 - (2*dipole.y*deltaR.y + muDotR)*scale2;
atom2.fieldGradient[2] -= muDotR*deltaR.z*deltaR.z*scale3 - (2*dipole.z*deltaR.z + muDotR)*scale2;
atom2.fieldGradient[3] -= muDotR*deltaR.x*deltaR.y*scale3 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*scale2;
atom2.fieldGradient[4] -= muDotR*deltaR.x*deltaR.z*scale3 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*scale2;
atom2.fieldGradient[5] -= muDotR*deltaR.y*deltaR.z*scale3 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*scale2;
dipole = atom1.inducedDipolePolar;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom2.fieldGradientPolar[0] -= muDotR*deltaR.x*deltaR.x*scale3 - (2*dipole.x*deltaR.x + muDotR)*scale2;
atom2.fieldGradientPolar[1] -= muDotR*deltaR.y*deltaR.y*scale3 - (2*dipole.y*deltaR.y + muDotR)*scale2;
atom2.fieldGradientPolar[2] -= muDotR*deltaR.z*deltaR.z*scale3 - (2*dipole.z*deltaR.z + muDotR)*scale2;
atom2.fieldGradientPolar[3] -= muDotR*deltaR.x*deltaR.y*scale3 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*scale2;
atom2.fieldGradientPolar[4] -= muDotR*deltaR.x*deltaR.z*scale3 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*scale2;
atom2.fieldGradientPolar[5] -= muDotR*deltaR.y*deltaR.z*scale3 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*scale2;
dipole = atom2.inducedDipole;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom1.fieldGradient[0] += muDotR*deltaR.x*deltaR.x*scale3 - (2*dipole.x*deltaR.x + muDotR)*scale2;
atom1.fieldGradient[1] += muDotR*deltaR.y*deltaR.y*scale3 - (2*dipole.y*deltaR.y + muDotR)*scale2;
atom1.fieldGradient[2] += muDotR*deltaR.z*deltaR.z*scale3 - (2*dipole.z*deltaR.z + muDotR)*scale2;
atom1.fieldGradient[3] += muDotR*deltaR.x*deltaR.y*scale3 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*scale2;
atom1.fieldGradient[4] += muDotR*deltaR.x*deltaR.z*scale3 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*scale2;
atom1.fieldGradient[5] += muDotR*deltaR.y*deltaR.z*scale3 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*scale2;
dipole = atom2.inducedDipolePolar;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom1.fieldGradientPolar[0] += muDotR*deltaR.x*deltaR.x*scale3 - (2*dipole.x*deltaR.x + muDotR)*scale2;
atom1.fieldGradientPolar[1] += muDotR*deltaR.y*deltaR.y*scale3 - (2*dipole.y*deltaR.y + muDotR)*scale2;
atom1.fieldGradientPolar[2] += muDotR*deltaR.z*deltaR.z*scale3 - (2*dipole.z*deltaR.z + muDotR)*scale2;
atom1.fieldGradientPolar[3] += muDotR*deltaR.x*deltaR.y*scale3 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*scale2;
atom1.fieldGradientPolar[4] += muDotR*deltaR.x*deltaR.z*scale3 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*scale2;
atom1.fieldGradientPolar[5] += muDotR*deltaR.y*deltaR.z*scale3 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*scale2;
#endif
}
#elif defined USE_GK
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, bool isSelfInteraction) {
......@@ -182,6 +293,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
real r2I = rI*rI;
real rr3 = -rI*r2I;
real rr5 = -3*rr3*r2I;
real rr7 = 5*rr5*r2I;
real dampProd = atom1.damp*atom2.damp;
real ratio = (dampProd != 0 ? r/dampProd : 1);
float pGamma = (atom2.thole > atom1.thole ? atom1.thole: atom2.thole);
......@@ -189,6 +301,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
real dampExp = (dampProd != 0 ? EXP(-damp) : 0);
rr3 *= 1 - dampExp;
rr5 *= 1 - (1+damp)*dampExp;
rr7 *= 1 - (1+damp+(0.6f*damp*damp))*dampExp;
real dDotDelta = rr5*dot(deltaR, atom2.inducedDipole);
atom1.field += rr3*atom2.inducedDipole + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom2.inducedDipolePolar);
......@@ -197,6 +310,45 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
atom2.field += rr3*atom1.inducedDipole + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom1.inducedDipolePolar);
atom2.fieldPolar += rr3*atom1.inducedDipolePolar + dDotDelta*deltaR;
#ifdef EXTRAPOLATED_POLARIZATION
// Compute and store the field gradients for later use.
real3 dipole = atom1.inducedDipole;
real muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom2.fieldGradient[0] -= muDotR*deltaR.x*deltaR.x*rr7 - (2*dipole.x*deltaR.x + muDotR)*rr5;
atom2.fieldGradient[1] -= muDotR*deltaR.y*deltaR.y*rr7 - (2*dipole.y*deltaR.y + muDotR)*rr5;
atom2.fieldGradient[2] -= muDotR*deltaR.z*deltaR.z*rr7 - (2*dipole.z*deltaR.z + muDotR)*rr5;
atom2.fieldGradient[3] -= muDotR*deltaR.x*deltaR.y*rr7 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*rr5;
atom2.fieldGradient[4] -= muDotR*deltaR.x*deltaR.z*rr7 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*rr5;
atom2.fieldGradient[5] -= muDotR*deltaR.y*deltaR.z*rr7 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*rr5;
dipole = atom1.inducedDipolePolar;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom2.fieldGradientPolar[0] -= muDotR*deltaR.x*deltaR.x*rr7 - (2*dipole.x*deltaR.x + muDotR)*rr5;
atom2.fieldGradientPolar[1] -= muDotR*deltaR.y*deltaR.y*rr7 - (2*dipole.y*deltaR.y + muDotR)*rr5;
atom2.fieldGradientPolar[2] -= muDotR*deltaR.z*deltaR.z*rr7 - (2*dipole.z*deltaR.z + muDotR)*rr5;
atom2.fieldGradientPolar[3] -= muDotR*deltaR.x*deltaR.y*rr7 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*rr5;
atom2.fieldGradientPolar[4] -= muDotR*deltaR.x*deltaR.z*rr7 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*rr5;
atom2.fieldGradientPolar[5] -= muDotR*deltaR.y*deltaR.z*rr7 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*rr5;
dipole = atom2.inducedDipole;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom1.fieldGradient[0] += muDotR*deltaR.x*deltaR.x*rr7 - (2*dipole.x*deltaR.x + muDotR)*rr5;
atom1.fieldGradient[1] += muDotR*deltaR.y*deltaR.y*rr7 - (2*dipole.y*deltaR.y + muDotR)*rr5;
atom1.fieldGradient[2] += muDotR*deltaR.z*deltaR.z*rr7 - (2*dipole.z*deltaR.z + muDotR)*rr5;
atom1.fieldGradient[3] += muDotR*deltaR.x*deltaR.y*rr7 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*rr5;
atom1.fieldGradient[4] += muDotR*deltaR.x*deltaR.z*rr7 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*rr5;
atom1.fieldGradient[5] += muDotR*deltaR.y*deltaR.z*rr7 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*rr5;
dipole = atom2.inducedDipolePolar;
muDotR = dipole.x*deltaR.x + dipole.y*deltaR.y + dipole.z*deltaR.z;
atom1.fieldGradientPolar[0] += muDotR*deltaR.x*deltaR.x*rr7 - (2*dipole.x*deltaR.x + muDotR)*rr5;
atom1.fieldGradientPolar[1] += muDotR*deltaR.y*deltaR.y*rr7 - (2*dipole.y*deltaR.y + muDotR)*rr5;
atom1.fieldGradientPolar[2] += muDotR*deltaR.z*deltaR.z*rr7 - (2*dipole.z*deltaR.z + muDotR)*rr5;
atom1.fieldGradientPolar[3] += muDotR*deltaR.x*deltaR.y*rr7 - (dipole.x*deltaR.y + dipole.y*deltaR.x)*rr5;
atom1.fieldGradientPolar[4] += muDotR*deltaR.x*deltaR.z*rr7 - (dipole.x*deltaR.z + dipole.z*deltaR.x)*rr5;
atom1.fieldGradientPolar[5] += muDotR*deltaR.y*deltaR.z*rr7 - (dipole.y*deltaR.z + dipole.z*deltaR.y)*rr5;
#endif
}
#endif
......@@ -206,12 +358,18 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
extern "C" __global__ void computeInducedField(
unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const ushort2* __restrict__ exclusionTiles,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar,
#endif
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK
unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
#ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradientS, unsigned long long* __restrict__ fieldGradientPolarS,
#endif
#endif
const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
......@@ -284,36 +442,10 @@ extern "C" __global__ void computeInducedField(
// Write results.
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (data.field.x*0x100000000)));
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (data.fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (data.fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.z*0x100000000)));
#endif
SAVE_ATOM_DATA(offset, data)
if (x != y) {
offset = y*TILE_SIZE + tgx;
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0x100000000)));
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.z*0x100000000)));
#endif
SAVE_ATOM_DATA(offset, localData[threadIdx.x])
}
}
......@@ -412,39 +544,13 @@ extern "C" __global__ void computeInducedField(
// Write results.
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (data.field.x*0x100000000)));
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (data.fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (data.fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.z*0x100000000)));
#endif
SAVE_ATOM_DATA(offset, data)
#ifdef USE_CUTOFF
offset = atomIndices[threadIdx.x];
#else
offset = y*TILE_SIZE + tgx;
#endif
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0x100000000)));
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.z*0x100000000)));
#endif
SAVE_ATOM_DATA(offset, localData[threadIdx.x])
}
pos++;
}
......@@ -533,8 +639,8 @@ extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restri
real oldDipole = inducedDipole[dipoleIndex];
real oldDipolePolar = inducedDipolePolar[dipoleIndex];
long long fixedS = (fixedFieldS == NULL ? (long long) 0 : fixedFieldS[fieldIndex]);
real newDipole = scale*((fixedField[fieldIndex]+fixedS+inducedField[fieldIndex])*fieldScale+ewaldScale*oldDipole);
real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+fixedS+inducedFieldPolar[fieldIndex])*fieldScale+ewaldScale*oldDipolePolar);
real newDipole = scale*((fixedField[fieldIndex]+fixedS+inducedField[fieldIndex])*fieldScale);
real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+fixedS+inducedFieldPolar[fieldIndex])*fieldScale);
int storePrevIndex = dipoleIndex+min(iteration, MAX_PREV_DIIS_DIPOLES-1)*NUM_ATOMS*3;
prevDipoles[storePrevIndex] = newDipole;
prevDipolesPolar[storePrevIndex] = newDipolePolar;
......@@ -607,3 +713,146 @@ extern "C" __global__ void updateInducedFieldByDIIS(real* __restrict__ inducedDi
inducedDipolePolar[index] = sumPolar;
}
}
extern "C" __global__ void initExtrapolatedDipoles(real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole,
real* __restrict__ extrapolatedDipolePolar, long long* __restrict__ inducedDipoleFieldGradient, long long* __restrict__ inducedDipoleFieldGradientPolar
#ifdef USE_GK
, real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar,
real* __restrict__ inducedDipoleFieldGradientGk, real* __restrict__ inducedDipoleFieldGradientGkPolar
#endif
) {
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
extrapolatedDipole[index] = inducedDipole[index];
extrapolatedDipolePolar[index] = inducedDipolePolar[index];
#ifdef USE_GK
extrapolatedDipoleGk[index] = inducedDipoleGk[index];
extrapolatedDipoleGkPolar[index] = inducedDipoleGkPolar[index];
#endif
}
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 6*NUM_ATOMS; index += blockDim.x*gridDim.x) {
inducedDipoleFieldGradient[index] = 0;
inducedDipoleFieldGradientPolar[index] = 0;
#ifdef USE_GK
inducedDipoleFieldGradientGk[index] = 0;
inducedDipoleFieldGradientGkPolar[index] = 0;
#endif
}
}
extern "C" __global__ void iterateExtrapolatedDipoles(int order, real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole,
real* __restrict__ extrapolatedDipolePolar, long long* __restrict__ inducedDipoleFieldGradient, long long* __restrict__ inducedDipoleFieldGradientPolar,
long long* __restrict__ inducedDipoleField, long long* __restrict__ inducedDipoleFieldPolar, real* __restrict__ extrapolatedDipoleFieldGradient, real* __restrict__ extrapolatedDipoleFieldGradientPolar,
#ifdef USE_GK
real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar,
real* __restrict__ inducedDipoleFieldGradientGk, real* __restrict__ inducedDipoleFieldGradientGkPolar, long long* __restrict__ inducedDipoleFieldGk,
long long* __restrict__ inducedDipoleFieldGkPolar, real* __restrict__ extrapolatedDipoleFieldGradientGk, real* __restrict__ extrapolatedDipoleFieldGradientGkPolar,
#endif
const float* __restrict__ polarizability) {
const real fieldScale = 1/(real) 0x100000000;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
int atom = index/3;
int component = index-3*atom;
int fieldIndex = atom+component*PADDED_NUM_ATOMS;
float polar = polarizability[atom];
real value = inducedDipoleField[fieldIndex]*fieldScale*polar;
inducedDipole[index] = value;
extrapolatedDipole[order*3*NUM_ATOMS+index] = value;
value = inducedDipoleFieldPolar[fieldIndex]*fieldScale*polar;
inducedDipolePolar[index] = value;
extrapolatedDipolePolar[order*3*NUM_ATOMS+index] = value;
#ifdef USE_GK
value = inducedDipoleFieldGk[fieldIndex]*fieldScale*polar;
inducedDipoleGk[index] = value;
extrapolatedDipoleGk[order*3*NUM_ATOMS+index] = value;
value = inducedDipoleFieldGkPolar[fieldIndex]*fieldScale*polar;
inducedDipoleGkPolar[index] = value;
extrapolatedDipoleGkPolar[order*3*NUM_ATOMS+index] = value;
#endif
}
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 6*NUM_ATOMS; index += blockDim.x*gridDim.x) {
int index2 = (order-1)*6*NUM_ATOMS+index;
extrapolatedDipoleFieldGradient[index2] = fieldScale*inducedDipoleFieldGradient[index];
extrapolatedDipoleFieldGradientPolar[index2] = fieldScale*inducedDipoleFieldGradientPolar[index];
#ifdef USE_GK
extrapolatedDipoleFieldGradientGk[index2] = fieldScale*inducedDipoleFieldGradientGk[index];
extrapolatedDipoleFieldGradientGkPolar[index2] = fieldScale*inducedDipoleFieldGradientGkPolar[index];
#endif
}
}
extern "C" __global__ void computeExtrapolatedDipoles(real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole,
real* __restrict__ extrapolatedDipolePolar
#ifdef USE_GK
, real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar
#endif
) {
real coeff[] = {EXTRAPOLATION_COEFFICIENTS_SUM};
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
real sum = 0, sumPolar = 0, sumGk = 0, sumGkPolar = 0;
for (int order = 0; order < MAX_EXTRAPOLATION_ORDER; order++) {
sum += extrapolatedDipole[order*3*NUM_ATOMS+index]*coeff[order];
sumPolar += extrapolatedDipolePolar[order*3*NUM_ATOMS+index]*coeff[order];
#ifdef USE_GK
sumGk += extrapolatedDipoleGk[order*3*NUM_ATOMS+index]*coeff[order];
sumGkPolar += extrapolatedDipoleGkPolar[order*3*NUM_ATOMS+index]*coeff[order];
#endif
}
inducedDipole[index] = sum;
inducedDipolePolar[index] = sumPolar;
#ifdef USE_GK
inducedDipoleGk[index] = sumGk;
inducedDipoleGkPolar[index] = sumGkPolar;
#endif
}
}
extern "C" __global__ void addExtrapolatedFieldGradientToForce(long long* __restrict__ forceBuffers, real* __restrict__ extrapolatedDipole,
real* __restrict__ extrapolatedDipolePolar, real* __restrict__ extrapolatedDipoleFieldGradient, real* __restrict__ extrapolatedDipoleFieldGradientPolar
#ifdef USE_GK
, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar,
real* __restrict__ extrapolatedDipoleFieldGradientGk, real* __restrict__ extrapolatedDipoleFieldGradientGkPolar
#endif
) {
real coeff[] = {EXTRAPOLATION_COEFFICIENTS_SUM};
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
real fx = 0, fy = 0, fz = 0;
for (int l = 0; l < MAX_EXTRAPOLATION_ORDER-1; l++) {
int index1 = 3*(l*NUM_ATOMS+atom);
real dipole[] = {extrapolatedDipole[index1], extrapolatedDipole[index1+1], extrapolatedDipole[index1+2]};
real dipolePolar[] = {extrapolatedDipolePolar[index1], extrapolatedDipolePolar[index1+1], extrapolatedDipolePolar[index1+2]};
#ifdef USE_GK
real dipoleGk[] = {extrapolatedDipoleGk[index1], extrapolatedDipoleGk[index1+1], extrapolatedDipoleGk[index1+2]};
real dipoleGkPolar[] = {extrapolatedDipoleGkPolar[index1], extrapolatedDipoleGkPolar[index1+1], extrapolatedDipoleGkPolar[index1+2]};
#endif
for (int m = 0; m < MAX_EXTRAPOLATION_ORDER-1-l; m++) {
int index2 = 6*(m*NUM_ATOMS+atom);
real scale = 0.5f*coeff[l+m+1]*ENERGY_SCALE_FACTOR;
real gradient[] = {extrapolatedDipoleFieldGradient[index2], extrapolatedDipoleFieldGradient[index2+1], extrapolatedDipoleFieldGradient[index2+2],
extrapolatedDipoleFieldGradient[index2+3], extrapolatedDipoleFieldGradient[index2+4], extrapolatedDipoleFieldGradient[index2+5]};
real gradientPolar[] = {extrapolatedDipoleFieldGradientPolar[index2], extrapolatedDipoleFieldGradientPolar[index2+1], extrapolatedDipoleFieldGradientPolar[index2+2],
extrapolatedDipoleFieldGradientPolar[index2+3], extrapolatedDipoleFieldGradientPolar[index2+4], extrapolatedDipoleFieldGradientPolar[index2+5]};
fx += scale*(dipole[0]*gradientPolar[0] + dipole[1]*gradientPolar[3] + dipole[2]*gradientPolar[4]);
fy += scale*(dipole[0]*gradientPolar[3] + dipole[1]*gradientPolar[1] + dipole[2]*gradientPolar[5]);
fz += scale*(dipole[0]*gradientPolar[4] + dipole[1]*gradientPolar[5] + dipole[2]*gradientPolar[2]);
fx += scale*(dipolePolar[0]*gradient[0] + dipolePolar[1]*gradient[3] + dipolePolar[2]*gradient[4]);
fy += scale*(dipolePolar[0]*gradient[3] + dipolePolar[1]*gradient[1] + dipolePolar[2]*gradient[5]);
fz += scale*(dipolePolar[0]*gradient[4] + dipolePolar[1]*gradient[5] + dipolePolar[2]*gradient[2]);
#ifdef USE_GK
real gradientGk[] = {extrapolatedDipoleFieldGradient[index2], extrapolatedDipoleFieldGradient[index2+1], extrapolatedDipoleFieldGradient[index2+2],
extrapolatedDipoleFieldGradient[index2+3], extrapolatedDipoleFieldGradient[index2+4], extrapolatedDipoleFieldGradient[index2+5]};
real gradientGkPolar[] = {extrapolatedDipoleFieldGradientPolar[index2], extrapolatedDipoleFieldGradientPolar[index2+1], extrapolatedDipoleFieldGradientPolar[index2+2],
extrapolatedDipoleFieldGradientPolar[index2+3], extrapolatedDipoleFieldGradientPolar[index2+4], extrapolatedDipoleFieldGradientPolar[index2+5]};
fx += scale*(dipoleGk[0]*gradientGkPolar[0] + dipoleGk[1]*gradientGkPolar[3] + dipoleGk[2]*gradientGkPolar[4]);
fy += scale*(dipoleGk[0]*gradientGkPolar[3] + dipoleGk[1]*gradientGkPolar[1] + dipoleGk[2]*gradientGkPolar[5]);
fz += scale*(dipoleGk[0]*gradientGkPolar[4] + dipoleGk[1]*gradientGkPolar[5] + dipoleGk[2]*gradientGkPolar[2]);
fx += scale*(dipoleGkPolar[0]*gradientGk[0] + dipoleGkPolar[1]*gradientGk[3] + dipoleGkPolar[2]*gradientGk[4]);
fy += scale*(dipoleGkPolar[0]*gradientGk[3] + dipoleGkPolar[1]*gradientGk[1] + dipoleGkPolar[2]*gradientGk[5]);
fz += scale*(dipoleGkPolar[0]*gradientGk[4] + dipoleGkPolar[1]*gradientGk[5] + dipoleGkPolar[2]*gradientGk[2]);
#endif
}
}
forceBuffers[atom] += (long long) (fx*0x100000000);
forceBuffers[atom+PADDED_NUM_ATOMS] += (long long) (fy*0x100000000);
forceBuffers[atom+PADDED_NUM_ATOMS*2] += (long long) (fz*0x100000000);
}
}
......@@ -643,8 +643,8 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
// Process the atoms in spatially sorted order. This improves cache performance when loading
// the grid values.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int m = pmeAtomGridIndex[i].x;
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
int m = pmeAtomGridIndex[atom].x;
real4 pos = posq[m];
pos -= periodicBoxVecZ*floor(pos.z*recipBoxVecZ.z+0.5f);
pos -= periodicBoxVecY*floor(pos.y*recipBoxVecY.z+0.5f);
......@@ -1051,7 +1051,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
f.x += (inducedDipole[k]+inducedDipolePolar[k])*phi[i+NUM_ATOMS*j1];
f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[i+NUM_ATOMS*j2];
f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[i+NUM_ATOMS*j3];
#ifndef DIRECT_POLARIZATION
#ifdef MUTUAL_POLARIZATION
f.x += (inducedDipole[k]*phip[i+NUM_ATOMS*j1] + inducedDipolePolar[k]*phid[i+NUM_ATOMS*j1]);
f.y += (inducedDipole[k]*phip[i+NUM_ATOMS*j2] + inducedDipolePolar[k]*phid[i+NUM_ATOMS*j2]);
f.z += (inducedDipole[k]*phip[i+NUM_ATOMS*j3] + inducedDipolePolar[k]*phid[i+NUM_ATOMS*j3]);
......@@ -1073,8 +1073,12 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.25f*EPSILON_FACTOR*energy;
}
extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip,
long long* __restrict__ inducedField, long long* __restrict__ inducedFieldPolar, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip, long long* __restrict__ inducedField,
long long* __restrict__ inducedFieldPolar, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
#ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar,
#endif
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
__shared__ real fracToCart[3][3];
if (threadIdx.x == 0) {
fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x;
......@@ -1088,12 +1092,62 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph
fracToCart[2][2] = GRID_SIZE_Z*recipBoxVecZ.z;
}
__syncthreads();
real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
inducedField[i] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2]));
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2]));
inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2]));
inducedFieldPolar[i] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2]));
inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2]));
inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2]));
inducedField[i] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i]));
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1]));
inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2]));
inducedFieldPolar[i] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipolePolar[3*i]));
inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipolePolar[3*i+1]));
inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipolePolar[3*i+2]));
#ifdef EXTRAPOLATED_POLARIZATION
// Compute and store the field gradients for later use.
real EmatD[3][3] = {
{phid[i+NUM_ATOMS*4], phid[i+NUM_ATOMS*7], phid[i+NUM_ATOMS*8]},
{phid[i+NUM_ATOMS*7], phid[i+NUM_ATOMS*5], phid[i+NUM_ATOMS*9]},
{phid[i+NUM_ATOMS*8], phid[i+NUM_ATOMS*9], phid[i+NUM_ATOMS*6]}
};
real Exx = 0, Eyy = 0, Ezz = 0, Exy = 0, Exz = 0, Eyz = 0;
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 3; ++l) {
Exx += fracToCart[0][k] * EmatD[k][l] * fracToCart[0][l];
Eyy += fracToCart[1][k] * EmatD[k][l] * fracToCart[1][l];
Ezz += fracToCart[2][k] * EmatD[k][l] * fracToCart[2][l];
Exy += fracToCart[0][k] * EmatD[k][l] * fracToCart[1][l];
Exz += fracToCart[0][k] * EmatD[k][l] * fracToCart[2][l];
Eyz += fracToCart[1][k] * EmatD[k][l] * fracToCart[2][l];
}
}
atomicAdd(&fieldGradient[6*i+0], static_cast<unsigned long long>((long long) (-Exx*0x100000000)));
atomicAdd(&fieldGradient[6*i+1], static_cast<unsigned long long>((long long) (-Eyy*0x100000000)));
atomicAdd(&fieldGradient[6*i+2], static_cast<unsigned long long>((long long) (-Ezz*0x100000000)));
atomicAdd(&fieldGradient[6*i+3], static_cast<unsigned long long>((long long) (-Exy*0x100000000)));
atomicAdd(&fieldGradient[6*i+4], static_cast<unsigned long long>((long long) (-Exz*0x100000000)));
atomicAdd(&fieldGradient[6*i+5], static_cast<unsigned long long>((long long) (-Eyz*0x100000000)));
real EmatP[3][3] = {
{phip[i+NUM_ATOMS*4], phip[i+NUM_ATOMS*7], phip[i+NUM_ATOMS*8]},
{phip[i+NUM_ATOMS*7], phip[i+NUM_ATOMS*5], phip[i+NUM_ATOMS*9]},
{phip[i+NUM_ATOMS*8], phip[i+NUM_ATOMS*9], phip[i+NUM_ATOMS*6]}
};
Exx = 0; Eyy = 0; Ezz = 0; Exy = 0; Exz = 0; Eyz = 0;
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 3; ++l) {
Exx += fracToCart[0][k] * EmatP[k][l] * fracToCart[0][l];
Eyy += fracToCart[1][k] * EmatP[k][l] * fracToCart[1][l];
Ezz += fracToCart[2][k] * EmatP[k][l] * fracToCart[2][l];
Exy += fracToCart[0][k] * EmatP[k][l] * fracToCart[1][l];
Exz += fracToCart[0][k] * EmatP[k][l] * fracToCart[2][l];
Eyz += fracToCart[1][k] * EmatP[k][l] * fracToCart[2][l];
}
}
atomicAdd(&fieldGradientPolar[6*i+0], static_cast<unsigned long long>((long long) (-Exx*0x100000000)));
atomicAdd(&fieldGradientPolar[6*i+1], static_cast<unsigned long long>((long long) (-Eyy*0x100000000)));
atomicAdd(&fieldGradientPolar[6*i+2], static_cast<unsigned long long>((long long) (-Ezz*0x100000000)));
atomicAdd(&fieldGradientPolar[6*i+3], static_cast<unsigned long long>((long long) (-Exy*0x100000000)));
atomicAdd(&fieldGradientPolar[6*i+4], static_cast<unsigned long long>((long long) (-Exz*0x100000000)));
atomicAdd(&fieldGradientPolar[6*i+5], static_cast<unsigned long long>((long long) (-Eyz*0x100000000)));
#endif
}
}
......@@ -365,7 +365,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
real iEIY = qiUinpI.x*Vijp[1] + qiUindI.x*Vijd[1] - qiUinpI.y*Vijp[0] - qiUindI.y*Vijd[0];
real iEJY = qiUinpJ.x*Vjip[1] + qiUindJ.x*Vjid[1] - qiUinpJ.y*Vjip[0] - qiUindJ.y*Vjid[0];
#ifdef USE_MUTUAL_POLARIZATION
#ifdef MUTUAL_POLARIZATION
// Uind-Uind terms (m=0)
real eCoef = -fourThirds*rInvVec[3]*(3*(thole_d0 + bVec[3]) + alphaRVec[3]*X);
real dCoef = rInvVec[4]*(6*(dthole_d0 + bVec[3]) + 4*alphaRVec[5]*X);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment