Commit dca54ec7 authored by Saurabh Belsare's avatar Saurabh Belsare
Browse files

Merged fork with latest original master

parents cace5edf 01f9e415
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Peter Eastman *
* Contributors: *
* *
......@@ -183,13 +183,34 @@ public:
*/
void getParticleExclusions(int particleIndex, std::vector<int>& exclusions) const;
/**
* Get the cutoff distance (in nm) being used for nonbonded interactions. If the NonbondedMethod in use
* is NoCutoff, this value will have no effect.
*
* @return the cutoff distance, measured in nm
*/
double getCutoffDistance() const;
/**
* Set the cutoff distance (in nm) being used for nonbonded interactions. If the NonbondedMethod in use
* is NoCutoff, this value will have no effect.
*
* @param distance the cutoff distance, measured in nm
*/
void setCutoffDistance(double distance);
/**
* Set the cutoff distance.
*
* @deprecated This method exists only for backward compatibility. Use setCutoffDistance() instead.
*/
void setCutoff(double cutoff);
/**
* Get the cutoff distance.
*
* @deprecated This method exists only for backward compatibility. Use getCutoffDistance() instead.
*/
double getCutoff() const;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
......@@ -36,7 +36,7 @@
using namespace OpenMM;
AmoebaAngleForce::AmoebaAngleForce() {
AmoebaAngleForce::AmoebaAngleForce() : usePeriodic(false) {
_globalCubicK = _globalQuarticK = _globalPenticK = _globalSexticK = 0.0;
}
......@@ -102,3 +102,11 @@ ForceImpl* AmoebaAngleForce::createImpl() const {
void AmoebaAngleForce::updateParametersInContext(Context& context) {
dynamic_cast<AmoebaAngleForceImpl&>(getImplInContext(context)).updateParametersInContext(getContextImpl(context));
}
void AmoebaAngleForce::setUsesPeriodicBoundaryConditions(bool periodic) {
usePeriodic = periodic;
}
bool AmoebaAngleForce::usesPeriodicBoundaryConditions() const {
return usePeriodic;
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
......@@ -36,7 +36,7 @@
using namespace OpenMM;
AmoebaBondForce::AmoebaBondForce() {
AmoebaBondForce::AmoebaBondForce() : usePeriodic(false) {
_globalCubicK = _globalQuarticK = 0.0;
}
......@@ -82,3 +82,11 @@ ForceImpl* AmoebaBondForce::createImpl() const {
void AmoebaBondForce::updateParametersInContext(Context& context) {
dynamic_cast<AmoebaBondForceImpl&>(getImplInContext(context)).updateParametersInContext(getContextImpl(context));
}
void AmoebaBondForce::setUsesPeriodicBoundaryConditions(bool periodic) {
usePeriodic = periodic;
}
bool AmoebaBondForce::usesPeriodicBoundaryConditions() const {
return usePeriodic;
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
......@@ -36,7 +36,7 @@
using namespace OpenMM;
AmoebaInPlaneAngleForce::AmoebaInPlaneAngleForce() {
AmoebaInPlaneAngleForce::AmoebaInPlaneAngleForce() : usePeriodic(false) {
_globalCubicK = _globalQuarticK = _globalPenticK = _globalSexticK = 0.0;
}
......@@ -104,3 +104,11 @@ ForceImpl* AmoebaInPlaneAngleForce::createImpl() const {
void AmoebaInPlaneAngleForce::updateParametersInContext(Context& context) {
dynamic_cast<AmoebaInPlaneAngleForceImpl&>(getImplInContext(context)).updateParametersInContext(getContextImpl(context));
}
void AmoebaInPlaneAngleForce::setUsesPeriodicBoundaryConditions(bool periodic) {
usePeriodic = periodic;
}
bool AmoebaInPlaneAngleForce::usesPeriodicBoundaryConditions() const {
return usePeriodic;
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
......@@ -40,9 +40,11 @@ using std::string;
using std::vector;
AmoebaMultipoleForce::AmoebaMultipoleForce() : nonbondedMethod(NoCutoff), polarizationType(Mutual), pmeBSplineOrder(5), cutoffDistance(1.0), ewaldErrorTol(1e-4), mutualInducedMaxIterations(60),
mutualInducedTargetEpsilon(1.0e-02), scalingDistanceCutoff(100.0), electricConstant(138.9354558456), aewald(0.0) {
pmeGridDimension.resize(3);
pmeGridDimension[0] = pmeGridDimension[1] = pmeGridDimension[2];
mutualInducedTargetEpsilon(1.0e-02), scalingDistanceCutoff(100.0), electricConstant(138.9354558456), alpha(0.0), nx(0), ny(0), nz(0) {
extrapolationCoefficients.push_back(-0.154);
extrapolationCoefficients.push_back(0.017);
extrapolationCoefficients.push_back(0.658);
extrapolationCoefficients.push_back(0.474);
}
AmoebaMultipoleForce::NonbondedMethod AmoebaMultipoleForce::getNonbondedMethod() const {
......@@ -61,6 +63,14 @@ void AmoebaMultipoleForce::setPolarizationType(AmoebaMultipoleForce::Polarizatio
polarizationType = type;
}
void AmoebaMultipoleForce::setExtrapolationCoefficients(const std::vector<double> &coefficients) {
extrapolationCoefficients = coefficients;
}
const std::vector<double> & AmoebaMultipoleForce::getExtrapolationCoefficients() const {
return extrapolationCoefficients;
}
double AmoebaMultipoleForce::getCutoffDistance() const {
return cutoffDistance;
}
......@@ -69,12 +79,26 @@ void AmoebaMultipoleForce::setCutoffDistance(double distance) {
cutoffDistance = distance;
}
void AmoebaMultipoleForce::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
alpha = this->alpha;
nx = this->nx;
ny = this->ny;
nz = this->nz;
}
void AmoebaMultipoleForce::setPMEParameters(double alpha, int nx, int ny, int nz) {
this->alpha = alpha;
this->nx = nx;
this->ny = ny;
this->nz = nz;
}
double AmoebaMultipoleForce::getAEwald() const {
return aewald;
return alpha;
}
void AmoebaMultipoleForce::setAEwald(double inputAewald) {
aewald = inputAewald;
alpha = inputAewald;
}
int AmoebaMultipoleForce::getPmeBSplineOrder() const {
......@@ -82,25 +106,18 @@ int AmoebaMultipoleForce::getPmeBSplineOrder() const {
}
void AmoebaMultipoleForce::getPmeGridDimensions(std::vector<int>& gridDimension) const {
if (gridDimension.size() < 3) {
if (gridDimension.size() < 3)
gridDimension.resize(3);
}
if (pmeGridDimension.size() > 2) {
gridDimension[0] = pmeGridDimension[0];
gridDimension[1] = pmeGridDimension[1];
gridDimension[2] = pmeGridDimension[2];
} else {
gridDimension[0] = gridDimension[1] = gridDimension[2] = 0;
}
return;
gridDimension[0] = nx;
gridDimension[1] = ny;
gridDimension[2] = nz;
}
void AmoebaMultipoleForce::setPmeGridDimensions(const std::vector<int>& gridDimension) {
pmeGridDimension.resize(3);
pmeGridDimension[0] = gridDimension[0];
pmeGridDimension[1] = gridDimension[1];
pmeGridDimension[2] = gridDimension[2];
return;
nx = gridDimension[0];
ny = gridDimension[1];
nz = gridDimension[2];
>>>>>>> upstream/master
}
void AmoebaMultipoleForce::getPMEParametersInContext(const Context& context, double& alpha, int& nx, int& ny, int& nz) const {
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
......@@ -36,7 +36,7 @@
using namespace OpenMM;
AmoebaOutOfPlaneBendForce::AmoebaOutOfPlaneBendForce() {
AmoebaOutOfPlaneBendForce::AmoebaOutOfPlaneBendForce() : usePeriodic(false) {
_globalCubicK = -0.1400000E-01;
_globalQuarticK = 0.5600000E-04;
_globalPenticK = -0.7000000E-06;
......@@ -106,3 +106,11 @@ ForceImpl* AmoebaOutOfPlaneBendForce::createImpl() const {
void AmoebaOutOfPlaneBendForce::updateParametersInContext(Context& context) {
dynamic_cast<AmoebaOutOfPlaneBendForceImpl&>(getImplInContext(context)).updateParametersInContext(getContextImpl(context));
}
void AmoebaOutOfPlaneBendForce::setUsesPeriodicBoundaryConditions(bool periodic) {
usePeriodic = periodic;
}
bool AmoebaOutOfPlaneBendForce::usesPeriodicBoundaryConditions() const {
return usePeriodic;
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
......@@ -36,7 +36,7 @@
using namespace OpenMM;
AmoebaPiTorsionForce::AmoebaPiTorsionForce() {
AmoebaPiTorsionForce::AmoebaPiTorsionForce() : usePeriodic(false) {
}
int AmoebaPiTorsionForce::addPiTorsion(int particle1, int particle2, int particle3, int particle4, int particle5, int particle6, double k) {
......@@ -71,3 +71,11 @@ ForceImpl* AmoebaPiTorsionForce::createImpl() const {
void AmoebaPiTorsionForce::updateParametersInContext(Context& context) {
dynamic_cast<AmoebaPiTorsionForceImpl&>(getImplInContext(context)).updateParametersInContext(getContextImpl(context));
}
void AmoebaPiTorsionForce::setUsesPeriodicBoundaryConditions(bool periodic) {
usePeriodic = periodic;
}
bool AmoebaPiTorsionForce::usesPeriodicBoundaryConditions() const {
return usePeriodic;
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
......@@ -36,7 +36,7 @@
using namespace OpenMM;
AmoebaStretchBendForce::AmoebaStretchBendForce() {
AmoebaStretchBendForce::AmoebaStretchBendForce() : usePeriodic(false) {
}
int AmoebaStretchBendForce::addStretchBend(int particle1, int particle2, int particle3,
......@@ -76,3 +76,11 @@ ForceImpl* AmoebaStretchBendForce::createImpl() const {
void AmoebaStretchBendForce::updateParametersInContext(Context& context) {
dynamic_cast<AmoebaStretchBendForceImpl&>(getImplInContext(context)).updateParametersInContext(getContextImpl(context));
}
void AmoebaStretchBendForce::setUsesPeriodicBoundaryConditions(bool periodic) {
usePeriodic = periodic;
}
bool AmoebaStretchBendForce::usesPeriodicBoundaryConditions() const {
return usePeriodic;
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2014 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
......@@ -38,7 +38,7 @@
using namespace OpenMM;
using namespace std;
AmoebaTorsionTorsionForce::AmoebaTorsionTorsionForce() {
AmoebaTorsionTorsionForce::AmoebaTorsionTorsionForce() : usePeriodic(false) {
}
int AmoebaTorsionTorsionForce::addTorsionTorsion(int particle1, int particle2, int particle3,
......@@ -183,3 +183,11 @@ AmoebaTorsionTorsionForce::TorsionTorsionGridInfo::TorsionTorsionGridInfo(const
}
}
}
void AmoebaTorsionTorsionForce::setUsesPeriodicBoundaryConditions(bool periodic) {
usePeriodic = periodic;
}
bool AmoebaTorsionTorsionForce::usesPeriodicBoundaryConditions() const {
return usePeriodic;
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
......@@ -102,12 +102,20 @@ void AmoebaVdwForce::getParticleExclusions(int particleIndex, std::vector< int >
}
void AmoebaVdwForce::setCutoff(double inputCutoff) {
double AmoebaVdwForce::getCutoffDistance() const {
return cutoff;
}
void AmoebaVdwForce::setCutoffDistance(double inputCutoff) {
cutoff = inputCutoff;
}
void AmoebaVdwForce::setCutoff(double inputCutoff) {
setCutoffDistance(inputCutoff);
}
double AmoebaVdwForce::getCutoff() const {
return cutoff;
return getCutoffDistance();
}
AmoebaVdwForce::NonbondedMethod AmoebaVdwForce::getNonbondedMethod() const {
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
......@@ -62,7 +62,7 @@ void AmoebaVdwForceImpl::initialize(ContextImpl& context) {
if (owner.getNonbondedMethod() == AmoebaVdwForce::CutoffPeriodic) {
Vec3 boxVectors[3];
system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
double cutoff = owner.getCutoff();
double cutoff = owner.getCutoffDistance();
if (cutoff > 0.5*boxVectors[0][0] || cutoff > 0.5*boxVectors[1][1] || cutoff > 0.5*boxVectors[2][2])
throw OpenMMException("AmoebaVdwForce: The cutoff distance cannot be greater than half the periodic box size.");
}
......@@ -103,7 +103,7 @@ double AmoebaVdwForceImpl::calcDispersionCorrection(const System& system, const
}
// Compute the VdW tapering coefficients. Mostly copied from amoebaCudaGpu.cpp.
double cutoff = force.getCutoff();
double cutoff = force.getCutoffDistance();
double vdwTaper = 0.90; // vdwTaper is a scaling factor, it is not a distance.
double c0 = 0.0;
double c1 = 0.0;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: Peter Eastman, Mark Friedrichs *
* Contributors: *
* *
......@@ -117,6 +117,7 @@ void CudaCalcAmoebaBondForceKernel::initialize(const System& system, const Amoeb
}
params->upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaAmoebaKernelSources::amoebaBondForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
replacements["CUBIC_K"] = cu.doubleToString(force.getAmoebaGlobalBondCubic());
......@@ -214,6 +215,7 @@ void CudaCalcAmoebaAngleForceKernel::initialize(const System& system, const Amoe
}
params->upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaAmoebaKernelSources::amoebaAngleForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
replacements["CUBIC_K"] = cu.doubleToString(force.getAmoebaGlobalAngleCubic());
......@@ -315,6 +317,7 @@ void CudaCalcAmoebaInPlaneAngleForceKernel::initialize(const System& system, con
}
params->upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
replacements["CUBIC_K"] = cu.doubleToString(force.getAmoebaGlobalInPlaneAngleCubic());
replacements["QUARTIC_K"] = cu.doubleToString(force.getAmoebaGlobalInPlaneAngleQuartic());
......@@ -417,6 +420,7 @@ void CudaCalcAmoebaPiTorsionForceKernel::initialize(const System& system, const
}
params->upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaAmoebaKernelSources::amoebaPiTorsionForce, replacements), force.getForceGroup());
cu.addForce(new ForceInfo(force));
......@@ -517,6 +521,7 @@ void CudaCalcAmoebaStretchBendForceKernel::initialize(const System& system, cons
params1->upload(paramVector);
params2->upload(paramVectorK);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params1->getDevicePointer(), "float3");
replacements["FORCE_CONSTANTS"] = cu.getBondedUtilities().addArgument(params2->getDevicePointer(), "float2");
replacements["RAD_TO_DEG"] = cu.doubleToString(180/M_PI);
......@@ -617,6 +622,7 @@ void CudaCalcAmoebaOutOfPlaneBendForceKernel::initialize(const System& system, c
}
params->upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float");
replacements["CUBIC_K"] = cu.doubleToString(force.getAmoebaGlobalOutOfPlaneBendCubic());
replacements["QUARTIC_K"] = cu.doubleToString(force.getAmoebaGlobalOutOfPlaneBendQuartic());
......@@ -748,6 +754,7 @@ void CudaCalcAmoebaTorsionTorsionForceKernel::initialize(const System& system, c
gridValues->upload(gridValuesVec);
gridParams->upload(gridParamsVec);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["GRID_VALUES"] = cu.getBondedUtilities().addArgument(gridValues->getDevicePointer(), "float4");
replacements["GRID_PARAMS"] = cu.getBondedUtilities().addArgument(gridParams->getDevicePointer(), "float4");
replacements["TORSION_PARAMS"] = cu.getBondedUtilities().addArgument(torsionParams->getDevicePointer(), "int2");
......@@ -810,7 +817,10 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL), sphericalDipoles(NULL), sphericalQuadrupoles(NULL),
fracDipoles(NULL), fracQuadrupoles(NULL), field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), inducedDipole(NULL),
diisCoefficients(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), prevDipoles(NULL), prevDipolesPolar(NULL), prevDipolesGk(NULL),
prevDipolesGkPolar(NULL), prevErrors(NULL), diisMatrix(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
prevDipolesGkPolar(NULL), prevErrors(NULL), diisMatrix(NULL), polarizability(NULL), extrapolatedDipole(NULL), extrapolatedDipolePolar(NULL),
extrapolatedDipoleGk(NULL), extrapolatedDipoleGkPolar(NULL), inducedDipoleFieldGradient(NULL), inducedDipoleFieldGradientPolar(NULL),
inducedDipoleFieldGradientGk(NULL), inducedDipoleFieldGradientGkPolar(NULL), extrapolatedDipoleFieldGradient(NULL), extrapolatedDipoleFieldGradientPolar(NULL),
extrapolatedDipoleFieldGradientGk(NULL), extrapolatedDipoleFieldGradientGkPolar(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeIgrid(NULL), pmePhi(NULL),
pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeCphi(NULL), pmeAtomGridIndex(NULL), lastPositions(NULL), sort(NULL), gkKernel(NULL) {
}
......@@ -867,6 +877,30 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete diisMatrix;
if (diisCoefficients != NULL)
delete diisCoefficients;
if (extrapolatedDipole != NULL)
delete extrapolatedDipole;
if (extrapolatedDipolePolar != NULL)
delete extrapolatedDipolePolar;
if (extrapolatedDipoleGk != NULL)
delete extrapolatedDipoleGk;
if (extrapolatedDipoleGkPolar != NULL)
delete extrapolatedDipoleGkPolar;
if (inducedDipoleFieldGradient != NULL)
delete inducedDipoleFieldGradient;
if (inducedDipoleFieldGradientPolar != NULL)
delete inducedDipoleFieldGradientPolar;
if (inducedDipoleFieldGradientGk != NULL)
delete inducedDipoleFieldGradientGk;
if (inducedDipoleFieldGradientGkPolar != NULL)
delete inducedDipoleFieldGradientGkPolar;
if (extrapolatedDipoleFieldGradient != NULL)
delete extrapolatedDipoleFieldGradient;
if (extrapolatedDipoleFieldGradientPolar != NULL)
delete extrapolatedDipoleFieldGradientPolar;
if (extrapolatedDipoleFieldGradientGk != NULL)
delete extrapolatedDipoleFieldGradientGk;
if (extrapolatedDipoleFieldGradientGkPolar != NULL)
delete extrapolatedDipoleFieldGradientGkPolar;
if (polarizability != NULL)
delete polarizability;
if (covalentFlags != NULL)
......@@ -967,6 +1001,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
// Create workspace arrays.
polarizationType = force.getPolarizationType();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
labFrameDipoles = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "labFrameDipoles");
labFrameQuadrupoles = new CudaArray(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles");
......@@ -979,12 +1014,23 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
torque = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "torque");
inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole");
inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar");
if (polarizationType == AmoebaMultipoleForce::Mutual) {
inducedDipoleErrors = new CudaArray(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors");
prevDipoles = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipoles");
prevDipolesPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesPolar");
prevErrors = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevErrors");
diisMatrix = new CudaArray(cu, MaxPrevDIISDipoles*MaxPrevDIISDipoles, elementSize, "diisMatrix");
diisCoefficients = new CudaArray(cu, MaxPrevDIISDipoles+1, sizeof(float), "diisMatrix");
}
else if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
int numOrders = force.getExtrapolationCoefficients().size();
extrapolatedDipole = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipole");
extrapolatedDipolePolar = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipolePolar");
inducedDipoleFieldGradient = new CudaArray(cu, 6*paddedNumAtoms, sizeof(long long), "inducedDipoleFieldGradient");
inducedDipoleFieldGradientPolar = new CudaArray(cu, 6*paddedNumAtoms, sizeof(long long), "inducedDipoleFieldGradientPolar");
extrapolatedDipoleFieldGradient = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradient");
extrapolatedDipoleFieldGradientPolar = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradientPolar");
}
cu.addAutoclearBuffer(*field);
cu.addAutoclearBuffer(*fieldPolar);
cu.addAutoclearBuffer(*torque);
......@@ -1036,14 +1082,16 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
// Record other options.
if (force.getPolarizationType() == AmoebaMultipoleForce::Mutual) {
if (polarizationType == AmoebaMultipoleForce::Mutual) {
maxInducedIterations = force.getMutualInducedMaxIterations();
inducedEpsilon = force.getMutualInducedTargetEpsilon();
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedFieldPolar");
}
else
maxInducedIterations = 0;
if (polarizationType != AmoebaMultipoleForce::Direct) {
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedFieldPolar");
}
usePME = (force.getNonbondedMethod() == AmoebaMultipoleForce::PME);
// See whether there's an AmoebaGeneralizedKirkwoodForce in the System.
......@@ -1058,6 +1106,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
bool useShuffle = (cu.getComputeCapability() >= 3.0 && !cu.getUseDoublePrecision());
double fixedThreadMemory = 19*elementSize+2*sizeof(float)+3*sizeof(int)/(double) cu.TileSize;
double inducedThreadMemory = 15*elementSize+2*sizeof(float);
if (polarizationType == AmoebaMultipoleForce::Extrapolated)
inducedThreadMemory += 12*elementSize;
double electrostaticsThreadMemory = 0;
if (!useShuffle)
fixedThreadMemory += 3*elementSize;
......@@ -1066,8 +1116,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/innerDielectric);
if (force.getPolarizationType() == AmoebaMultipoleForce::Direct)
if (polarizationType == AmoebaMultipoleForce::Direct)
defines["DIRECT_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Mutual)
defines["MUTUAL_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Extrapolated)
defines["EXTRAPOLATED_POLARIZATION"] = "";
if (useShuffle)
defines["USE_SHUFFLE"] = "";
if (hasQuadrupoles)
......@@ -1080,11 +1134,22 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts;
defines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex);
defines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex);
alpha = force.getAEwald();
maxExtrapolationOrder = force.getExtrapolationCoefficients().size();
defines["MAX_EXTRAPOLATION_ORDER"] = cu.intToString(maxExtrapolationOrder);
stringstream coefficients;
for (int i = 0; i < maxExtrapolationOrder; i++) {
if (i > 0)
coefficients << ",";
double sum = 0;
for (int j = i; j < maxExtrapolationOrder; j++)
sum += force.getExtrapolationCoefficients()[j];
coefficients << cu.doubleToString(sum);
}
defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str();
if (usePME) {
vector<int> pmeGridDimension;
force.getPmeGridDimensions(pmeGridDimension);
if (pmeGridDimension[0] == 0 || alpha == 0.0) {
int nx, ny, nz;
force.getPMEParameters(alpha, nx, ny, nz);
if (nx == 0 || alpha == 0.0) {
NonbondedForce nb;
nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
nb.setCutoffDistance(force.getCutoffDistance());
......@@ -1093,9 +1158,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ);
} else {
gridSizeX = CudaFFT3D::findLegalDimension(pmeGridDimension[0]);
gridSizeY = CudaFFT3D::findLegalDimension(pmeGridDimension[1]);
gridSizeZ = CudaFFT3D::findLegalDimension(pmeGridDimension[2]);
gridSizeX = CudaFFT3D::findLegalDimension(nx);
gridSizeY = CudaFFT3D::findLegalDimension(ny);
gridSizeZ = CudaFFT3D::findLegalDimension(nz);
}
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["SQRT_PI"] = cu.doubleToString(sqrt(M_PI));
......@@ -1113,9 +1178,21 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric));
fixedThreadMemory += 4*elementSize;
inducedThreadMemory += 13*elementSize;
if (polarizationType == AmoebaMultipoleForce::Mutual) {
prevDipolesGk = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGk");
prevDipolesGkPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGkPolar");
}
else if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
inducedThreadMemory += 12*elementSize;
int numOrders = force.getExtrapolationCoefficients().size();
extrapolatedDipoleGk = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipoleGk");
extrapolatedDipoleGkPolar = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipoleGkPolar");
inducedDipoleFieldGradientGk = new CudaArray(cu, 6*numMultipoles, elementSize, "inducedDipoleFieldGradientGk");
inducedDipoleFieldGradientGkPolar = new CudaArray(cu, 6*numMultipoles, elementSize, "inducedDipoleFieldGradientGkPolar");
extrapolatedDipoleFieldGradientGk = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradientGk");
extrapolatedDipoleFieldGradientGkPolar = new CudaArray(cu, 6*numMultipoles*(numOrders-1), elementSize, "extrapolatedDipoleFieldGradientGkPolar");
}
}
int maxThreads = cu.getNonbondedUtilities().getForceThreadBlockSize();
fixedFieldThreads = min(maxThreads, cu.computeThreadBlockSize(fixedThreadMemory));
inducedFieldThreads = min(maxThreads, cu.computeThreadBlockSize(inducedThreadMemory));
......@@ -1127,15 +1204,18 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["THREAD_BLOCK_SIZE"] = cu.intToString(fixedFieldThreads);
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines);
computeFixedFieldKernel = cu.getKernel(module, "computeFixedField");
if (maxInducedIterations > 0) {
if (polarizationType != AmoebaMultipoleForce::Direct) {
defines["THREAD_BLOCK_SIZE"] = cu.intToString(inducedFieldThreads);
defines["MAX_PREV_DIIS_DIPOLES"] = cu.intToString(MaxPrevDIISDipoles);
defines["USE_MUTUAL_POLARIZATION"] = "1";
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleInducedField, defines);
computeInducedFieldKernel = cu.getKernel(module, "computeInducedField");
updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldByDIIS");
recordDIISDipolesKernel = cu.getKernel(module, "recordInducedDipolesForDIIS");
buildMatrixKernel = cu.getKernel(module, "computeDIISMatrix");
initExtrapolatedKernel = cu.getKernel(module, "initExtrapolatedDipoles");
iterateExtrapolatedKernel = cu.getKernel(module, "iterateExtrapolatedDipoles");
computeExtrapolatedKernel = cu.getKernel(module, "computeExtrapolatedDipoles");
addExtrapolatedGradientKernel = cu.getKernel(module, "addExtrapolatedFieldGradientToForce");
}
stringstream electrostaticsSource;
electrostaticsSource << CudaKernelSources::vectorOps;
......@@ -1166,8 +1246,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
pmeDefines["M_PI"] = cu.doubleToString(M_PI);
pmeDefines["SQRT_PI"] = cu.doubleToString(sqrt(M_PI));
if (force.getPolarizationType() == AmoebaMultipoleForce::Direct)
if (polarizationType == AmoebaMultipoleForce::Direct)
pmeDefines["DIRECT_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Mutual)
pmeDefines["MUTUAL_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Extrapolated)
pmeDefines["EXTRAPOLATED_POLARIZATION"] = "";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipolePme, pmeDefines);
pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeTransformMultipolesKernel = cu.getKernel(module, "transformMultipolesToFractionalCoordinates");
......@@ -1407,7 +1491,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
void* npt = NULL;
if (pmeGrid == NULL) {
// Compute induced dipoles.
......@@ -1436,25 +1519,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge.
if (polarizationType == AmoebaMultipoleForce::Extrapolated)
computeExtrapolatedDipoles(NULL);
for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar);
if (gkKernel == NULL) {
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
}
else {
cu.clearBuffer(*gkKernel->getInducedField());
cu.clearBuffer(*gkKernel->getInducedFieldPolar());
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&gkKernel->getInducedField()->getDevicePointer(), &gkKernel->getInducedFieldPolar()->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&gkKernel->getBornRadii()->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
}
computeInducedField(NULL);
bool converged = iterateDipolesByDIIS(i);
if (converged)
break;
......@@ -1579,34 +1647,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge.
vector<float2> errors;
if (polarizationType == AmoebaMultipoleForce::Extrapolated)
computeExtrapolatedDipoles(recipBoxVectorPointer);
for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar);
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
cu.clearBuffer(*pmeGrid);
cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, pmeGrid->getSize());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
computeInducedField(recipBoxVectorPointer);
bool converged = iterateDipolesByDIIS(i);
if (converged)
break;
......@@ -1633,6 +1677,23 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cu.executeKernel(pmeInducedForceKernel, pmeInducedForceArgs, cu.getNumAtoms());
}
// If using extrapolated polarization, add in force contributions from µ(m) T µ(n).
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
if (gkKernel == NULL) {
void* extrapolatedArgs[] = {&cu.getForce().getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer()};
cu.executeKernel(addExtrapolatedGradientKernel, extrapolatedArgs, numMultipoles);
}
else {
void* extrapolatedArgs[] = {&cu.getForce().getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer(),
&extrapolatedDipoleGk->getDevicePointer(), &extrapolatedDipoleGkPolar->getDevicePointer(),
&extrapolatedDipoleFieldGradientGk->getDevicePointer(), &extrapolatedDipoleFieldGradientGkPolar->getDevicePointer()};
cu.executeKernel(addExtrapolatedGradientKernel, extrapolatedArgs, numMultipoles);
}
}
// Map torques to force.
void* mapTorqueArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
......@@ -1646,6 +1707,109 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
return 0.0;
}
void CudaCalcAmoebaMultipoleForceKernel::computeInducedField(void** recipBoxVectorPointer) {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
int startTileIndex = nb.getStartTileIndex();
int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
unsigned int maxTiles = 0;
vector<void*> computeInducedFieldArgs;
computeInducedFieldArgs.push_back(&inducedField->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedFieldPolar->getDevicePointer());
computeInducedFieldArgs.push_back(&cu.getPosq().getDevicePointer());
computeInducedFieldArgs.push_back(&nb.getExclusionTiles().getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipole->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipolePolar->getDevicePointer());
computeInducedFieldArgs.push_back(&startTileIndex);
computeInducedFieldArgs.push_back(&numTileIndices);
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradient->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradientPolar->getDevicePointer());
}
if (pmeGrid != NULL) {
computeInducedFieldArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
computeInducedFieldArgs.push_back(&nb.getInteractionCount().getDevicePointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxSizePointer());
computeInducedFieldArgs.push_back(cu.getInvPeriodicBoxSizePointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxVecXPointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxVecYPointer());
computeInducedFieldArgs.push_back(cu.getPeriodicBoxVecZPointer());
computeInducedFieldArgs.push_back(&maxTiles);
computeInducedFieldArgs.push_back(&nb.getBlockCenters().getDevicePointer());
computeInducedFieldArgs.push_back(&nb.getInteractingAtoms().getDevicePointer());
}
if (gkKernel != NULL) {
computeInducedFieldArgs.push_back(&gkKernel->getInducedField()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getInducedFieldPolar()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getInducedDipoles()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getInducedDipolesPolar()->getDevicePointer());
computeInducedFieldArgs.push_back(&gkKernel->getBornRadii()->getDevicePointer());
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradientGk->getDevicePointer());
computeInducedFieldArgs.push_back(&inducedDipoleFieldGradientGkPolar->getDevicePointer());
}
}
computeInducedFieldArgs.push_back(&dampingAndThole->getDevicePointer());
cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar);
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
cu.clearBuffer(*inducedDipoleFieldGradient);
cu.clearBuffer(*inducedDipoleFieldGradientPolar);
}
if (gkKernel != NULL) {
cu.clearBuffer(*gkKernel->getInducedField());
cu.clearBuffer(*gkKernel->getInducedFieldPolar());
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
cu.clearBuffer(*inducedDipoleFieldGradientGk);
cu.clearBuffer(*inducedDipoleFieldGradientGkPolar);
}
}
if (pmeGrid == NULL)
cu.executeKernel(computeInducedFieldKernel, &computeInducedFieldArgs[0], numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
else {
maxTiles = nb.getInteractingTiles().getSize();
cu.executeKernel(computeInducedFieldKernel, &computeInducedFieldArgs[0], numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
cu.clearBuffer(*pmeGrid);
void* pmeSpreadInducedDipolesArgs[] = {&cu.getPosq().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision()) {
void* finishSpreadArgs[] = {&pmeGrid->getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, pmeGrid->getSize());
}
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
void* pmeConvolutionArgs[] = {&pmeGrid->getDevicePointer(), &pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(),
&pmeBsplineModuliZ->getDevicePointer(), cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
void* pmeInducedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&pmePhidp->getDevicePointer(), &cu.getPosq().getDevicePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(),
cu.getPeriodicBoxVecZPointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2],
&pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
}
else {
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
}
}
}
bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
void* npt = NULL;
bool trueValue = true, falseValue = false;
......@@ -1744,6 +1908,62 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
return false;
}
void CudaCalcAmoebaMultipoleForceKernel::computeExtrapolatedDipoles(void** recipBoxVectorPointer) {
// Start by storing the direct dipoles as PT0
if (gkKernel == NULL) {
void* initArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer()};
cu.executeKernel(initExtrapolatedKernel, initArgs, extrapolatedDipole->getSize());
}
else {
void* initArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(), &extrapolatedDipoleGk->getDevicePointer(),
&extrapolatedDipoleGkPolar->getDevicePointer(), &inducedDipoleFieldGradientGk->getDevicePointer(), &inducedDipoleFieldGradientGkPolar->getDevicePointer()};
cu.executeKernel(initExtrapolatedKernel, initArgs, extrapolatedDipole->getSize());
}
// Recursively apply alpha.Tau to the µ_(n) components to generate µ_(n+1), and store the result
for (int order = 1; order < maxExtrapolationOrder; ++order) {
computeInducedField(recipBoxVectorPointer);
if (gkKernel == NULL) {
void* iterateArgs[] = {&order, &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer(),
&polarizability->getDevicePointer()};
cu.executeKernel(iterateExtrapolatedKernel, iterateArgs, extrapolatedDipole->getSize());
}
else {
void* iterateArgs[] = {&order, &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(), &extrapolatedDipoleGk->getDevicePointer(),
&extrapolatedDipoleGkPolar->getDevicePointer(), &inducedDipoleFieldGradientGk->getDevicePointer(), &inducedDipoleFieldGradientGkPolar->getDevicePointer(),
&gkKernel->getInducedField()->getDevicePointer(), &gkKernel->getInducedFieldPolar()->getDevicePointer(),
&extrapolatedDipoleFieldGradientGk->getDevicePointer(), &extrapolatedDipoleFieldGradientGkPolar->getDevicePointer(),
&polarizability->getDevicePointer()};
cu.executeKernel(iterateExtrapolatedKernel, iterateArgs, extrapolatedDipole->getSize());
}
}
// Take a linear combination of the µ_(n) components to form the total dipole
if (gkKernel == NULL) {
void* computeArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer()};
cu.executeKernel(computeExtrapolatedKernel, computeArgs, extrapolatedDipole->getSize());
}
else {
void* computeArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&extrapolatedDipoleGk->getDevicePointer(), &extrapolatedDipoleGkPolar->getDevicePointer()};
cu.executeKernel(computeExtrapolatedKernel, computeArgs, extrapolatedDipole->getSize());
}
computeInducedField(recipBoxVectorPointer);
}
void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& context) {
if (multipolesAreValid) {
int numParticles = cu.getNumAtoms();
......@@ -2152,7 +2372,8 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& syst
bornForce = CudaArray::create<long long>(cu, paddedNumAtoms, "bornForce");
inducedDipoleS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipoleS");
inducedDipolePolarS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolarS");
if (multipoles->getPolarizationType() == AmoebaMultipoleForce::Mutual) {
polarizationType = multipoles->getPolarizationType();
if (polarizationType != AmoebaMultipoleForce::Direct) {
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkInducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkInducedFieldPolar");
}
......@@ -2205,8 +2426,12 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& syst
defines["EPSILON_FACTOR"] = cu.doubleToString(138.9354558456);
defines["M_PI"] = cu.doubleToString(M_PI);
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/force.getSoluteDielectric());
if (multipoles->getPolarizationType() == AmoebaMultipoleForce::Direct)
if (polarizationType == AmoebaMultipoleForce::Direct)
defines["DIRECT_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Mutual)
defines["MUTUAL_POLARIZATION"] = "";
else if (polarizationType == AmoebaMultipoleForce::Extrapolated)
defines["EXTRAPOLATED_POLARIZATION"] = "";
includeSurfaceArea = force.getIncludeCavityTerm();
if (includeSurfaceArea) {
defines["SURFACE_AREA_FACTOR"] = cu.doubleToString(force.getSurfaceAreaFactor());
......@@ -2439,15 +2664,15 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
replacements["EPSILON_COMBINING_RULE"] = "4";
else
throw OpenMMException("Illegal combining rule for sigma: "+sigmaCombiningRule);
double cutoff = force.getCutoff();
double cutoff = force.getCutoffDistance();
double taperCutoff = cutoff*0.9;
replacements["CUTOFF_DISTANCE"] = cu.doubleToString(force.getCutoff());
replacements["CUTOFF_DISTANCE"] = cu.doubleToString(force.getCutoffDistance());
replacements["TAPER_CUTOFF"] = cu.doubleToString(taperCutoff);
replacements["TAPER_C3"] = cu.doubleToString(10/pow(taperCutoff-cutoff, 3.0));
replacements["TAPER_C4"] = cu.doubleToString(15/pow(taperCutoff-cutoff, 4.0));
replacements["TAPER_C5"] = cu.doubleToString(6/pow(taperCutoff-cutoff, 5.0));
bool useCutoff = (force.getNonbondedMethod() != AmoebaVdwForce::NoCutoff);
nonbonded->addInteraction(useCutoff, useCutoff, true, force.getCutoff(), exclusions,
nonbonded->addInteraction(useCutoff, useCutoff, true, force.getCutoffDistance(), exclusions,
cu.replaceStrings(CudaAmoebaKernelSources::amoebaVdwForce2, replacements), 0);
// Create the other kernels.
......
......@@ -399,14 +399,17 @@ private:
const char* getSortKey() const {return "value.y";}
};
void initializeScaleFactors();
void computeInducedField(void** recipBoxVectorPointer);
bool iterateDipolesByDIIS(int iteration);
void computeExtrapolatedDipoles(void** recipBoxVectorPointer);
void ensureMultipolesValid(ContextImpl& context);
template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments);
int numMultipoles, maxInducedIterations;
int numMultipoles, maxInducedIterations, maxExtrapolationOrder;
int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads;
int gridSizeX, gridSizeY, gridSizeZ;
double alpha, inducedEpsilon;
bool usePME, hasQuadrupoles, hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid;
AmoebaMultipoleForce::PolarizationType polarizationType;
CudaContext& cu;
const System& system;
std::vector<int3> covalentFlagValues;
......@@ -436,6 +439,18 @@ private:
CudaArray* prevErrors;
CudaArray* diisMatrix;
CudaArray* diisCoefficients;
CudaArray* extrapolatedDipole;
CudaArray* extrapolatedDipolePolar;
CudaArray* extrapolatedDipoleGk;
CudaArray* extrapolatedDipoleGkPolar;
CudaArray* inducedDipoleFieldGradient;
CudaArray* inducedDipoleFieldGradientPolar;
CudaArray* inducedDipoleFieldGradientGk;
CudaArray* inducedDipoleFieldGradientGkPolar;
CudaArray* extrapolatedDipoleFieldGradient;
CudaArray* extrapolatedDipoleFieldGradientPolar;
CudaArray* extrapolatedDipoleFieldGradientGk;
CudaArray* extrapolatedDipoleFieldGradientGkPolar;
CudaArray* polarizability;
CudaArray* covalentFlags;
CudaArray* polarizationGroupFlags;
......@@ -458,6 +473,7 @@ private:
CUfunction pmeGridIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel;
CUfunction pmeFixedPotentialKernel, pmeInducedPotentialKernel, pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel;
CUfunction recordDIISDipolesKernel, buildMatrixKernel;
CUfunction initExtrapolatedKernel, iterateExtrapolatedKernel, computeExtrapolatedKernel, addExtrapolatedGradientKernel;
CUfunction pmeTransformMultipolesKernel, pmeTransformPotentialKernel;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel;
static const int PmeOrder = 5;
......@@ -526,6 +542,7 @@ private:
const System& system;
bool includeSurfaceArea, hasInitializedKernels;
int computeBornSumThreads, gkForceThreads, chainRuleThreads, ediffThreads;
AmoebaMultipoleForce::PolarizationType polarizationType;
std::map<std::string, std::string> defines;
CudaArray* params;
CudaArray* bornSum;
......
float2 angleParams = PARAMS[index];
real xad = pos1.x - pos4.x;
real yad = pos1.y - pos4.y;
real zad = pos1.z - pos4.z;
real3 ad = make_real3(pos1.x-pos4.x, pos1.y-pos4.y, pos1.z-pos4.z);
real3 bd = make_real3(pos2.x-pos4.x, pos2.y-pos4.y, pos2.z-pos4.z);
real3 cd = make_real3(pos3.x-pos4.x, pos3.y-pos4.y, pos3.z-pos4.z);
real xbd = pos2.x - pos4.x;
real ybd = pos2.y - pos4.y;
real zbd = pos2.z - pos4.z;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(ad)
APPLY_PERIODIC_TO_DELTA(bd)
APPLY_PERIODIC_TO_DELTA(cd)
#endif
real xcd = pos3.x - pos4.x;
real ycd = pos3.y - pos4.y;
real zcd = pos3.z - pos4.z;
real xt = yad*zcd - zad*ycd;
real yt = zad*xcd - xad*zcd;
real zt = xad*ycd - yad*xcd;
real xt = ad.y*cd.z - ad.z*cd.y;
real yt = ad.z*cd.x - ad.x*cd.z;
real zt = ad.x*cd.y - ad.y*cd.x;
real rt2 = xt*xt + yt*yt + zt*zt;
real delta = -(xt*xbd + yt*ybd + zt*zbd) / rt2;
real delta = -(xt*bd.x + yt*bd.y + zt*bd.z) / rt2;
real xip = pos2.x + xt*delta;
real yip = pos2.y + yt*delta;
real zip = pos2.z + zt*delta;
real xap = pos1.x - xip;
real yap = pos1.y - yip;
real zap = pos1.z - zip;
real3 ap = make_real3(pos1.x-xip, pos1.y-yip, pos1.z-zip);
real3 cp = make_real3(pos3.x-xip, pos3.y-yip, pos3.z-zip);
real xcp = pos3.x - xip;
real ycp = pos3.y - yip;
real zcp = pos3.z - zip;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(ap)
APPLY_PERIODIC_TO_DELTA(cp)
#endif
real rap2 = xap*xap + yap*yap + zap*zap;
real rcp2 = xcp*xcp + ycp*ycp + zcp*zcp;
real rap2 = ap.x*ap.x + ap.y*ap.y + ap.z*ap.z;
real rcp2 = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
real xm = ycp*zap - zcp*yap;
real ym = zcp*xap - xcp*zap;
real zm = xcp*yap - ycp*xap;
real xm = cp.y*ap.z - cp.z*ap.y;
real ym = cp.z*ap.x - cp.x*ap.z;
real zm = cp.x*ap.y - cp.y*ap.x;
real rm = max(SQRT(xm*xm + ym*ym + zm*zm), (real) 1e-6f);
real dot = xap*xcp + yap*ycp + zap*zcp;
real dotp = ap.x*cp.x + ap.y*cp.y + ap.z*cp.z;
real product = SQRT(rap2*rcp2);
real cosine = (product > 0 ? (dot/product) : 0);
real cosine = (product > 0 ? (dotp/product) : 0);
cosine = max(min(cosine, (real) 1), (real) -1);
real angle = ACOS(cosine);
real angle;
if (cosine > 0.99f || cosine < -0.99f) {
real3 cross_prod = cross(ap, cp);
angle = ASIN(SQRT(dot(cross_prod, cross_prod)/(rap2*rcp2)))*RAD_TO_DEG;
if (cosine < 0.0f)
angle = 180-angle;
}
else
angle = ACOS(cosine)*RAD_TO_DEG;
// if product == 0, set force/energy to 0
real deltaIdeal = (product > 0 ? (angle*RAD_TO_DEG - angleParams.x) : 0);
real deltaIdeal = (product > 0 ? (angle - angleParams.x) : 0);
real deltaIdeal2 = deltaIdeal*deltaIdeal;
real deltaIdeal3 = deltaIdeal*deltaIdeal2;
real deltaIdeal4 = deltaIdeal2*deltaIdeal2;
......@@ -59,13 +65,13 @@ dEdAngle *= RAD_TO_DEG;
real terma = -dEdAngle/(rap2*rm);
real termc = dEdAngle/(rcp2*rm);
real dedxia = terma * (yap*zm-zap*ym);
real dedyia = terma * (zap*xm-xap*zm);
real dedzia = terma * (xap*ym-yap*xm);
real dedxia = terma * (ap.y*zm-ap.z*ym);
real dedyia = terma * (ap.z*xm-ap.x*zm);
real dedzia = terma * (ap.x*ym-ap.y*xm);
real dedxic = termc * (ycp*zm-zcp*ym);
real dedyic = termc * (zcp*xm-xcp*zm);
real dedzic = termc * (xcp*ym-ycp*xm);
real dedxic = termc * (cp.y*zm-cp.z*ym);
real dedyic = termc * (cp.z*xm-cp.x*zm);
real dedzic = termc * (cp.x*ym-cp.y*xm);
real dedxip = -dedxia - dedxic;
real dedyip = -dedyia - dedyic;
......@@ -74,23 +80,23 @@ real dedzip = -dedzia - dedzic;
real delta2 = 2.0f*delta;
real ptrt2 = (dedxip*xt + dedyip*yt + dedzip*zt) / rt2;
real term = (zcd*ybd-ycd*zbd) + delta2*(yt*zcd-zt*ycd);
real dpdxia = delta*(ycd*dedzip-zcd*dedyip) + term*ptrt2;
real term = (cd.z*bd.y-cd.y*bd.z) + delta2*(yt*cd.z-zt*cd.y);
real dpdxia = delta*(cd.y*dedzip-cd.z*dedyip) + term*ptrt2;
term = (xcd*zbd-zcd*xbd) + delta2*(zt*xcd-xt*zcd);
real dpdyia = delta*(zcd*dedxip-xcd*dedzip) + term*ptrt2;
term = (cd.x*bd.z-cd.z*bd.x) + delta2*(zt*cd.x-xt*cd.z);
real dpdyia = delta*(cd.z*dedxip-cd.x*dedzip) + term*ptrt2;
term = (ycd*xbd-xcd*ybd) + delta2*(xt*ycd-yt*xcd);
real dpdzia = delta*(xcd*dedyip-ycd*dedxip) + term*ptrt2;
term = (cd.y*bd.x-cd.x*bd.y) + delta2*(xt*cd.y-yt*cd.x);
real dpdzia = delta*(cd.x*dedyip-cd.y*dedxip) + term*ptrt2;
term = (yad*zbd-zad*ybd) + delta2*(zt*yad-yt*zad);
real dpdxic = delta*(zad*dedyip-yad*dedzip) + term*ptrt2;
term = (ad.y*bd.z-ad.z*bd.y) + delta2*(zt*ad.y-yt*ad.z);
real dpdxic = delta*(ad.z*dedyip-ad.y*dedzip) + term*ptrt2;
term = (zad*xbd-xad*zbd) + delta2*(xt*zad-zt*xad);
real dpdyic = delta*(xad*dedzip-zad*dedxip) + term*ptrt2;
term = (ad.z*bd.x-ad.x*bd.z) + delta2*(xt*ad.z-zt*ad.x);
real dpdyic = delta*(ad.x*dedzip-ad.z*dedxip) + term*ptrt2;
term = (xad*ybd-yad*xbd) + delta2*(yt*xad-xt*yad);
real dpdzic = delta*(yad*dedxip-xad*dedyip) + term*ptrt2;
term = (ad.x*bd.y-ad.y*bd.x) + delta2*(yt*ad.x-xt*ad.y);
real dpdzic = delta*(ad.y*dedxip-ad.x*dedyip) + term*ptrt2;
dedxia = dedxia + dpdxia;
dedyia = dedyia + dpdyia;
......
// compute the value of the bond angle
real xab = pos1.x - pos2.x;
real yab = pos1.y - pos2.y;
real zab = pos1.z - pos2.z;
real xcb = pos3.x - pos2.x;
real ycb = pos3.y - pos2.y;
real zcb = pos3.z - pos2.z;
// compute the out-of-plane bending angle
real xdb = pos4.x - pos2.x;
real ydb = pos4.y - pos2.y;
real zdb = pos4.z - pos2.z;
real xad = pos1.x - pos4.x;
real yad = pos1.y - pos4.y;
real zad = pos1.z - pos4.z;
real xcd = pos3.x - pos4.x;
real ycd = pos3.y - pos4.y;
real zcd = pos3.z - pos4.z;
real rdb2 = xdb*xdb + ydb*ydb + zdb*zdb;
real rad2 = xad*xad + yad*yad + zad*zad;
real rcd2 = xcd*xcd + ycd*ycd + zcd*zcd;
real ee = xab*(ycb*zdb-zcb*ydb) + yab*(zcb*xdb-xcb*zdb) + zab*(xcb*ydb-ycb*xdb);
real dot = xad*xcd + yad*ycd + zad*zcd;
real3 ab = make_real3(pos1.x-pos2.x, pos1.y-pos2.y, pos1.z-pos2.z);
real3 cb = make_real3(pos3.x-pos2.x, pos3.y-pos2.y, pos3.z-pos2.z);
real3 db = make_real3(pos4.x-pos2.x, pos4.y-pos2.y, pos4.z-pos2.z);
real3 ad = make_real3(pos1.x-pos4.x, pos1.y-pos4.y, pos1.z-pos4.z);
real3 cd = make_real3(pos3.x-pos4.x, pos3.y-pos4.y, pos3.z-pos4.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(ab)
APPLY_PERIODIC_TO_DELTA(cb)
APPLY_PERIODIC_TO_DELTA(db)
APPLY_PERIODIC_TO_DELTA(ad)
APPLY_PERIODIC_TO_DELTA(cd)
#endif
real rdb2 = db.x*db.x + db.y*db.y + db.z*db.z;
real rad2 = ad.x*ad.x + ad.y*ad.y + ad.z*ad.z;
real rcd2 = cd.x*cd.x + cd.y*cd.y + cd.z*cd.z;
real ee = ab.x*(cb.y*db.z-cb.z*db.y) + ab.y*(cb.z*db.x-cb.x*db.z) + ab.z*(cb.x*db.y-cb.y*db.x);
real dot = ad.x*cd.x + ad.y*cd.y + ad.z*cd.z;
real cc = rad2*rcd2 - dot*dot;
real bkk2 = (cc != 0 ? (ee*ee)/(cc) : (real) 0);
bkk2 = rdb2 - bkk2;
real adXcd_0 = yad*zcd - zad*ycd;
real adXcd_1 = zad*xcd - xad*zcd;
real adXcd_2 = xad*ycd - yad*xcd;
real adXcd_0 = ad.y*cd.z - ad.z*cd.y;
real adXcd_1 = ad.z*cd.x - ad.x*cd.z;
real adXcd_2 = ad.x*cd.y - ad.y*cd.x;
real adXcd_nrm2 = adXcd_0*adXcd_0 + adXcd_1*adXcd_1 + adXcd_2*adXcd_2;
real adXcd_dot_db = xdb*adXcd_0 + ydb*adXcd_1 + zdb*adXcd_2;
real adXcd_dot_db = db.x*adXcd_0 + db.y*adXcd_1 + db.z*adXcd_2;
adXcd_dot_db /= SQRT(rdb2*adXcd_nrm2);
real angle = abs(ASIN(adXcd_dot_db));
......@@ -62,13 +54,13 @@ real dedcos = -deddt*eeSign/SQRT(cc*bkk2);
real term = ee / cc;
real dccdxia = (xad*rcd2-xcd*dot) * term;
real dccdyia = (yad*rcd2-ycd*dot) * term;
real dccdzia = (zad*rcd2-zcd*dot) * term;
real dccdxia = (ad.x*rcd2-cd.x*dot) * term;
real dccdyia = (ad.y*rcd2-cd.y*dot) * term;
real dccdzia = (ad.z*rcd2-cd.z*dot) * term;
real dccdxic = (xcd*rad2-xad*dot) * term;
real dccdyic = (ycd*rad2-yad*dot) * term;
real dccdzic = (zcd*rad2-zad*dot) * term;
real dccdxic = (cd.x*rad2-ad.x*dot) * term;
real dccdyic = (cd.y*rad2-ad.y*dot) * term;
real dccdzic = (cd.z*rad2-ad.z*dot) * term;
real dccdxid = -dccdxia - dccdxic;
real dccdyid = -dccdyia - dccdyic;
......@@ -76,17 +68,17 @@ real dccdzid = -dccdzia - dccdzic;
term = ee / rdb2;
real deedxia = ydb*zcb - zdb*ycb;
real deedyia = zdb*xcb - xdb*zcb;
real deedzia = xdb*ycb - ydb*xcb;
real deedxia = db.y*cb.z - db.z*cb.y;
real deedyia = db.z*cb.x - db.x*cb.z;
real deedzia = db.x*cb.y - db.y*cb.x;
real deedxic = yab*zdb - zab*ydb;
real deedyic = zab*xdb - xab*zdb;
real deedzic = xab*ydb - yab*xdb;
real deedxic = ab.y*db.z - ab.z*db.y;
real deedyic = ab.z*db.x - ab.x*db.z;
real deedzic = ab.x*db.y - ab.y*db.x;
real deedxid = ycb*zab - zcb*yab + xdb*term;
real deedyid = zcb*xab - xcb*zab + ydb*term;
real deedzid = xcb*yab - ycb*xab + zdb*term;
real deedxid = cb.y*ab.z - cb.z*ab.y + db.x*term;
real deedyid = cb.z*ab.x - cb.x*ab.z + db.y*term;
real deedzid = cb.x*ab.y - cb.y*ab.x + db.z*term;
// compute first derivative components for this angle
......
// compute the value of the pi-orbital torsion angle
real xad = pos1.x - pos4.x;
real yad = pos1.y - pos4.y;
real zad = pos1.z - pos4.z;
real xbd = pos2.x - pos4.x;
real ybd = pos2.y - pos4.y;
real zbd = pos2.z - pos4.z;
real xec = pos5.x - pos3.x;
real yec = pos5.y - pos3.y;
real zec = pos5.z - pos3.z;
real xgc = pos6.x - pos3.x;
real ygc = pos6.y - pos3.y;
real zgc = pos6.z - pos3.z;
real xip = yad*zbd - ybd*zad + pos3.x;
real yip = zad*xbd - zbd*xad + pos3.y;
real zip = xad*ybd - xbd*yad + pos3.z;
real xiq = yec*zgc - ygc*zec + pos4.x;
real yiq = zec*xgc - zgc*xec + pos4.y;
real ziq = xec*ygc - xgc*yec + pos4.z;
real3 ad = make_real3(pos1.x-pos4.x, pos1.y-pos4.y, pos1.z-pos4.z);
real3 bd = make_real3(pos2.x-pos4.x, pos2.y-pos4.y, pos2.z-pos4.z);
real3 ec = make_real3(pos5.x-pos3.x, pos5.y-pos3.y, pos5.z-pos3.z);
real3 gc = make_real3(pos6.x-pos3.x, pos6.y-pos3.y, pos6.z-pos3.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(ad)
APPLY_PERIODIC_TO_DELTA(bd)
APPLY_PERIODIC_TO_DELTA(ec)
APPLY_PERIODIC_TO_DELTA(gc)
#endif
real xip = ad.y*bd.z - bd.y*ad.z + pos3.x;
real yip = ad.z*bd.x - bd.z*ad.x + pos3.y;
real zip = ad.x*bd.y - bd.x*ad.y + pos3.z;
real xiq = ec.y*gc.z - gc.y*ec.z + pos4.x;
real yiq = ec.z*gc.x - gc.z*ec.x + pos4.y;
real ziq = ec.x*gc.y - gc.x*ec.y + pos4.z;
real xcp = pos3.x - xip;
real ycp = pos3.y - yip;
......@@ -112,21 +108,21 @@ real dedziq = ydc*dedxu - xdc*dedyu;
// compute first derivative components for individual atoms
real dedxia = ybd*dedzip - zbd*dedyip;
real dedyia = zbd*dedxip - xbd*dedzip;
real dedzia = xbd*dedyip - ybd*dedxip;
real dedxia = bd.y*dedzip - bd.z*dedyip;
real dedyia = bd.z*dedxip - bd.x*dedzip;
real dedzia = bd.x*dedyip - bd.y*dedxip;
real dedxib = zad*dedyip - yad*dedzip;
real dedyib = xad*dedzip - zad*dedxip;
real dedzib = yad*dedxip - xad*dedyip;
real dedxib = ad.z*dedyip - ad.y*dedzip;
real dedyib = ad.x*dedzip - ad.z*dedxip;
real dedzib = ad.y*dedxip - ad.x*dedyip;
real dedxie = ygc*dedziq - zgc*dedyiq;
real dedyie = zgc*dedxiq - xgc*dedziq;
real dedzie = xgc*dedyiq - ygc*dedxiq;
real dedxie = gc.y*dedziq - gc.z*dedyiq;
real dedyie = gc.z*dedxiq - gc.x*dedziq;
real dedzie = gc.x*dedyiq - gc.y*dedxiq;
real dedxig = zec*dedyiq - yec*dedziq;
real dedyig = xec*dedziq - zec*dedxiq;
real dedzig = yec*dedxiq - xec*dedyiq;
real dedxig = ec.z*dedyiq - ec.y*dedziq;
real dedyig = ec.x*dedziq - ec.z*dedxiq;
real dedzig = ec.y*dedxiq - ec.x*dedyiq;
dedxic = dedxic + dedxip - dedxie - dedxig;
dedyic = dedyic + dedyip - dedyie - dedyig;
......
// compute the value of the bond angle
real xab = pos1.x - pos2.x;
real yab = pos1.y - pos2.y;
real zab = pos1.z - pos2.z;
real3 ab = make_real3(pos1.x-pos2.x, pos1.y-pos2.y, pos1.z-pos2.z);
real3 cb = make_real3(pos3.x-pos2.x, pos3.y-pos2.y, pos3.z-pos2.z);
real xcb = pos3.x - pos2.x;
real ycb = pos3.y - pos2.y;
real zcb = pos3.z - pos2.z;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(ab)
APPLY_PERIODIC_TO_DELTA(cb)
#endif
real rab = SQRT(xab*xab + yab*yab + zab*zab);
real rcb = SQRT(xcb*xcb + ycb*ycb + zcb*zcb);
real rab = SQRT(ab.x*ab.x + ab.y*ab.y + ab.z*ab.z);
real rcb = SQRT(cb.x*cb.x + cb.y*cb.y + cb.z*cb.z);
real xp = ycb*zab - zcb*yab;
real yp = zcb*xab - xcb*zab;
real zp = xcb*yab - ycb*xab;
real xp = cb.y*ab.z - cb.z*ab.y;
real yp = cb.z*ab.x - cb.x*ab.z;
real zp = cb.x*ab.y - cb.y*ab.x;
real rp = SQRT(xp*xp + yp*yp + zp*zp);
real dot = xab*xcb + yab*ycb + zab*zcb;
real cosine = rab*rcb > 0 ? (dot / (rab*rcb)) : (real) 1;
real dotp = ab.x*cb.x + ab.y*cb.y + ab.z*cb.z;
real cosine = rab*rcb > 0 ? (dotp / (rab*rcb)) : (real) 1;
cosine = (cosine > 1 ? (real) 1 : cosine);
cosine = (cosine < -1 ? -(real) 1 : cosine);
real angle = ACOS(cosine);
real angle;
if (cosine > 0.99f || cosine < -0.99f) {
// Highly unlikely a stretch-bend angle will be near 0 or 180, but just in case...
real3 cross_prod = cross(make_real3(ab.x, ab.y, ab.z), make_real3(cb.x, cb.y, cb.z));
angle = ASIN(SQRT(dot(cross_prod, cross_prod))/(rab*rcb))*RAD_TO_DEG;
if (cosine < 0.0f)
angle = 180-angle;
}
else
angle = ACOS(cosine)*RAD_TO_DEG;
// find chain rule terms for the bond angle deviation
float3 parameters = PARAMS[index];
float2 force_constants = FORCE_CONSTANTS[index];
real dt = RAD_TO_DEG*(angle - parameters.z);
real dt = angle - RAD_TO_DEG*parameters.z;
real terma = rab*rp != 0 ? (-RAD_TO_DEG/(rab*rab*rp)) : (real) 0;
real termc = rcb*rp != 0 ? (RAD_TO_DEG/(rcb*rcb*rp)) : (real) 0;
real ddtdxia = terma * (yab*zp-zab*yp);
real ddtdyia = terma * (zab*xp-xab*zp);
real ddtdzia = terma * (xab*yp-yab*xp);
real ddtdxia = terma * (ab.y*zp-ab.z*yp);
real ddtdyia = terma * (ab.z*xp-ab.x*zp);
real ddtdzia = terma * (ab.x*yp-ab.y*xp);
real ddtdxic = termc * (ycb*zp-zcb*yp);
real ddtdyic = termc * (zcb*xp-xcb*zp);
real ddtdzic = termc * (xcb*yp-ycb*xp);
real ddtdxic = termc * (cb.y*zp-cb.z*yp);
real ddtdyic = termc * (cb.z*xp-cb.x*zp);
real ddtdzic = termc * (cb.x*yp-cb.y*xp);
// find chain rule terms for the bond length deviations
......@@ -52,13 +61,13 @@ real frc2 = ((rp != 0) ? force_constants.y : (real) 0);
real drkk = dr1*frc1 + dr2*frc2;
real ddrdxia = terma * xab;
real ddrdyia = terma * yab;
real ddrdzia = terma * zab;
real ddrdxia = terma * ab.x;
real ddrdyia = terma * ab.y;
real ddrdzia = terma * ab.z;
real ddrdxic = termc * xcb;
real ddrdyic = termc * ycb;
real ddrdzic = termc * zcb;
real ddrdxic = termc * cb.x;
real ddrdyic = termc * cb.y;
real ddrdzic = termc * cb.z;
// get the energy and master chain rule terms for derivatives
......
int2 torsionParams = TORSION_PARAMS[index];
real xba = pos2.x - pos1.x;
real yba = pos2.y - pos1.y;
real zba = pos2.z - pos1.z;
real xcb = pos3.x - pos2.x;
real ycb = pos3.y - pos2.y;
real zcb = pos3.z - pos2.z;
real xdc = pos4.x - pos3.x;
real ydc = pos4.y - pos3.y;
real zdc = pos4.z - pos3.z;
real xed = pos5.x - pos4.x;
real yed = pos5.y - pos4.y;
real zed = pos5.z - pos4.z;
real xt = yba*zcb - ycb*zba;
real yt = zba*xcb - zcb*xba;
real zt = xba*ycb - xcb*yba;
real xu = ycb*zdc - ydc*zcb;
real yu = zcb*xdc - zdc*xcb;
real zu = xcb*ydc - xdc*ycb;
real3 ba = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
real3 cb = make_real3(pos3.x-pos2.x, pos3.y-pos2.y, pos3.z-pos2.z);
real3 dc = make_real3(pos4.x-pos3.x, pos4.y-pos3.y, pos4.z-pos3.z);
real3 ed = make_real3(pos5.x-pos4.x, pos5.y-pos4.y, pos5.z-pos4.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(ba)
APPLY_PERIODIC_TO_DELTA(cb)
APPLY_PERIODIC_TO_DELTA(dc)
APPLY_PERIODIC_TO_DELTA(ed)
#endif
real xt = ba.y*cb.z - cb.y*ba.z;
real yt = ba.z*cb.x - cb.z*ba.x;
real zt = ba.x*cb.y - cb.x*ba.y;
real xu = cb.y*dc.z - dc.y*cb.z;
real yu = cb.z*dc.x - dc.z*cb.x;
real zu = cb.x*dc.y - dc.x*cb.y;
real rt2 = xt*xt + yt*yt + zt*zt;
real ru2 = xu*xu + yu*yu + zu*zu;
real rtru = SQRT(rt2 * ru2);
real xv = ydc*zed - yed*zdc;
real yv = zdc*xed - zed*xdc;
real zv = xdc*yed - xed*ydc;
real xv = dc.y*ed.z - ed.y*dc.z;
real yv = dc.z*ed.x - ed.z*dc.x;
real zv = dc.x*ed.y - ed.x*dc.y;
real rv2 = xv*xv + yv*yv + zv*zv;
real rurv = SQRT(ru2 * rv2);
real rcb = SQRT(xcb*xcb + ycb*ycb + zcb*zcb);
real rcb = SQRT(cb.x*cb.x + cb.y*cb.y + cb.z*cb.z);
real cosine1 = (rtru != 0 ? (xt*xu+yt*yu+zt*zu)/rtru : (real) 0);
cosine1 = (cosine1 > 1 ? (real) 1 : cosine1);
cosine1 = (cosine1 < -1 ? (real) -1 : cosine1);
real angle1 = RAD_TO_DEG * ACOS(cosine1);
real sign = xba*xu + yba*yu + zba*zu;
real angle1;
if (cosine1 > 0.99f || cosine1 < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real3 cross_prod = cross(make_real3(xt, yt, zt), make_real3(xu, yu, zu));
angle1 = RAD_TO_DEG*ASIN(SQRT(dot(cross_prod, cross_prod)/(rt2*ru2)));
if (cosine1 < 0.0f)
angle1 = 180-angle1;
}
else
angle1 = RAD_TO_DEG*ACOS(cosine1);
real sign = ba.x*xu + ba.y*yu + ba.z*zu;
angle1 = (sign < 0 ? -angle1 : angle1);
real value1 = angle1;
real rdc = SQRT(xdc*xdc + ydc*ydc + zdc*zdc);
real rdc = SQRT(dc.x*dc.x + dc.y*dc.y + dc.z*dc.z);
real cosine2 = (xu*xv + yu*yv + zu*zv) / rurv;
cosine2 = (cosine2 > 1 ? (real) 1 : cosine2);
cosine2 = (cosine2 < -1 ? (real) -1 : cosine2);
real angle2 = RAD_TO_DEG * ACOS(cosine2);
sign = xcb*xv + ycb*yv + zcb*zv;
real angle2;
if (cosine2 > 0.99f || cosine2 < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real3 cross_prod = cross(make_real3(xu, yu, zu), make_real3(xv, yv, zv));
angle2 = RAD_TO_DEG*ASIN(SQRT(dot(cross_prod, cross_prod)/(ru2*rv2)));
if (cosine2 < 0.0f)
angle2 = 180-angle2;
}
else
angle2 = RAD_TO_DEG*ACOS(cosine2);
sign = cb.x*xv + cb.y*yv + cb.z*zv;
angle2 = (sign < 0 ? -angle2 : angle2);
real value2 = angle2;
......@@ -65,24 +79,20 @@ real value2 = angle2;
int chiralAtomIndex = (torsionParams.x > -1 ? torsionParams.x : atom5);
real4 pos6 = posq[chiralAtomIndex];
real xac = pos6.x - pos3.x;
real yac = pos6.y - pos3.y;
real zac = pos6.z - pos3.z;
real xbc = pos2.x - pos3.x;
real ybc = pos2.y - pos3.y;
real zbc = pos2.z - pos3.z;
real3 ac = make_real3(pos6.x-pos3.x, pos6.y-pos3.y, pos6.z-pos3.z);
real3 bc = make_real3(pos2.x-pos3.x, pos2.y-pos3.y, pos2.z-pos3.z);
real3 dc1 = make_real3(pos4.x-pos3.x, pos4.y-pos3.y, pos4.z-pos3.z);
// xdc, ydc, zdc appear above
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(ac)
APPLY_PERIODIC_TO_DELTA(bc)
APPLY_PERIODIC_TO_DELTA(dc1)
#endif
real xdc1 = pos4.x - pos3.x;
real ydc1 = pos4.y - pos3.y;
real zdc1 = pos4.z - pos3.z;
real c1 = ybc*zdc1 - zbc*ydc1;
real c2 = ydc1*zac - zdc1*yac;
real c3 = yac*zbc - zac*ybc;
real vol = xac*c1 + xbc*c2 + xdc1*c3;
real c1 = bc.y*dc1.z - bc.z*dc1.y;
real c2 = dc1.y*ac.z - dc1.z*ac.y;
real c3 = ac.y*bc.z - ac.z*bc.y;
real vol = ac.x*c1 + bc.x*c2 + dc1.x*c3;
sign = (vol > 0 ? (real) 1 : (real) -1);
sign = (torsionParams.x < 0 ? (real) 1 : sign);
value1 *= sign;
......@@ -152,66 +162,68 @@ dedang2 *= sign * RAD_TO_DEG;
// chain rule terms for first angle derivative components
real xca = pos3.x - pos1.x;
real yca = pos3.y - pos1.y;
real zca = pos3.z - pos1.z;
real3 ca = make_real3(pos3.x-pos1.x, pos3.y-pos1.y, pos3.z-pos1.z);
real3 db = make_real3(pos4.x-pos2.x, pos4.y-pos2.y, pos4.z-pos2.z);
real xdb = pos4.x - pos2.x;
real ydb = pos4.y - pos2.y;
real zdb = pos4.z - pos2.z;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(ca)
APPLY_PERIODIC_TO_DELTA(db)
#endif
real dedxt = dedang1 * (yt*zcb - ycb*zt) / (rt2*rcb);
real dedyt = dedang1 * (zt*xcb - zcb*xt) / (rt2*rcb);
real dedzt = dedang1 * (xt*ycb - xcb*yt) / (rt2*rcb);
real dedxu = -dedang1 * (yu*zcb - ycb*zu) / (ru2*rcb);
real dedyu = -dedang1 * (zu*xcb - zcb*xu) / (ru2*rcb);
real dedzu = -dedang1 * (xu*ycb - xcb*yu) / (ru2*rcb);
real dedxt = dedang1 * (yt*cb.z - cb.y*zt) / (rt2*rcb);
real dedyt = dedang1 * (zt*cb.x - cb.z*xt) / (rt2*rcb);
real dedzt = dedang1 * (xt*cb.y - cb.x*yt) / (rt2*rcb);
real dedxu = -dedang1 * (yu*cb.z - cb.y*zu) / (ru2*rcb);
real dedyu = -dedang1 * (zu*cb.x - cb.z*xu) / (ru2*rcb);
real dedzu = -dedang1 * (xu*cb.y - cb.x*yu) / (ru2*rcb);
// compute first derivative components for first angle
real dedxia = zcb*dedyt - ycb*dedzt;
real dedyia = xcb*dedzt - zcb*dedxt;
real dedzia = ycb*dedxt - xcb*dedyt;
real dedxia = cb.z*dedyt - cb.y*dedzt;
real dedyia = cb.x*dedzt - cb.z*dedxt;
real dedzia = cb.y*dedxt - cb.x*dedyt;
real dedxib = yca*dedzt - zca*dedyt + zdc*dedyu - ydc*dedzu;
real dedyib = zca*dedxt - xca*dedzt + xdc*dedzu - zdc*dedxu;
real dedzib = xca*dedyt - yca*dedxt + ydc*dedxu - xdc*dedyu;
real dedxib = ca.y*dedzt - ca.z*dedyt + dc.z*dedyu - dc.y*dedzu;
real dedyib = ca.z*dedxt - ca.x*dedzt + dc.x*dedzu - dc.z*dedxu;
real dedzib = ca.x*dedyt - ca.y*dedxt + dc.y*dedxu - dc.x*dedyu;
real dedxic = zba*dedyt - yba*dedzt + ydb*dedzu - zdb*dedyu;
real dedyic = xba*dedzt - zba*dedxt + zdb*dedxu - xdb*dedzu;
real dedzic = yba*dedxt - xba*dedyt + xdb*dedyu - ydb*dedxu;
real dedxic = ba.z*dedyt - ba.y*dedzt + db.y*dedzu - db.z*dedyu;
real dedyic = ba.x*dedzt - ba.z*dedxt + db.z*dedxu - db.x*dedzu;
real dedzic = ba.y*dedxt - ba.x*dedyt + db.x*dedyu - db.y*dedxu;
real dedxid = zcb*dedyu - ycb*dedzu;
real dedyid = xcb*dedzu - zcb*dedxu;
real dedzid = ycb*dedxu - xcb*dedyu;
real dedxid = cb.z*dedyu - cb.y*dedzu;
real dedyid = cb.x*dedzu - cb.z*dedxu;
real dedzid = cb.y*dedxu - cb.x*dedyu;
// chain rule terms for second angle derivative components
real xec = pos5.x - pos3.x;
real yec = pos5.y - pos3.y;
real zec = pos5.z - pos3.z;
real3 ec = make_real3(pos5.x-pos3.x, pos5.y-pos3.y, pos5.z-pos3.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(ec)
#endif
real dedxu2 = dedang2 * (yu*zdc - ydc*zu) / (ru2*rdc);
real dedyu2 = dedang2 * (zu*xdc - zdc*xu) / (ru2*rdc);
real dedzu2 = dedang2 * (xu*ydc - xdc*yu) / (ru2*rdc);
real dedxv2 = -dedang2 * (yv*zdc - ydc*zv) / (rv2*rdc);
real dedyv2 = -dedang2 * (zv*xdc - zdc*xv) / (rv2*rdc);
real dedzv2 = -dedang2 * (xv*ydc - xdc*yv) / (rv2*rdc);
real dedxu2 = dedang2 * (yu*dc.z - dc.y*zu) / (ru2*rdc);
real dedyu2 = dedang2 * (zu*dc.x - dc.z*xu) / (ru2*rdc);
real dedzu2 = dedang2 * (xu*dc.y - dc.x*yu) / (ru2*rdc);
real dedxv2 = -dedang2 * (yv*dc.z - dc.y*zv) / (rv2*rdc);
real dedyv2 = -dedang2 * (zv*dc.x - dc.z*xv) / (rv2*rdc);
real dedzv2 = -dedang2 * (xv*dc.y - dc.x*yv) / (rv2*rdc);
// compute first derivative components for second angle
real dedxib2 = zdc*dedyu2 - ydc*dedzu2;
real dedyib2 = xdc*dedzu2 - zdc*dedxu2;
real dedzib2 = ydc*dedxu2 - xdc*dedyu2;
real dedxic2 = ydb*dedzu2 - zdb*dedyu2 + zed*dedyv2 - yed*dedzv2;
real dedyic2 = zdb*dedxu2 - xdb*dedzu2 + xed*dedzv2 - zed*dedxv2;
real dedzic2 = xdb*dedyu2 - ydb*dedxu2 + yed*dedxv2 - xed*dedyv2;
real dedxid2 = zcb*dedyu2 - ycb*dedzu2 + yec*dedzv2 - zec*dedyv2;
real dedyid2 = xcb*dedzu2 - zcb*dedxu2 + zec*dedxv2 - xec*dedzv2;
real dedzid2 = ycb*dedxu2 - xcb*dedyu2 + xec*dedyv2 - yec*dedxv2;
real dedxie2 = zdc*dedyv2 - ydc*dedzv2;
real dedyie2 = xdc*dedzv2 - zdc*dedxv2;
real dedzie2 = ydc*dedxv2 - xdc*dedyv2;
real dedxib2 = dc.z*dedyu2 - dc.y*dedzu2;
real dedyib2 = dc.x*dedzu2 - dc.z*dedxu2;
real dedzib2 = dc.y*dedxu2 - dc.x*dedyu2;
real dedxic2 = db.y*dedzu2 - db.z*dedyu2 + ed.z*dedyv2 - ed.y*dedzv2;
real dedyic2 = db.z*dedxu2 - db.x*dedzu2 + ed.x*dedzv2 - ed.z*dedxv2;
real dedzic2 = db.x*dedyu2 - db.y*dedxu2 + ed.y*dedxv2 - ed.x*dedyv2;
real dedxid2 = cb.z*dedyu2 - cb.y*dedzu2 + ec.y*dedzv2 - ec.z*dedyv2;
real dedyid2 = cb.x*dedzu2 - cb.z*dedxu2 + ec.z*dedxv2 - ec.x*dedzv2;
real dedzid2 = cb.y*dedxu2 - cb.x*dedyu2 + ec.x*dedyv2 - ec.y*dedxv2;
real dedxie2 = dc.z*dedyv2 - dc.y*dedzv2;
real dedyie2 = dc.x*dedzv2 - dc.z*dedxv2;
real dedzie2 = dc.y*dedxv2 - dc.x*dedyv2;
real3 force1 = make_real3(-dedxia, -dedyia, -dedzia);
real3 force2 = make_real3(-dedxib-dedxib2, -dedyib-dedyib2, -dedzib-dedzib2);
......
......@@ -365,7 +365,22 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
// correction to convert mutual to direct polarization force
#ifdef DIRECT_POLARIZATION
#ifdef MUTUAL_POLARIZATION
real findmp1 = uscale*(scip2*ddsc3_1 - ddsc5_1*(sci3*scip4+scip3*sci4));
real findmp2 = uscale*(scip2*ddsc3_2 - ddsc5_2*(sci3*scip4+scip3*sci4));
real findmp3 = uscale*(scip2*ddsc3_3 - ddsc5_3*(sci3*scip4+scip3*sci4));
ftm2i1 -= 0.5f*findmp1;
ftm2i2 -= 0.5f*findmp2;
ftm2i3 -= 0.5f*findmp3;
real sci3X = sci3 - sci3Y;
real sci4X = sci4 - sci4Y;
real scip3X = scip3 - scip3Y;
real scip4X = scip4 - scip4Y;
ftm2i1 += 0.5f*uscale*(-ddsc5_1*(sci3X*scip4X+scip3X*sci4X));
ftm2i2 += 0.5f*uscale*(-ddsc5_2*(sci3X*scip4X+scip3X*sci4X));
ftm2i3 += 0.5f*uscale*(-ddsc5_3*(sci3X*scip4X+scip3X*sci4X));
#else
real gfd = (scip2*scale3i - 5*rr2*(scip3*sci4+sci3*scip4)*scale5i);
real fdir1 = gfd*xr + scale5i* (sci4*atom1.inducedDipolePolarS.x+scip4*atom1.inducedDipoleS.x + sci3*atom2.inducedDipolePolarS.x+scip3*atom2.inducedDipoleS.x);
real fdir2 = gfd*yr + scale5i* (sci4*atom1.inducedDipolePolarS.y+scip4*atom1.inducedDipoleS.y + sci3*atom2.inducedDipolePolarS.y+scip3*atom2.inducedDipoleS.y);
......@@ -385,21 +400,6 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
ftm2i1 += 0.5f*fdir1;
ftm2i2 += 0.5f*fdir2;
ftm2i3 += 0.5f*fdir3;
#else
real findmp1 = uscale*(scip2*ddsc3_1 - ddsc5_1*(sci3*scip4+scip3*sci4));
real findmp2 = uscale*(scip2*ddsc3_2 - ddsc5_2*(sci3*scip4+scip3*sci4));
real findmp3 = uscale*(scip2*ddsc3_3 - ddsc5_3*(sci3*scip4+scip3*sci4));
ftm2i1 -= 0.5f*findmp1;
ftm2i2 -= 0.5f*findmp2;
ftm2i3 -= 0.5f*findmp3;
real sci3X = sci3 - sci3Y;
real sci4X = sci4 - sci4Y;
real scip3X = scip3 - scip3Y;
real scip4X = scip4 - scip4Y;
ftm2i1 += 0.5f*uscale*(-ddsc5_1*(sci3X*scip4X+scip3X*sci4X));
ftm2i2 += 0.5f*uscale*(-ddsc5_2*(sci3X*scip4X+scip3X*sci4X));
ftm2i3 += 0.5f*uscale*(-ddsc5_3*(sci3X*scip4X+scip3X*sci4X));
#endif
#endif
......
......@@ -327,7 +327,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
real iEIY = qiUinpI.x*Vijp[1] + qiUindI.x*Vijd[1] - qiUinpI.y*Vijp[0] - qiUindI.y*Vijd[0];
real iEJY = qiUinpJ.x*Vjip[1] + qiUindJ.x*Vjid[1] - qiUinpJ.y*Vjip[0] - qiUindJ.y*Vjid[0];
#ifdef USE_MUTUAL_POLARIZATION
#ifdef MUTUAL_POLARIZATION
// Uind-Uind terms (m=0)
real eCoef = -4*rInvVec[3]*thole_d0;
real dCoef = 6*rInvVec[4]*dthole_d0;
......@@ -488,6 +488,8 @@ extern "C" __global__ void computeElectrostatics(
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (numTiles > maxTiles ? startTileIndex+warp*(long long)numTileIndices/totalWarps : warp*(long long)numTiles/totalWarps);
int end = (int) (numTiles > maxTiles ? startTileIndex+(warp+1)*(long long)numTileIndices/totalWarps : (warp+1)*(long long)numTiles/totalWarps);
#else
......@@ -508,11 +510,8 @@ extern "C" __global__ void computeElectrostatics(
int x, y;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles)
x = tiles[pos];
else
#endif
{
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
......@@ -535,7 +534,7 @@ extern "C" __global__ void computeElectrostatics(
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
}
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment