Commit 222378c6 authored by Peter Eastman's avatar Peter Eastman
Browse files

Began converting AmoebaGeneralizedKirkwoodForce to new CUDA platform

parent 235a88e5
...@@ -58,6 +58,9 @@ public: ...@@ -58,6 +58,9 @@ public:
return std::map<std::string, double>(); // This force field doesn't define any parameters. return std::map<std::string, double>(); // This force field doesn't define any parameters.
} }
std::vector<std::string> getKernelNames(); std::vector<std::string> getKernelNames();
Kernel& getKernel() {
return kernel;
}
private: private:
AmoebaGeneralizedKirkwoodForce& owner; AmoebaGeneralizedKirkwoodForce& owner;
Kernel kernel; Kernel kernel;
......
...@@ -95,8 +95,8 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl ...@@ -95,8 +95,8 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
if (name == CalcAmoebaMultipoleForceKernel::Name()) if (name == CalcAmoebaMultipoleForceKernel::Name())
return new CudaCalcAmoebaMultipoleForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcAmoebaMultipoleForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcAmoebaGeneralizedKirkwoodForceKernel::Name()) if (name == CalcAmoebaGeneralizedKirkwoodForceKernel::Name())
// return new CudaCalcAmoebaGeneralizedKirkwoodForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcAmoebaGeneralizedKirkwoodForceKernel(name, platform, cu, context.getSystem());
if (name == CalcAmoebaVdwForceKernel::Name()) if (name == CalcAmoebaVdwForceKernel::Name())
return new CudaCalcAmoebaVdwForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcAmoebaVdwForceKernel(name, platform, cu, context.getSystem());
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "AmoebaCudaKernels.h" #include "AmoebaCudaKernels.h"
#include "CudaAmoebaKernelSources.h" #include "CudaAmoebaKernelSources.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include "openmm/internal/AmoebaGeneralizedKirkwoodForceImpl.h"
#include "openmm/internal/AmoebaMultipoleForceImpl.h" #include "openmm/internal/AmoebaMultipoleForceImpl.h"
#include "openmm/internal/AmoebaWcaDispersionForceImpl.h" #include "openmm/internal/AmoebaWcaDispersionForceImpl.h"
#include "openmm/internal/AmoebaTorsionTorsionForceImpl.h" #include "openmm/internal/AmoebaTorsionTorsionForceImpl.h"
...@@ -794,7 +795,7 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri ...@@ -794,7 +795,7 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL),
inducedDipole(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL), inducedDipole(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeTheta1(NULL), pmeTheta2(NULL), pmeTheta3(NULL), pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeTheta1(NULL), pmeTheta2(NULL), pmeTheta3(NULL),
pmeIgrid(NULL), pmePhi(NULL), pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL) { pmeIgrid(NULL), pmePhi(NULL), pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL), gkKernel(NULL) {
} }
CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() { CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
...@@ -999,6 +1000,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -999,6 +1000,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
maxInducedIterations = 0; maxInducedIterations = 0;
bool usePME = (force.getNonbondedMethod() == AmoebaMultipoleForce::PME); bool usePME = (force.getNonbondedMethod() == AmoebaMultipoleForce::PME);
// See whether there's an AmoebaGeneralizedKirkwoodForce in the System.
const AmoebaGeneralizedKirkwoodForce* gk = NULL;
for (int i = 0; i < system.getNumForces() && gk == NULL; i++)
gk = dynamic_cast<const AmoebaGeneralizedKirkwoodForce*>(&system.getForce(i));
double innerDielectric = (gk == NULL ? 1.0 : gk->getSoluteDielectric());
// Create the kernels. // Create the kernels.
map<string, string> defines; map<string, string> defines;
...@@ -1006,7 +1014,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1006,7 +1014,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()); defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks()); defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456); // DIVIDE BY INNER DIELECTRIC!!! defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/innerDielectric);
if (force.getPolarizationType() == AmoebaMultipoleForce::Direct) if (force.getPolarizationType() == AmoebaMultipoleForce::Direct)
defines["DIRECT_POLARIZATION"] = ""; defines["DIRECT_POLARIZATION"] = "";
double alpha = force.getAEwald(); double alpha = force.getAEwald();
...@@ -1034,6 +1042,14 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1034,6 +1042,14 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["USE_PERIODIC"] = ""; defines["USE_PERIODIC"] = "";
defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance()); defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
} }
if (gk != NULL) {
defines["USE_GK"] = "";
defines["GK_C"] = cu.doubleToString(2.455);
double solventDielectric = gk->getSolventDielectric();
defines["GK_FC"] = cu.doubleToString(1*(1-solventDielectric)/(0+1*solventDielectric));
defines["GK_FD"] = cu.doubleToString(2*(1-solventDielectric)/(1+2*solventDielectric));
defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric));
}
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoles, defines); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoles, defines);
computeMomentsKernel = cu.getKernel(module, "computeLabFrameMoments"); computeMomentsKernel = cu.getKernel(module, "computeLabFrameMoments");
recordInducedDipolesKernel = cu.getKernel(module, "recordInducedDipoles"); recordInducedDipolesKernel = cu.getKernel(module, "recordInducedDipoles");
...@@ -1284,8 +1300,14 @@ void CudaCalcAmoebaMultipoleForceKernel::initializeScaleFactors() { ...@@ -1284,8 +1300,14 @@ void CudaCalcAmoebaMultipoleForceKernel::initializeScaleFactors() {
} }
double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (!hasInitializedScaleFactors) if (!hasInitializedScaleFactors) {
initializeScaleFactors(); initializeScaleFactors();
for (int i = 0; i < (int) context.getForceImpls().size() && gkKernel == NULL; i++) {
AmoebaGeneralizedKirkwoodForceImpl* gkImpl = dynamic_cast<AmoebaGeneralizedKirkwoodForceImpl*>(context.getForceImpls()[i]);
if (gkImpl != NULL)
gkKernel = dynamic_cast<CudaCalcAmoebaGeneralizedKirkwoodForceKernel*>(&gkImpl->getKernel().getImpl());
}
}
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities(); CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// Compute the lab frame moments. // Compute the lab frame moments.
...@@ -1302,14 +1324,41 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1302,14 +1324,41 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
if (pmeGrid == NULL) { if (pmeGrid == NULL) {
// Compute induced dipoles. // Compute induced dipoles.
void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(), if (gkKernel == NULL) {
&nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(), void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices, &nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices,
cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &dampingAndThole->getDevicePointer()};
void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()}; void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms()); &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()};
cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms());
}
else {
gkKernel->computeBornRadii();
void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices,
&gkKernel->getBornRadii()->getDevicePointer(), &gkKernel->getField()->getDevicePointer(),
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
vector<long long> f;
gkKernel->getField()->download(f);
printf("field\n");
for (int i = 0; i < 3*cu.getNumAtoms(); i++)
printf("%d %g\n", i, f[i]/(double) 0xFFFFFFFF);
void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
&gkKernel->getField()->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(),
&gkKernel->getInducedDipolesPolar()->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()};
cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms());
vector<float> d, dp;
gkKernel->getInducedDipoles()->download(d);
gkKernel->getInducedDipolesPolar()->download(dp);
printf("dipoles\n");
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d %g %g %g, %g %g %g\n", i, d[3*i], d[3*i+1], d[3*i+2], dp[3*i], dp[3*i+1], dp[3*i+2]);
}
// Iterate until the dipoles converge. // Iterate until the dipoles converge.
...@@ -1343,6 +1392,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1343,6 +1392,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
if (gkKernel != NULL)
gkKernel->finishComputation(*torque, *labFrameDipoles, *labFrameQuadrupoles, *inducedDipole, *inducedDipolePolar, *dampingAndThole, *covalentFlags, *polarizationGroupFlags);
// Map torques to force. // Map torques to force.
...@@ -1645,85 +1696,180 @@ void CudaCalcAmoebaMultipoleForceKernel::getSystemMultipoleMoments(ContextImpl& ...@@ -1645,85 +1696,180 @@ void CudaCalcAmoebaMultipoleForceKernel::getSystemMultipoleMoments(ContextImpl&
computeSystemMultipoleMoments<float, float4>(context, origin, outputMultipoleMoments); computeSystemMultipoleMoments<float, float4>(context, origin, outputMultipoleMoments);
} }
///* -------------------------------------------------------------------------- * /* -------------------------------------------------------------------------- *
// * AmoebaGeneralizedKirkwood * * AmoebaGeneralizedKirkwood *
// * -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
//
//class CudaCalcAmoebaGeneralizedKirkwoodForceKernel::ForceInfo : public CudaForceInfo { class CudaCalcAmoebaGeneralizedKirkwoodForceKernel::ForceInfo : public CudaForceInfo {
//public: public:
// ForceInfo(const AmoebaGeneralizedKirkwoodForce& force) : force(force) { ForceInfo(const AmoebaGeneralizedKirkwoodForce& force) : force(force) {
// } }
// bool areParticlesIdentical(int particle1, int particle2) { bool areParticlesIdentical(int particle1, int particle2) {
// double charge1, charge2, radius1, radius2, scale1, scale2; double charge1, charge2, radius1, radius2, scale1, scale2;
// force.getParticleParameters(particle1, charge1, radius1, scale1); force.getParticleParameters(particle1, charge1, radius1, scale1);
// force.getParticleParameters(particle2, charge2, radius2, scale2); force.getParticleParameters(particle2, charge2, radius2, scale2);
// return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2); return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
// } }
//private: private:
// const AmoebaGeneralizedKirkwoodForce& force; const AmoebaGeneralizedKirkwoodForce& force;
//}; };
//
//CudaCalcAmoebaGeneralizedKirkwoodForceKernel::CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CudaCalcAmoebaGeneralizedKirkwoodForceKernel::CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) :
// CalcAmoebaGeneralizedKirkwoodForceKernel(name, platform), cu(cu), system(system) { CalcAmoebaGeneralizedKirkwoodForceKernel(name, platform), cu(cu), system(system), params(NULL), bornRadii(NULL), field(NULL), inducedDipoleS(NULL),
// data.incrementKernelCount(); inducedDipolePolarS(NULL), bornSum(NULL), bornForce(NULL) {
//} }
//
//CudaCalcAmoebaGeneralizedKirkwoodForceKernel::~CudaCalcAmoebaGeneralizedKirkwoodForceKernel() { CudaCalcAmoebaGeneralizedKirkwoodForceKernel::~CudaCalcAmoebaGeneralizedKirkwoodForceKernel() {
// data.decrementKernelCount(); cu.setAsCurrent();
//} if (params != NULL)
// delete params;
//void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& system, const AmoebaGeneralizedKirkwoodForce& force) { if (bornRadii != NULL)
// delete bornRadii;
// data.setHasAmoebaGeneralizedKirkwood( true ); if (field != NULL)
// delete field;
// int numParticles = system.getNumParticles(); if (inducedDipoleS != NULL)
// delete inducedDipoleS;
// std::vector<float> radius(numParticles); if (inducedDipolePolarS != NULL)
// std::vector<float> scale(numParticles); delete inducedDipolePolarS;
// std::vector<float> charge(numParticles); if (bornSum != NULL)
// delete bornSum;
// for( int ii = 0; ii < numParticles; ii++ ){ if (bornForce != NULL)
// double particleCharge, particleRadius, scalingFactor; delete bornForce;
// force.getParticleParameters(ii, particleCharge, particleRadius, scalingFactor); }
// radius[ii] = static_cast<float>( particleRadius );
// scale[ii] = static_cast<float>( scalingFactor ); void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& system, const AmoebaGeneralizedKirkwoodForce& force) {
// charge[ii] = static_cast<float>( particleCharge ); cu.setAsCurrent();
// } if (cu.getPlatformData().contexts.size() > 1)
// if( data.getUseGrycuk() ){ throw OpenMMException("AmoebaGeneralizedKirkwoodForce does not support using multiple CUDA devices");
// const AmoebaMultipoleForce* multipoles = NULL;
// gpuSetAmoebaGrycukParameters( data.getAmoebaGpu(), static_cast<float>(force.getSoluteDielectric() ), for (int i = 0; i < system.getNumForces() && multipoles == NULL; i++)
// static_cast<float>( force.getSolventDielectric() ), multipoles = dynamic_cast<const AmoebaMultipoleForce*>(&system.getForce(i));
// radius, scale, charge, if (multipoles == NULL)
// force.getIncludeCavityTerm(), throw OpenMMException("AmoebaGeneralizedKirkwoodForce requires the System to also contain an AmoebaMultipoleForce");
// static_cast<float>( force.getProbeRadius() ), CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// static_cast<float>( force.getSurfaceAreaFactor() ) ); int paddedNumAtoms = cu.getPaddedNumAtoms();
// int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
// } else { params = CudaArray::create<float2>(cu, paddedNumAtoms, "amoebaGkParams");
// bornRadii = new CudaArray(cu, paddedNumAtoms, elementSize, "bornRadii");
// gpuSetAmoebaObcParameters( data.getAmoebaGpu(), static_cast<float>(force.getSoluteDielectric() ), field = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkField");
// static_cast<float>( force.getSolventDielectric() ), bornSum = CudaArray::create<long long>(cu, paddedNumAtoms, "bornSum");
// radius, scale, charge, bornForce = CudaArray::create<long long>(cu, paddedNumAtoms, "bornForce");
// force.getIncludeCavityTerm(), inducedDipoleS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipoleS");
// static_cast<float>( force.getProbeRadius() ), inducedDipolePolarS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolarS");
// static_cast<float>( force.getSurfaceAreaFactor() ) ); cu.addAutoclearBuffer(*field);
// cu.addAutoclearBuffer(*bornSum);
// } cu.addAutoclearBuffer(*bornForce);
// data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force)); vector<float2> paramsVector(paddedNumAtoms);
//} for (int i = 0; i < force.getNumParticles(); i++) {
// double charge, radius, scalingFactor;
//double CudaCalcAmoebaGeneralizedKirkwoodForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { force.getParticleParameters(i, charge, radius, scalingFactor);
// // handled in computeAmoebaMultipoleForce() paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
// return 0.0;
//} // Make sure the charge matches the one specified by the AmoebaMultipoleForce.
//
//static void computeAmoebaVdwForce( CudaContext& cu ) { double charge2, thole, damping, polarity;
// int axisType, atomX, atomY, atomZ;
// amoebaGpuContext gpu = data.getAmoebaGpu(); vector<double> dipole, quadrupole;
// data.initializeGpu(); multipoles->getMultipoleParameters(i, charge2, dipole, quadrupole, axisType, atomZ, atomX, atomY, thole, damping, polarity);
// if (charge != charge2)
// // Vdw14_7F throw OpenMMException("AmoebaGeneralizedKirkwoodForce and AmoebaMultipoleForce must specify the same charge for every atom");
// kCalculateAmoebaVdw14_7Forces(gpu, data.getUseVdwNeighborList()); }
//} params->upload(paramsVector);
// Create the kernels.
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(paddedNumAtoms);
defines["THREAD_BLOCK_SIZE"] = cu.intToString(nb.getForceThreadBlockSize());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["GK_C"] = cu.doubleToString(2.455);
double solventDielectric = force.getSolventDielectric();
defines["GK_FC"] = cu.doubleToString(1*(1-solventDielectric)/(0+1*solventDielectric));
defines["GK_FD"] = cu.doubleToString(2*(1-solventDielectric)/(1+2*solventDielectric));
defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric));
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/solventDielectric);
stringstream forceSource;
forceSource << CudaKernelSources::vectorOps;
forceSource << CudaAmoebaKernelSources::amoebaGk;
forceSource << "#define F1\n";
forceSource << CudaAmoebaKernelSources::gkPairForce;
forceSource << CudaAmoebaKernelSources::gkEDiffPairForce;
forceSource << "#undef F1\n";
forceSource << "#define F2\n";
forceSource << CudaAmoebaKernelSources::gkPairForce;
forceSource << "#undef F2\n";
forceSource << "#define T1\n";
forceSource << CudaAmoebaKernelSources::gkPairForce;
forceSource << CudaAmoebaKernelSources::gkEDiffPairForce;
forceSource << "#undef T1\n";
forceSource << "#define T2\n";
forceSource << CudaAmoebaKernelSources::gkPairForce;
forceSource << "#undef T2\n";
forceSource << "#define T3\n";
forceSource << CudaAmoebaKernelSources::gkEDiffPairForce;
forceSource << "#undef T3\n";
forceSource << "#define B1\n";
forceSource << "#define B2\n";
forceSource << CudaAmoebaKernelSources::gkPairForce;
CUmodule module = cu.createModule(forceSource.str(), defines);
computeBornSumKernel = cu.getKernel(module, "computeBornSum");
reduceBornSumKernel = cu.getKernel(module, "reduceBornSum");
gkForceKernel = cu.getKernel(module, "computeGKForces");
chainRuleKernel = cu.getKernel(module, "computeChainRuleForce");
ediffKernel = cu.getKernel(module, "computeEDiffForce");
cu.addForce(new ForceInfo(force));
}
double CudaCalcAmoebaGeneralizedKirkwoodForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// Since GK is so tightly entwined with the electrostatics, this method does nothing, and the force calculation
// is driven by AmoebaMultipoleForce.
return 0.0;
}
void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::computeBornRadii() {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
int numTiles = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int forceThreadBlockSize = nb.getForceThreadBlockSize();
void* computeBornSumArgs[] = {&bornSum->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&params->getDevicePointer(), &numTiles};
cu.executeKernel(computeBornSumKernel, computeBornSumArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
void* reduceBornSumArgs[] = {&bornSum->getDevicePointer(), &params->getDevicePointer(), &bornRadii->getDevicePointer()};
cu.executeKernel(reduceBornSumKernel, reduceBornSumArgs, cu.getNumAtoms());
vector<float> r;
bornRadii->download(r);
printf("radii\n");
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d %g\n", i, r[i]);
}
void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles,
CudaArray& inducedDipole, CudaArray& inducedDipolePolar, CudaArray& dampingAndThole, CudaArray& covalentFlags, CudaArray& polarizationGroupFlags) {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
int startTileIndex = nb.getStartTileIndex();
int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int forceThreadBlockSize = nb.getForceThreadBlockSize();
void* gkForceArgs[] = {&cu.getForce().getDevicePointer(), &torque.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&cu.getPosq().getDevicePointer(), &startTileIndex, &numTileIndices, &labFrameDipoles.getDevicePointer(),
&labFrameQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(),
&bornRadii->getDevicePointer(), &bornForce->getDevicePointer()};
cu.executeKernel(gkForceKernel, gkForceArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
// Compute cavity term...
void* chainRuleArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &startTileIndex, &numTileIndices,
&params->getDevicePointer(), &bornRadii->getDevicePointer(), &bornForce->getDevicePointer()};
cu.executeKernel(chainRuleKernel, chainRuleArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
void* ediffArgs[] = {&cu.getForce().getDevicePointer(), &torque.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&cu.getPosq().getDevicePointer(), &nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&covalentFlags.getDevicePointer(), &polarizationGroupFlags.getDevicePointer(), &startTileIndex, &numTileIndices,
&labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(),
&inducedDipolePolar.getDevicePointer(), &inducedDipoleS->getDevicePointer(), &inducedDipolePolarS->getDevicePointer(),
&dampingAndThole.getDevicePointer()};
cu.executeKernel(ediffKernel, ediffArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
/* -------------------------------------------------------------------------- * /* -------------------------------------------------------------------------- *
* AmoebaVdw * * AmoebaVdw *
......
...@@ -37,6 +37,8 @@ ...@@ -37,6 +37,8 @@
namespace OpenMM { namespace OpenMM {
class CudaCalcAmoebaGeneralizedKirkwoodForceKernel;
/** /**
* This kernel is invoked by AmoebaHarmonicBondForce to calculate the forces acting on the system and the energy of the system. * This kernel is invoked by AmoebaHarmonicBondForce to calculate the forces acting on the system and the energy of the system.
*/ */
...@@ -427,6 +429,7 @@ private: ...@@ -427,6 +429,7 @@ private:
CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel; CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
CUfunction pmeUpdateBsplinesKernel, pmeAtomRangeKernel, pmeZIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeConvolutionKernel, pmeFixedPotentialKernel, pmeInducedPotentialKernel; CUfunction pmeUpdateBsplinesKernel, pmeAtomRangeKernel, pmeZIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeConvolutionKernel, pmeFixedPotentialKernel, pmeInducedPotentialKernel;
CUfunction pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel; CUfunction pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel;
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
...@@ -453,10 +456,38 @@ public: ...@@ -453,10 +456,38 @@ public:
* @return the potential energy due to the force * @return the potential energy due to the force
*/ */
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Perform the computation of Born radii.
*/
void computeBornRadii();
/**
* Perform the final parts of the force/energy computation.
*/
void finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles, CudaArray& inducedDipole, CudaArray& inducedDipolePolar, CudaArray& dampingAndThole, CudaArray& covalentFlags, CudaArray& polarizationGroupFlags);
CudaArray* getBornRadii() {
return bornRadii;
}
CudaArray* getField() {
return field;
}
CudaArray* getInducedDipoles() {
return inducedDipoleS;
}
CudaArray* getInducedDipolesPolar() {
return inducedDipolePolarS;
}
private: private:
class ForceInfo; class ForceInfo;
CudaContext& cu; CudaContext& cu;
System& system; System& system;
CudaArray* params;
CudaArray* bornSum;
CudaArray* bornRadii;
CudaArray* bornForce;
CudaArray* field;
CudaArray* inducedDipoleS;
CudaArray* inducedDipolePolarS;
CUfunction computeBornSumKernel, reduceBornSumKernel, gkForceKernel, chainRuleKernel, ediffKernel;
}; };
/** /**
......
#define TILE_SIZE 32
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
/**
* Reduce the Born sums to compute the Born radii.
*/
extern "C" __global__ void reduceBornSum(const long long* __restrict__ bornSum, const float2* __restrict__ params, real* __restrict__ bornRadii) {
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Get summed Born data
real sum = RECIP(0xFFFFFFFF)*bornSum[index];
// Now calculate Born radius.
float radius = params[index].x;
radius = RECIP(radius*radius*radius);
sum = radius-sum;
sum = (sum <= 0 ? (real) 1000 : POW(sum, -1/(real) 3));
bornRadii[index] = sum;
}
}
/**
* Data structure used by computeBornSum().
*/
typedef struct {
real3 pos;
real bornSum;
float radius, scaledRadius, padding;
} AtomData1;
__device__ void computeBornSumOneInteraction(AtomData1& atom1, AtomData1& atom2, float& bornSum) {
if (atom1.radius <= 0)
return; // Ignore this interaction
real3 delta = atom2.pos - atom1.pos;
real r2 = dot(delta, delta);
real r = SQRT(r2);
float sk = atom2.scaledRadius;
real sk2 = sk*sk;
if (atom1.radius+r < sk) {
real lik = atom1.radius;
real uik = sk - r;
bornSum -= RECIP(uik*uik*uik) - RECIP(lik*lik*lik);
}
real uik = r+sk;
real lik;
if (atom1.radius+r < sk)
lik = sk-r;
else if (r < atom1.radius+sk)
lik = atom1.radius;
else
lik = r-sk;
real l2 = lik*lik;
real l4 = l2*l2;
real lr = lik*r;
real l4r = l4*r;
real u2 = uik*uik;
real u4 = u2*u2;
real ur = uik*r;
real u4r = u4*r;
real term = (3*(r2-sk2)+6*u2-8*ur)/u4r - (3*(r2-sk2)+6*l2-8*lr)/l4r;
bornSum += term/16;
}
/**
* Compute the Born sum.
*/
extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ bornSum, const real4* __restrict__ posq,
const float2* __restrict__ params, unsigned int numTiles) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
unsigned int lasty = 0xFFFFFFFF;
__shared__ AtomData1 localData[THREAD_BLOCK_SIZE];
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
unsigned int x, y;
AtomData1 data;
data.bornSum = 0;
if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
unsigned int atom1 = x*TILE_SIZE + tgx;
data.pos = trimTo3(posq[atom1]);
float2 params1 = params[atom1];
data.radius = params1.x;
data.scaledRadius = params1.y;
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].radius = params1.x;
localData[threadIdx.x].scaledRadius = params1.y;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2)
computeBornSumOneInteraction(data, localData[tbx+j], data.bornSum);
}
}
else {
// This is an off-diagonal tile.
if (lasty != y) {
unsigned int j = y*TILE_SIZE + tgx;
real4 tempPosq = posq[j];
localData[threadIdx.x].pos = trimTo3(tempPosq);
float2 tempParams = params[j];
localData[threadIdx.x].radius = tempParams.x;
localData[threadIdx.x].scaledRadius = tempParams.y;
}
localData[threadIdx.x].bornSum = 0;
// Compute the full set of interactions in this tile.
unsigned int tj = tgx;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
computeBornSumOneInteraction(data, localData[tbx+j], data.bornSum);
computeBornSumOneInteraction(localData[tbx+j], data, localData[tbx+j].bornSum);
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
}
// Write results.
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (data.bornSum*0xFFFFFFFF)));
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornSum*0xFFFFFFFF)));
}
lasty = y;
pos++;
} while (pos < end);
}
/**
* Data structure used by computeGKForces().
*/
typedef struct {
real3 pos, force, dipole, inducedDipole, inducedDipolePolar;
real quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ, quadrupoleZZ;
real q, bornRadius, bornForce;
} AtomData2;
__device__ void computeOneInteractionF1(AtomData2& atom1, volatile AtomData2& atom2, real& outputEnergy, real3& force);
__device__ void computeOneInteractionF2(AtomData2& atom1, volatile AtomData2& atom2, real& outputEnergy, real3& force);
__device__ void computeOneInteractionT1(AtomData2& atom1, volatile AtomData2& atom2, real3& torque);
__device__ void computeOneInteractionT2(AtomData2& atom1, volatile AtomData2& atom2, real3& torque);
__device__ void computeOneInteractionB1B2(AtomData2& atom1, volatile AtomData2& atom2);
inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ bornRadius) {
float4 atomPosq = posq[atom];
data.pos = trimTo3(atomPosq);
data.q = atomPosq.w;
data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2];
data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
data.bornRadius = bornRadius[atom];
}
inline __device__ void zeroAtomData(AtomData2& data) {
data.force = make_real3(0);
data.bornForce = 0;
}
/**
* Compute electrostatic interactions.
*/
extern "C" __global__ void computeGKForces(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
const real* __restrict__ bornRadii, unsigned long long* __restrict__ bornForce) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int numTiles = numTileIndices;
unsigned int pos = startTileIndex+warp*numTiles/totalWarps;
unsigned int end = startTileIndex+(warp+1)*numTiles/totalWarps;
real energy = 0;
__shared__ AtomData2 localData[THREAD_BLOCK_SIZE];
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
unsigned int x, y;
AtomData2 data;
if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData2(data, atom1, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, bornRadii);
zeroAtomData(data);
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].q = data.q;
localData[threadIdx.x].dipole = data.dipole;
localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX;
localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY;
localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ;
localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY;
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].quadrupoleZZ = data.quadrupoleZZ;
localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].bornRadius = data.bornRadius;
// Compute forces.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
computeOneInteractionF1(data, localData[tbx+j], tempEnergy, tempForce);
computeOneInteractionF2(data, localData[tbx+j], tempEnergy, tempForce);
data.force += tempForce;
energy += 0.5f*tempEnergy;
}
}
data.force *= 0.5f;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
// Compute torques.
zeroAtomData(data);
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempTorque;
computeOneInteractionT1(data, localData[tbx+j], tempTorque);
computeOneInteractionT2(data, localData[tbx+j], tempTorque);
data.force += tempTorque;
}
}
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
// Chain rule terms?
zeroAtomData(data);
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
computeOneInteractionB1B2(data, localData[tbx+j]);
}
atomicAdd(&bornForce[atom1], static_cast<unsigned long long>((long long) (data.bornForce*0xFFFFFFFF)));
}
else {
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
loadAtomData2(localData[threadIdx.x], j, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, bornRadii);
zeroAtomData(localData[threadIdx.x]);
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
computeOneInteractionF1(data, localData[tbx+tj], tempEnergy, tempForce);
computeOneInteractionF2(data, localData[tbx+tj], tempEnergy, tempForce);
data.force += tempForce;
localData[tbx+tj].force -= tempForce;
energy += tempEnergy;
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= 0.5f;
localData[threadIdx.x].force *= 0.5f;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
// Compute torques.
zeroAtomData(data);
zeroAtomData(localData[threadIdx.x]);
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempTorque;
computeOneInteractionT1(data, localData[tbx+tj], tempTorque);
computeOneInteractionT2(data, localData[tbx+tj], tempTorque);
data.force += tempTorque;
computeOneInteractionT1(localData[tbx+tj], data, tempTorque);
computeOneInteractionT2(localData[tbx+tj], data, tempTorque);
localData[tbx+tj].force += tempTorque;
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
// Chain rule terms?
zeroAtomData(data);
zeroAtomData(localData[threadIdx.x]);
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
computeOneInteractionB1B2(data, localData[tbx+tj]);
tj = (tj + 1) & (TILE_SIZE - 1);
}
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (data.bornForce*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornForce*0xFFFFFFFF)));
}
}
}
pos++;
} while (pos < end);
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy*0.5f;
}
/**
* Data structure used by computeChainRuleForce().
*/
typedef struct {
real3 pos, force;
real radius, scaledRadius, bornRadius, bornForce;
} AtomData3;
inline __device__ void loadAtomData3(AtomData3& data, int atom, const real4* __restrict__ posq, const float2* __restrict__ params, const real* __restrict__ bornRadius, const long long* __restrict__ bornForce) {
data.pos = trimTo3(posq[atom]);
data.bornRadius = bornRadius[atom];
float2 params1 = params[atom];
data.radius = params1.x;
data.scaledRadius = params1.y;
data.bornForce = bornForce[atom]/(real) 0xFFFFFFFF;
}
__device__ void computeBornChainRuleInteraction(AtomData3& atom1, AtomData3& atom2, real3& force) {
real third = 1/(real) 3;
real pi43 = 4*third*M_PI;
real factor = -POW(M_PI, third)*POW(6, 2/(real) 3)/9;
real term = pi43/(atom1.bornRadius*atom1.bornRadius*atom1.bornRadius);
term = factor/POW(term, 4/(real) 3);
real3 delta = atom2.pos-atom1.pos;
float sk = atom2.scaledRadius;
real sk2 = sk*sk;
real r2 = dot(delta, delta);
real r = SQRT(r2);
real de = 0;
if (atom1.radius+r < sk) {
real uik = sk-r;
real uik4 = uik*uik;
uik4 = uik4*uik4;
de = -4*M_PI/uik4;
real lik = sk - r;
real lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*M_PI*(sk2-4*sk*r+17*r2)/(r2*lik4);
}
else if (r < atom1.radius+sk) {
real lik = atom1.radius;
real lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*M_PI*(2*atom1.radius*atom1.radius-sk2-r2)/(r2*lik4);
}
else {
real lik = r-sk;
real lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*M_PI*(sk2-4*sk*r+r2)/(r2*lik4);
}
real uik = r+sk;
real uik4 = uik*uik;
uik4 = uik4*uik4;
de -= 0.25f*M_PI*(sk2+4*sk*r+r2)/(r2*uik4);
real dbr = term*de/r;
de = dbr*atom1.bornForce;
force = delta*de;
}
/**
* Compute electrostatic interactions.
*/
extern "C" __global__ void computeChainRuleForce(
unsigned long long* __restrict__ forceBuffers, const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices,
const float2* __restrict__ params, const real* __restrict__ bornRadii, const long long* __restrict__ bornForce) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int numTiles = numTileIndices;
unsigned int pos = startTileIndex+warp*numTiles/totalWarps;
unsigned int end = startTileIndex+(warp+1)*numTiles/totalWarps;
__shared__ AtomData3 localData[THREAD_BLOCK_SIZE];
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
unsigned int x, y;
AtomData3 data;
if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData3(data, atom1, posq, params, bornRadii, bornForce);
data.force = make_real3(0);
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].radius = data.radius;
localData[threadIdx.x].scaledRadius = data.scaledRadius;
localData[threadIdx.x].bornRadius = data.bornRadius;
localData[threadIdx.x].bornForce = data.bornForce;
// Compute forces.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
computeBornChainRuleInteraction(data, localData[tbx+j], tempForce);
data.force -= tempForce;
localData[tbx+j].force += tempForce;
}
}
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
}
else {
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
loadAtomData3(localData[threadIdx.x], j, posq, params, bornRadii, bornForce);
localData[threadIdx.x].force = make_real3(0);
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
computeBornChainRuleInteraction(data, localData[tbx+tj], tempForce);
data.force -= tempForce;
localData[tbx+tj].force += tempForce;
computeBornChainRuleInteraction(localData[tbx+tj], data, tempForce);
data.force += tempForce;
localData[tbx+tj].force -= tempForce;
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
}
}
pos++;
} while (pos < end);
}
typedef struct {
real3 pos, force, dipole, inducedDipole, inducedDipolePolar, inducedDipoleS, inducedDipolePolarS;
real q, quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ, quadrupoleZZ;
float thole, damp, padding;
} AtomData4;
__device__ void computeOneEDiffInteractionF1(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real& outputEnergy, real3& outputForce);
__device__ void computeOneEDiffInteractionT1(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce);
__device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce);
inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS, const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.q = atomPosq.w;
data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2];
data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
data.inducedDipoleS.x = inducedDipoleS[atom*3];
data.inducedDipoleS.y = inducedDipoleS[atom*3+1];
data.inducedDipoleS.z = inducedDipoleS[atom*3+2];
data.inducedDipolePolarS.x = inducedDipolePolarS[atom*3];
data.inducedDipolePolarS.y = inducedDipolePolarS[atom*3+1];
data.inducedDipolePolarS.z = inducedDipolePolarS[atom*3+2];
float2 temp = dampingAndThole[atom];
data.damp = temp.x;
data.thole = temp.y;
}
__device__ real computeDScaleFactor(unsigned int polarizationGroup) {
return (polarizationGroup & 1 ? 0 : 1);
}
__device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGroup) {
bool x = (covalent.x & 1);
bool y = (covalent.y & 1);
bool p = (polarizationGroup & 1);
return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f));
}
/**
* Compute electrostatic interactions.
*/
extern "C" __global__ void computeEDiffForce(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices,
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices,
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS,
const float2* __restrict__ dampingAndThole) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int numTiles = numTileIndices;
unsigned int pos = startTileIndex+warp*numTiles/totalWarps;
unsigned int end = startTileIndex+(warp+1)*numTiles/totalWarps;
real energy = 0;
__shared__ AtomData4 localData[THREAD_BLOCK_SIZE];
__shared__ unsigned int exclusionRange[2*WARPS_PER_GROUP];
__shared__ int exclusionIndex[WARPS_PER_GROUP];
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
const unsigned int localGroupIndex = threadIdx.x/TILE_SIZE;
unsigned int x, y;
AtomData4 data;
if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData4(data, atom1, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, inducedDipoleS, inducedDipolePolarS, dampingAndThole);
data.force = make_real3(0);
// Locate the exclusion data for this tile.
if (tgx < 2)
exclusionRange[2*localGroupIndex+tgx] = exclusionRowIndices[x+tgx];
if (tgx == 0)
exclusionIndex[localGroupIndex] = -1;
for (unsigned int i = exclusionRange[2*localGroupIndex]+tgx; i < exclusionRange[2*localGroupIndex+1]; i += TILE_SIZE)
if (exclusionIndices[i] == y)
exclusionIndex[localGroupIndex] = i*TILE_SIZE;
bool hasExclusions = (exclusionIndex[localGroupIndex] > -1);
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].q = data.q;
localData[threadIdx.x].dipole = data.dipole;
localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX;
localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY;
localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ;
localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY;
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].quadrupoleZZ = data.quadrupoleZZ;
localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].inducedDipoleS = data.inducedDipoleS;
localData[threadIdx.x].inducedDipolePolarS = data.inducedDipolePolarS;
localData[threadIdx.x].thole = data.thole;
localData[threadIdx.x].damp = data.damp;
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
// Compute forces.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneEDiffInteractionF1(data, localData[tbx+j], d, p, tempEnergy, tempForce);
energy += 0.25f*tempEnergy;
data.force += tempForce;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
// Compute torques.
data.force = make_real3(0);
covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempTorque;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneEDiffInteractionT1(data, localData[tbx+j], d, p, tempTorque);
data.force += tempTorque;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
}
else {
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
loadAtomData4(localData[threadIdx.x], j, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, inducedDipoleS, inducedDipolePolarS, dampingAndThole);
localData[threadIdx.x].force = make_real3(0);
uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
// Compute forces.
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneEDiffInteractionF1(data, localData[tbx+tj], d, p, tempEnergy, tempForce);
energy += 0.5f*tempEnergy;
data.force += tempForce;
localData[tbx+tj].force -= tempForce;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
// Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempTorque;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneEDiffInteractionT1(data, localData[tbx+tj], d, p, tempTorque);
data.force += tempTorque;
computeOneEDiffInteractionT3(data, localData[tbx+tj], d, p, tempTorque);
localData[tbx+tj].force += tempTorque;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
}
}
pos++;
} while (pos < end);
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy*ENERGY_SCALE_FACTOR;
}
#if defined F1
__device__ void computeOneEDiffInteractionF1(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real& outputEnergy, real3& outputForce) {
#elif defined T1
__device__ void computeOneEDiffInteractionT1(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce) {
#elif defined T3
__device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce) {
#endif
const float uscale = 1;
// deltaR
real xr = atom2.pos.x - atom1.pos.x;
real yr = atom2.pos.y - atom1.pos.y;
real zr = atom2.pos.z - atom1.pos.z;
real r22 = xr*xr + yr*yr + zr*zr;
real r = SQRT(r22);
real rr1 = RECIP(r);
real rr2 = rr1*rr1;
real rr3 = rr1*rr2;
real scale3 = 1;
real scale5 = 1;
real scale7 = 1;
#ifdef F1
real ddsc3_1 = 0;
real ddsc3_2 = 0;
real ddsc3_3 = 0;
real ddsc5_1 = 0;
real ddsc5_2 = 0;
real ddsc5_3 = 0;
real ddsc7_1 = 0;
real ddsc7_2 = 0;
real ddsc7_3 = 0;
real ftm2i1 = 0;
real ftm2i2 = 0;
real ftm2i3 = 0;
#endif
// apply Thole polarization damping to scale factors
real damp = atom1.damp*atom2.damp;
if (damp != 0) {
real pgamma = atom2.thole > atom1.thole ? atom1.thole : atom2.thole;
real ratio = (r/damp);
damp = -pgamma*ratio*ratio*ratio;
if (damp > -50) {
real dampE = EXP(damp);
real damp2 = damp*damp;
scale3 = 1 - dampE;
scale5 = 1 - (1 - damp)*dampE;
scale7 = 1 - (1 - damp + 0.6f*damp2)*dampE;
#ifdef F1
ddsc3_1 = -3*damp*EXP(damp)*xr*rr2*rr3;
ddsc3_2 = -3*damp*EXP(damp)*yr*rr2*rr3;
ddsc3_3 = -3*damp*EXP(damp)*zr*rr2*rr3;
ddsc5_1 = -3*damp*ddsc3_1*rr2;
ddsc5_2 = -3*damp*ddsc3_2*rr2;
ddsc5_3 = -3*damp*ddsc3_3*rr2;
ddsc7_1 = -5*(0.2f+0.6f*damp)*ddsc5_1*rr2;
ddsc7_2 = -5*(0.2f+0.6f*damp)*ddsc5_2*rr2;
ddsc7_3 = -5*(0.2f+0.6f*damp)*ddsc5_3*rr2;
#endif
}
}
real scale3i = 3*scale3*uscale*rr3*rr2;
real scale5i = 3*scale5*uscale*rr3*rr2;
#ifdef APPLY_SCALE
real dsc3 = scale3*dScale*rr3;
real dsc5 = 3*scale5*dScale*rr3*rr2;
real dsc7 = 15*scale7*dScale*rr3*rr3*rr1;
real psc3 = scale3*pScale*rr3;
real psc5 = 3*scale5*pScale*rr3*rr2;
real psc7 = 15*scale7*pScale*rr3*rr3*rr1;
#else
real psc3 = scale3*rr3;
real psc5 = 3*scale5*rr3*rr2;
real psc7 = 15*scale7*rr3*rr3*rr1;
#endif
#ifdef T1
real dixr1 = atom1.dipole.y*zr - atom1.dipole.z*yr;
real dixr2 = atom1.dipole.z*xr - atom1.dipole.x*zr;
real dixr3 = atom1.dipole.x*yr - atom1.dipole.y*xr;
#endif
#ifdef T3
real dkxr1 = atom2.dipole.y*zr - atom2.dipole.z*yr;
real dkxr2 = atom2.dipole.z*xr - atom2.dipole.x*zr;
real dkxr3 = atom2.dipole.x*yr - atom2.dipole.y*xr;
#endif
real qir1 = atom1.quadrupoleXX*xr + atom1.quadrupoleXY*yr + atom1.quadrupoleXZ*zr;
real qir2 = atom1.quadrupoleXY*xr + atom1.quadrupoleYY*yr + atom1.quadrupoleYZ*zr;
real qir3 = atom1.quadrupoleXZ*xr + atom1.quadrupoleYZ*yr + atom1.quadrupoleZZ*zr;
real qkr1 = atom2.quadrupoleXX*xr + atom2.quadrupoleXY*yr + atom2.quadrupoleXZ*zr;
real qkr2 = atom2.quadrupoleXY*xr + atom2.quadrupoleYY*yr + atom2.quadrupoleYZ*zr;
real qkr3 = atom2.quadrupoleXZ*xr + atom2.quadrupoleYZ*yr + atom2.quadrupoleZZ*zr;
#ifdef T1
real rxqir1 = yr*qir3 - zr*qir2;
real rxqir2 = zr*qir1 - xr*qir3;
real rxqir3 = xr*qir2 - yr*qir1;
#endif
#ifdef T3
real rxqkr1 = yr*qkr3 - zr*qkr2;
real rxqkr2 = zr*qkr1 - xr*qkr3;
real rxqkr3 = xr*qkr2 - yr*qkr1;
#endif
// get intermediate variables for permanent energy terms
real sc3 = atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr;
real sc4 = atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr;
real sc5 = qir1*xr + qir2*yr + qir3*zr;
real sc6 = qkr1*xr + qkr2*yr + qkr3*zr;
#ifdef T1
real dixuk1 = atom1.dipole.y*atom2.inducedDipoleS.z - atom1.dipole.z*atom2.inducedDipoleS.y;
real dixuk2 = atom1.dipole.z*atom2.inducedDipoleS.x - atom1.dipole.x*atom2.inducedDipoleS.z;
real dixuk3 = atom1.dipole.x*atom2.inducedDipoleS.y - atom1.dipole.y*atom2.inducedDipoleS.x;
real dixukp1 = atom1.dipole.y*atom2.inducedDipolePolarS.z - atom1.dipole.z*atom2.inducedDipolePolarS.y;
real dixukp2 = atom1.dipole.z*atom2.inducedDipolePolarS.x - atom1.dipole.x*atom2.inducedDipolePolarS.z;
real dixukp3 = atom1.dipole.x*atom2.inducedDipolePolarS.y - atom1.dipole.y*atom2.inducedDipolePolarS.x;
#endif
#ifdef T3
real dkxui1 = atom2.dipole.y*atom1.inducedDipoleS.z - atom2.dipole.z*atom1.inducedDipoleS.y;
real dkxui2 = atom2.dipole.z*atom1.inducedDipoleS.x - atom2.dipole.x*atom1.inducedDipoleS.z;
real dkxui3 = atom2.dipole.x*atom1.inducedDipoleS.y - atom2.dipole.y*atom1.inducedDipoleS.x;
real dkxuip1 = atom2.dipole.y*atom1.inducedDipolePolarS.z - atom2.dipole.z*atom1.inducedDipolePolarS.y;
real dkxuip2 = atom2.dipole.z*atom1.inducedDipolePolarS.x - atom2.dipole.x*atom1.inducedDipolePolarS.z;
real dkxuip3 = atom2.dipole.x*atom1.inducedDipolePolarS.y - atom2.dipole.y*atom1.inducedDipolePolarS.x;
#endif
#if defined F1 || defined T1
real qiuk1 = atom1.quadrupoleXX*atom2.inducedDipoleS.x + atom1.quadrupoleXY*atom2.inducedDipoleS.y + atom1.quadrupoleXZ*atom2.inducedDipoleS.z;
real qiuk2 = atom1.quadrupoleXY*atom2.inducedDipoleS.x + atom1.quadrupoleYY*atom2.inducedDipoleS.y + atom1.quadrupoleYZ*atom2.inducedDipoleS.z;
real qiuk3 = atom1.quadrupoleXZ*atom2.inducedDipoleS.x + atom1.quadrupoleYZ*atom2.inducedDipoleS.y + atom1.quadrupoleZZ*atom2.inducedDipoleS.z;
real qiukp1 = atom1.quadrupoleXX*atom2.inducedDipolePolarS.x + atom1.quadrupoleXY*atom2.inducedDipolePolarS.y + atom1.quadrupoleXZ*atom2.inducedDipolePolarS.z;
real qiukp2 = atom1.quadrupoleXY*atom2.inducedDipolePolarS.x + atom1.quadrupoleYY*atom2.inducedDipolePolarS.y + atom1.quadrupoleYZ*atom2.inducedDipolePolarS.z;
real qiukp3 = atom1.quadrupoleXZ*atom2.inducedDipolePolarS.x + atom1.quadrupoleYZ*atom2.inducedDipolePolarS.y + atom1.quadrupoleZZ*atom2.inducedDipolePolarS.z;
#if defined F1
qiuk1 -= atom1.quadrupoleXX*atom2.inducedDipole.x + atom1.quadrupoleXY*atom2.inducedDipole.y + atom1.quadrupoleXZ*atom2.inducedDipole.z;
qiuk2 -= atom1.quadrupoleXY*atom2.inducedDipole.x + atom1.quadrupoleYY*atom2.inducedDipole.y + atom1.quadrupoleYZ*atom2.inducedDipole.z;
qiuk3 -= atom1.quadrupoleXZ*atom2.inducedDipole.x + atom1.quadrupoleYZ*atom2.inducedDipole.y + atom1.quadrupoleZZ*atom2.inducedDipole.z;
qiukp1 -= atom1.quadrupoleXX*atom2.inducedDipolePolar.x + atom1.quadrupoleXY*atom2.inducedDipolePolar.y + atom1.quadrupoleXZ*atom2.inducedDipolePolar.z;
qiukp2 -= atom1.quadrupoleXY*atom2.inducedDipolePolar.x + atom1.quadrupoleYY*atom2.inducedDipolePolar.y + atom1.quadrupoleYZ*atom2.inducedDipolePolar.z;
qiukp3 -= atom1.quadrupoleXZ*atom2.inducedDipolePolar.x + atom1.quadrupoleYZ*atom2.inducedDipolePolar.y + atom1.quadrupoleZZ*atom2.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i1 -= psc5*qiuk1 + dsc5*qiukp1;
ftm2i2 -= psc5*qiuk2 + dsc5*qiukp2;
ftm2i3 -= psc5*qiuk3 + dsc5*qiukp3;
#else
ftm2i1 -= psc5*(qiuk1 + qiukp1);
ftm2i2 -= psc5*(qiuk2 + qiukp2);
ftm2i3 -= psc5*(qiuk3 + qiukp3);
#endif
#endif
#endif
#if defined F1 || defined T3
real qkui1 = atom2.quadrupoleXX*atom1.inducedDipoleS.x + atom2.quadrupoleXY*atom1.inducedDipoleS.y + atom2.quadrupoleXZ*atom1.inducedDipoleS.z;
real qkui2 = atom2.quadrupoleXY*atom1.inducedDipoleS.x + atom2.quadrupoleYY*atom1.inducedDipoleS.y + atom2.quadrupoleYZ*atom1.inducedDipoleS.z;
real qkui3 = atom2.quadrupoleXZ*atom1.inducedDipoleS.x + atom2.quadrupoleYZ*atom1.inducedDipoleS.y + atom2.quadrupoleZZ*atom1.inducedDipoleS.z;
real qkuip1 = atom2.quadrupoleXX*atom1.inducedDipolePolarS.x + atom2.quadrupoleXY*atom1.inducedDipolePolarS.y + atom2.quadrupoleXZ*atom1.inducedDipolePolarS.z;
real qkuip2 = atom2.quadrupoleXY*atom1.inducedDipolePolarS.x + atom2.quadrupoleYY*atom1.inducedDipolePolarS.y + atom2.quadrupoleYZ*atom1.inducedDipolePolarS.z;
real qkuip3 = atom2.quadrupoleXZ*atom1.inducedDipolePolarS.x + atom2.quadrupoleYZ*atom1.inducedDipolePolarS.y + atom2.quadrupoleZZ*atom1.inducedDipolePolarS.z;
#if defined F1
qkui1 -= atom2.quadrupoleXX*atom1.inducedDipole.x + atom2.quadrupoleXY*atom1.inducedDipole.y + atom2.quadrupoleXZ*atom1.inducedDipole.z;
qkui2 -= atom2.quadrupoleXY*atom1.inducedDipole.x + atom2.quadrupoleYY*atom1.inducedDipole.y + atom2.quadrupoleYZ*atom1.inducedDipole.z;
qkui3 -= atom2.quadrupoleXZ*atom1.inducedDipole.x + atom2.quadrupoleYZ*atom1.inducedDipole.y + atom2.quadrupoleZZ*atom1.inducedDipole.z;
qkuip1 -= atom2.quadrupoleXX*atom1.inducedDipolePolar.x + atom2.quadrupoleXY*atom1.inducedDipolePolar.y + atom2.quadrupoleXZ*atom1.inducedDipolePolar.z;
qkuip2 -= atom2.quadrupoleXY*atom1.inducedDipolePolar.x + atom2.quadrupoleYY*atom1.inducedDipolePolar.y + atom2.quadrupoleYZ*atom1.inducedDipolePolar.z;
qkuip3 -= atom2.quadrupoleXZ*atom1.inducedDipolePolar.x + atom2.quadrupoleYZ*atom1.inducedDipolePolar.y + atom2.quadrupoleZZ*atom1.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i1 += psc5*qkui1 + dsc5*qkuip1;
ftm2i2 += psc5*qkui2 + dsc5*qkuip2;
ftm2i3 += psc5*qkui3 + dsc5*qkuip3;
#else
ftm2i1 += psc5*(qkui1 + qkuip1);
ftm2i2 += psc5*(qkui2 + qkuip2);
ftm2i3 += psc5*(qkui3 + qkuip3);
#endif
#endif
#endif
#ifdef T3
real uixqkr1 = atom1.inducedDipoleS.y*qkr3 - atom1.inducedDipoleS.z*qkr2;
real uixqkr2 = atom1.inducedDipoleS.z*qkr1 - atom1.inducedDipoleS.x*qkr3;
real uixqkr3 = atom1.inducedDipoleS.x*qkr2 - atom1.inducedDipoleS.y*qkr1;
real uixqkrp1 = atom1.inducedDipolePolarS.y*qkr3 - atom1.inducedDipolePolarS.z*qkr2;
real uixqkrp2 = atom1.inducedDipolePolarS.z*qkr1 - atom1.inducedDipolePolarS.x*qkr3;
real uixqkrp3 = atom1.inducedDipolePolarS.x*qkr2 - atom1.inducedDipolePolarS.y*qkr1;
real rxqkuip1 = yr*qkuip3 - zr*qkuip2;
real rxqkuip2 = zr*qkuip1 - xr*qkuip3;
real rxqkuip3 = xr*qkuip2 - yr*qkuip1;
real rxqkui1 = yr*qkui3 - zr*qkui2;
real rxqkui2 = zr*qkui1 - xr*qkui3;
real rxqkui3 = xr*qkui2 - yr*qkui1;
#endif
#ifdef T1
real ukxqir1 = atom2.inducedDipoleS.y*qir3 - atom2.inducedDipoleS.z*qir2;
real ukxqir2 = atom2.inducedDipoleS.z*qir1 - atom2.inducedDipoleS.x*qir3;
real ukxqir3 = atom2.inducedDipoleS.x*qir2 - atom2.inducedDipoleS.y*qir1;
real ukxqirp1 = atom2.inducedDipolePolarS.y*qir3 - atom2.inducedDipolePolarS.z*qir2;
real ukxqirp2 = atom2.inducedDipolePolarS.z*qir1 - atom2.inducedDipolePolarS.x*qir3;
real ukxqirp3 = atom2.inducedDipolePolarS.x*qir2 - atom2.inducedDipolePolarS.y*qir1;
real rxqiuk1 = yr*qiuk3 - zr*qiuk2;
real rxqiuk2 = zr*qiuk1 - xr*qiuk3;
real rxqiuk3 = xr*qiuk2 - yr*qiuk1;
real rxqiukp1 = yr*qiukp3 - zr*qiukp2;
real rxqiukp2 = zr*qiukp1 - xr*qiukp3;
real rxqiukp3 = xr*qiukp2 - yr*qiukp1;
#endif
// get intermediate variables for induction energy terms
real sci3 = atom1.inducedDipoleS.x*xr + atom1.inducedDipoleS.y*yr + atom1.inducedDipoleS.z*zr;
real sci4 = atom2.inducedDipoleS.x*xr + atom2.inducedDipoleS.y*yr + atom2.inducedDipoleS.z*zr;
#ifdef F1
ftm2i1 += 0.5f*scale5i*(sci4*atom1.inducedDipolePolarS.x + sci3*atom2.inducedDipolePolarS.x);
ftm2i2 += 0.5f*scale5i*(sci4*atom1.inducedDipolePolarS.y + sci3*atom2.inducedDipolePolarS.y);
ftm2i3 += 0.5f*scale5i*(sci4*atom1.inducedDipolePolarS.z + sci3*atom2.inducedDipolePolarS.z);
#endif
real sci3Y = atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr;
real sci4Y = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
#ifdef F1
ftm2i1 -= 0.5f*scale5i*(sci3Y*atom2.inducedDipolePolar.x + sci4Y*atom1.inducedDipolePolar.x);
ftm2i2 -= 0.5f*scale5i*(sci3Y*atom2.inducedDipolePolar.y + sci4Y*atom1.inducedDipolePolar.y);
ftm2i3 -= 0.5f*scale5i*(sci3Y*atom2.inducedDipolePolar.z + sci4Y*atom1.inducedDipolePolar.z);
#endif
real sci7 = qir1*atom2.inducedDipoleS.x + qir2*atom2.inducedDipoleS.y + qir3*atom2.inducedDipoleS.z;
real sci8 = qkr1*atom1.inducedDipoleS.x + qkr2*atom1.inducedDipoleS.y + qkr3*atom1.inducedDipoleS.z;
real scip1 = atom1.inducedDipolePolarS.x*atom2.dipole.x + atom1.inducedDipolePolarS.y*atom2.dipole.y + atom1.inducedDipolePolarS.z*atom2.dipole.z +
atom1.dipole.x*atom2.inducedDipolePolarS.x + atom1.dipole.y*atom2.inducedDipolePolarS.y + atom1.dipole.z*atom2.inducedDipolePolarS.z;
real scip2 = atom1.inducedDipoleS.x*atom2.inducedDipolePolarS.x + atom1.inducedDipoleS.y*atom2.inducedDipolePolarS.y + atom1.inducedDipoleS.z*atom2.inducedDipolePolarS.z +
atom1.inducedDipolePolarS.x*atom2.inducedDipoleS.x + atom1.inducedDipolePolarS.y*atom2.inducedDipoleS.y + atom1.inducedDipolePolarS.z*atom2.inducedDipoleS.z;
sci7 -= qir1*atom2.inducedDipole.x + qir2*atom2.inducedDipole.y + qir3*atom2.inducedDipole.z;
sci8 -= qkr1*atom1.inducedDipole.x + qkr2*atom1.inducedDipole.y + qkr3*atom1.inducedDipole.z;
scip1 -= atom1.inducedDipolePolar.x*atom2.dipole.x + atom1.inducedDipolePolar.y*atom2.dipole.y + atom1.inducedDipolePolar.z*atom2.dipole.z +
atom1.dipole.x*atom2.inducedDipolePolar.x + atom1.dipole.y*atom2.inducedDipolePolar.y + atom1.dipole.z*atom2.inducedDipolePolar.z;
scip2 -= atom1.inducedDipole.x*atom2.inducedDipolePolar.x + atom1.inducedDipole.y*atom2.inducedDipolePolar.y + atom1.inducedDipole.z*atom2.inducedDipolePolar.z +
atom1.inducedDipolePolar.x*atom2.inducedDipole.x + atom1.inducedDipolePolar.y*atom2.inducedDipole.y + atom1.inducedDipolePolar.z*atom2.inducedDipole.z;
real scip3 = atom1.inducedDipolePolarS.x*xr + atom1.inducedDipolePolarS.y*yr + atom1.inducedDipolePolarS.z*zr;
real scip4 = atom2.inducedDipolePolarS.x*xr + atom2.inducedDipolePolarS.y*yr + atom2.inducedDipolePolarS.z*zr;
real gfi1 = -2.5f*(sci3*scip4+scip3*sci4)*scale5i;
#ifdef F1
ftm2i1 += 0.5f*scale5i*(scip4*atom1.inducedDipoleS.x + scip3*atom2.inducedDipoleS.x);
ftm2i2 += 0.5f*scale5i*(scip4*atom1.inducedDipoleS.y + scip3*atom2.inducedDipoleS.y);
ftm2i3 += 0.5f*scale5i*(scip4*atom1.inducedDipoleS.z + scip3*atom2.inducedDipoleS.z);
#endif
real scip3Y = atom1.inducedDipolePolar.x*xr + atom1.inducedDipolePolar.y*yr + atom1.inducedDipolePolar.z*zr;
real scip4Y = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
gfi1 += 2.5f*(sci3Y*scip4Y + scip3Y*sci4Y)*scale5i;
#ifdef F1
ftm2i1 -= 0.5f*scale5i*(scip3Y*atom2.inducedDipole.x + scip4Y*atom1.inducedDipole.x);
ftm2i2 -= 0.5f*scale5i*(scip3Y*atom2.inducedDipole.y + scip4Y*atom1.inducedDipole.y);
ftm2i3 -= 0.5f*scale5i*(scip3Y*atom2.inducedDipole.z + scip4Y*atom1.inducedDipole.z);
#endif
sci3Y = sci3 - sci3Y;
sci4Y = sci4 - sci4Y;
scip3Y = scip3 - scip3Y;
scip4Y = scip4 - scip4Y;
real scip7 = qir1*atom2.inducedDipolePolarS.x + qir2*atom2.inducedDipolePolarS.y + qir3*atom2.inducedDipolePolarS.z;
scip7 -= qir1*atom2.inducedDipolePolar.x + qir2*atom2.inducedDipolePolar.y + qir3*atom2.inducedDipolePolar.z;
real scip8 = qkr1*atom1.inducedDipolePolarS.x + qkr2*atom1.inducedDipolePolarS.y + qkr3*atom1.inducedDipolePolarS.z;
scip8 -= qkr1*atom1.inducedDipolePolar.x + qkr2*atom1.inducedDipolePolar.y + qkr3*atom1.inducedDipolePolar.z;
real sci1 = atom1.inducedDipoleS.x*atom2.dipole.x + atom1.inducedDipoleS.y*atom2.dipole.y +
atom1.inducedDipoleS.z*atom2.dipole.z + atom1.dipole.x*atom2.inducedDipoleS.x +
atom1.dipole.y*atom2.inducedDipoleS.y + atom1.dipole.z*atom2.inducedDipoleS.z;
sci1 -= atom1.inducedDipole.x*atom2.dipole.x + atom1.inducedDipole.y*atom2.dipole.y +
atom1.inducedDipole.z*atom2.dipole.z + atom1.dipole.x*atom2.inducedDipole.x +
atom1.dipole.y*atom2.inducedDipole.y + atom1.dipole.z*atom2.inducedDipole.z;
real gli1 = atom2.q*sci3Y - atom1.q*sci4Y + sci1;
real gli2 = -sc3*sci4Y - sci3Y*sc4 + 2*(sci7-sci8);
real gli3 = sci3Y*sc6 - sci4Y*sc5;
real glip1 = atom2.q*scip3Y - atom1.q*scip4Y + scip1;
real glip2 = -sc3*scip4Y - scip3Y*sc4 + 2*(scip7-scip8);
real glip3 = scip3Y*sc6 - scip4Y*sc5;
#ifdef F1
#ifdef APPLY_SCALE
ftm2i1 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_1 + (gli2*pScale + glip2*dScale)*ddsc5_1 + (gli3*pScale+glip3*dScale)*ddsc7_1);
ftm2i2 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_2 + (gli2*pScale + glip2*dScale)*ddsc5_2 + (gli3*pScale+glip3*dScale)*ddsc7_2);
ftm2i3 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_3 + (gli2*pScale + glip2*dScale)*ddsc5_3 + (gli3*pScale+glip3*dScale)*ddsc7_3);
#else
ftm2i1 -= 0.5f*((gli1 + glip1)*ddsc3_1 + (gli2 + glip2)*ddsc5_1 + (gli3 + glip3)*ddsc7_1);
ftm2i2 -= 0.5f*((gli1 + glip1)*ddsc3_2 + (gli2 + glip2)*ddsc5_2 + (gli3 + glip3)*ddsc7_2);
ftm2i3 -= 0.5f*((gli1 + glip1)*ddsc3_3 + (gli2 + glip2)*ddsc5_3 + (gli3 + glip3)*ddsc7_3);
#endif
outputEnergy = gli1*psc3 + gli2*psc5 + gli3*psc7;
#endif
#ifdef APPLY_SCALE
gfi1 += 1.5f*(gli1*psc3 + glip1*dsc3);
gfi1 += 2.5f*(gli2*psc5 + glip2*dsc5);
gfi1 += 3.5f*(gli3*psc7 + glip3*dsc7);
#else
gfi1 += 1.5f*psc3*(gli1 + glip1);
gfi1 += 2.5f*psc5*(gli2 + glip2);
gfi1 += 3.5f*psc7*(gli3 + glip3);
#endif
gfi1 *= rr2;
gfi1 += 0.5f*scip2*scale3i;
#if defined F1 || defined T1
#ifdef APPLY_SCALE
real gfi5 = (sci4Y*psc7+scip4Y*dsc7);
#else
real gfi5 = psc7*(sci4Y + scip4Y);
#endif
#endif
#if defined F1 || defined T3
#ifdef APPLY_SCALE
real gfi6 = -(sci3Y*psc7+scip3Y*dsc7);
#else
real gfi6 = -psc7*(sci3Y + scip3Y);
#endif
#endif
#ifdef F1
ftm2i1 += gfi1*xr;
real diff0 = atom1.inducedDipoleS.x - atom1.inducedDipole.x;
real diff1 = atom1.inducedDipolePolarS.x - atom1.inducedDipolePolar.x;
#ifdef APPLY_SCALE
ftm2i1 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7));
#else
ftm2i1 += 0.5f*(-atom2.q*psc3*(diff0 + diff1) + sc4*psc5*(diff0 + diff1) - sc6*psc7*(diff0 + diff1));
#endif
diff0 = atom2.inducedDipoleS.x - atom2.inducedDipole.x;
diff1 = atom2.inducedDipolePolarS.x - atom2.inducedDipolePolar.x;
#ifdef APPLY_SCALE
ftm2i1 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7));
ftm2i1 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.x + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.x + gfi5*qir1 + gfi6*qkr1;
#else
ftm2i1 += 0.5f*(atom1.q*psc3*(diff0 + diff1) + sc3*psc5*(diff0 + diff1) + sc5*psc7*(diff0 + diff1));
ftm2i1 += 0.5f*psc5*(sci4Y + scip4Y)*atom1.dipole.x + 0.5f*psc5*(sci3Y + scip3Y)*atom2.dipole.x + gfi5*qir1 + gfi6*qkr1;
#endif
ftm2i2 += gfi1*yr;
diff0 = atom1.inducedDipoleS.y - atom1.inducedDipole.y;
diff1 = atom1.inducedDipolePolarS.y - atom1.inducedDipolePolar.y;
#ifdef APPLY_SCALE
ftm2i2 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7));
#else
ftm2i2 += 0.5f*(-atom2.q*psc3*(diff0 + diff1) + sc4*psc5*(diff0 + diff1) - sc6*psc7*(diff0 + diff1));
#endif
diff0 = atom2.inducedDipoleS.y - atom2.inducedDipole.y;
diff1 = atom2.inducedDipolePolarS.y - atom2.inducedDipolePolar.y;
#ifdef APPLY_SCALE
ftm2i2 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7));
ftm2i2 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.y + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.y + gfi5*qir2 + gfi6*qkr2;
#else
ftm2i2 += 0.5f*(atom1.q*psc3*(diff0 + diff1) + sc3*psc5*(diff0 + diff1) + sc5*psc7*(diff0 + diff1));
ftm2i2 += 0.5f*psc5*(sci4Y +scip4Y)*atom1.dipole.y + 0.5f*psc5*(sci3Y +scip3Y)*atom2.dipole.y + gfi5*qir2 + gfi6*qkr2;
#endif
ftm2i3 += gfi1*zr;
diff0 = atom1.inducedDipoleS.z - atom1.inducedDipole.z;
diff1 = atom1.inducedDipolePolarS.z - atom1.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i3 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7));
#else
ftm2i3 += 0.5f*(-atom2.q*psc3*(diff0 + diff1) + sc4*psc5*(diff0 + diff1) - sc6*psc7*(diff0 + diff1));
#endif
diff0 = atom2.inducedDipoleS.z - atom2.inducedDipole.z;
diff1 = atom2.inducedDipolePolarS.z - atom2.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i3 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7));
ftm2i3 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.z + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.z + gfi5*qir3 + gfi6*qkr3;
#else
ftm2i3 += 0.5f*(atom1.q*psc3*(diff0 + diff1) + sc3*psc5*(diff0 + diff1) + sc5*psc7*(diff0 + diff1));
ftm2i3 += 0.5f*psc5*(sci4Y + scip4Y)*atom1.dipole.z + 0.5f*psc5*(sci3Y+scip3Y)*atom2.dipole.z + gfi5*qir3 + gfi6*qkr3;
#endif
// intermediate values needed for partially excluded interactions
// correction to convert mutual to direct polarization force
#ifdef DIRECT_POLARIZATION
real gfd = (scip2*scale3i - 5*rr2*(scip3*sci4+sci3*scip4)*scale5i);
real fdir1 = gfd*xr + scale5i* (sci4*atom1.inducedDipolePolarS.x+scip4*atom1.inducedDipoleS.x + sci3*atom2.inducedDipolePolarS.x+scip3*atom2.inducedDipoleS.x);
real fdir2 = gfd*yr + scale5i* (sci4*atom1.inducedDipolePolarS.y+scip4*atom1.inducedDipoleS.y + sci3*atom2.inducedDipolePolarS.y+scip3*atom2.inducedDipoleS.y);
real fdir3 = gfd*zr + scale5i* (sci4*atom1.inducedDipolePolarS.z+scip4*atom1.inducedDipoleS.z + sci3*atom2.inducedDipolePolarS.z+scip3*atom2.inducedDipoleS.z);
ftm2i1 -= 0.5f*fdir1;
ftm2i2 -= 0.5f*fdir2;
ftm2i3 -= 0.5f*fdir3;
real sci3X = sci3 - sci3Y;
real sci4X = sci4 - sci4Y;
real scip3X = scip3 - scip3Y;
real scip4X = scip4 - scip4Y;
gfd = -5*rr2*(scip3X*sci4X+sci3X*scip4X)*scale5i;
fdir1 = gfd*xr + scale5i*(sci4X*atom1.inducedDipolePolar.x + scip4X*atom1.inducedDipole.x + sci3X*atom2.inducedDipolePolar.x + scip3X*atom2.inducedDipole.x);
fdir2 = gfd*yr + scale5i*(sci4X*atom1.inducedDipolePolar.y + scip4X*atom1.inducedDipole.y + sci3X*atom2.inducedDipolePolar.y + scip3X*atom2.inducedDipole.y);
fdir3 = gfd*zr + scale5i*(sci4X*atom1.inducedDipolePolar.z + scip4X*atom1.inducedDipole.z + sci3X*atom2.inducedDipolePolar.z + scip3X*atom2.inducedDipole.z);
ftm2i1 += 0.5f*fdir1;
ftm2i2 += 0.5f*fdir2;
ftm2i3 += 0.5f*fdir3;
#else
real findmp1 = uscale*(scip2*ddsc3_1 - ddsc5_1*(sci3*scip4+scip3*sci4));
real findmp2 = uscale*(scip2*ddsc3_2 - ddsc5_2*(sci3*scip4+scip3*sci4));
real findmp3 = uscale*(scip2*ddsc3_3 - ddsc5_3*(sci3*scip4+scip3*sci4));
ftm2i1 -= 0.5f*findmp1;
ftm2i2 -= 0.5f*findmp2;
ftm2i3 -= 0.5f*findmp3;
real sci3X = sci3 - sci3Y;
real sci4X = sci4 - sci4Y;
real scip3X = scip3 - scip3Y;
real scip4X = scip4 - scip4Y;
ftm2i1 += 0.5f*uscale*(-ddsc5_1*(sci3X*scip4X+scip3X*sci4X));
ftm2i2 += 0.5f*uscale*(-ddsc5_2*(sci3X*scip4X+scip3X*sci4X));
ftm2i3 += 0.5f*uscale*(-ddsc5_3*(sci3X*scip4X+scip3X*sci4X));
#endif
#endif
#ifdef T1
#ifdef APPLY_SCALE
real gti2 = 0.5f*(sci4Y*psc5 + scip4Y*dsc5);
real ttm2i1 = -(dixuk1*psc3+dixukp1*dsc3)*0.5f + gti2*dixr1 + ((ukxqir1+rxqiuk1)*psc5 +(ukxqirp1+rxqiukp1)*dsc5) - gfi5*rxqir1;
real ttm2i2 = -(dixuk2*psc3+dixukp2*dsc3)*0.5f + gti2*dixr2 + ((ukxqir2+rxqiuk2)*psc5 +(ukxqirp2+rxqiukp2)*dsc5) - gfi5*rxqir2;
real ttm2i3 = -(dixuk3*psc3+dixukp3*dsc3)*0.5f + gti2*dixr3 + ((ukxqir3+rxqiuk3)*psc5 +(ukxqirp3+rxqiukp3)*dsc5) - gfi5*rxqir3;
#else
real gti2 = 0.5f*psc5*(sci4Y + scip4Y);
real ttm2i1 = -psc3*(dixuk1 + dixukp1)*0.5f + gti2*dixr1 + psc5*((ukxqir1+rxqiuk1) + (ukxqirp1+rxqiukp1)) - gfi5*rxqir1;
real ttm2i2 = -psc3*(dixuk2 + dixukp2)*0.5f + gti2*dixr2 + psc5*((ukxqir2+rxqiuk2) + (ukxqirp2+rxqiukp2)) - gfi5*rxqir2;
real ttm2i3 = -psc3*(dixuk3 + dixukp3)*0.5f + gti2*dixr3 + psc5*((ukxqir3+rxqiuk3) + (ukxqirp3+rxqiukp3)) - gfi5*rxqir3;
#endif
#endif
#ifdef T3
#ifdef APPLY_SCALE
real gti3 = 0.5f*(sci3Y*psc5 + scip3Y*dsc5);
real ttm3i1 = -(dkxui1*psc3+dkxuip1*dsc3)*0.5f + gti3*dkxr1 - ((uixqkr1+rxqkui1)*psc5 +(uixqkrp1+rxqkuip1)*dsc5) - gfi6*rxqkr1;
real ttm3i2 = -(dkxui2*psc3+dkxuip2*dsc3)*0.5f + gti3*dkxr2 - ((uixqkr2+rxqkui2)*psc5 +(uixqkrp2+rxqkuip2)*dsc5) - gfi6*rxqkr2;
real ttm3i3 = -(dkxui3*psc3+dkxuip3*dsc3)*0.5f + gti3*dkxr3 - ((uixqkr3+rxqkui3)*psc5 +(uixqkrp3+rxqkuip3)*dsc5) - gfi6*rxqkr3;
#else
real gti3 = 0.5f*psc5*(sci3Y + scip3Y);
real ttm3i1 = -psc3*(dkxui1 + dkxuip1)*0.5f + gti3*dkxr1 - psc5*((uixqkr1+rxqkui1) + (uixqkrp1+rxqkuip1)) - gfi6*rxqkr1;
real ttm3i2 = -psc3*(dkxui2 + dkxuip2)*0.5f + gti3*dkxr2 - psc5*((uixqkr2+rxqkui2) + (uixqkrp2+rxqkuip2)) - gfi6*rxqkr2;
real ttm3i3 = -psc3*(dkxui3 + dkxuip3)*0.5f + gti3*dkxr3 - psc5*((uixqkr3+rxqkui3) + (uixqkrp3+rxqkuip3)) - gfi6*rxqkr3;
#endif
#endif
// update force and torque on site k
#ifdef F1
outputForce.x = -ftm2i1;
outputForce.y = -ftm2i2;
outputForce.z = -ftm2i3;
#endif
#ifdef T1
outputForce.x = ttm2i1;
outputForce.y = ttm2i2;
outputForce.z = ttm2i3;
#endif
#ifdef T3
outputForce.x = ttm3i1;
outputForce.y = ttm3i2;
outputForce.z = ttm3i3;
#endif
// construct auxiliary vectors for induced terms
#ifdef T1
dixuk1 = atom1.dipole.y*atom2.inducedDipole.z - atom1.dipole.z*atom2.inducedDipole.y;
dixuk2 = atom1.dipole.z*atom2.inducedDipole.x - atom1.dipole.x*atom2.inducedDipole.z;
dixuk3 = atom1.dipole.x*atom2.inducedDipole.y - atom1.dipole.y*atom2.inducedDipole.x;
dixukp1 = atom1.dipole.y*atom2.inducedDipolePolar.z - atom1.dipole.z*atom2.inducedDipolePolar.y;
dixukp2 = atom1.dipole.z*atom2.inducedDipolePolar.x - atom1.dipole.x*atom2.inducedDipolePolar.z;
dixukp3 = atom1.dipole.x*atom2.inducedDipolePolar.y - atom1.dipole.y*atom2.inducedDipolePolar.x;
#endif
#ifdef T3
dkxui1 = atom2.dipole.y*atom1.inducedDipole.z - atom2.dipole.z*atom1.inducedDipole.y;
dkxui2 = atom2.dipole.z*atom1.inducedDipole.x - atom2.dipole.x*atom1.inducedDipole.z;
dkxui3 = atom2.dipole.x*atom1.inducedDipole.y - atom2.dipole.y*atom1.inducedDipole.x;
dkxuip1 = atom2.dipole.y*atom1.inducedDipolePolar.z - atom2.dipole.z*atom1.inducedDipolePolar.y;
dkxuip2 = atom2.dipole.z*atom1.inducedDipolePolar.x - atom2.dipole.x*atom1.inducedDipolePolar.z;
dkxuip3 = atom2.dipole.x*atom1.inducedDipolePolar.y - atom2.dipole.y*atom1.inducedDipolePolar.x;
#endif
#if defined T1
qiuk1 = atom1.quadrupoleXX*atom2.inducedDipole.x + atom1.quadrupoleXY*atom2.inducedDipole.y + atom1.quadrupoleXZ*atom2.inducedDipole.z;
qiuk2 = atom1.quadrupoleXY*atom2.inducedDipole.x + atom1.quadrupoleYY*atom2.inducedDipole.y + atom1.quadrupoleYZ*atom2.inducedDipole.z;
qiuk3 = atom1.quadrupoleXZ*atom2.inducedDipole.x + atom1.quadrupoleYZ*atom2.inducedDipole.y + atom1.quadrupoleZZ*atom2.inducedDipole.z;
qiukp1 = atom1.quadrupoleXX*atom2.inducedDipolePolar.x + atom1.quadrupoleXY*atom2.inducedDipolePolar.y + atom1.quadrupoleXZ*atom2.inducedDipolePolar.z;
qiukp2 = atom1.quadrupoleXY*atom2.inducedDipolePolar.x + atom1.quadrupoleYY*atom2.inducedDipolePolar.y + atom1.quadrupoleYZ*atom2.inducedDipolePolar.z;
qiukp3 = atom1.quadrupoleXZ*atom2.inducedDipolePolar.x + atom1.quadrupoleYZ*atom2.inducedDipolePolar.y + atom1.quadrupoleZZ*atom2.inducedDipolePolar.z;
#endif
#if defined T3
qkui1 = atom2.quadrupoleXX*atom1.inducedDipole.x + atom2.quadrupoleXY*atom1.inducedDipole.y + atom2.quadrupoleXZ*atom1.inducedDipole.z;
qkui2 = atom2.quadrupoleXY*atom1.inducedDipole.x + atom2.quadrupoleYY*atom1.inducedDipole.y + atom2.quadrupoleYZ*atom1.inducedDipole.z;
qkui3 = atom2.quadrupoleXZ*atom1.inducedDipole.x + atom2.quadrupoleYZ*atom1.inducedDipole.y + atom2.quadrupoleZZ*atom1.inducedDipole.z;
qkuip1 = atom2.quadrupoleXX*atom1.inducedDipolePolar.x + atom2.quadrupoleXY*atom1.inducedDipolePolar.y + atom2.quadrupoleXZ*atom1.inducedDipolePolar.z;
qkuip2 = atom2.quadrupoleXY*atom1.inducedDipolePolar.x + atom2.quadrupoleYY*atom1.inducedDipolePolar.y + atom2.quadrupoleYZ*atom1.inducedDipolePolar.z;
qkuip3 = atom2.quadrupoleXZ*atom1.inducedDipolePolar.x + atom2.quadrupoleYZ*atom1.inducedDipolePolar.y + atom2.quadrupoleZZ*atom1.inducedDipolePolar.z;
#endif
#ifdef T3
uixqkr1 = atom1.inducedDipole.y*qkr3 - atom1.inducedDipole.z*qkr2;
uixqkr2 = atom1.inducedDipole.z*qkr1 - atom1.inducedDipole.x*qkr3;
uixqkr3 = atom1.inducedDipole.x*qkr2 - atom1.inducedDipole.y*qkr1;
uixqkrp1 = atom1.inducedDipolePolar.y*qkr3 - atom1.inducedDipolePolar.z*qkr2;
uixqkrp2 = atom1.inducedDipolePolar.z*qkr1 - atom1.inducedDipolePolar.x*qkr3;
uixqkrp3 = atom1.inducedDipolePolar.x*qkr2 - atom1.inducedDipolePolar.y*qkr1;
#endif
#ifdef T1
ukxqir1 = atom2.inducedDipole.y*qir3 - atom2.inducedDipole.z*qir2;
ukxqir2 = atom2.inducedDipole.z*qir1 - atom2.inducedDipole.x*qir3;
ukxqir3 = atom2.inducedDipole.x*qir2 - atom2.inducedDipole.y*qir1;
ukxqirp1 = atom2.inducedDipolePolar.y*qir3 - atom2.inducedDipolePolar.z*qir2;
ukxqirp2 = atom2.inducedDipolePolar.z*qir1 - atom2.inducedDipolePolar.x*qir3;
ukxqirp3 = atom2.inducedDipolePolar.x*qir2 - atom2.inducedDipolePolar.y*qir1;
rxqiuk1 = yr*qiuk3 - zr*qiuk2;
rxqiuk2 = zr*qiuk1 - xr*qiuk3;
rxqiuk3 = xr*qiuk2 - yr*qiuk1;
rxqiukp1 = yr*qiukp3 - zr*qiukp2;
rxqiukp2 = zr*qiukp1 - xr*qiukp3;
rxqiukp3 = xr*qiukp2 - yr*qiukp1;
#endif
#ifdef T3
rxqkui1 = yr*qkui3 - zr*qkui2;
rxqkui2 = zr*qkui1 - xr*qkui3;
rxqkui3 = xr*qkui2 - yr*qkui1;
rxqkuip1 = yr*qkuip3 - zr*qkuip2;
rxqkuip2 = zr*qkuip1 - xr*qkuip3;
rxqkuip3 = xr*qkuip2 - yr*qkuip1;
#endif
#ifdef T1
#ifdef APPLY_SCALE
ttm2i1 = -(dixuk1*psc3+dixukp1*dsc3)*0.5f + ((ukxqir1+rxqiuk1)*psc5 +(ukxqirp1+rxqiukp1)*dsc5);
ttm2i2 = -(dixuk2*psc3+dixukp2*dsc3)*0.5f + ((ukxqir2+rxqiuk2)*psc5 +(ukxqirp2+rxqiukp2)*dsc5);
ttm2i3 = -(dixuk3*psc3+dixukp3*dsc3)*0.5f + ((ukxqir3+rxqiuk3)*psc5 +(ukxqirp3+rxqiukp3)*dsc5);
#else
ttm2i1 = -psc3*(dixuk1+dixukp1)*0.5f + psc5*((ukxqir1+rxqiuk1) + (ukxqirp1+rxqiukp1));
ttm2i2 = -psc3*(dixuk2+dixukp2)*0.5f + psc5*((ukxqir2+rxqiuk2) + (ukxqirp2+rxqiukp2));
ttm2i3 = -psc3*(dixuk3+dixukp3)*0.5f + psc5*((ukxqir3+rxqiuk3) + (ukxqirp3+rxqiukp3));
#endif
#endif
#ifdef T3
#ifdef APPLY_SCALE
ttm3i1 = -(dkxui1*psc3+dkxuip1*dsc3)*0.5f - ((uixqkr1+rxqkui1)*psc5 +(uixqkrp1+rxqkuip1)*dsc5);
ttm3i2 = -(dkxui2*psc3+dkxuip2*dsc3)*0.5f - ((uixqkr2+rxqkui2)*psc5 +(uixqkrp2+rxqkuip2)*dsc5);
ttm3i3 = -(dkxui3*psc3+dkxuip3*dsc3)*0.5f - ((uixqkr3+rxqkui3)*psc5 +(uixqkrp3+rxqkuip3)*dsc5);
#else
ttm3i1 = -psc3*(dkxui1 + dkxuip1)*0.5f - psc5*((uixqkr1+rxqkui1) + (uixqkrp1+rxqkuip1));
ttm3i2 = -psc3*(dkxui2 + dkxuip2)*0.5f - psc5*((uixqkr2+rxqkui2) + (uixqkrp2+rxqkuip2));
ttm3i3 = -psc3*(dkxui3 + dkxuip3)*0.5f - psc5*((uixqkr3+rxqkui3) + (uixqkrp3+rxqkuip3));
#endif
#endif
// update force and torque on site k;
#ifdef T1
outputForce.x -= ttm2i1;
outputForce.y -= ttm2i2;
outputForce.z -= ttm2i3;
#endif
#ifdef T3
outputForce.x -= ttm3i1;
outputForce.y -= ttm3i2;
outputForce.z -= ttm3i3;
#endif
}
/**
* This defines three different closely related functions, depending on which constant (F1, T1, or T3) is defined.
*/
#if defined F1
__device__ void computeOneInteractionF1(AtomData2& atom1, volatile AtomData2& atom2, real& outputEnergy, real3& force) {
#elif defined F2
__device__ void computeOneInteractionF2(AtomData2& atom1, volatile AtomData2& atom2, real& outputEnergy, real3& force) {
#elif defined T1
__device__ void computeOneInteractionT1(AtomData2& atom1, volatile AtomData2& atom2, real3& torque) {
#elif defined T2
__device__ void computeOneInteractionT2(AtomData2& atom1, volatile AtomData2& atom2, real3& torque) {
#elif defined B1 && defined B2
__device__ void computeOneInteractionB1B2(AtomData2& atom1, volatile AtomData2& atom2) {
#endif
#if defined F2 || defined B2
real sxi = atom1.inducedDipole.x + atom1.inducedDipolePolar.x;
real syi = atom1.inducedDipole.y + atom1.inducedDipolePolar.y;
real szi = atom1.inducedDipole.z + atom1.inducedDipolePolar.z;
#endif
#if defined F2 || defined T2 || defined B2
real sxk = atom2.inducedDipole.x + atom2.inducedDipolePolar.x;
real syk = atom2.inducedDipole.y + atom2.inducedDipolePolar.y;
real szk = atom2.inducedDipole.z + atom2.inducedDipolePolar.z;
#endif
// decide whether to compute the current interaction;
real xr = atom2.pos.x - atom1.pos.x;
real yr = atom2.pos.y - atom1.pos.y;
real zr = atom2.pos.z - atom1.pos.z;
real xr2 = xr*xr;
real yr2 = yr*yr;
real zr2 = zr*zr;
real r2 = xr2 + yr2 + zr2;
real rb2 = atom1.bornRadius*atom2.bornRadius;
real expterm = EXP(-r2/(GK_C*rb2));
real expc = expterm/GK_C;
real expcr = r2*expterm/(GK_C*GK_C*rb2*rb2);
real dexpc = -2 / (GK_C*rb2);
real dexpcr = 2 / (GK_C*rb2*rb2);
real dgfdr = 0.5f*expterm*(1 + r2/(rb2*GK_C));
real gf2 = 1 / (r2 + rb2*expterm);
real gf = SQRT(gf2);
real gf3 = gf2*gf;
real gf5 = gf3*gf2;
real gf7 = gf5*gf2;
real gf9 = gf7*gf2;
real gf11 = gf9*gf2;
// reaction potential auxiliary terms;
real a00 = gf;
real a10 = -gf3;
real a20 = 3*gf5;
real a30 = -15*gf7;
real a40 = 105*gf9;
real a50 = -945*gf11;
// Born radii derivatives of reaction potential auxiliary terms;
real b00 = dgfdr*a10;
real b10 = dgfdr*a20;
real b20 = dgfdr*a30;
real b30 = dgfdr*a40;
real b40 = dgfdr*a50;
// reaction potential gradient auxiliary terms;
real expc1 = 1 - expc;
real a01 = expc1*a10;
real a11 = expc1*a20;
real a21 = expc1*a30;
real a31 = expc1*a40;
real a41 = expc1*a50;
// Born radii derivs of reaction potential gradient auxiliary terms;
real b01 = b10 - expcr*a10 - expc*b10;
real b11 = b20 - expcr*a20 - expc*b20;
real b21 = b30 - expcr*a30 - expc*b30;
real b31 = b40 - expcr*a40 - expc*b40;
// 2nd reaction potential gradient auxiliary terms;
real expcdexpc = -expc*dexpc;
real a02 = expc1*a11 + expcdexpc*a10;
real a12 = expc1*a21 + expcdexpc*a20;
real a22 = expc1*a31 + expcdexpc*a30;
real a32 = expc1*a41 + expcdexpc*a40;
// Born radii derivatives of the 2nd reaction potential
// gradient auxiliary terms
real b02 = b11 - (expcr*(a11 + dexpc*a10) + expc*(b11 + dexpcr*a10 + dexpc*b10));
real b12 = b21 - (expcr*(a21 + dexpc*a20) + expc*(b21 + dexpcr*a20 + dexpc*b20));
real b22 = b31 - (expcr*(a31 + dexpc*a30) + expc*(b31 + dexpcr*a30 + dexpc*b30));
// 3rd reaction potential gradient auxiliary terms
expcdexpc = 2*expcdexpc;
real a03 = expc1*a12 + expcdexpc*a11;
real a13 = expc1*a22 + expcdexpc*a21;
real a23 = expc1*a32 + expcdexpc*a31;
expcdexpc = -expc*dexpc*dexpc;
a03 = a03 + expcdexpc*a10;
a13 = a13 + expcdexpc*a20;
a23 = a23 + expcdexpc*a30;
// multiply the auxillary terms by their dieletric functions;
a00 *= GK_FC;
a01 *= GK_FC;
a02 *= GK_FC;
a03 *= GK_FC;
b00 *= GK_FC;
b01 *= GK_FC;
b02 *= GK_FC;
a10 *= GK_FD;
a11 *= GK_FD;
a12 *= GK_FD;
a13 *= GK_FD;
b10 *= GK_FD;
b11 *= GK_FD;
b12 *= GK_FD;
a20 *= GK_FQ;
a21 *= GK_FQ;
a22 *= GK_FQ;
a23 *= GK_FQ;
b20 *= GK_FQ;
b21 *= GK_FQ;
b22 *= GK_FQ;
// unweighted reaction potential tensor
#if defined F2
real energy = -a10*atom2.q*(atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr);
energy += a10*atom1.q*(atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr);
#endif
#if defined F1
real energy = 2*atom1.q*atom2.q*a00;
energy += -a10*atom2.q*(atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr);
energy += a10*atom1.q*(atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr);
energy += a20*atom2.q*(atom1.quadrupoleXX*xr2 + atom1.quadrupoleYY*yr2 + atom1.quadrupoleZZ*zr2 + 2*(atom1.quadrupoleXY*xr*yr + atom1.quadrupoleXZ*xr*zr + atom1.quadrupoleYZ*yr*zr));
energy += a20*atom1.q*(atom2.quadrupoleXX*xr2 + atom2.quadrupoleYY*yr2 + atom2.quadrupoleZZ*zr2 + 2*(atom2.quadrupoleXY*xr*yr + atom2.quadrupoleXZ*xr*zr + atom2.quadrupoleYZ*yr*zr));
#endif
// Born radii derivs of unweighted reaction potential tensor
#if defined B1
real dsumdrB1 = 2*(atom1.q*atom2.q*b00);
dsumdrB1 = b10*atom2.q*(atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr);
dsumdrB1 += b10*atom1.q*(atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr);
#endif
#if defined B2
real dsumdrB2 = -b10*atom2.q*(sxi*xr + syi*yr + szi*zr);
dsumdrB2 += b10*atom1.q*(sxk*xr + syk*yr + szk*zr);
#endif
#if defined B1
real gqxx21 = xr*xr;
real gqyy21 = yr*yr;
real gqzz21 = zr*zr;
real gqxy21 = xr*yr;
real gqxz21 = xr*zr;
real gqyz21 = yr*zr;
dsumdrB1 += b20*atom2.q*(atom1.quadrupoleXX*gqxx21 + atom1.quadrupoleYY*gqyy21 + atom1.quadrupoleZZ*gqzz21 + 2*(atom1.quadrupoleXY*gqxy21 + atom1.quadrupoleXZ*gqxz21 + atom1.quadrupoleYZ*gqyz21));
dsumdrB1 += b20*atom1.q*(atom2.quadrupoleXX*gqxx21 + atom2.quadrupoleYY*gqyy21 + atom2.quadrupoleZZ*gqzz21 + 2*(atom2.quadrupoleXY*gqxy21 + atom2.quadrupoleXZ*gqxz21 + atom2.quadrupoleYZ*gqyz21));
#endif
#if defined F1
energy += a01*atom1.q*(atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr);
energy = a01*atom2.q*(atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr);
real factor = a01*2*atom1.q*atom2.q;
real dedx = factor*xr;
real dedy = factor*yr;
real dedz = factor*zr;
#endif
#if defined F2
energy += a01*atom1.q*(atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr);
energy = a01*atom2.q*(atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr);
#endif
#if defined F1 || defined F2 || defined T1 || defined T2
real gux2 = a10 + xr*xr*a11;
real gux3 = xr*yr*a11;
real gux4 = xr*zr*a11;
real guy3 = a10 + yr*yr*a11;
real guy4 = yr*zr*a11;
real guz4 = a10 + zr*zr*a11;
#if defined T1
real guy2 = gux3;
real guz2 = gux4;
real guz3 = guy4;
#endif
#if defined T2
real fid1 = sxk*gux2 + syk*gux3 + szk*gux4;
real fid2 = sxk*gux3 + syk*guy3 + szk*guy4;
real fid3 = sxk*gux4 + syk*guy4 + szk*guz4;
real trqi1 = atom1.dipole.y*fid3 - atom1.dipole.z*fid2;
real trqi2 = atom1.dipole.z*fid1 - atom1.dipole.x*fid3;
real trqi3 = atom1.dipole.x*fid2 - atom1.dipole.y*fid1;
#endif
#if defined F1
energy = 2*(atom1.dipole.x*(atom2.dipole.x*gux2 + atom2.dipole.y*gux3 + atom2.dipole.z*gux4) +
atom1.dipole.y*(atom2.dipole.x*gux3 + atom2.dipole.y*guy3 + atom2.dipole.z*guy4) +
atom1.dipole.z*(atom2.dipole.x*gux4 + atom2.dipole.y*guy4 + atom2.dipole.z*guz4));
dedx = atom2.q*(atom1.dipole.x*gux2 + atom1.dipole.y*gux3 + atom1.dipole.z*gux4);
dedx += atom1.q*(atom2.dipole.x*gux2 + atom2.dipole.y*gux3 + atom2.dipole.z*gux4);
dedy = atom2.q*(atom1.dipole.x*gux3 + atom1.dipole.y*guy3 + atom1.dipole.z*guy4);
dedy += atom1.q*(atom2.dipole.x*gux3 + atom2.dipole.y*guy3 + atom2.dipole.z*guy4);
dedz = atom2.q*(atom1.dipole.x*gux4 + atom1.dipole.y*guy4 + atom1.dipole.z*guz4);
dedz += atom1.q*(atom2.dipole.x*gux4 + atom2.dipole.y*guy4 + atom2.dipole.z*guz4);
#endif
#if defined F2
energy = 2*(
atom1.dipole.x*(atom2.inducedDipole.x*gux2 + atom2.inducedDipole.y*gux3 + atom2.inducedDipole.z*gux4) +
atom1.dipole.y*(atom2.inducedDipole.x*gux3 + atom2.inducedDipole.y*guy3 + atom2.inducedDipole.z*guy4) +
atom1.dipole.z*(atom2.inducedDipole.x*gux4 + atom2.inducedDipole.y*guy4 + atom2.inducedDipole.z*guz4) +
atom2.dipole.x*(atom1.inducedDipole.x*gux2 + atom1.inducedDipole.y*gux3 + atom1.inducedDipole.z*gux4) +
atom2.dipole.y*(atom1.inducedDipole.x*gux3 + atom1.inducedDipole.y*guy3 + atom1.inducedDipole.z*guy4) +
atom2.dipole.z*(atom1.inducedDipole.x*gux4 + atom1.inducedDipole.y*guy4 + atom1.inducedDipole.z*guz4));
real dpdx = atom1.q*(sxk*gux2 + syk*gux3 + szk*gux4);
dpdx = atom2.q*(sxi*gux2 + syi*gux3 + szi*gux4);
real dpdy = atom1.q*(sxk*gux3 + syk*guy3 + szk*guy4);
dpdy = atom2.q*(sxi*gux3 + syi*guy3 + szi*guy4);
real dpdz = atom1.q*(sxk*gux4 + syk*guy4 + szk*guz4);
dpdz = atom2.q*(sxi*gux4 + syi*guy4 + szi*guz4);
#endif
real gqxx2 = xr*(2*a20 + xr*xr*a21);
real gqxx3 = yr*xr*xr*a21;
real gqxx4 = zr*xr*xr*a21;
real gqyy2 = xr*yr*yr*a21;
real gqyy3 = yr*(2*a20 + yr*yr*a21);
real gqyy4 = zr*yr*yr*a21;
real gqzz2 = xr*zr*zr*a21;
real gqzz3 = yr*zr*zr*a21;
real gqzz4 = zr*(2*a20 + zr*zr*a21);
real gqxy2 = yr*(a20 + xr*xr*a21);
real gqxy3 = xr*(a20 + yr*yr*a21);
real gqxy4 = zr*xr*yr*a21;
real gqxz2 = zr*(a20 + xr*xr*a21);
real gqxz4 = xr*(a20 + zr*zr*a21);
real gqyz3 = zr*(a20 + yr*yr*a21);
real gqyz4 = yr*(a20 + zr*zr*a21);
#if defined T1 || defined T2
real gqxz3 = gqxy4;
real gqyz2 = gqxy4;
#endif
#if defined F1
energy += atom2.dipole.x*(atom1.quadrupoleXX*gqxx2 + atom1.quadrupoleYY*gqyy2 + atom1.quadrupoleZZ*gqzz2 + 2*(atom1.quadrupoleXY*gqxy2 + atom1.quadrupoleXZ*gqxz2 + atom1.quadrupoleYZ*gqxy4)) +
atom2.dipole.y*(atom1.quadrupoleXX*gqxx3 + atom1.quadrupoleYY*gqyy3 + atom1.quadrupoleZZ*gqzz3 + 2*(atom1.quadrupoleXY*gqxy3 + atom1.quadrupoleXZ*gqxy4 + atom1.quadrupoleYZ*gqyz3)) +
atom2.dipole.z*(atom1.quadrupoleXX*gqxx4 + atom1.quadrupoleYY*gqyy4 + atom1.quadrupoleZZ*gqzz4 + 2*(atom1.quadrupoleXY*gqxy4 + atom1.quadrupoleXZ*gqxz4 + atom1.quadrupoleYZ*gqyz4));
energy = atom1.dipole.x*(atom2.quadrupoleXX*gqxx2 + atom2.quadrupoleYY*gqyy2 + atom2.quadrupoleZZ*gqzz2 + 2*(atom2.quadrupoleXY*gqxy2 + atom2.quadrupoleXZ*gqxz2 + atom2.quadrupoleYZ*gqxy4)) +
atom1.dipole.y*(atom2.quadrupoleXX*gqxx3 + atom2.quadrupoleYY*gqyy3 + atom2.quadrupoleZZ*gqzz3 + 2*(atom2.quadrupoleXY*gqxy3 + atom2.quadrupoleXZ*gqxy4 + atom2.quadrupoleYZ*gqyz3)) +
atom1.dipole.z*(atom2.quadrupoleXX*gqxx4 + atom2.quadrupoleYY*gqyy4 + atom2.quadrupoleZZ*gqzz4 + 2*(atom2.quadrupoleXY*gqxy4 + atom2.quadrupoleXZ*gqxz4 + atom2.quadrupoleYZ*gqyz4));
dedx += atom2.q*(atom1.quadrupoleXX*gqxx2 + atom1.quadrupoleYY*gqyy2 + atom1.quadrupoleZZ*gqzz2 + 2*(atom1.quadrupoleXY*gqxy2 + atom1.quadrupoleXZ*gqxz2 + atom1.quadrupoleYZ*gqxy4));
dedx += atom1.q*(atom2.quadrupoleXX*gqxx2 + atom2.quadrupoleYY*gqyy2 + atom2.quadrupoleZZ*gqzz2 + 2*(atom2.quadrupoleXY*gqxy2 + atom2.quadrupoleXZ*gqxz2 + atom2.quadrupoleYZ*gqxy4));
dedy += atom2.q*(atom1.quadrupoleXX*gqxx3 + atom1.quadrupoleYY*gqyy3 + atom1.quadrupoleZZ*gqzz3 + 2*(atom1.quadrupoleXY*gqxy3 + atom1.quadrupoleXZ*gqxy4 + atom1.quadrupoleYZ*gqyz3));
dedy += atom1.q*(atom2.quadrupoleXX*gqxx3 + atom2.quadrupoleYY*gqyy3 + atom2.quadrupoleZZ*gqzz3 + 2*(atom2.quadrupoleXY*gqxy3 + atom2.quadrupoleXZ*gqxy4 + atom2.quadrupoleYZ*gqyz3));
dedz += atom2.q*(atom1.quadrupoleXX*gqxx4 + atom1.quadrupoleYY*gqyy4 + atom1.quadrupoleZZ*gqzz4 + 2*(atom1.quadrupoleXY*gqxy4 + atom1.quadrupoleXZ*gqxz4 + atom1.quadrupoleYZ*gqyz4));
dedz += atom1.q*(atom2.quadrupoleXX*gqxx4 + atom2.quadrupoleYY*gqyy4 + atom2.quadrupoleZZ*gqzz4 + 2*(atom2.quadrupoleXY*gqxy4 + atom2.quadrupoleXZ*gqxz4 + atom2.quadrupoleYZ*gqyz4));
#endif
#if defined F2
energy += atom2.inducedDipole.x*(atom1.quadrupoleXX*gqxx2 + atom1.quadrupoleYY*gqyy2 + atom1.quadrupoleZZ*gqzz2 + 2*(atom1.quadrupoleXY*gqxy2 + atom1.quadrupoleXZ*gqxz2 + atom1.quadrupoleYZ*gqxy4)) +
atom2.inducedDipole.y*(atom1.quadrupoleXX*gqxx3 + atom1.quadrupoleYY*gqyy3 + atom1.quadrupoleZZ*gqzz3 + 2*(atom1.quadrupoleXY*gqxy3 + atom1.quadrupoleXZ*gqxy4 + atom1.quadrupoleYZ*gqyz3)) +
atom2.inducedDipole.z*(atom1.quadrupoleXX*gqxx4 + atom1.quadrupoleYY*gqyy4 + atom1.quadrupoleZZ*gqzz4 + 2*(atom1.quadrupoleXY*gqxy4 + atom1.quadrupoleXZ*gqxz4 + atom1.quadrupoleYZ*gqyz4));
energy = atom1.inducedDipole.x*(atom2.quadrupoleXX*gqxx2 + atom2.quadrupoleYY*gqyy2 + atom2.quadrupoleZZ*gqzz2 + 2*(atom2.quadrupoleXY*gqxy2 + atom2.quadrupoleXZ*gqxz2 + atom2.quadrupoleYZ*gqxy4)) +
atom1.inducedDipole.y*(atom2.quadrupoleXX*gqxx3 + atom2.quadrupoleYY*gqyy3 + atom2.quadrupoleZZ*gqzz3 + 2*(atom2.quadrupoleXY*gqxy3 + atom2.quadrupoleXZ*gqxy4 + atom2.quadrupoleYZ*gqyz3)) +
atom1.inducedDipole.z*(atom2.quadrupoleXX*gqxx4 + atom2.quadrupoleYY*gqyy4 + atom2.quadrupoleZZ*gqzz4 + 2*(atom2.quadrupoleXY*gqxy4 + atom2.quadrupoleXZ*gqxz4 + atom2.quadrupoleYZ*gqyz4));
#endif
#endif
// Born derivs of the unweighted reaction potential gradient tensor
#if defined B1
dsumdrB1 += b01*atom1.q*(atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr);
dsumdrB1 = b01*atom2.q*(atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr);
#endif
#if defined B2
dsumdrB2 += b01*atom1.q*(sxk*xr+ syk*yr + szk*zr);
dsumdrB2 = b01*atom2.q*(sxi*xr+ syi*yr + szi*zr);
#endif
#if defined B1 || defined B2
real gux22 = b10 + xr2*b11;
real gux23 = xr*yr*b11;
real gux24 = xr*zr*b11;
real guy22 = gux23;
real guy23 = b10 + yr2*b11;
real guy24 = yr*zr*b11;
real guz22 = gux24;
real guz23 = guy24;
real guz24 = b10 + zr2*b11;
#if defined B1
dsumdrB1 = 2*(atom1.dipole.x*(atom2.dipole.x*gux22 + atom2.dipole.y*guy22 + atom2.dipole.z*guz22) +
atom1.dipole.y*(atom2.dipole.x*gux23 + atom2.dipole.y*guy23 + atom2.dipole.z*guz23) +
atom1.dipole.z*(atom2.dipole.x*gux24 + atom2.dipole.y*guy24 + atom2.dipole.z*guz24));
#endif
#if defined B2
dsumdrB2 = 2*(atom1.dipole.x*(sxk*gux22 + syk*guy22 + szk*guz22) +
atom1.dipole.y*(sxk*gux23 + syk*guy23 + szk*guz23) +
atom1.dipole.z*(sxk*gux24 + syk*guy24 + szk*guz24) +
atom2.dipole.x*(sxi*gux22 + syi*guy22 + szi*guz22) +
atom2.dipole.y*(sxi*gux23 + syi*guy23 + szi*guz23) +
atom2.dipole.z*(sxi*gux24 + syi*guy24 + szi*guz24));
#ifndef DIRECT_POLARIZATION
dsumdrB2 = 2*(atom1.inducedDipole.x*(atom2.inducedDipolePolar.x*gux22 + atom2.inducedDipolePolar.y*gux23 + atom2.inducedDipolePolar.z*gux24)
+ atom1.inducedDipole.y*(atom2.inducedDipolePolar.x*guy22 + atom2.inducedDipolePolar.y*guy23 + atom2.inducedDipolePolar.z*guy24)
+ atom1.inducedDipole.z*(atom2.inducedDipolePolar.x*guz22 + atom2.inducedDipolePolar.y*guz23 + atom2.inducedDipolePolar.z*guz24)
+ atom2.inducedDipole.x*(atom1.inducedDipolePolar.x*gux22 + atom1.inducedDipolePolar.y*gux23 + atom1.inducedDipolePolar.z*gux24)
+ atom2.inducedDipole.y*(atom1.inducedDipolePolar.x*guy22 + atom1.inducedDipolePolar.y*guy23 + atom1.inducedDipolePolar.z*guy24)
+ atom2.inducedDipole.z*(atom1.inducedDipolePolar.x*guz22 + atom1.inducedDipolePolar.y*guz23 + atom1.inducedDipolePolar.z*guz24));
#endif
#endif
real gqxx22 = xr*(2*b20 + xr2*b21);
real gqxx23 = yr*xr2*b21;
real gqxx24 = zr*xr2*b21;
real gqyy22 = xr*yr2*b21;
real gqyy23 = yr*(2*b20 + yr2*b21);
real gqyy24 = zr*yr2*b21;
real gqzz22 = xr*zr2*b21;
real gqzz23 = yr*zr2*b21;
real gqzz24 = zr*(2*b20 + zr2*b21);
real gqxy22 = yr*(b20 + xr2*b21);
real gqxy23 = xr*(b20 + yr2*b21);
real gqxy24 = zr*xr*yr*b21;
real gqxz22 = zr*(b20 + xr2*b21);
real gqxz23 = gqxy24;
real gqxz24 = xr*(b20 + zr2*b21);
real gqyz22 = gqxy24;
real gqyz23 = zr*(b20 + yr2*b21);
real gqyz24 = yr*(b20 + zr2*b21);
#if defined B1
dsumdrB1 += atom2.dipole.x*(atom1.quadrupoleXX*gqxx22 + atom1.quadrupoleYY*gqyy22 + atom1.quadrupoleZZ*gqzz22 + 2*(atom1.quadrupoleXY*gqxy22 + atom1.quadrupoleXZ*gqxz22 + atom1.quadrupoleYZ*gqyz22)) +
atom2.dipole.y*(atom1.quadrupoleXX*gqxx23 + atom1.quadrupoleYY*gqyy23 + atom1.quadrupoleZZ*gqzz23 + 2*(atom1.quadrupoleXY*gqxy23 + atom1.quadrupoleXZ*gqxz23 + atom1.quadrupoleYZ*gqyz23)) +
atom2.dipole.z*(atom1.quadrupoleXX*gqxx24 + atom1.quadrupoleYY*gqyy24 + atom1.quadrupoleZZ*gqzz24 + 2*(atom1.quadrupoleXY*gqxy24 + atom1.quadrupoleXZ*gqxz24 + atom1.quadrupoleYZ*gqyz24));
dsumdrB1 = atom1.dipole.x*(atom2.quadrupoleXX*gqxx22 + atom2.quadrupoleYY*gqyy22 + atom2.quadrupoleZZ*gqzz22 + 2*(atom2.quadrupoleXY*gqxy22 + atom2.quadrupoleXZ*gqxz22 + atom2.quadrupoleYZ*gqyz22)) +
atom1.dipole.y*(atom2.quadrupoleXX*gqxx23 + atom2.quadrupoleYY*gqyy23 + atom2.quadrupoleZZ*gqzz23 + 2*(atom2.quadrupoleXY*gqxy23 + atom2.quadrupoleXZ*gqxz23 + atom2.quadrupoleYZ*gqyz23)) +
atom1.dipole.z*(atom2.quadrupoleXX*gqxx24 + atom2.quadrupoleYY*gqyy24 + atom2.quadrupoleZZ*gqzz24 + 2*(atom2.quadrupoleXY*gqxy24 + atom2.quadrupoleXZ*gqxz24 + atom2.quadrupoleYZ*gqyz24));
#endif
#if defined B2
dsumdrB2 += sxk*(atom1.quadrupoleXX*gqxx22 + atom1.quadrupoleYY*gqyy22 + atom1.quadrupoleZZ*gqzz22 + 2*(atom1.quadrupoleXY*gqxy22 + atom1.quadrupoleXZ*gqxz22 + atom1.quadrupoleYZ*gqyz22)) +
syk*(atom1.quadrupoleXX*gqxx23 + atom1.quadrupoleYY*gqyy23 + atom1.quadrupoleZZ*gqzz23 + 2*(atom1.quadrupoleXY*gqxy23 + atom1.quadrupoleXZ*gqxz23 + atom1.quadrupoleYZ*gqyz23)) +
szk*(atom1.quadrupoleXX*gqxx24 + atom1.quadrupoleYY*gqyy24 + atom1.quadrupoleZZ*gqzz24 + 2*(atom1.quadrupoleXY*gqxy24 + atom1.quadrupoleXZ*gqxz24 + atom1.quadrupoleYZ*gqyz24));
dsumdrB2 = sxi*(atom2.quadrupoleXX*gqxx22 + atom2.quadrupoleYY*gqyy22 + atom2.quadrupoleZZ*gqzz22 + 2*(atom2.quadrupoleXY*gqxy22 + atom2.quadrupoleXZ*gqxz22 + atom2.quadrupoleYZ*gqyz22)) +
syi*(atom2.quadrupoleXX*gqxx23 + atom2.quadrupoleYY*gqyy23 + atom2.quadrupoleZZ*gqzz23 + 2*(atom2.quadrupoleXY*gqxy23 + atom2.quadrupoleXZ*gqxz23 + atom2.quadrupoleYZ*gqyz23)) +
szi*(atom2.quadrupoleXX*gqxx24 + atom2.quadrupoleYY*gqyy24 + atom2.quadrupoleZZ*gqzz24 + 2*(atom2.quadrupoleXY*gqxy24 + atom2.quadrupoleXZ*gqxz24 + atom2.quadrupoleYZ*gqyz24));
#endif
#endif
// unweighted 2nd reaction potential gradient tensor;
#if defined F1 || defined F2 || defined T1
real gc5 = a01 + xr2*a02;
real gc6 = xr*yr*a02;
real gc7 = xr*zr*a02;
real gc8 = a01 + yr2*a02;
real gc9 = yr*zr*a02;
real gc10 = a01 + zr2*a02;
#if defined F1
energy += atom1.q*(atom2.quadrupoleXX*gc5 + atom2.quadrupoleYY*gc8 + atom2.quadrupoleZZ*gc10 + 2*(atom2.quadrupoleXY*gc6 + atom2.quadrupoleXZ*gc7 + atom2.quadrupoleYZ*gc9));
energy += atom2.q*(atom1.quadrupoleXX*gc5 + atom1.quadrupoleYY*gc8 + atom1.quadrupoleZZ*gc10 + 2*(atom1.quadrupoleXY*gc6 + atom1.quadrupoleXZ*gc7 + atom1.quadrupoleYZ*gc9));
dedx += atom1.q*(atom2.dipole.x*gc5 + atom2.dipole.y*gc6 + atom2.dipole.z*gc7);
dedx = atom2.q*(atom1.dipole.x*gc5 + atom1.dipole.y*gc6 + atom1.dipole.z*gc7);
dedy += atom1.q*(atom2.dipole.x*gc6 + atom2.dipole.y*gc8 + atom2.dipole.z*gc9);
dedy = atom2.q*(atom1.dipole.x*gc6 + atom1.dipole.y*gc8 + atom1.dipole.z*gc9);
dedz += atom1.q*(atom2.dipole.x*gc7 + atom2.dipole.y*gc9 + atom2.dipole.z*gc10);
dedz = atom2.q*(atom1.dipole.x*gc7 + atom1.dipole.y*gc9 + atom1.dipole.z*gc10);
#endif
#if defined F2
dpdx += atom1.q*(sxk*gc5 + syk*gc6 + szk*gc7);
dpdx = atom2.q*(sxi*gc5 + syi*gc6 + szi*gc7);
dpdy += atom1.q*(sxk*gc6 + syk*gc8 + szk*gc9);
dpdy = atom2.q*(sxi*gc6 + syi*gc8 + szi*gc9);
dpdz += atom1.q*(sxk*gc7 + syk*gc9 + szk*gc10);
dpdz = atom2.q*(sxi*gc7 + syi*gc9 + szi*gc10);
#endif
#endif
#if defined F1 || defined F2 || defined T1 || defined T2
real gux5 = xr*(3*a11 + xr2*a12);
real gux6 = yr*(a11 + xr2*a12);
real gux7 = zr*(a11 + xr2*a12);
real gux8 = xr*(a11 + yr2*a12);
real gux9 = zr*xr*yr*a12;
real gux10 = xr*(a11 + zr2*a12);
real guy5 = yr*(a11 + xr2*a12);
real guy6 = xr*(a11 + yr2*a12);
real guy7 = gux9;
real guy8 = yr*(3*a11 + yr2*a12);
real guy9 = zr*(a11 + yr2*a12);
real guy10 = yr*(a11 + zr2*a12);
real guz5 = zr*(a11 + xr2*a12);
real guz6 = gux9;
real guz7 = xr*(a11 + zr2*a12);
real guz8 = zr*(a11 + yr2*a12);
real guz9 = yr*(a11 + zr2*a12);
real guz10 = zr*(3*a11 + zr2*a12);
#if defined F1
energy = atom1.dipole.x*(atom2.quadrupoleXX*gux5 + atom2.quadrupoleYY*gux8 + atom2.quadrupoleZZ*gux10 + 2*(atom2.quadrupoleXY*gux6 + atom2.quadrupoleXZ*gux7 + atom2.quadrupoleYZ*gux9)) +
atom1.dipole.y*(atom2.quadrupoleXX*guy5 + atom2.quadrupoleYY*guy8 + atom2.quadrupoleZZ*guy10 + 2*(atom2.quadrupoleXY*guy6 + atom2.quadrupoleXZ*guy7 + atom2.quadrupoleYZ*guy9)) +
atom1.dipole.z*(atom2.quadrupoleXX*guz5 + atom2.quadrupoleYY*guz8 + atom2.quadrupoleZZ*guz10 + 2*(atom2.quadrupoleXY*guz6 + atom2.quadrupoleXZ*guz7 + atom2.quadrupoleYZ*guz9));
energy += atom2.dipole.x*(atom1.quadrupoleXX*gux5 + atom1.quadrupoleYY*gux8 + atom1.quadrupoleZZ*gux10 + 2*(atom1.quadrupoleXY*gux6 + atom1.quadrupoleXZ*gux7 + atom1.quadrupoleYZ*gux9)) +
atom2.dipole.y*(atom1.quadrupoleXX*guy5 + atom1.quadrupoleYY*guy8 + atom1.quadrupoleZZ*guy10 + 2*(atom1.quadrupoleXY*guy6 + atom1.quadrupoleXZ*guy7 + atom1.quadrupoleYZ*guy9)) +
atom2.dipole.z*(atom1.quadrupoleXX*guz5 + atom1.quadrupoleYY*guz8 + atom1.quadrupoleZZ*guz10 + 2*(atom1.quadrupoleXY*guz6 + atom1.quadrupoleXZ*guz7 + atom1.quadrupoleYZ*guz9));
dedx = 2*(atom1.dipole.x*(atom2.dipole.x*gux5 + atom2.dipole.y*guy5 + atom2.dipole.z*guz5) +
atom1.dipole.y*(atom2.dipole.x*gux6 + atom2.dipole.y*guy6 + atom2.dipole.z*guz6) +
atom1.dipole.z*(atom2.dipole.x*gux7 + atom2.dipole.y*guy7 + atom2.dipole.z*guz7));
dedy = 2*(atom1.dipole.x*(atom2.dipole.x*gux6 + atom2.dipole.y*guy6 + atom2.dipole.z*guz6) +
atom1.dipole.y*(atom2.dipole.x*gux8 + atom2.dipole.y*guy8 + atom2.dipole.z*guz8) +
atom1.dipole.z*(atom2.dipole.x*gux9 + atom2.dipole.y*guy9 + atom2.dipole.z*guz9));
dedz = 2*(atom1.dipole.x*(atom2.dipole.x*gux7 + atom2.dipole.y*guy7 + atom2.dipole.z*guz7) +
atom1.dipole.y*(atom2.dipole.x*gux9 + atom2.dipole.y*guy9 + atom2.dipole.z*guz9) +
atom1.dipole.z*(atom2.dipole.x*gux10 + atom2.dipole.y*guy10 + atom2.dipole.z*guz10));
#endif
#if defined F2
energy = atom1.inducedDipole.x*(atom2.quadrupoleXX*gux5 + atom2.quadrupoleYY*gux8 + atom2.quadrupoleZZ*gux10 + 2*(atom2.quadrupoleXY*gux6 + atom2.quadrupoleXZ*gux7 + atom2.quadrupoleYZ*gux9)) +
atom1.inducedDipole.y*(atom2.quadrupoleXX*guy5 + atom2.quadrupoleYY*guy8 + atom2.quadrupoleZZ*guy10 + 2*(atom2.quadrupoleXY*guy6 + atom2.quadrupoleXZ*guy7 + atom2.quadrupoleYZ*guy9)) +
atom1.inducedDipole.z*(atom2.quadrupoleXX*guz5 + atom2.quadrupoleYY*guz8 + atom2.quadrupoleZZ*guz10 + 2*(atom2.quadrupoleXY*guz6 + atom2.quadrupoleXZ*guz7 + atom2.quadrupoleYZ*guz9));
energy += atom2.inducedDipole.x*(atom1.quadrupoleXX*gux5 + atom1.quadrupoleYY*gux8 + atom1.quadrupoleZZ*gux10 + 2*(atom1.quadrupoleXY*gux6 + atom1.quadrupoleXZ*gux7 + atom1.quadrupoleYZ*gux9)) +
atom2.inducedDipole.y*(atom1.quadrupoleXX*guy5 + atom1.quadrupoleYY*guy8 + atom1.quadrupoleZZ*guy10 + 2*(atom1.quadrupoleXY*guy6 + atom1.quadrupoleXZ*guy7 + atom1.quadrupoleYZ*guy9)) +
atom2.inducedDipole.z*(atom1.quadrupoleXX*guz5 + atom1.quadrupoleYY*guz8 + atom1.quadrupoleZZ*guz10 + 2*(atom1.quadrupoleXY*guz6 + atom1.quadrupoleXZ*guz7 + atom1.quadrupoleYZ*guz9));
dpdx = 2*(atom1.dipole.x*(sxk*gux5 + syk*guy5 + szk*guz5) + atom1.dipole.y*(sxk*gux6 + syk*guy6 + szk*guz6) + atom1.dipole.z*(sxk*gux7 + syk*guy7 + szk*guz7) +
atom2.dipole.x*(sxi*gux5 + syi*guy5 + szi*guz5) + atom2.dipole.y*(sxi*gux6 + syi*guy6 + szi*guz6) + atom2.dipole.z*(sxi*gux7 + syi*guy7 + szi*guz7));
dpdy = 2*(atom1.dipole.x*(sxk*gux6 + syk*guy6 + szk*guz6) + atom1.dipole.y*(sxk*gux8 + syk*guy8 + szk*guz8) + atom1.dipole.z*(sxk*gux9 + syk*guy9 + szk*guz9) +
atom2.dipole.x*(sxi*gux6 + syi*guy6 + szi*guz6) + atom2.dipole.y*(sxi*gux8 + syi*guy8 + szi*guz8) + atom2.dipole.z*(sxi*gux9 + syi*guy9 + szi*guz9));
dpdz = 2*(atom1.dipole.x*(sxk*gux7 + syk*guy7 + szk*guz7) + atom1.dipole.y*(sxk*gux9 + syk*guy9 + szk*guz9) + atom1.dipole.z*(sxk*gux10 + syk*guy10 + szk*guz10) +
atom2.dipole.x*(sxi*gux7 + syi*guy7 + szi*guz7) + atom2.dipole.y*(sxi*gux9 + syi*guy9 + szi*guz9) + atom2.dipole.z*(sxi*gux10 + syi*guy10 + szi*guz10));
#ifndef DIRECT_POLARIZATION
dpdx = 2*(atom1.inducedDipole.x*(atom2.inducedDipolePolar.x*gux5 + atom2.inducedDipolePolar.y*gux6 + atom2.inducedDipolePolar.z*gux7)
+ atom1.inducedDipole.y*(atom2.inducedDipolePolar.x*guy5 + atom2.inducedDipolePolar.y*guy6 + atom2.inducedDipolePolar.z*guy7)
+ atom1.inducedDipole.z*(atom2.inducedDipolePolar.x*guz5 + atom2.inducedDipolePolar.y*guz6 + atom2.inducedDipolePolar.z*guz7)
+ atom2.inducedDipole.x*(atom1.inducedDipolePolar.x*gux5 + atom1.inducedDipolePolar.y*gux6 + atom1.inducedDipolePolar.z*gux7)
+ atom2.inducedDipole.y*(atom1.inducedDipolePolar.x*guy5 + atom1.inducedDipolePolar.y*guy6 + atom1.inducedDipolePolar.z*guy7)
+ atom2.inducedDipole.z*(atom1.inducedDipolePolar.x*guz5 + atom1.inducedDipolePolar.y*guz6 + atom1.inducedDipolePolar.z*guz7));
dpdy = 2*(atom1.inducedDipole.x*(atom2.inducedDipolePolar.x*gux6 + atom2.inducedDipolePolar.y*gux8 + atom2.inducedDipolePolar.z*gux9)
+ atom1.inducedDipole.y*(atom2.inducedDipolePolar.x*guy6 + atom2.inducedDipolePolar.y*guy8 + atom2.inducedDipolePolar.z*guy9)
+ atom1.inducedDipole.z*(atom2.inducedDipolePolar.x*guz6 + atom2.inducedDipolePolar.y*guz8 + atom2.inducedDipolePolar.z*guz9)
+ atom2.inducedDipole.x*(atom1.inducedDipolePolar.x*gux6 + atom1.inducedDipolePolar.y*gux8 + atom1.inducedDipolePolar.z*gux9)
+ atom2.inducedDipole.y*(atom1.inducedDipolePolar.x*guy6 + atom1.inducedDipolePolar.y*guy8 + atom1.inducedDipolePolar.z*guy9)
+ atom2.inducedDipole.z*(atom1.inducedDipolePolar.x*guz6 + atom1.inducedDipolePolar.y*guz8 + atom1.inducedDipolePolar.z*guz9));
dpdz = 2*(atom1.inducedDipole.x*(atom2.inducedDipolePolar.x*gux7 + atom2.inducedDipolePolar.y*gux9 + atom2.inducedDipolePolar.z*gux10)
+ atom1.inducedDipole.y*(atom2.inducedDipolePolar.x*guy7 + atom2.inducedDipolePolar.y*guy9 + atom2.inducedDipolePolar.z*guy10)
+ atom1.inducedDipole.z*(atom2.inducedDipolePolar.x*guz7 + atom2.inducedDipolePolar.y*guz9 + atom2.inducedDipolePolar.z*guz10)
+ atom2.inducedDipole.x*(atom1.inducedDipolePolar.x*gux7 + atom1.inducedDipolePolar.y*gux9 + atom1.inducedDipolePolar.z*gux10)
+ atom2.inducedDipole.y*(atom1.inducedDipolePolar.x*guy7 + atom1.inducedDipolePolar.y*guy9 + atom1.inducedDipolePolar.z*guy10)
+ atom2.inducedDipole.z*(atom1.inducedDipolePolar.x*guz7 + atom1.inducedDipolePolar.y*guz9 + atom1.inducedDipolePolar.z*guz10));
#endif
#endif
#endif
#if defined F1 || defined F2 || defined T1
real gqxx5 = 2*a20 + xr2*(5*a21 + xr2*a22);
real gqxx6 = yr*xr*(2*a21 + xr2*a22);
real gqxx7 = zr*xr*(2*a21 + xr2*a22);
real gqxx8 = xr2*(a21 + yr2*a22);
real gqxx9 = zr*yr*xr2*a22;
real gqxx10 = xr2*(a21 + zr2*a22);
real gqyy5 = yr2*(a21 + xr2*a22);
real gqyy6 = xr*yr*(2*a21 + yr2*a22);
real gqyy7 = xr*zr*yr2*a22;
real gqyy8 = 2*a20 + yr2*(5*a21 + yr2*a22);
real gqyy9 = yr*zr*(2*a21 + yr2*a22);
real gqyy10 = yr2*(a21 + zr2*a22);
real gqzz5 = zr2*(a21 + xr2*a22);
real gqzz6 = xr*yr*zr2*a22;
real gqzz7 = xr*zr*(2*a21 + zr2*a22);
real gqzz8 = zr2*(a21 + yr2*a22);
real gqzz9 = yr*zr*(2*a21 + zr2*a22);
real gqzz10 = 2*a20 + zr2*(5*a21 + zr2*a22);
real gqxy5 = xr*yr*(3*a21 + xr2*a22);
real gqxy6 = a20 + (xr2 + yr2)*a21 + xr2*yr2*a22;
real gqxy7 = zr*yr*(a21 + xr2*a22);
real gqxy8 = xr*yr*(3*a21 + yr2*a22);
real gqxy9 = zr*xr*(a21 + yr2*a22);
real gqxy10 = xr*yr*(a21 + zr2*a22);
real gqxz5 = xr*zr*(3*a21 + xr2*a22);
real gqxz6 = yr*zr*(a21 + xr2*a22);
real gqxz7 = a20 + (xr2 + zr2)*a21 + xr2*zr2*a22;
real gqxz8 = xr*zr*(a21 + yr2*a22);
real gqxz9 = xr*yr*(a21 + zr2*a22);
real gqxz10 = xr*zr*(3*a21 + zr2*a22);
real gqyz5 = zr*yr*(a21 + xr2*a22);
real gqyz6 = xr*zr*(a21 + yr2*a22);
real gqyz7 = xr*yr*(a21 + zr2*a22);
real gqyz8 = yr*zr*(3*a21 + yr2*a22);
real gqyz9 = a20 + (yr2 + zr2)*a21 + yr2*zr2*a22;
real gqyz10 = yr*zr*(3*a21 + zr2*a22);
#if defined F1
energy += atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqxx8 + atom2.quadrupoleZZ*gqxx10 + 2*(atom2.quadrupoleXY*gqxx6 + atom2.quadrupoleXZ*gqxx7 + atom2.quadrupoleYZ*gqxx9))
+ atom1.quadrupoleYY*(atom2.quadrupoleXX*gqyy5 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqyy10 + 2*(atom2.quadrupoleXY*gqyy6 + atom2.quadrupoleXZ*gqyy7 + atom2.quadrupoleYZ*gqyy9))
+ atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqzz5 + atom2.quadrupoleYY*gqzz8 + atom2.quadrupoleZZ*gqzz10 + 2*(atom2.quadrupoleXY*gqzz6 + atom2.quadrupoleXZ*gqzz7 + atom2.quadrupoleYZ*gqzz9))
+ 2*(atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxy5 + atom2.quadrupoleYY*gqxy8 + atom2.quadrupoleZZ*gqxy10
+ 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxy7 + atom2.quadrupoleYZ*gqxy9))
+ atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxz5 + atom2.quadrupoleYY*gqxz8 + atom2.quadrupoleZZ*gqxz10
+ 2*(atom2.quadrupoleXY*gqxz6 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqxz9))
+ atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqyz5 + atom2.quadrupoleYY*gqyz8 + atom2.quadrupoleZZ*gqyz10
+ 2*(atom2.quadrupoleXY*gqyz6 + atom2.quadrupoleXZ*gqyz7 + atom2.quadrupoleYZ*gqyz9)));
energy += atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqyy5 + atom2.quadrupoleZZ*gqzz5 + 2*(atom2.quadrupoleXY*gqxy5 + atom2.quadrupoleXZ*gqxz5 + atom2.quadrupoleYZ*gqyz5))
+ atom1.quadrupoleYY*(atom2.quadrupoleXX*gqxx8 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqzz8
+ 2*(atom2.quadrupoleXY*gqxy8 + atom2.quadrupoleXZ*gqxz8 + atom2.quadrupoleYZ*gqyz8))
+ atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqxx10 + atom2.quadrupoleYY*gqyy10 + atom2.quadrupoleZZ*gqzz10
+ 2*(atom2.quadrupoleXY*gqxy10 + atom2.quadrupoleXZ*gqxz10 + atom2.quadrupoleYZ*gqyz10))
+ 2*(atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6
+ 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6))
+ atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7
+ 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7))
+ atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9
+ 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9)));
dedx += atom2.dipole.x*(atom1.quadrupoleXX*gqxx5 + atom1.quadrupoleYY*gqyy5 + atom1.quadrupoleZZ*gqzz5 + 2*(atom1.quadrupoleXY*gqxy5 + atom1.quadrupoleXZ*gqxz5 + atom1.quadrupoleYZ*gqyz5)) +
atom2.dipole.y*(atom1.quadrupoleXX*gqxx6 + atom1.quadrupoleYY*gqyy6 + atom1.quadrupoleZZ*gqzz6 + 2*(atom1.quadrupoleXY*gqxy6 + atom1.quadrupoleXZ*gqxz6 + atom1.quadrupoleYZ*gqyz6)) +
atom2.dipole.z*(atom1.quadrupoleXX*gqxx7 + atom1.quadrupoleYY*gqyy7 + atom1.quadrupoleZZ*gqzz7 + 2*(atom1.quadrupoleXY*gqxy7 + atom1.quadrupoleXZ*gqxz7 + atom1.quadrupoleYZ*gqyz7));
dedx = atom1.dipole.x*(atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqyy5 + atom2.quadrupoleZZ*gqzz5 + 2*(atom2.quadrupoleXY*gqxy5 + atom2.quadrupoleXZ*gqxz5 + atom2.quadrupoleYZ*gqyz5)) +
atom1.dipole.y*(atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6 + 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6)) +
atom1.dipole.z*(atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7 + 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7));
dedy += atom2.dipole.x*(atom1.quadrupoleXX*gqxx6 + atom1.quadrupoleYY*gqyy6 + atom1.quadrupoleZZ*gqzz6 + 2*(atom1.quadrupoleXY*gqxy6 + atom1.quadrupoleXZ*gqxz6 + atom1.quadrupoleYZ*gqyz6)) +
atom2.dipole.y*(atom1.quadrupoleXX*gqxx8 + atom1.quadrupoleYY*gqyy8 + atom1.quadrupoleZZ*gqzz8 + 2*(atom1.quadrupoleXY*gqxy8 + atom1.quadrupoleXZ*gqxz8 + atom1.quadrupoleYZ*gqyz8)) +
atom2.dipole.z*(atom1.quadrupoleXX*gqxx9 + atom1.quadrupoleYY*gqyy9 + atom1.quadrupoleZZ*gqzz9 + 2*(atom1.quadrupoleXY*gqxy9 + atom1.quadrupoleXZ*gqxz9 + atom1.quadrupoleYZ*gqyz9));
dedy = atom1.dipole.x*(atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6 + 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6)) +
atom1.dipole.y*(atom2.quadrupoleXX*gqxx8 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqzz8 + 2*(atom2.quadrupoleXY*gqxy8 + atom2.quadrupoleXZ*gqxz8 + atom2.quadrupoleYZ*gqyz8)) +
atom1.dipole.z*(atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9 + 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9));
dedz += atom2.dipole.x*(atom1.quadrupoleXX*gqxx7 + atom1.quadrupoleYY*gqyy7 + atom1.quadrupoleZZ*gqzz7 + 2*(atom1.quadrupoleXY*gqxy7 + atom1.quadrupoleXZ*gqxz7 + atom1.quadrupoleYZ*gqyz7)) +
atom2.dipole.y*(atom1.quadrupoleXX*gqxx9 + atom1.quadrupoleYY*gqyy9 + atom1.quadrupoleZZ*gqzz9 + 2*(atom1.quadrupoleXY*gqxy9 + atom1.quadrupoleXZ*gqxz9 + atom1.quadrupoleYZ*gqyz9)) +
atom2.dipole.z*(atom1.quadrupoleXX*gqxx10 + atom1.quadrupoleYY*gqyy10 + atom1.quadrupoleZZ*gqzz10 + 2*(atom1.quadrupoleXY*gqxy10 + atom1.quadrupoleXZ*gqxz10 + atom1.quadrupoleYZ*gqyz10));
dedz = atom1.dipole.x*(atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7 + 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7)) +
atom1.dipole.y*(atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9 + 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9)) +
atom1.dipole.z*(atom2.quadrupoleXX*gqxx10 + atom2.quadrupoleYY*gqyy10 + atom2.quadrupoleZZ*gqzz10 + 2*(atom2.quadrupoleXY*gqxy10 + atom2.quadrupoleXZ*gqxz10 + atom2.quadrupoleYZ*gqyz10));
#endif
#if defined F2
dpdx += sxk*(atom1.quadrupoleXX*gqxx5 + atom1.quadrupoleYY*gqyy5 + atom1.quadrupoleZZ*gqzz5 + 2*(atom1.quadrupoleXY*gqxy5 + atom1.quadrupoleXZ*gqxz5 + atom1.quadrupoleYZ*gqyz5)) +
syk*(atom1.quadrupoleXX*gqxx6 + atom1.quadrupoleYY*gqyy6 + atom1.quadrupoleZZ*gqzz6 + 2*(atom1.quadrupoleXY*gqxy6 + atom1.quadrupoleXZ*gqxz6 + atom1.quadrupoleYZ*gqyz6)) +
szk*(atom1.quadrupoleXX*gqxx7 + atom1.quadrupoleYY*gqyy7 + atom1.quadrupoleZZ*gqzz7 + 2*(atom1.quadrupoleXY*gqxy7 + atom1.quadrupoleXZ*gqxz7 + atom1.quadrupoleYZ*gqyz7));
dpdx = sxi*(atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqyy5 + atom2.quadrupoleZZ*gqzz5 + 2*(atom2.quadrupoleXY*gqxy5 + atom2.quadrupoleXZ*gqxz5 + atom2.quadrupoleYZ*gqyz5)) +
syi*(atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6 + 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6)) +
szi*(atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7 + 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7));
dpdy += sxk*(atom1.quadrupoleXX*gqxx6 + atom1.quadrupoleYY*gqyy6 + atom1.quadrupoleZZ*gqzz6 + 2*(atom1.quadrupoleXY*gqxy6 + atom1.quadrupoleXZ*gqxz6 + atom1.quadrupoleYZ*gqyz6)) +
syk*(atom1.quadrupoleXX*gqxx8 + atom1.quadrupoleYY*gqyy8 + atom1.quadrupoleZZ*gqzz8 + 2*(atom1.quadrupoleXY*gqxy8 + atom1.quadrupoleXZ*gqxz8 + atom1.quadrupoleYZ*gqyz8)) +
szk*(atom1.quadrupoleXX*gqxx9 + atom1.quadrupoleYY*gqyy9 + atom1.quadrupoleZZ*gqzz9 + 2*(atom1.quadrupoleXY*gqxy9 + atom1.quadrupoleXZ*gqxz9 + atom1.quadrupoleYZ*gqyz9));
dpdy = sxi*(atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6 + 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6)) +
syi*(atom2.quadrupoleXX*gqxx8 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqzz8 + 2*(atom2.quadrupoleXY*gqxy8 + atom2.quadrupoleXZ*gqxz8 + atom2.quadrupoleYZ*gqyz8)) +
szi*(atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9 + 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9));
dpdz = sxi*(atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7 + 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7)) +
syi*(atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9 + 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9)) +
szi*(atom2.quadrupoleXX*gqxx10 + atom2.quadrupoleYY*gqyy10 + atom2.quadrupoleZZ*gqzz10 + 2*(atom2.quadrupoleXY*gqxy10 + atom2.quadrupoleXZ*gqxz10 + atom2.quadrupoleYZ*gqyz10));
dpdz += sxk*(atom1.quadrupoleXX*gqxx7 + atom1.quadrupoleYY*gqyy7 + atom1.quadrupoleZZ*gqzz7 + 2*(atom1.quadrupoleXY*gqxy7 + atom1.quadrupoleXZ*gqxz7 + atom1.quadrupoleYZ*gqyz7)) +
syk*(atom1.quadrupoleXX*gqxx9 + atom1.quadrupoleYY*gqyy9 + atom1.quadrupoleZZ*gqzz9 + 2*(atom1.quadrupoleXY*gqxy9 + atom1.quadrupoleXZ*gqxz9 + atom1.quadrupoleYZ*gqyz9)) +
szk*(atom1.quadrupoleXX*gqxx10 + atom1.quadrupoleYY*gqyy10 + atom1.quadrupoleZZ*gqzz10 + 2*(atom1.quadrupoleXY*gqxy10 + atom1.quadrupoleXZ*gqxz10 + atom1.quadrupoleYZ*gqyz10));
#endif
#endif
// Born radii derivatives of the unweighted 2nd reaction;
// potential gradient tensor;
#if defined B1
real gc25 = b01 + xr2*b02;
real gc26 = xr*yr*b02;
real gc27 = xr*zr*b02;
real gc28 = b01 + yr2*b02;
real gc29 = yr*zr*b02;
real gc30 = b01 + zr2*b02;
dsumdrB1 += atom1.q*(atom2.quadrupoleXX*gc25 + atom2.quadrupoleYY*gc28 + atom2.quadrupoleZZ*gc30 + 2*(atom2.quadrupoleXY*gc26 + atom2.quadrupoleXZ*gc27 + atom2.quadrupoleYZ*gc29));
dsumdrB1 += atom2.q*(atom1.quadrupoleXX*gc25 + atom1.quadrupoleYY*gc28 + atom1.quadrupoleZZ*gc30 + 2*(atom1.quadrupoleXY*gc26 + atom1.quadrupoleXZ*gc27 + atom1.quadrupoleYZ*gc29));
#endif
#if defined B1 || defined B2
real gux25 = xr*(3*b11 + xr2*b12);
real gux26 = yr*(b11 + xr2*b12);
real gux27 = zr*(b11 + xr2*b12);
real gux28 = xr*(b11 + yr2*b12);
real gux29 = zr*xr*yr*b12;
real gux30 = xr*(b11 + zr2*b12);
real guy25 = yr*(b11 + xr2*b12);
real guy26 = xr*(b11 + yr2*b12);
real guy27 = gux29;
real guy28 = yr*(3*b11 + yr2*b12);
real guy29 = zr*(b11 + yr2*b12);
real guy30 = yr*(b11 + zr2*b12);
real guz25 = zr*(b11 + xr2*b12);
real guz26 = gux29;
real guz27 = xr*(b11 + zr2*b12);
real guz28 = zr*(b11 + yr2*b12);
real guz29 = yr*(b11 + zr2*b12);
real guz30 = zr*(3*b11 + zr2*b12);
#endif
#if defined B2
dsumdrB2 = sxi*(atom2.quadrupoleXX*gux25 + atom2.quadrupoleYY*gux28 + atom2.quadrupoleZZ*gux30 + 2*(atom2.quadrupoleXY*gux26 + atom2.quadrupoleXZ*gux27 + atom2.quadrupoleYZ*gux29)) +
syi*(atom2.quadrupoleXX*guy25 + atom2.quadrupoleYY*guy28 + atom2.quadrupoleZZ*guy30 + 2*(atom2.quadrupoleXY*guy26 + atom2.quadrupoleXZ*guy27 + atom2.quadrupoleYZ*guy29)) +
szi*(atom2.quadrupoleXX*guz25 + atom2.quadrupoleYY*guz28 + atom2.quadrupoleZZ*guz30 + 2*(atom2.quadrupoleXY*guz26 + atom2.quadrupoleXZ*guz27 + atom2.quadrupoleYZ*guz29));
dsumdrB2 += sxk*(atom1.quadrupoleXX*gux25 + atom1.quadrupoleYY*gux28 + atom1.quadrupoleZZ*gux30 + 2*(atom1.quadrupoleXY*gux26 + atom1.quadrupoleXZ*gux27 + atom1.quadrupoleYZ*gux29)) +
syk*(atom1.quadrupoleXX*guy25 + atom1.quadrupoleYY*guy28 + atom1.quadrupoleZZ*guy30 + 2*(atom1.quadrupoleXY*guy26 + atom1.quadrupoleXZ*guy27 + atom1.quadrupoleYZ*guy29)) +
szk*(atom1.quadrupoleXX*guz25 + atom1.quadrupoleYY*guz28 + atom1.quadrupoleZZ*guz30 + 2*(atom1.quadrupoleXY*guz26 + atom1.quadrupoleXZ*guz27 + atom1.quadrupoleYZ*guz29));
#endif
#if defined B1
dsumdrB1 = atom1.dipole.x*(atom2.quadrupoleXX*gux25 + atom2.quadrupoleYY*gux28 + atom2.quadrupoleZZ*gux30 + 2*(atom2.quadrupoleXY*gux26 + atom2.quadrupoleXZ*gux27 + atom2.quadrupoleYZ*gux29)) +
atom1.dipole.y*(atom2.quadrupoleXX*guy25 + atom2.quadrupoleYY*guy28 + atom2.quadrupoleZZ*guy30 + 2*(atom2.quadrupoleXY*guy26 + atom2.quadrupoleXZ*guy27 + atom2.quadrupoleYZ*guy29)) +
atom1.dipole.z*(atom2.quadrupoleXX*guz25 + atom2.quadrupoleYY*guz28 + atom2.quadrupoleZZ*guz30 + 2*(atom2.quadrupoleXY*guz26 + atom2.quadrupoleXZ*guz27 + atom2.quadrupoleYZ*guz29));
dsumdrB1 += atom2.dipole.x*(atom1.quadrupoleXX*gux25 + atom1.quadrupoleYY*gux28 + atom1.quadrupoleZZ*gux30 + 2*(atom1.quadrupoleXY*gux26 + atom1.quadrupoleXZ*gux27 + atom1.quadrupoleYZ*gux29)) +
atom2.dipole.y*(atom1.quadrupoleXX*guy25 + atom1.quadrupoleYY*guy28 + atom1.quadrupoleZZ*guy30 + 2*(atom1.quadrupoleXY*guy26 + atom1.quadrupoleXZ*guy27 + atom1.quadrupoleYZ*guy29)) +
atom2.dipole.z*(atom1.quadrupoleXX*guz25 + atom1.quadrupoleYY*guz28 + atom1.quadrupoleZZ*guz30 + 2*(atom1.quadrupoleXY*guz26 + atom1.quadrupoleXZ*guz27 + atom1.quadrupoleYZ*guz29));
real gqxx25 = 2*b20 + xr2*(5*b21 + xr2*b22);
real gqxx26 = yr*xr*(2*b21 + xr2*b22);
real gqxx27 = zr*xr*(2*b21 + xr2*b22);
real gqxx28 = xr2*(b21 + yr2*b22);
real gqxx29 = zr*yr*xr2*b22;
real gqxx30 = xr2*(b21 + zr2*b22);
real gqyy25 = yr2*(b21 + xr2*b22);
real gqyy26 = xr*yr*(2*b21 + yr2*b22);
real gqyy27 = xr*zr*yr2*b22;
real gqyy28 = 2*b20 + yr2*(5*b21 + yr2*b22);
real gqyy29 = yr*zr*(2*b21 + yr2*b22);
real gqyy30 = yr2*(b21 + zr2*b22);
real gqzz25 = zr2*(b21 + xr2*b22);
real gqzz26 = xr*yr*zr2*b22;
real gqzz27 = xr*zr*(2*b21 + zr2*b22);
real gqzz28 = zr2*(b21 + yr2*b22);
real gqzz29 = yr*zr*(2*b21 + zr2*b22);
real gqzz30 = 2*b20 + zr2*(5*b21 + zr2*b22);
real gqxy25 = xr*yr*(3*b21 + xr2*b22);
real gqxy26 = b20 + (xr2 + yr2)*b21 + xr2*yr2*b22;
real gqxy27 = zr*yr*(b21 + xr2*b22);
real gqxy28 = xr*yr*(3*b21 + yr2*b22);
real gqxy29 = zr*xr*(b21 + yr2*b22);
real gqxy30 = xr*yr*(b21 + zr2*b22);
real gqxz25 = xr*zr*(3*b21 + xr2*b22);
real gqxz26 = yr*zr*(b21 + xr2*b22);
real gqxz27 = b20 + (xr2 + zr2)*b21 + xr2*zr2*b22;
real gqxz28 = xr*zr*(b21 + yr2*b22);
real gqxz29 = xr*yr*(b21 + zr2*b22);
real gqxz30 = xr*zr*(3*b21 + zr2*b22);
real gqyz25 = zr*yr*(b21 + xr2*b22);
real gqyz26 = xr*zr*(b21 + yr2*b22);
real gqyz27 = xr*yr*(b21 + zr2*b22);
real gqyz28 = yr*zr*(3*b21 + yr2*b22);
real gqyz29 = b20 + (yr2 + zr2)*b21 + yr2*zr2*b22;
real gqyz30 = yr*zr*(3*b21 + zr2*b22);
dsumdrB1 +=
atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx25 + atom2.quadrupoleYY*gqxx28 + atom2.quadrupoleZZ*gqxx30 + 2*(atom2.quadrupoleXY*gqxx26 + atom2.quadrupoleXZ*gqxx27 + atom2.quadrupoleYZ*gqxx29)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqyy25 + atom2.quadrupoleYY*gqyy28 + atom2.quadrupoleZZ*gqyy30 + 2*(atom2.quadrupoleXY*gqyy26 + atom2.quadrupoleXZ*gqyy27 + atom2.quadrupoleYZ*gqyy29)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqzz25 + atom2.quadrupoleYY*gqzz28 + atom2.quadrupoleZZ*gqzz30 + 2*(atom2.quadrupoleXY*gqzz26 + atom2.quadrupoleXZ*gqzz27 + atom2.quadrupoleYZ*gqzz29));
dsumdrB1 += 2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxy25 + atom2.quadrupoleYY*gqxy28 + atom2.quadrupoleZZ*gqxy30 + 2*(atom2.quadrupoleXY*gqxy26 + atom2.quadrupoleXZ*gqxy27 + atom2.quadrupoleYZ*gqxy29)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxz25 + atom2.quadrupoleYY*gqxz28 + atom2.quadrupoleZZ*gqxz30 + 2*(atom2.quadrupoleXY*gqxz26 + atom2.quadrupoleXZ*gqxz27 + atom2.quadrupoleYZ*gqxz29)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqyz25 + atom2.quadrupoleYY*gqyz28 + atom2.quadrupoleZZ*gqyz30 + 2*(atom2.quadrupoleXY*gqyz26 + atom2.quadrupoleXZ*gqyz27 + atom2.quadrupoleYZ*gqyz29)));
dsumdrB1 +=
atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx25 + atom2.quadrupoleYY*gqyy25 + atom2.quadrupoleZZ*gqzz25 + 2*(atom2.quadrupoleXY*gqxy25 + atom2.quadrupoleXZ*gqxz25 + atom2.quadrupoleYZ*gqyz25)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqxx28 + atom2.quadrupoleYY*gqyy28 + atom2.quadrupoleZZ*gqzz28 + 2*(atom2.quadrupoleXY*gqxy28 + atom2.quadrupoleXZ*gqxz28 + atom2.quadrupoleYZ*gqyz28)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqxx30 + atom2.quadrupoleYY*gqyy30 + atom2.quadrupoleZZ*gqzz30 + 2*(atom2.quadrupoleXY*gqxy30 + atom2.quadrupoleXZ*gqxz30 + atom2.quadrupoleYZ*gqyz30));
dsumdrB1 += 2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxx26 + atom2.quadrupoleYY*gqyy26 + atom2.quadrupoleZZ*gqzz26 + 2*(atom2.quadrupoleXY*gqxy26 + atom2.quadrupoleXZ*gqxz26 + atom2.quadrupoleYZ*gqyz26)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxx27 + atom2.quadrupoleYY*gqyy27 + atom2.quadrupoleZZ*gqzz27 + 2*(atom2.quadrupoleXY*gqxy27 + atom2.quadrupoleXZ*gqxz27 + atom2.quadrupoleYZ*gqyz27)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqxx29 + atom2.quadrupoleYY*gqyy29 + atom2.quadrupoleZZ*gqzz29 + 2*(atom2.quadrupoleXY*gqxy29 + atom2.quadrupoleXZ*gqxz29 + atom2.quadrupoleYZ*gqyz29)));
dsumdrB1 *= 0.5f;
atom1.bornForce += atom2.bornRadius*dsumdrB1;
atom2.bornForce += atom1.bornRadius*dsumdrB1;
#endif
// unweighted 3rd reaction potential gradient tensor;
#if defined F1
real gc11 = xr*(3*a02 + xr2*a03);
real gc12 = yr*(a02 + xr2*a03);
real gc13 = zr*(a02 + xr2*a03);
real gc14 = xr*(a02 + yr2*a03);
real gc15 = xr*yr*zr*a03;
real gc16 = xr*(a02 + zr2*a03);
real gc17 = yr*(3*a02 + yr2*a03);
real gc18 = zr*(a02 + yr2*a03);
real gc19 = yr*(a02 + zr2*a03);
real gc20 = zr*(3*a02 + zr2*a03);
dedx += atom1.q*(atom2.quadrupoleXX*gc11 + atom2.quadrupoleYY*gc14 + atom2.quadrupoleZZ*gc16 + 2*(atom2.quadrupoleXY*gc12 + atom2.quadrupoleXZ*gc13 + atom2.quadrupoleYZ*gc15));
dedx += atom2.q*(atom1.quadrupoleXX*gc11 + atom1.quadrupoleYY*gc14 + atom1.quadrupoleZZ*gc16 + 2*(atom1.quadrupoleXY*gc12 + atom1.quadrupoleXZ*gc13 + atom1.quadrupoleYZ*gc15));
dedy += atom1.q*(atom2.quadrupoleXX*gc12 + atom2.quadrupoleYY*gc17 + atom2.quadrupoleZZ*gc19 + 2*(atom2.quadrupoleXY*gc14 + atom2.quadrupoleXZ*gc15 + atom2.quadrupoleYZ*gc18));
dedy += atom2.q*(atom1.quadrupoleXX*gc12 + atom1.quadrupoleYY*gc17 + atom1.quadrupoleZZ*gc19 + 2*(atom1.quadrupoleXY*gc14 + atom1.quadrupoleXZ*gc15 + atom1.quadrupoleYZ*gc18));
dedz += atom1.q*(atom2.quadrupoleXX*gc13 + atom2.quadrupoleYY*gc18 + atom2.quadrupoleZZ*gc20 + 2*(atom2.quadrupoleXY*gc15 + atom2.quadrupoleXZ*gc16 + atom2.quadrupoleYZ*gc19));
dedz += atom2.q*(atom1.quadrupoleXX*gc13 + atom1.quadrupoleYY*gc18 + atom1.quadrupoleZZ*gc20 + 2*(atom1.quadrupoleXY*gc15 + atom1.quadrupoleXZ*gc16 + atom1.quadrupoleYZ*gc19));
#endif
#if defined F1 || defined F2
real gux11 = 3*a11 + xr2*(6*a12 + xr2*a13);
real gux12 = xr*yr*(3*a12 + xr2*a13);
real gux13 = xr*zr*(3*a12 + xr2*a13);
real gux14 = a11 + (xr2 + yr2)*a12 + xr2*yr2*a13;
real gux15 = yr*zr*(a12 + xr2*a13);
real gux16 = a11 + (xr2 + zr2)*a12 + xr2*zr2*a13;
real gux17 = xr*yr*(3*a12 + yr2*a13);
real gux18 = xr*zr*(a12 + yr2*a13);
real gux19 = xr*yr*(a12 + zr2*a13);
real gux20 = xr*zr*(3*a12 + zr2*a13);
real guy11 = gux12;
real guy12 = gux14;
real guy13 = gux15;
real guy14 = gux17;
real guy15 = gux18;
real guy16 = gux19;
real guy17 = 3*a11 + yr2*(6*a12 + yr2*a13);
real guy18 = yr*zr*(3*a12 + yr2*a13);
real guy19 = a11 + (yr2 + zr2)*a12 + yr2*zr2*a13;
real guy20 = yr*zr*(3*a12 + zr2*a13);
real guz11 = gux13;
real guz12 = gux15;
real guz13 = gux16;
real guz14 = gux18;
real guz15 = gux19;
real guz16 = gux20;
real guz17 = guy18;
real guz18 = guy19;
real guz19 = guy20;
real guz20 = 3*a11 + zr2*(6*a12 + zr2*a13);
#if defined F1
dedx = atom1.dipole.x*(atom2.quadrupoleXX*gux11 + atom2.quadrupoleYY*gux14 + atom2.quadrupoleZZ*gux16 + 2*(atom2.quadrupoleXY*gux12 + atom2.quadrupoleXZ*gux13 + atom2.quadrupoleYZ*gux15)) +
atom1.dipole.y*(atom2.quadrupoleXX*guy11 + atom2.quadrupoleYY*guy14 + atom2.quadrupoleZZ*guy16 + 2*(atom2.quadrupoleXY*guy12 + atom2.quadrupoleXZ*guy13 + atom2.quadrupoleYZ*guy15)) +
atom1.dipole.z*(atom2.quadrupoleXX*guz11 + atom2.quadrupoleYY*guz14 + atom2.quadrupoleZZ*guz16 + 2*(atom2.quadrupoleXY*guz12 + atom2.quadrupoleXZ*guz13 + atom2.quadrupoleYZ*guz15));
dedx += atom2.dipole.x*(atom1.quadrupoleXX*gux11 + atom1.quadrupoleYY*gux14 + atom1.quadrupoleZZ*gux16 + 2*(atom1.quadrupoleXY*gux12 + atom1.quadrupoleXZ*gux13 + atom1.quadrupoleYZ*gux15)) +
atom2.dipole.y*(atom1.quadrupoleXX*guy11 + atom1.quadrupoleYY*guy14 + atom1.quadrupoleZZ*guy16 + 2*(atom1.quadrupoleXY*guy12 + atom1.quadrupoleXZ*guy13 + atom1.quadrupoleYZ*guy15)) +
atom2.dipole.z*(atom1.quadrupoleXX*guz11 + atom1.quadrupoleYY*guz14 + atom1.quadrupoleZZ*guz16 + 2*(atom1.quadrupoleXY*guz12 + atom1.quadrupoleXZ*guz13 + atom1.quadrupoleYZ*guz15));
dedy = atom1.dipole.x*(atom2.quadrupoleXX*gux12 + atom2.quadrupoleYY*gux17 + atom2.quadrupoleZZ*gux19 + 2*(atom2.quadrupoleXY*gux14 + atom2.quadrupoleXZ*gux15 + atom2.quadrupoleYZ*gux18)) +
atom1.dipole.y*(atom2.quadrupoleXX*guy12 + atom2.quadrupoleYY*guy17 + atom2.quadrupoleZZ*guy19 + 2*(atom2.quadrupoleXY*guy14 + atom2.quadrupoleXZ*guy15 + atom2.quadrupoleYZ*guy18)) +
atom1.dipole.z*(atom2.quadrupoleXX*guz12 + atom2.quadrupoleYY*guz17 + atom2.quadrupoleZZ*guz19 + 2*(atom2.quadrupoleXY*guz14 + atom2.quadrupoleXZ*guz15 + atom2.quadrupoleYZ*guz18));
dedy += atom2.dipole.x*(atom1.quadrupoleXX*gux12 + atom1.quadrupoleYY*gux17 + atom1.quadrupoleZZ*gux19 + 2*(atom1.quadrupoleXY*gux14 + atom1.quadrupoleXZ*gux15 + atom1.quadrupoleYZ*gux18)) +
atom2.dipole.y*(atom1.quadrupoleXX*guy12 + atom1.quadrupoleYY*guy17 + atom1.quadrupoleZZ*guy19 + 2*(atom1.quadrupoleXY*guy14 + atom1.quadrupoleXZ*guy15 + atom1.quadrupoleYZ*guy18)) +
atom2.dipole.z*(atom1.quadrupoleXX*guz12 + atom1.quadrupoleYY*guz17 + atom1.quadrupoleZZ*guz19 + 2*(atom1.quadrupoleXY*guz14 + atom1.quadrupoleXZ*guz15 + atom1.quadrupoleYZ*guz18));
dedz = atom1.dipole.x*(atom2.quadrupoleXX*gux13 + atom2.quadrupoleYY*gux18 + atom2.quadrupoleZZ*gux20 + 2*(atom2.quadrupoleXY*gux15 + atom2.quadrupoleXZ*gux16 + atom2.quadrupoleYZ*gux19)) +
atom1.dipole.y*(atom2.quadrupoleXX*guy13 + atom2.quadrupoleYY*guy18 + atom2.quadrupoleZZ*guy20 + 2*(atom2.quadrupoleXY*guy15 + atom2.quadrupoleXZ*guy16 + atom2.quadrupoleYZ*guy19)) +
atom1.dipole.z*(atom2.quadrupoleXX*guz13 + atom2.quadrupoleYY*guz18 + atom2.quadrupoleZZ*guz20 + 2*(atom2.quadrupoleXY*guz15 + atom2.quadrupoleXZ*guz16 + atom2.quadrupoleYZ*guz19));
dedz += atom2.dipole.x*(atom1.quadrupoleXX*gux13 + atom1.quadrupoleYY*gux18 + atom1.quadrupoleZZ*gux20 + 2*(atom1.quadrupoleXY*gux15 + atom1.quadrupoleXZ*gux16 + atom1.quadrupoleYZ*gux19)) +
atom2.dipole.y*(atom1.quadrupoleXX*guy13 + atom1.quadrupoleYY*guy18 + atom1.quadrupoleZZ*guy20 + 2*(atom1.quadrupoleXY*guy15 + atom1.quadrupoleXZ*guy16 + atom1.quadrupoleYZ*guy19)) +
atom2.dipole.z*(atom1.quadrupoleXX*guz13 + atom1.quadrupoleYY*guz18 + atom1.quadrupoleZZ*guz20 + 2*(atom1.quadrupoleXY*guz15 + atom1.quadrupoleXZ*guz16 + atom1.quadrupoleYZ*guz19));
#endif
#if defined F2
dpdx = sxi*(atom2.quadrupoleXX*gux11 + atom2.quadrupoleYY*gux14 + atom2.quadrupoleZZ*gux16 + 2*(atom2.quadrupoleXY*gux12 + atom2.quadrupoleXZ*gux13 + atom2.quadrupoleYZ*gux15)) +
syi*(atom2.quadrupoleXX*guy11 + atom2.quadrupoleYY*guy14 + atom2.quadrupoleZZ*guy16 + 2*(atom2.quadrupoleXY*guy12 + atom2.quadrupoleXZ*guy13 + atom2.quadrupoleYZ*guy15)) +
szi*(atom2.quadrupoleXX*guz11 + atom2.quadrupoleYY*guz14 + atom2.quadrupoleZZ*guz16 + 2*(atom2.quadrupoleXY*guz12 + atom2.quadrupoleXZ*guz13 + atom2.quadrupoleYZ*guz15));
dpdx += sxk*(atom1.quadrupoleXX*gux11 + atom1.quadrupoleYY*gux14 + atom1.quadrupoleZZ*gux16 + 2*(atom1.quadrupoleXY*gux12 + atom1.quadrupoleXZ*gux13 + atom1.quadrupoleYZ*gux15)) +
syk*(atom1.quadrupoleXX*guy11 + atom1.quadrupoleYY*guy14 + atom1.quadrupoleZZ*guy16 + 2*(atom1.quadrupoleXY*guy12 + atom1.quadrupoleXZ*guy13 + atom1.quadrupoleYZ*guy15)) +
szk*(atom1.quadrupoleXX*guz11 + atom1.quadrupoleYY*guz14 + atom1.quadrupoleZZ*guz16 + 2*(atom1.quadrupoleXY*guz12 + atom1.quadrupoleXZ*guz13 + atom1.quadrupoleYZ*guz15));
dpdy = sxi*(atom2.quadrupoleXX*gux12 + atom2.quadrupoleYY*gux17 + atom2.quadrupoleZZ*gux19 + 2*(atom2.quadrupoleXY*gux14 + atom2.quadrupoleXZ*gux15 + atom2.quadrupoleYZ*gux18)) +
syi*(atom2.quadrupoleXX*guy12 + atom2.quadrupoleYY*guy17 + atom2.quadrupoleZZ*guy19 + 2*(atom2.quadrupoleXY*guy14 + atom2.quadrupoleXZ*guy15 + atom2.quadrupoleYZ*guy18)) +
szi*(atom2.quadrupoleXX*guz12 + atom2.quadrupoleYY*guz17 + atom2.quadrupoleZZ*guz19 + 2*(atom2.quadrupoleXY*guz14 + atom2.quadrupoleXZ*guz15 + atom2.quadrupoleYZ*guz18));
dpdy += sxk*(atom1.quadrupoleXX*gux12 + atom1.quadrupoleYY*gux17 + atom1.quadrupoleZZ*gux19 + 2*(atom1.quadrupoleXY*gux14 + atom1.quadrupoleXZ*gux15 + atom1.quadrupoleYZ*gux18)) +
syk*(atom1.quadrupoleXX*guy12 + atom1.quadrupoleYY*guy17 + atom1.quadrupoleZZ*guy19 + 2*(atom1.quadrupoleXY*guy14 + atom1.quadrupoleXZ*guy15 + atom1.quadrupoleYZ*guy18)) +
szk*(atom1.quadrupoleXX*guz12 + atom1.quadrupoleYY*guz17 + atom1.quadrupoleZZ*guz19 + 2*(atom1.quadrupoleXY*guz14 + atom1.quadrupoleXZ*guz15 + atom1.quadrupoleYZ*guz18));
dpdz = sxi*(atom2.quadrupoleXX*gux13 + atom2.quadrupoleYY*gux18 + atom2.quadrupoleZZ*gux20 + 2*(atom2.quadrupoleXY*gux15 + atom2.quadrupoleXZ*gux16 + atom2.quadrupoleYZ*gux19)) +
syi*(atom2.quadrupoleXX*guy13 + atom2.quadrupoleYY*guy18 + atom2.quadrupoleZZ*guy20 + 2*(atom2.quadrupoleXY*guy15 + atom2.quadrupoleXZ*guy16 + atom2.quadrupoleYZ*guy19)) +
szi*(atom2.quadrupoleXX*guz13 + atom2.quadrupoleYY*guz18 + atom2.quadrupoleZZ*guz20 + 2*(atom2.quadrupoleXY*guz15 + atom2.quadrupoleXZ*guz16 + atom2.quadrupoleYZ*guz19));
dpdz += sxk*(atom1.quadrupoleXX*gux13 + atom1.quadrupoleYY*gux18 + atom1.quadrupoleZZ*gux20 + 2*(atom1.quadrupoleXY*gux15 + atom1.quadrupoleXZ*gux16 + atom1.quadrupoleYZ*gux19)) +
syk*(atom1.quadrupoleXX*guy13 + atom1.quadrupoleYY*guy18 + atom1.quadrupoleZZ*guy20 + 2*(atom1.quadrupoleXY*guy15 + atom1.quadrupoleXZ*guy16 + atom1.quadrupoleYZ*guy19)) +
szk*(atom1.quadrupoleXX*guz13 + atom1.quadrupoleYY*guz18 + atom1.quadrupoleZZ*guz20 + 2*(atom1.quadrupoleXY*guz15 + atom1.quadrupoleXZ*guz16 + atom1.quadrupoleYZ*guz19));
#endif
#endif
#if defined F1
real gqxx11 = xr*(12*a21 + xr2*(9*a22 + xr2*a23));
real gqxx12 = yr*(2*a21 + xr2*(5*a22 + xr2*a23));
real gqxx13 = zr*(2*a21 + xr2*(5*a22 + xr2*a23));
real gqxx14 = xr*(2*a21 + yr2*2*a22 + xr2*(a22 + yr2*a23));
real gqxx15 = xr*yr*zr*(2*a22 + xr2*a23);
real gqxx16 = xr*(2*a21 + zr2*2*a22 + xr2*(a22 + zr2*a23));
real gqxx17 = yr*xr2*(3*a22 + yr2*a23);
real gqxx18 = zr*xr2*(a22 + yr2*a23);
real gqxx19 = yr*xr2*(a22 + zr2*a23);
real gqxx20 = zr*xr2*(3*a22 + zr2*a23);
real gqxy11 = yr*(3*a21 + xr2*(6*a22 + xr2*a23));
real gqxy12 = xr*(3*(a21 + yr2*a22) + xr2*(a22 + yr2*a23));
real gqxy13 = xr*yr*zr*(3*a22 + xr2*a23);
real gqxy14 = yr*(3*(a21 + xr2*a22) + yr2*(a22 + xr2*a23));
real gqxy15 = zr*(a21 + (yr2 + xr2)*a22 + yr2*xr2*a23);
real gqxy16 = yr*(a21 + (xr2 + zr2)*a22 + xr2*zr2*a23);
real gqxy17 = xr*(3*(a21 + yr2*a22) + yr2*(3*a22 + yr2*a23));
real gqxy18 = xr*yr*zr*(3*a22 + yr2*a23);
real gqxy19 = xr*(a21 + (yr2 + zr2)*a22 + yr2*zr2*a23);
real gqxy20 = xr*yr*zr*(3*a22 + zr2*a23);
real gqxz11 = zr*(3*a21 + xr2*(6*a22 + xr2*a23));
real gqxz12 = xr*yr*zr*(3*a22 + xr2*a23);
real gqxz13 = xr*(3*(a21 + zr2*a22) + xr2*(a22 + zr2*a23));
real gqxz14 = zr*(a21 + (xr2 + yr2)*a22 + xr2*yr2*a23);
real gqxz15 = yr*(a21 + (xr2 + zr2)*a22 + zr2*xr2*a23);
real gqxz16 = zr*(3*(a21 + xr2*a22) + zr2*(a22 + xr2*a23));
real gqxz17 = xr*yr*zr*(3*a22 + yr2*a23);
real gqxz18 = xr*(a21 + (zr2 + yr2)*a22 + zr2*yr2*a23);
real gqxz19 = xr*yr*zr*(3*a22 + zr2*a23);
real gqxz20 = xr*(3*a21 + zr2*(6*a22 + zr2*a23));
real gqyy11 = xr*yr2*(3*a22 + xr2*a23);
real gqyy12 = yr*(2*a21 + xr2*2*a22 + yr2*(a22 + xr2*a23));
real gqyy13 = zr*yr2*(a22 + xr2*a23);
real gqyy14 = xr*(2*a21 + yr2*(5*a22 + yr2*a23));
real gqyy15 = xr*yr*zr*(2*a22 + yr2*a23);
real gqyy16 = xr*yr2*(a22 + zr2*a23);
real gqyy17 = yr*(12*a21 + yr2*(9*a22 + yr2*a23));
real gqyy18 = zr*(2*a21 + yr2*(5*a22 + yr2*a23));
real gqyy19 = yr*(2*a21 + zr2*2*a22 + yr2*(a22 + zr2*a23));
real gqyy20 = zr*yr2*(3*a22 + zr2*a23);
real gqyz11 = xr*yr*zr*(3*a22 + xr2*a23);
real gqyz12 = zr*(a21 + (xr2 + yr2)*a22 + xr2*yr2*a23);
real gqyz13 = yr*(a21 + (xr2 + zr2)*a22 + xr2*zr2*a23);
real gqyz14 = xr*yr*zr*(3*a22 + yr2*a23);
real gqyz15 = xr*(a21 + (yr2 + zr2)*a22 + yr2*zr2*a23);
real gqyz16 = xr*yr*zr*(3*a22 + zr2*a23);
real gqyz17 = zr*(3*a21 + yr2*(6*a22 + yr2*a23));
real gqyz18 = yr*(3*(a21 + zr2*a22) + yr2*(a22 + zr2*a23));
real gqyz19 = zr*(3*(a21 + yr2*a22) + zr2*(a22 + yr2*a23));
real gqyz20 = yr*(3*a21 + zr2*(6*a22 + zr2*a23));
real gqzz11 = xr*zr2*(3*a22 + xr2*a23);
real gqzz12 = yr*(zr2*a22 + xr2*(zr2*a23));
real gqzz13 = zr*(2*a21 + xr2*2*a22 + zr2*(a22 + xr2*a23));
real gqzz14 = xr*zr2*(a22 + yr2*a23);
real gqzz15 = xr*yr*zr*(2*a22 + zr2*a23);
real gqzz16 = xr*(2*a21 + zr2*(5*a22 + zr2*a23));
real gqzz17 = yr*zr2*(3*a22 + yr2*a23);
real gqzz18 = zr*(2*a21 + yr2*2*a22 + zr2*(a22 + yr2*a23));
real gqzz19 = yr*(2*a21 + zr2*(5*a22 + zr2*a23));
real gqzz20 = zr*(12*a21 + zr2*(9*a22 + zr2*a23));
dedx += atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx11 + atom2.quadrupoleYY*gqxx14 + atom2.quadrupoleZZ*gqxx16 + 2*(atom2.quadrupoleXY*gqxx12 + atom2.quadrupoleXZ*gqxx13 + atom2.quadrupoleYZ*gqxx15)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqyy11 + atom2.quadrupoleYY*gqyy14 + atom2.quadrupoleZZ*gqyy16 + 2*(atom2.quadrupoleXY*gqyy12 + atom2.quadrupoleXZ*gqyy13 + atom2.quadrupoleYZ*gqyy15)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqzz11 + atom2.quadrupoleYY*gqzz14 + atom2.quadrupoleZZ*gqzz16 + 2*(atom2.quadrupoleXY*gqzz12 + atom2.quadrupoleXZ*gqzz13 + atom2.quadrupoleYZ*gqzz15)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxy11 + atom2.quadrupoleYY*gqxy14 + atom2.quadrupoleZZ*gqxy16 + 2*(atom2.quadrupoleXY*gqxy12 + atom2.quadrupoleXZ*gqxy13 + atom2.quadrupoleYZ*gqxy15)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxz11 + atom2.quadrupoleYY*gqxz14 + atom2.quadrupoleZZ*gqxz16 + 2*(atom2.quadrupoleXY*gqxz12 + atom2.quadrupoleXZ*gqxz13 + atom2.quadrupoleYZ*gqxz15)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqyz11 + atom2.quadrupoleYY*gqyz14 + atom2.quadrupoleZZ*gqyz16 + 2*(atom2.quadrupoleXY*gqyz12 + atom2.quadrupoleXZ*gqyz13 + atom2.quadrupoleYZ*gqyz15))) +
atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx11 + atom2.quadrupoleYY*gqyy11 + atom2.quadrupoleZZ*gqzz11 + 2*(atom2.quadrupoleXY*gqxy11 + atom2.quadrupoleXZ*gqxz11 + atom2.quadrupoleYZ*gqyz11)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqxx14 + atom2.quadrupoleYY*gqyy14 + atom2.quadrupoleZZ*gqzz14 + 2*(atom2.quadrupoleXY*gqxy14 + atom2.quadrupoleXZ*gqxz14 + atom2.quadrupoleYZ*gqyz14)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqxx16 + atom2.quadrupoleYY*gqyy16 + atom2.quadrupoleZZ*gqzz16 + 2*(atom2.quadrupoleXY*gqxy16 + atom2.quadrupoleXZ*gqxz16 + atom2.quadrupoleYZ*gqyz16)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxx12 + atom2.quadrupoleYY*gqyy12 + atom2.quadrupoleZZ*gqzz12 + 2*(atom2.quadrupoleXY*gqxy12 + atom2.quadrupoleXZ*gqxz12 + atom2.quadrupoleYZ*gqyz12)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxx13 + atom2.quadrupoleYY*gqyy13 + atom2.quadrupoleZZ*gqzz13 + 2*(atom2.quadrupoleXY*gqxy13 + atom2.quadrupoleXZ*gqxz13 + atom2.quadrupoleYZ*gqyz13)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqxx15 + atom2.quadrupoleYY*gqyy15 + atom2.quadrupoleZZ*gqzz15 + 2*(atom2.quadrupoleXY*gqxy15 + atom2.quadrupoleXZ*gqxz15 + atom2.quadrupoleYZ*gqyz15)));
dedy += atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx12 + atom2.quadrupoleYY*gqxx17 + atom2.quadrupoleZZ*gqxx19 + 2*(atom2.quadrupoleXY*gqxx14 + atom2.quadrupoleXZ*gqxx15 + atom2.quadrupoleYZ*gqxx18)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqyy12 + atom2.quadrupoleYY*gqyy17 + atom2.quadrupoleZZ*gqyy19 + 2*(atom2.quadrupoleXY*gqyy14 + atom2.quadrupoleXZ*gqyy15 + atom2.quadrupoleYZ*gqyy18)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqzz12 + atom2.quadrupoleYY*gqzz17 + atom2.quadrupoleZZ*gqzz19 + 2*(atom2.quadrupoleXY*gqzz14 + atom2.quadrupoleXZ*gqzz15 + atom2.quadrupoleYZ*gqzz18)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxy12 + atom2.quadrupoleYY*gqxy17 + atom2.quadrupoleZZ*gqxy19 + 2*(atom2.quadrupoleXY*gqxy14 + atom2.quadrupoleXZ*gqxy15 + atom2.quadrupoleYZ*gqxy18)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxz12 + atom2.quadrupoleYY*gqxz17 + atom2.quadrupoleZZ*gqxz19 + 2*(atom2.quadrupoleXY*gqxz14 + atom2.quadrupoleXZ*gqxz15 + atom2.quadrupoleYZ*gqxz18)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqyz12 + atom2.quadrupoleYY*gqyz17 + atom2.quadrupoleZZ*gqyz19 + 2*(atom2.quadrupoleXY*gqyz14 + atom2.quadrupoleXZ*gqyz15 + atom2.quadrupoleYZ*gqyz18))) +
atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx12 + atom2.quadrupoleYY*gqyy12 + atom2.quadrupoleZZ*gqzz12 + 2*(atom2.quadrupoleXY*gqxy12 + atom2.quadrupoleXZ*gqxz12 + atom2.quadrupoleYZ*gqyz12)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqxx17 + atom2.quadrupoleYY*gqyy17 + atom2.quadrupoleZZ*gqzz17 + 2*(atom2.quadrupoleXY*gqxy17 + atom2.quadrupoleXZ*gqxz17 + atom2.quadrupoleYZ*gqyz17)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqxx19 + atom2.quadrupoleYY*gqyy19 + atom2.quadrupoleZZ*gqzz19 + 2*(atom2.quadrupoleXY*gqxy19 + atom2.quadrupoleXZ*gqxz19 + atom2.quadrupoleYZ*gqyz19)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxx14 + atom2.quadrupoleYY*gqyy14 + atom2.quadrupoleZZ*gqzz14 + 2*(atom2.quadrupoleXY*gqxy14 + atom2.quadrupoleXZ*gqxz14 + atom2.quadrupoleYZ*gqyz14)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxx15 + atom2.quadrupoleYY*gqyy15 + atom2.quadrupoleZZ*gqzz15 + 2*(atom2.quadrupoleXY*gqxy15 + atom2.quadrupoleXZ*gqxz15 + atom2.quadrupoleYZ*gqyz15)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqxx18 + atom2.quadrupoleYY*gqyy18 + atom2.quadrupoleZZ*gqzz18 + 2*(atom2.quadrupoleXY*gqxy18 + atom2.quadrupoleXZ*gqxz18 + atom2.quadrupoleYZ*gqyz18)));
dedz += atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx13 + atom2.quadrupoleYY*gqxx18 + atom2.quadrupoleZZ*gqxx20 + 2*(atom2.quadrupoleXY*gqxx15 + atom2.quadrupoleXZ*gqxx16 + atom2.quadrupoleYZ*gqxx19)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqyy13 + atom2.quadrupoleYY*gqyy18 + atom2.quadrupoleZZ*gqyy20 + 2*(atom2.quadrupoleXY*gqyy15 + atom2.quadrupoleXZ*gqyy16 + atom2.quadrupoleYZ*gqyy19)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqzz13 + atom2.quadrupoleYY*gqzz18 + atom2.quadrupoleZZ*gqzz20 + 2*(atom2.quadrupoleXY*gqzz15 + atom2.quadrupoleXZ*gqzz16 + atom2.quadrupoleYZ*gqzz19)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxy13 + atom2.quadrupoleYY*gqxy18 + atom2.quadrupoleZZ*gqxy20 + 2*(atom2.quadrupoleXY*gqxy15 + atom2.quadrupoleXZ*gqxy16 + atom2.quadrupoleYZ*gqxy19)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxz13 + atom2.quadrupoleYY*gqxz18 + atom2.quadrupoleZZ*gqxz20 + 2*(atom2.quadrupoleXY*gqxz15 + atom2.quadrupoleXZ*gqxz16 + atom2.quadrupoleYZ*gqxz19)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqyz13 + atom2.quadrupoleYY*gqyz18 + atom2.quadrupoleZZ*gqyz20 + 2*(atom2.quadrupoleXY*gqyz15 + atom2.quadrupoleXZ*gqyz16 + atom2.quadrupoleYZ*gqyz19))) +
atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx13 + atom2.quadrupoleYY*gqyy13 + atom2.quadrupoleZZ*gqzz13 + 2*(atom2.quadrupoleXY*gqxy13 + atom2.quadrupoleXZ*gqxz13 + atom2.quadrupoleYZ*gqyz13)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqxx18 + atom2.quadrupoleYY*gqyy18 + atom2.quadrupoleZZ*gqzz18 + 2*(atom2.quadrupoleXY*gqxy18 + atom2.quadrupoleXZ*gqxz18 + atom2.quadrupoleYZ*gqyz18)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqxx20 + atom2.quadrupoleYY*gqyy20 + atom2.quadrupoleZZ*gqzz20 + 2*(atom2.quadrupoleXY*gqxy20 + atom2.quadrupoleXZ*gqxz20 + atom2.quadrupoleYZ*gqyz20)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxx15 + atom2.quadrupoleYY*gqyy15 + atom2.quadrupoleZZ*gqzz15 + 2*(atom2.quadrupoleXY*gqxy15 + atom2.quadrupoleXZ*gqxz15 + atom2.quadrupoleYZ*gqyz15)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxx16 + atom2.quadrupoleYY*gqyy16 + atom2.quadrupoleZZ*gqzz16 + 2*(atom2.quadrupoleXY*gqxy16 + atom2.quadrupoleXZ*gqxz16 + atom2.quadrupoleYZ*gqyz16)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqxx19 + atom2.quadrupoleYY*gqyy19 + atom2.quadrupoleZZ*gqzz19 + 2*(atom2.quadrupoleXY*gqxy19 + atom2.quadrupoleXZ*gqxz19 + atom2.quadrupoleYZ*gqyz19)));
#endif
#if defined T1
if (xr != 0 || yr != 0 || zr != 0) {
real gux1 = xr*a10;
real guy1 = yr*a10;
real guz1 = zr*a10;
real gc2 = xr*a01;
real gc3 = yr*a01;
real gc4 = zr*a01;
real fid1 = atom2.dipole.x*gux2 + atom2.dipole.y*gux3 + atom2.dipole.z*gux4 + 0.5f*(atom2.q*gux1 + atom2.quadrupoleXX*gux5 + atom2.quadrupoleYY*gux8 + atom2.quadrupoleZZ*gux10 +
2*(atom2.quadrupoleXY*gux6 + atom2.quadrupoleXZ*gux7 + atom2.quadrupoleYZ*gux9) +
atom2.q*gc2 + atom2.quadrupoleXX*gqxx2 + atom2.quadrupoleYY*gqyy2 + atom2.quadrupoleZZ*gqzz2 +
2*(atom2.quadrupoleXY*gqxy2 + atom2.quadrupoleXZ*gqxz2 + atom2.quadrupoleYZ*gqyz2));
real fid2 = atom2.dipole.x*guy2 + atom2.dipole.y*guy3 + atom2.dipole.z*guy4 + 0.5f*(atom2.q*guy1 + atom2.quadrupoleXX*guy5 + atom2.quadrupoleYY*guy8 + atom2.quadrupoleZZ*guy10 +
2*(atom2.quadrupoleXY*guy6 + atom2.quadrupoleXZ*guy7 + atom2.quadrupoleYZ*guy9) +
atom2.q*gc3 + atom2.quadrupoleXX*gqxx3 + atom2.quadrupoleYY*gqyy3 + atom2.quadrupoleZZ*gqzz3 +
2*(atom2.quadrupoleXY*gqxy3 + atom2.quadrupoleXZ*gqxz3 + atom2.quadrupoleYZ*gqyz3));
real fid3 = atom2.dipole.x*guz2 + atom2.dipole.y*guz3 + atom2.dipole.z*guz4 + 0.5f*(atom2.q*guz1 + atom2.quadrupoleXX*guz5 + atom2.quadrupoleYY*guz8 + atom2.quadrupoleZZ*guz10 +
2*(atom2.quadrupoleXY*guz6 + atom2.quadrupoleXZ*guz7 + atom2.quadrupoleYZ*guz9) +
atom2.q*gc4 + atom2.quadrupoleXX*gqxx4 + atom2.quadrupoleYY*gqyy4 + atom2.quadrupoleZZ*gqzz4 +
2*(atom2.quadrupoleXY*gqxy4 + atom2.quadrupoleXZ*gqxz4 + atom2.quadrupoleYZ*gqyz4));
real trq1 = atom1.dipole.y*fid3 - atom1.dipole.z*fid2;
real trq2 = atom1.dipole.z*fid1 - atom1.dipole.x*fid3;
real trq3 = atom1.dipole.x*fid2 - atom1.dipole.y*fid1;
// torque on quadrupoles due to permanent reaction field gradient
real fidg11 =
(atom2.q*xr2*a20 + atom2.dipole.x*gqxx2 + atom2.dipole.y*gqxx3 + atom2.dipole.z*gqxx4
+ atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqxx8 + atom2.quadrupoleZZ*gqxx10
+ 2*(atom2.quadrupoleXY*gqxx6 + atom2.quadrupoleXZ*gqxx7 + atom2.quadrupoleYZ*gqxx9)
+ atom2.q*gc5 + atom2.dipole.x*gux5 + atom2.dipole.y*guy5 + atom2.dipole.z*guz5
+ atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqyy5 + atom2.quadrupoleZZ*gqzz5
+ 2*(atom2.quadrupoleXY*gqxy5 + atom2.quadrupoleXZ*gqxz5 + atom2.quadrupoleYZ*gqyz5));
real fidg12 =
(atom2.q*xr*yr*a20 + atom2.dipole.x*gqxy2 + atom2.dipole.y*gqxy3 + atom2.dipole.z*gqxy4
+ atom2.quadrupoleXX*gqxy5 + atom2.quadrupoleYY*gqxy8 + atom2.quadrupoleZZ*gqxy10
+ 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxy7 + atom2.quadrupoleYZ*gqxy9)
+ atom2.q*gc6 + atom2.dipole.x*gux6 + atom2.dipole.y*guy6 + atom2.dipole.z*guz6
+ atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6
+ 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6));
real fidg13 =
(atom2.q*xr*zr*a20 + atom2.dipole.x*gqxz2 + atom2.dipole.y*gqxz3 + atom2.dipole.z*gqxz4
+ atom2.quadrupoleXX*gqxz5 + atom2.quadrupoleYY*gqxz8 + atom2.quadrupoleZZ*gqxz10
+ 2*(atom2.quadrupoleXY*gqxz6 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqxz9)
+ atom2.q*gc7 + atom2.dipole.x*gux7 + atom2.dipole.y*guy7 + atom2.dipole.z*guz7
+ atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7
+ 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7));
real fidg22 =
(atom2.q*yr2*a20 + atom2.dipole.x*gqyy2 + atom2.dipole.y*gqyy3 + atom2.dipole.z*gqyy4
+ atom2.quadrupoleXX*gqyy5 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqyy10
+ 2*(atom2.quadrupoleXY*gqyy6 + atom2.quadrupoleXZ*gqyy7 + atom2.quadrupoleYZ*gqyy9)
+ atom2.q*gc8 + atom2.dipole.x*gux8 + atom2.dipole.y*guy8 + atom2.dipole.z*guz8
+ atom2.quadrupoleXX*gqxx8 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqzz8
+ 2*(atom2.quadrupoleXY*gqxy8 + atom2.quadrupoleXZ*gqxz8 + atom2.quadrupoleYZ*gqyz8));
real fidg23 =
(atom2.q*yr*zr*a20 + atom2.dipole.x*gqyz2 + atom2.dipole.y*gqyz3 + atom2.dipole.z*gqyz4
+ atom2.quadrupoleXX*gqyz5 + atom2.quadrupoleYY*gqyz8 + atom2.quadrupoleZZ*gqyz10
+ 2*(atom2.quadrupoleXY*gqyz6 + atom2.quadrupoleXZ*gqyz7 + atom2.quadrupoleYZ*gqyz9)
+ atom2.q*gc9 + atom2.dipole.x*gux9 + atom2.dipole.y*guy9 + atom2.dipole.z*guz9
+ atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9
+ 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9));
real fidg33 =
(atom2.q*zr2*a20 + atom2.dipole.x*gqzz2 + atom2.dipole.y*gqzz3 + atom2.dipole.z*gqzz4
+ atom2.quadrupoleXX*gqzz5 + atom2.quadrupoleYY*gqzz8 + atom2.quadrupoleZZ*gqzz10
+ 2*(atom2.quadrupoleXY*gqzz6 + atom2.quadrupoleXZ*gqzz7 + atom2.quadrupoleYZ*gqzz9)
+ atom2.q*gc10 + atom2.dipole.x*gux10 + atom2.dipole.y*guy10 + atom2.dipole.z*guz10
+ atom2.quadrupoleXX*gqxx10 + atom2.quadrupoleYY*gqyy10 + atom2.quadrupoleZZ*gqzz10
+ 2*(atom2.quadrupoleXY*gqxy10 + atom2.quadrupoleXZ*gqxz10 + atom2.quadrupoleYZ*gqyz10));
trq1 = (atom1.quadrupoleXY*fidg13 + atom1.quadrupoleYY*fidg23 + atom1.quadrupoleYZ*fidg33 -atom1.quadrupoleXZ*fidg12-atom1.quadrupoleYZ*fidg22-atom1.quadrupoleZZ*fidg23);
trq2 = (atom1.quadrupoleXZ*fidg11 + atom1.quadrupoleYZ*fidg12 + atom1.quadrupoleZZ*fidg13 -atom1.quadrupoleXX*fidg13-atom1.quadrupoleXY*fidg23-atom1.quadrupoleXZ*fidg33);
trq3 = (atom1.quadrupoleXX*fidg12 + atom1.quadrupoleXY*fidg22 + atom1.quadrupoleXZ*fidg23 -atom1.quadrupoleXY*fidg11-atom1.quadrupoleYY*fidg12-atom1.quadrupoleYZ*fidg13);
torque.x = trq1;
torque.y = trq2;
torque.z = trq3;
} else {
torque.x = 0;
torque.y = 0;
torque.z = 0;
}
#endif
#if defined B2
dsumdrB2 *= 0.5f;
atom1.bornForce += 0.5f*atom2.bornRadius*dsumdrB2;
atom2.bornForce += 0.5f*atom1.bornRadius*dsumdrB2;
#endif
#if defined T2
// torque due to induced reaction field gradient on quadrupoles;
real fidg11 = sxk*gqxx2 + syk*gqxx3 + szk*gqxx4 + sxk*gux5 + syk*guy5 + szk*guz5;
real fidg12 = sxk*gqxy2 + syk*gqxy3 + szk*gqxy4 + sxk*gux6 + syk*guy6 + szk*guz6;
real fidg13 = sxk*gqxz2 + syk*gqxz3 + szk*gqxz4 + sxk*gux7 + syk*guy7 + szk*guz7;
real fidg22 = sxk*gqyy2 + syk*gqyy3 + szk*gqyy4 + sxk*gux8 + syk*guy8 + szk*guz8;
real fidg23 = sxk*gqyz2 + syk*gqyz3 + szk*gqyz4 + sxk*gux9 + syk*guy9 + szk*guz9;
real fidg33 = sxk*gqzz2 + syk*gqzz3 + szk*gqzz4 + sxk*gux10 + syk*guy10 + szk*guz10;
trqi1 = atom1.quadrupoleXY*fidg13 + atom1.quadrupoleYY*fidg23 + atom1.quadrupoleYZ*fidg33
-atom1.quadrupoleXZ*fidg12 - atom1.quadrupoleYZ*fidg22 - atom1.quadrupoleZZ*fidg23;
trqi2 = atom1.quadrupoleXZ*fidg11 + atom1.quadrupoleYZ*fidg12 + atom1.quadrupoleZZ*fidg13
-atom1.quadrupoleXX*fidg13 - atom1.quadrupoleXY*fidg23 - atom1.quadrupoleXZ*fidg33;
trqi3 = atom1.quadrupoleXX*fidg12 + atom1.quadrupoleXY*fidg22 + atom1.quadrupoleXZ*fidg23
-atom1.quadrupoleXY*fidg11 - atom1.quadrupoleYY*fidg12 - atom1.quadrupoleYZ*fidg13;
torque.x += 0.5f*trqi1;
torque.y += 0.5f*trqi2;
torque.z += 0.5f*trqi3;
#endif
#if defined F1
outputEnergy += energy;
if ((xr != 0 || yr != 0 || zr != 0)) {
force.x = dedx;
force.y = dedy;
force.z = dedz;
} else {
force.x = force.y = force.z = 0;
}
#endif
#if defined F2
outputEnergy += 0.5f*energy;
dpdx *= 0.5f;
dpdy *= 0.5f;
dpdz *= 0.5f;
if ((xr != 0 || yr != 0 || zr != 0)) {
force.x += dpdx;
force.y += dpdy;
force.z += dpdz;
}
#endif
}
...@@ -135,8 +135,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -135,8 +135,8 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ; localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].inducedDipole = data.inducedDipole; localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar; localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].thole = data.thole; // IS THIS CORRECT? localData[threadIdx.x].thole = data.thole;
localData[threadIdx.x].damp = data.damp; // IS THIS CORRECT? localData[threadIdx.x].damp = data.damp;
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx]; uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx]; unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
...@@ -260,6 +260,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -260,6 +260,8 @@ extern "C" __global__ void computeElectrostatics(
// Compute torques. // Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) { if ((flags&(1<<j)) != 0) {
int atom2 = tbx+j; int atom2 = tbx+j;
...@@ -362,6 +364,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -362,6 +364,8 @@ extern "C" __global__ void computeElectrostatics(
// Compute torques. // Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0)); covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0); polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx)); covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
......
...@@ -7,6 +7,10 @@ typedef struct { ...@@ -7,6 +7,10 @@ typedef struct {
real quadrupoleXX, quadrupoleXY, quadrupoleXZ; real quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ, quadrupoleZZ; real quadrupoleYY, quadrupoleYZ, quadrupoleZZ;
float thole, damp; float thole, damp;
#ifdef USE_GK
real3 gkField;
real bornRadius;
#endif
} AtomData; } AtomData;
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const float2* __restrict__ dampingAndThole) { inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const float2* __restrict__ dampingAndThole) {
...@@ -178,6 +182,205 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -178,6 +182,205 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
} }
#endif #endif
#ifdef USE_GK
__device__ void computeOneGkInteraction(AtomData& atom1, AtomData& atom2, real3 delta, real3* fields) {
real a[4][4];
real gc[5];
real gux[11],guy[11],guz[11];
real gqxx[5],gqxy[5];
real gqxz[5],gqyy[5];
real gqyz[5],gqzz[5];
real ci = atom1.posq.w;
real ck = atom2.posq.w;
real uxi = atom1.dipole.x;
real uyi = atom1.dipole.y;
real uzi = atom1.dipole.z;
real uxk = atom2.dipole.x;
real uyk = atom2.dipole.y;
real uzk = atom2.dipole.z;
real qxxi = atom1.quadrupoleXX;
real qxyi = atom1.quadrupoleXY;
real qxzi = atom1.quadrupoleXZ;
real qyyi = atom1.quadrupoleYY;
real qyzi = atom1.quadrupoleYZ;
real qzzi = atom1.quadrupoleZZ;
real qxxk = atom2.quadrupoleXX;
real qxyk = atom2.quadrupoleXY;
real qxzk = atom2.quadrupoleXZ;
real qyyk = atom2.quadrupoleYY;
real qyzk = atom2.quadrupoleYZ;
real qzzk = atom2.quadrupoleZZ;
real xr2 = delta.x*delta.x;
real yr2 = delta.y*delta.y;
real zr2 = delta.z*delta.z;
real r2 = xr2 + yr2 + zr2;
real rb2 = atom1.bornRadius*atom2.bornRadius;
real expterm = EXP(-r2/(GK_C*rb2));
real expc = expterm / GK_C;
real dexpc = -2/(GK_C*rb2);
real gf2 = RECIP(r2+rb2*expterm);
real gf = SQRT(gf2);
real gf3 = gf2*gf;
real gf5 = gf3*gf2;
real gf7 = gf5*gf2;
// reaction potential auxiliary terms
a[0][0] = gf;
a[1][0] = -gf3;
a[2][0] = 3*gf5;
a[3][0] = -15*gf7;
// reaction potential gradient auxiliary terms
real expc1 = 1 - expc;
a[0][1] = expc1*a[1][0];
a[1][1] = expc1*a[2][0];
a[2][1] = expc1*a[3][0];
// dipole second reaction potential gradient auxiliary term
real expcdexpc = -expc*dexpc;
a[1][2] = expc1*a[2][1] + expcdexpc*a[2][0];
// multiply the auxillary terms by dielectric functions;
a[0][1] = GK_FC*a[0][1];
a[1][0] = GK_FD*a[1][0];
a[1][1] = GK_FD*a[1][1];
a[1][2] = GK_FD*a[1][2];
a[2][0] = GK_FQ*a[2][0];
a[2][1] = GK_FQ*a[2][1];
// unweighted dipole reaction potential tensor
gux[1] = delta.x*a[1][0];
guy[1] = delta.y*a[1][0];
guz[1] = delta.z*a[1][0];
// unweighted reaction potential gradient tensor
gc[2] = delta.x*a[0][1];
gc[3] = delta.y*a[0][1];
gc[4] = delta.z*a[0][1];
gux[2] = a[1][0] + xr2*a[1][1];
gux[3] = delta.x*delta.y*a[1][1];
gux[4] = delta.x*delta.z*a[1][1];
guy[2] = gux[3];
guy[3] = a[1][0] + yr2*a[1][1];
guy[4] = delta.y*delta.z*a[1][1];
guz[2] = gux[4];
guz[3] = guy[4];
guz[4] = a[1][0] + zr2*a[1][1];
gqxx[2] = delta.x*(2*a[2][0]+xr2*a[2][1]);
gqxx[3] = delta.y*xr2*a[2][1];
gqxx[4] = delta.z*xr2*a[2][1];
gqyy[2] = delta.x*yr2*a[2][1];
gqyy[3] = delta.y*(2*a[2][0]+yr2*a[2][1]);
gqyy[4] = delta.z*yr2*a[2][1];
gqzz[2] = delta.x*zr2*a[2][1];
gqzz[3] = delta.y*zr2*a[2][1];
gqzz[4] = delta.z*(2*a[2][0]+zr2*a[2][1]);
gqxy[2] = delta.y*(a[2][0]+xr2*a[2][1]);
gqxy[3] = delta.x*(a[2][0]+yr2*a[2][1]);
gqxy[4] = delta.z*delta.x*delta.y*a[2][1];
gqxz[2] = delta.z*(a[2][0]+xr2*a[2][1]);
gqxz[3] = gqxy[4];
gqxz[4] = delta.x*(a[2][0]+zr2*a[2][1]);
gqyz[2] = gqxy[4];
gqyz[3] = delta.z*(a[2][0]+yr2*a[2][1]);
gqyz[4] = delta.y*(a[2][0]+zr2*a[2][1]);
// unweighted dipole second reaction potential gradient tensor
gux[5] = delta.x*(3*a[1][1]+xr2*a[1][2]);
gux[6] = delta.y*(a[1][1]+xr2*a[1][2]);
gux[7] = delta.z*(a[1][1]+xr2*a[1][2]);
gux[8] = delta.x*(a[1][1]+yr2*a[1][2]);
gux[9] = delta.z*delta.x*delta.y*a[1][2];
gux[10] = delta.x*(a[1][1]+zr2*a[1][2]);
guy[5] = delta.y*(a[1][1]+xr2*a[1][2]);
guy[6] = delta.x*(a[1][1]+yr2*a[1][2]);
guy[7] = gux[9];
guy[8] = delta.y*(3*a[1][1]+yr2*a[1][2]);
guy[9] = delta.z*(a[1][1]+yr2*a[1][2]);
guy[10] = delta.y*(a[1][1]+zr2*a[1][2]);
guz[5] = delta.z*(a[1][1]+xr2*a[1][2]);
guz[6] = gux[9];
guz[7] = delta.x*(a[1][1]+zr2*a[1][2]);
guz[8] = delta.z*(a[1][1]+yr2*a[1][2]);
guz[9] = delta.y*(a[1][1]+zr2*a[1][2]);
guz[10] = delta.z*(3*a[1][1]+zr2*a[1][2]);
// generalized Kirkwood permanent reaction field
fields[0].x = uxk*gux[2] + uyk*gux[3] + uzk*gux[4]
+ 0.5f*(ck*gux[1] + qxxk*gux[5]
+ qyyk*gux[8] + qzzk*gux[10]
+ 2*(qxyk*gux[6]+qxzk*gux[7]
+ qyzk*gux[9]))
+ 0.5f*(ck*gc[2] + qxxk*gqxx[2]
+ qyyk*gqyy[2] + qzzk*gqzz[2]
+ 2*(qxyk*gqxy[2]+qxzk*gqxz[2]
+ qyzk*gqyz[2]));
fields[0].y = uxk*guy[2] + uyk*guy[3] + uzk*guy[4]
+ 0.5f*(ck*guy[1] + qxxk*guy[5]
+ qyyk*guy[8] + qzzk*guy[10]
+ 2*(qxyk*guy[6]+qxzk*guy[7]
+ qyzk*guy[9]))
+ 0.5f*(ck*gc[3] + qxxk*gqxx[3]
+ qyyk*gqyy[3] + qzzk*gqzz[3]
+ 2*(qxyk*gqxy[3]+qxzk*gqxz[3]
+ qyzk*gqyz[3]));
fields[0].z = uxk*guz[2] + uyk*guz[3] + uzk*guz[4]
+ 0.5f*(ck*guz[1] + qxxk*guz[5]
+ qyyk*guz[8] + qzzk*guz[10]
+ 2*(qxyk*guz[6]+qxzk*guz[7]
+ qyzk*guz[9]))
+ 0.5f*(ck*gc[4] + qxxk*gqxx[4]
+ qyyk*gqyy[4] + qzzk*gqzz[4]
+ 2*(qxyk*gqxy[4]+qxzk*gqxz[4]
+ qyzk*gqyz[4]));
fields[1].x = uxi*gux[2] + uyi*gux[3] + uzi*gux[4]
- 0.5f*(ci*gux[1] + qxxi*gux[5]
+ qyyi*gux[8] + qzzi*gux[10]
+ 2*(qxyi*gux[6]+qxzi*gux[7]
+ qyzi*gux[9]))
- 0.5f*(ci*gc[2] + qxxi*gqxx[2]
+ qyyi*gqyy[2] + qzzi*gqzz[2]
+ 2*(qxyi*gqxy[2]+qxzi*gqxz[2]
+ qyzi*gqyz[2]));
fields[1].y = uxi*guy[2] + uyi*guy[3] + uzi*guy[4]
- 0.5f*(ci*guy[1] + qxxi*guy[5]
+ qyyi*guy[8] + qzzi*guy[10]
+ 2*(qxyi*guy[6]+qxzi*guy[7]
+ qyzi*guy[9]))
- 0.5f*(ci*gc[3] + qxxi*gqxx[3]
+ qyyi*gqyy[3] + qzzi*gqzz[3]
+ 2*(qxyi*gqxy[3]+qxzi*gqxz[3]
+ qyzi*gqyz[3]));
fields[1].z = uxi*guz[2] + uyi*guz[3] + uzi*guz[4]
- 0.5f*(ci*guz[1] + qxxi*guz[5]
+ qyyi*guz[8] + qzzi*guz[10]
+ 2*(qxyi*guz[6]+qxzi*guz[7]
+ qyzi*guz[9]))
- 0.5f*(ci*gc[4] + qxxi*gqxx[4]
+ qyyi*gqyy[4] + qzzi*gqzz[4]
+ 2*(qxyi*gqxy[4]+qxzi*gqxz[4]
+ qyzi*gqyz[4]));
}
#endif
__device__ real computeDScaleFactor(unsigned int polarizationGroup) { __device__ real computeDScaleFactor(unsigned int polarizationGroup) {
return (polarizationGroup & 1 ? 0 : 1); return (polarizationGroup & 1 ? 0 : 1);
} }
...@@ -198,6 +401,8 @@ extern "C" __global__ void computeFixedField( ...@@ -198,6 +401,8 @@ extern "C" __global__ void computeFixedField(
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags, const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags,
#elif defined USE_GK
const real* __restrict__ bornRadii, unsigned long long* __restrict__ gkFieldBuffers,
#endif #endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const float2* __restrict__ dampingAndThole) { const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const float2* __restrict__ dampingAndThole) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
...@@ -246,6 +451,9 @@ extern "C" __global__ void computeFixedField( ...@@ -246,6 +451,9 @@ extern "C" __global__ void computeFixedField(
} }
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, dampingAndThole); loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, dampingAndThole);
#ifdef USE_GK
data.bornRadius = bornRadii[atom1];
#endif
// Locate the exclusion data for this tile. // Locate the exclusion data for this tile.
...@@ -271,26 +479,32 @@ extern "C" __global__ void computeFixedField( ...@@ -271,26 +479,32 @@ extern "C" __global__ void computeFixedField(
localData[localAtomIndex].quadrupoleYY = data.quadrupoleYY; localData[localAtomIndex].quadrupoleYY = data.quadrupoleYY;
localData[localAtomIndex].quadrupoleYZ = data.quadrupoleYZ; localData[localAtomIndex].quadrupoleYZ = data.quadrupoleYZ;
localData[localAtomIndex].quadrupoleZZ = data.quadrupoleZZ; localData[localAtomIndex].quadrupoleZZ = data.quadrupoleZZ;
localData[localAtomIndex].thole = data.thole; // IS THIS CORRECT? localData[localAtomIndex].thole = data.thole;
localData[localAtomIndex].damp = data.damp; // IS THIS CORRECT? localData[localAtomIndex].damp = data.damp;
#ifdef USE_GK
localData[localAtomIndex].bornRadius = data.bornRadius;
#endif
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx]; uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx]; unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+j; real3 delta = trimTo3(localData[tbx+j].posq-data.posq);
real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real3 fields[4]; int atom2 = y*TILE_SIZE+j;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneInteraction(data, localData[atom2], delta, d, p, fields);
atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 fields[4];
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneInteraction(data, localData[tbx+j], delta, d, p, fields);
data.field += fields[0]; data.field += fields[0];
data.fieldPolar += fields[1]; data.fieldPolar += fields[1];
#ifdef USE_GK
computeOneGkInteraction(data, localData[tbx+j], delta, fields);
data.gkField += fields[0];
#endif
} }
covalent.x >>= 1; covalent.x >>= 1;
covalent.y >>= 1; covalent.y >>= 1;
...@@ -305,6 +519,10 @@ extern "C" __global__ void computeFixedField( ...@@ -305,6 +519,10 @@ extern "C" __global__ void computeFixedField(
loadAtomData(localData[localAtomIndex], j, posq, labFrameDipole, labFrameQuadrupole, dampingAndThole); loadAtomData(localData[localAtomIndex], j, posq, labFrameDipole, labFrameQuadrupole, dampingAndThole);
localData[localAtomIndex].field = make_real3(0); localData[localAtomIndex].field = make_real3(0);
localData[localAtomIndex].fieldPolar = make_real3(0); localData[localAtomIndex].fieldPolar = make_real3(0);
#ifdef USE_GK
localData[localAtomIndex].bornRadius = bornRadii[j];
localData[localAtomIndex].gkField = make_real3(0);
#endif
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF); unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (!hasExclusions && flags != 0xFFFFFFFF) { if (!hasExclusions && flags != 0xFFFFFFFF) {
...@@ -385,22 +603,27 @@ extern "C" __global__ void computeFixedField( ...@@ -385,22 +603,27 @@ extern "C" __global__ void computeFixedField(
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx)); polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj; real3 delta = trimTo3(localData[tbx+tj].posq-data.posq);
real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real3 fields[4]; int atom2 = y*TILE_SIZE+tj;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneInteraction(data, localData[atom2], delta, d, p, fields);
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 fields[4];
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneInteraction(data, localData[tbx+tj], delta, d, p, fields);
data.field += fields[0]; data.field += fields[0];
data.fieldPolar += fields[1]; data.fieldPolar += fields[1];
localData[atom2].field += fields[2]; localData[tbx+tj].field += fields[2];
localData[atom2].fieldPolar += fields[3]; localData[tbx+tj].fieldPolar += fields[3];
#ifdef USE_GK
computeOneGkInteraction(data, localData[tbx+tj], delta, fields);
data.gkField += fields[0];
localData[tbx+tj].gkField += fields[1];
#endif
} }
covalent.x >>= 1; covalent.x >>= 1;
covalent.y >>= 1; covalent.y >>= 1;
...@@ -421,6 +644,11 @@ extern "C" __global__ void computeFixedField( ...@@ -421,6 +644,11 @@ extern "C" __global__ void computeFixedField(
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF)));
#ifdef USE_GK
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (data.gkField.x*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.y*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.z*0xFFFFFFFF)));
#endif
} }
if (pos < end && x != y) { if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
...@@ -430,6 +658,11 @@ extern "C" __global__ void computeFixedField( ...@@ -430,6 +658,11 @@ extern "C" __global__ void computeFixedField(
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF)));
#ifdef USE_GK
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.x*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.y*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.z*0xFFFFFFFF)));
#endif
} }
pos++; pos++;
} while (pos < end); } while (pos < end);
......
...@@ -207,6 +207,9 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4 ...@@ -207,6 +207,9 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
} }
extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fieldBuffers, const long long* __restrict__ fieldPolarBuffers, extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fieldBuffers, const long long* __restrict__ fieldPolarBuffers,
#ifdef USE_GK
const long long* __restrict__ gkFieldBuffers, real* __restrict__ inducedDipoleS, real* __restrict__ inducedDipolePolarS,
#endif
real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) { real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) { for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
real scale = polarizability[atom]/(real) 0xFFFFFFFF; real scale = polarizability[atom]/(real) 0xFFFFFFFF;
...@@ -216,16 +219,17 @@ extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fi ...@@ -216,16 +219,17 @@ extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fi
inducedDipolePolar[3*atom] = scale*fieldPolarBuffers[atom]; inducedDipolePolar[3*atom] = scale*fieldPolarBuffers[atom];
inducedDipolePolar[3*atom+1] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS]; inducedDipolePolar[3*atom+1] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS];
inducedDipolePolar[3*atom+2] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]; inducedDipolePolar[3*atom+2] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2];
#ifdef USE_GK
inducedDipoleS[3*atom] = scale*(fieldBuffers[atom]+gkFieldBuffers[atom]);
inducedDipoleS[3*atom+1] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipoleS[3*atom+2] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
inducedDipolePolarS[3*atom] = scale*(fieldPolarBuffers[atom]+gkFieldBuffers[atom]);
inducedDipolePolarS[3*atom+1] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipolePolarS[3*atom+2] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
#endif
} }
} }
/**
* Convert a real4 to a real3 by removing its last element.
*/
inline __device__ real3 trim(real4 v) {
return make_real3(v.x, v.y, v.z);
}
/** /**
* Normalize a vector and return what its magnitude was. * Normalize a vector and return what its magnitude was.
*/ */
...@@ -274,11 +278,11 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for ...@@ -274,11 +278,11 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
// NoAxisType // NoAxisType
if (axisType < 5 && particles.z >= 0) { if (axisType < 5 && particles.z >= 0) {
real3 atomPos = trim(posq[atom]); real3 atomPos = trimTo3(posq[atom]);
vector[U] = atomPos - trim(posq[axisAtom]); vector[U] = atomPos - trimTo3(posq[axisAtom]);
norms[U] = normVector(vector[U]); norms[U] = normVector(vector[U]);
if (axisType != 4 && particles.x >= 0) if (axisType != 4 && particles.x >= 0)
vector[V] = atomPos - trim(posq[particles.x]); vector[V] = atomPos - trimTo3(posq[particles.x]);
else else
vector[V] = make_real3(0.1f); vector[V] = make_real3(0.1f);
norms[V] = normVector(vector[V]); norms[V] = normVector(vector[V]);
...@@ -288,7 +292,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for ...@@ -288,7 +292,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
if (axisType < 2 || axisType > 3) if (axisType < 2 || axisType > 3)
vector[W] = cross(vector[U], vector[V]); vector[W] = cross(vector[U], vector[V]);
else else
vector[W] = atomPos - trim(posq[particles.y]); vector[W] = atomPos - trimTo3(posq[particles.y]);
norms[W] = normVector(vector[W]); norms[W] = normVector(vector[W]);
vector[UV] = cross(vector[V], vector[U]); vector[UV] = cross(vector[V], vector[U]);
......
...@@ -260,8 +260,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -260,8 +260,8 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ; localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].inducedDipole = data.inducedDipole; localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar; localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].thole = data.thole; // IS THIS CORRECT? localData[threadIdx.x].thole = data.thole;
localData[threadIdx.x].damp = data.damp; // IS THIS CORRECT? localData[threadIdx.x].damp = data.damp;
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx]; uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx]; unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment