Commit 222378c6 authored by Peter Eastman's avatar Peter Eastman
Browse files

Began converting AmoebaGeneralizedKirkwoodForce to new CUDA platform

parent 235a88e5
......@@ -58,6 +58,9 @@ public:
return std::map<std::string, double>(); // This force field doesn't define any parameters.
}
std::vector<std::string> getKernelNames();
Kernel& getKernel() {
return kernel;
}
private:
AmoebaGeneralizedKirkwoodForce& owner;
Kernel kernel;
......
......@@ -95,8 +95,8 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
if (name == CalcAmoebaMultipoleForceKernel::Name())
return new CudaCalcAmoebaMultipoleForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcAmoebaGeneralizedKirkwoodForceKernel::Name())
// return new CudaCalcAmoebaGeneralizedKirkwoodForceKernel(name, platform, cu, context.getSystem());
if (name == CalcAmoebaGeneralizedKirkwoodForceKernel::Name())
return new CudaCalcAmoebaGeneralizedKirkwoodForceKernel(name, platform, cu, context.getSystem());
if (name == CalcAmoebaVdwForceKernel::Name())
return new CudaCalcAmoebaVdwForceKernel(name, platform, cu, context.getSystem());
......
......@@ -27,6 +27,7 @@
#include "AmoebaCudaKernels.h"
#include "CudaAmoebaKernelSources.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/AmoebaGeneralizedKirkwoodForceImpl.h"
#include "openmm/internal/AmoebaMultipoleForceImpl.h"
#include "openmm/internal/AmoebaWcaDispersionForceImpl.h"
#include "openmm/internal/AmoebaTorsionTorsionForceImpl.h"
......@@ -794,7 +795,7 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL),
inducedDipole(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeTheta1(NULL), pmeTheta2(NULL), pmeTheta3(NULL),
pmeIgrid(NULL), pmePhi(NULL), pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL) {
pmeIgrid(NULL), pmePhi(NULL), pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL), gkKernel(NULL) {
}
CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
......@@ -999,6 +1000,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
maxInducedIterations = 0;
bool usePME = (force.getNonbondedMethod() == AmoebaMultipoleForce::PME);
// See whether there's an AmoebaGeneralizedKirkwoodForce in the System.
const AmoebaGeneralizedKirkwoodForce* gk = NULL;
for (int i = 0; i < system.getNumForces() && gk == NULL; i++)
gk = dynamic_cast<const AmoebaGeneralizedKirkwoodForce*>(&system.getForce(i));
double innerDielectric = (gk == NULL ? 1.0 : gk->getSoluteDielectric());
// Create the kernels.
map<string, string> defines;
......@@ -1006,7 +1014,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456); // DIVIDE BY INNER DIELECTRIC!!!
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/innerDielectric);
if (force.getPolarizationType() == AmoebaMultipoleForce::Direct)
defines["DIRECT_POLARIZATION"] = "";
double alpha = force.getAEwald();
......@@ -1034,6 +1042,14 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["USE_PERIODIC"] = "";
defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
}
if (gk != NULL) {
defines["USE_GK"] = "";
defines["GK_C"] = cu.doubleToString(2.455);
double solventDielectric = gk->getSolventDielectric();
defines["GK_FC"] = cu.doubleToString(1*(1-solventDielectric)/(0+1*solventDielectric));
defines["GK_FD"] = cu.doubleToString(2*(1-solventDielectric)/(1+2*solventDielectric));
defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric));
}
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoles, defines);
computeMomentsKernel = cu.getKernel(module, "computeLabFrameMoments");
recordInducedDipolesKernel = cu.getKernel(module, "recordInducedDipoles");
......@@ -1284,8 +1300,14 @@ void CudaCalcAmoebaMultipoleForceKernel::initializeScaleFactors() {
}
double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (!hasInitializedScaleFactors)
if (!hasInitializedScaleFactors) {
initializeScaleFactors();
for (int i = 0; i < (int) context.getForceImpls().size() && gkKernel == NULL; i++) {
AmoebaGeneralizedKirkwoodForceImpl* gkImpl = dynamic_cast<AmoebaGeneralizedKirkwoodForceImpl*>(context.getForceImpls()[i]);
if (gkImpl != NULL)
gkKernel = dynamic_cast<CudaCalcAmoebaGeneralizedKirkwoodForceKernel*>(&gkImpl->getKernel().getImpl());
}
}
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// Compute the lab frame moments.
......@@ -1302,14 +1324,41 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
if (pmeGrid == NULL) {
// Compute induced dipoles.
void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices,
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()};
cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms());
if (gkKernel == NULL) {
void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices,
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()};
cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms());
}
else {
gkKernel->computeBornRadii();
void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices,
&gkKernel->getBornRadii()->getDevicePointer(), &gkKernel->getField()->getDevicePointer(),
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
vector<long long> f;
gkKernel->getField()->download(f);
printf("field\n");
for (int i = 0; i < 3*cu.getNumAtoms(); i++)
printf("%d %g\n", i, f[i]/(double) 0xFFFFFFFF);
void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
&gkKernel->getField()->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(),
&gkKernel->getInducedDipolesPolar()->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()};
cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms());
vector<float> d, dp;
gkKernel->getInducedDipoles()->download(d);
gkKernel->getInducedDipolesPolar()->download(dp);
printf("dipoles\n");
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d %g %g %g, %g %g %g\n", i, d[3*i], d[3*i+1], d[3*i+2], dp[3*i], dp[3*i+1], dp[3*i+2]);
}
// Iterate until the dipoles converge.
......@@ -1343,6 +1392,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
if (gkKernel != NULL)
gkKernel->finishComputation(*torque, *labFrameDipoles, *labFrameQuadrupoles, *inducedDipole, *inducedDipolePolar, *dampingAndThole, *covalentFlags, *polarizationGroupFlags);
// Map torques to force.
......@@ -1645,85 +1696,180 @@ void CudaCalcAmoebaMultipoleForceKernel::getSystemMultipoleMoments(ContextImpl&
computeSystemMultipoleMoments<float, float4>(context, origin, outputMultipoleMoments);
}
///* -------------------------------------------------------------------------- *
// * AmoebaGeneralizedKirkwood *
// * -------------------------------------------------------------------------- */
//
//class CudaCalcAmoebaGeneralizedKirkwoodForceKernel::ForceInfo : public CudaForceInfo {
//public:
// ForceInfo(const AmoebaGeneralizedKirkwoodForce& force) : force(force) {
// }
// bool areParticlesIdentical(int particle1, int particle2) {
// double charge1, charge2, radius1, radius2, scale1, scale2;
// force.getParticleParameters(particle1, charge1, radius1, scale1);
// force.getParticleParameters(particle2, charge2, radius2, scale2);
// return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
// }
//private:
// const AmoebaGeneralizedKirkwoodForce& force;
//};
//
//CudaCalcAmoebaGeneralizedKirkwoodForceKernel::CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) :
// CalcAmoebaGeneralizedKirkwoodForceKernel(name, platform), cu(cu), system(system) {
// data.incrementKernelCount();
//}
//
//CudaCalcAmoebaGeneralizedKirkwoodForceKernel::~CudaCalcAmoebaGeneralizedKirkwoodForceKernel() {
// data.decrementKernelCount();
//}
//
//void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& system, const AmoebaGeneralizedKirkwoodForce& force) {
//
// data.setHasAmoebaGeneralizedKirkwood( true );
//
// int numParticles = system.getNumParticles();
//
// std::vector<float> radius(numParticles);
// std::vector<float> scale(numParticles);
// std::vector<float> charge(numParticles);
//
// for( int ii = 0; ii < numParticles; ii++ ){
// double particleCharge, particleRadius, scalingFactor;
// force.getParticleParameters(ii, particleCharge, particleRadius, scalingFactor);
// radius[ii] = static_cast<float>( particleRadius );
// scale[ii] = static_cast<float>( scalingFactor );
// charge[ii] = static_cast<float>( particleCharge );
// }
// if( data.getUseGrycuk() ){
//
// gpuSetAmoebaGrycukParameters( data.getAmoebaGpu(), static_cast<float>(force.getSoluteDielectric() ),
// static_cast<float>( force.getSolventDielectric() ),
// radius, scale, charge,
// force.getIncludeCavityTerm(),
// static_cast<float>( force.getProbeRadius() ),
// static_cast<float>( force.getSurfaceAreaFactor() ) );
//
// } else {
//
// gpuSetAmoebaObcParameters( data.getAmoebaGpu(), static_cast<float>(force.getSoluteDielectric() ),
// static_cast<float>( force.getSolventDielectric() ),
// radius, scale, charge,
// force.getIncludeCavityTerm(),
// static_cast<float>( force.getProbeRadius() ),
// static_cast<float>( force.getSurfaceAreaFactor() ) );
//
// }
// data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
//}
//
//double CudaCalcAmoebaGeneralizedKirkwoodForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// // handled in computeAmoebaMultipoleForce()
// return 0.0;
//}
//
//static void computeAmoebaVdwForce( CudaContext& cu ) {
//
// amoebaGpuContext gpu = data.getAmoebaGpu();
// data.initializeGpu();
//
// // Vdw14_7F
// kCalculateAmoebaVdw14_7Forces(gpu, data.getUseVdwNeighborList());
//}
/* -------------------------------------------------------------------------- *
* AmoebaGeneralizedKirkwood *
* -------------------------------------------------------------------------- */
class CudaCalcAmoebaGeneralizedKirkwoodForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaGeneralizedKirkwoodForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, radius1, radius2, scale1, scale2;
force.getParticleParameters(particle1, charge1, radius1, scale1);
force.getParticleParameters(particle2, charge2, radius2, scale2);
return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
}
private:
const AmoebaGeneralizedKirkwoodForce& force;
};
CudaCalcAmoebaGeneralizedKirkwoodForceKernel::CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) :
CalcAmoebaGeneralizedKirkwoodForceKernel(name, platform), cu(cu), system(system), params(NULL), bornRadii(NULL), field(NULL), inducedDipoleS(NULL),
inducedDipolePolarS(NULL), bornSum(NULL), bornForce(NULL) {
}
CudaCalcAmoebaGeneralizedKirkwoodForceKernel::~CudaCalcAmoebaGeneralizedKirkwoodForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (bornRadii != NULL)
delete bornRadii;
if (field != NULL)
delete field;
if (inducedDipoleS != NULL)
delete inducedDipoleS;
if (inducedDipolePolarS != NULL)
delete inducedDipolePolarS;
if (bornSum != NULL)
delete bornSum;
if (bornForce != NULL)
delete bornForce;
}
void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& system, const AmoebaGeneralizedKirkwoodForce& force) {
cu.setAsCurrent();
if (cu.getPlatformData().contexts.size() > 1)
throw OpenMMException("AmoebaGeneralizedKirkwoodForce does not support using multiple CUDA devices");
const AmoebaMultipoleForce* multipoles = NULL;
for (int i = 0; i < system.getNumForces() && multipoles == NULL; i++)
multipoles = dynamic_cast<const AmoebaMultipoleForce*>(&system.getForce(i));
if (multipoles == NULL)
throw OpenMMException("AmoebaGeneralizedKirkwoodForce requires the System to also contain an AmoebaMultipoleForce");
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
int paddedNumAtoms = cu.getPaddedNumAtoms();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
params = CudaArray::create<float2>(cu, paddedNumAtoms, "amoebaGkParams");
bornRadii = new CudaArray(cu, paddedNumAtoms, elementSize, "bornRadii");
field = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkField");
bornSum = CudaArray::create<long long>(cu, paddedNumAtoms, "bornSum");
bornForce = CudaArray::create<long long>(cu, paddedNumAtoms, "bornForce");
inducedDipoleS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipoleS");
inducedDipolePolarS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolarS");
cu.addAutoclearBuffer(*field);
cu.addAutoclearBuffer(*bornSum);
cu.addAutoclearBuffer(*bornForce);
vector<float2> paramsVector(paddedNumAtoms);
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor);
paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
// Make sure the charge matches the one specified by the AmoebaMultipoleForce.
double charge2, thole, damping, polarity;
int axisType, atomX, atomY, atomZ;
vector<double> dipole, quadrupole;
multipoles->getMultipoleParameters(i, charge2, dipole, quadrupole, axisType, atomZ, atomX, atomY, thole, damping, polarity);
if (charge != charge2)
throw OpenMMException("AmoebaGeneralizedKirkwoodForce and AmoebaMultipoleForce must specify the same charge for every atom");
}
params->upload(paramsVector);
// Create the kernels.
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(paddedNumAtoms);
defines["THREAD_BLOCK_SIZE"] = cu.intToString(nb.getForceThreadBlockSize());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["GK_C"] = cu.doubleToString(2.455);
double solventDielectric = force.getSolventDielectric();
defines["GK_FC"] = cu.doubleToString(1*(1-solventDielectric)/(0+1*solventDielectric));
defines["GK_FD"] = cu.doubleToString(2*(1-solventDielectric)/(1+2*solventDielectric));
defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric));
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/solventDielectric);
stringstream forceSource;
forceSource << CudaKernelSources::vectorOps;
forceSource << CudaAmoebaKernelSources::amoebaGk;
forceSource << "#define F1\n";
forceSource << CudaAmoebaKernelSources::gkPairForce;
forceSource << CudaAmoebaKernelSources::gkEDiffPairForce;
forceSource << "#undef F1\n";
forceSource << "#define F2\n";
forceSource << CudaAmoebaKernelSources::gkPairForce;
forceSource << "#undef F2\n";
forceSource << "#define T1\n";
forceSource << CudaAmoebaKernelSources::gkPairForce;
forceSource << CudaAmoebaKernelSources::gkEDiffPairForce;
forceSource << "#undef T1\n";
forceSource << "#define T2\n";
forceSource << CudaAmoebaKernelSources::gkPairForce;
forceSource << "#undef T2\n";
forceSource << "#define T3\n";
forceSource << CudaAmoebaKernelSources::gkEDiffPairForce;
forceSource << "#undef T3\n";
forceSource << "#define B1\n";
forceSource << "#define B2\n";
forceSource << CudaAmoebaKernelSources::gkPairForce;
CUmodule module = cu.createModule(forceSource.str(), defines);
computeBornSumKernel = cu.getKernel(module, "computeBornSum");
reduceBornSumKernel = cu.getKernel(module, "reduceBornSum");
gkForceKernel = cu.getKernel(module, "computeGKForces");
chainRuleKernel = cu.getKernel(module, "computeChainRuleForce");
ediffKernel = cu.getKernel(module, "computeEDiffForce");
cu.addForce(new ForceInfo(force));
}
double CudaCalcAmoebaGeneralizedKirkwoodForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// Since GK is so tightly entwined with the electrostatics, this method does nothing, and the force calculation
// is driven by AmoebaMultipoleForce.
return 0.0;
}
void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::computeBornRadii() {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
int numTiles = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int forceThreadBlockSize = nb.getForceThreadBlockSize();
void* computeBornSumArgs[] = {&bornSum->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&params->getDevicePointer(), &numTiles};
cu.executeKernel(computeBornSumKernel, computeBornSumArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
void* reduceBornSumArgs[] = {&bornSum->getDevicePointer(), &params->getDevicePointer(), &bornRadii->getDevicePointer()};
cu.executeKernel(reduceBornSumKernel, reduceBornSumArgs, cu.getNumAtoms());
vector<float> r;
bornRadii->download(r);
printf("radii\n");
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d %g\n", i, r[i]);
}
void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles,
CudaArray& inducedDipole, CudaArray& inducedDipolePolar, CudaArray& dampingAndThole, CudaArray& covalentFlags, CudaArray& polarizationGroupFlags) {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
int startTileIndex = nb.getStartTileIndex();
int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int forceThreadBlockSize = nb.getForceThreadBlockSize();
void* gkForceArgs[] = {&cu.getForce().getDevicePointer(), &torque.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&cu.getPosq().getDevicePointer(), &startTileIndex, &numTileIndices, &labFrameDipoles.getDevicePointer(),
&labFrameQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(),
&bornRadii->getDevicePointer(), &bornForce->getDevicePointer()};
cu.executeKernel(gkForceKernel, gkForceArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
// Compute cavity term...
void* chainRuleArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &startTileIndex, &numTileIndices,
&params->getDevicePointer(), &bornRadii->getDevicePointer(), &bornForce->getDevicePointer()};
cu.executeKernel(chainRuleKernel, chainRuleArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
void* ediffArgs[] = {&cu.getForce().getDevicePointer(), &torque.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&cu.getPosq().getDevicePointer(), &nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&covalentFlags.getDevicePointer(), &polarizationGroupFlags.getDevicePointer(), &startTileIndex, &numTileIndices,
&labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(),
&inducedDipolePolar.getDevicePointer(), &inducedDipoleS->getDevicePointer(), &inducedDipolePolarS->getDevicePointer(),
&dampingAndThole.getDevicePointer()};
cu.executeKernel(ediffKernel, ediffArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
/* -------------------------------------------------------------------------- *
* AmoebaVdw *
......
......@@ -37,6 +37,8 @@
namespace OpenMM {
class CudaCalcAmoebaGeneralizedKirkwoodForceKernel;
/**
* This kernel is invoked by AmoebaHarmonicBondForce to calculate the forces acting on the system and the energy of the system.
*/
......@@ -427,6 +429,7 @@ private:
CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
CUfunction pmeUpdateBsplinesKernel, pmeAtomRangeKernel, pmeZIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeConvolutionKernel, pmeFixedPotentialKernel, pmeInducedPotentialKernel;
CUfunction pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel;
static const int PmeOrder = 5;
};
......@@ -453,10 +456,38 @@ public:
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Perform the computation of Born radii.
*/
void computeBornRadii();
/**
* Perform the final parts of the force/energy computation.
*/
void finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles, CudaArray& inducedDipole, CudaArray& inducedDipolePolar, CudaArray& dampingAndThole, CudaArray& covalentFlags, CudaArray& polarizationGroupFlags);
CudaArray* getBornRadii() {
return bornRadii;
}
CudaArray* getField() {
return field;
}
CudaArray* getInducedDipoles() {
return inducedDipoleS;
}
CudaArray* getInducedDipolesPolar() {
return inducedDipolePolarS;
}
private:
class ForceInfo;
CudaContext& cu;
System& system;
CudaArray* params;
CudaArray* bornSum;
CudaArray* bornRadii;
CudaArray* bornForce;
CudaArray* field;
CudaArray* inducedDipoleS;
CudaArray* inducedDipolePolarS;
CUfunction computeBornSumKernel, reduceBornSumKernel, gkForceKernel, chainRuleKernel, ediffKernel;
};
/**
......
#define TILE_SIZE 32
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
/**
* Reduce the Born sums to compute the Born radii.
*/
extern "C" __global__ void reduceBornSum(const long long* __restrict__ bornSum, const float2* __restrict__ params, real* __restrict__ bornRadii) {
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Get summed Born data
real sum = RECIP(0xFFFFFFFF)*bornSum[index];
// Now calculate Born radius.
float radius = params[index].x;
radius = RECIP(radius*radius*radius);
sum = radius-sum;
sum = (sum <= 0 ? (real) 1000 : POW(sum, -1/(real) 3));
bornRadii[index] = sum;
}
}
/**
* Data structure used by computeBornSum().
*/
typedef struct {
real3 pos;
real bornSum;
float radius, scaledRadius, padding;
} AtomData1;
__device__ void computeBornSumOneInteraction(AtomData1& atom1, AtomData1& atom2, float& bornSum) {
if (atom1.radius <= 0)
return; // Ignore this interaction
real3 delta = atom2.pos - atom1.pos;
real r2 = dot(delta, delta);
real r = SQRT(r2);
float sk = atom2.scaledRadius;
real sk2 = sk*sk;
if (atom1.radius+r < sk) {
real lik = atom1.radius;
real uik = sk - r;
bornSum -= RECIP(uik*uik*uik) - RECIP(lik*lik*lik);
}
real uik = r+sk;
real lik;
if (atom1.radius+r < sk)
lik = sk-r;
else if (r < atom1.radius+sk)
lik = atom1.radius;
else
lik = r-sk;
real l2 = lik*lik;
real l4 = l2*l2;
real lr = lik*r;
real l4r = l4*r;
real u2 = uik*uik;
real u4 = u2*u2;
real ur = uik*r;
real u4r = u4*r;
real term = (3*(r2-sk2)+6*u2-8*ur)/u4r - (3*(r2-sk2)+6*l2-8*lr)/l4r;
bornSum += term/16;
}
/**
* Compute the Born sum.
*/
extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ bornSum, const real4* __restrict__ posq,
const float2* __restrict__ params, unsigned int numTiles) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
unsigned int lasty = 0xFFFFFFFF;
__shared__ AtomData1 localData[THREAD_BLOCK_SIZE];
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
unsigned int x, y;
AtomData1 data;
data.bornSum = 0;
if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
unsigned int atom1 = x*TILE_SIZE + tgx;
data.pos = trimTo3(posq[atom1]);
float2 params1 = params[atom1];
data.radius = params1.x;
data.scaledRadius = params1.y;
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].radius = params1.x;
localData[threadIdx.x].scaledRadius = params1.y;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2)
computeBornSumOneInteraction(data, localData[tbx+j], data.bornSum);
}
}
else {
// This is an off-diagonal tile.
if (lasty != y) {
unsigned int j = y*TILE_SIZE + tgx;
real4 tempPosq = posq[j];
localData[threadIdx.x].pos = trimTo3(tempPosq);
float2 tempParams = params[j];
localData[threadIdx.x].radius = tempParams.x;
localData[threadIdx.x].scaledRadius = tempParams.y;
}
localData[threadIdx.x].bornSum = 0;
// Compute the full set of interactions in this tile.
unsigned int tj = tgx;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
computeBornSumOneInteraction(data, localData[tbx+j], data.bornSum);
computeBornSumOneInteraction(localData[tbx+j], data, localData[tbx+j].bornSum);
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
}
// Write results.
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (data.bornSum*0xFFFFFFFF)));
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornSum*0xFFFFFFFF)));
}
lasty = y;
pos++;
} while (pos < end);
}
/**
* Data structure used by computeGKForces().
*/
typedef struct {
real3 pos, force, dipole, inducedDipole, inducedDipolePolar;
real quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ, quadrupoleZZ;
real q, bornRadius, bornForce;
} AtomData2;
__device__ void computeOneInteractionF1(AtomData2& atom1, volatile AtomData2& atom2, real& outputEnergy, real3& force);
__device__ void computeOneInteractionF2(AtomData2& atom1, volatile AtomData2& atom2, real& outputEnergy, real3& force);
__device__ void computeOneInteractionT1(AtomData2& atom1, volatile AtomData2& atom2, real3& torque);
__device__ void computeOneInteractionT2(AtomData2& atom1, volatile AtomData2& atom2, real3& torque);
__device__ void computeOneInteractionB1B2(AtomData2& atom1, volatile AtomData2& atom2);
inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ bornRadius) {
float4 atomPosq = posq[atom];
data.pos = trimTo3(atomPosq);
data.q = atomPosq.w;
data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2];
data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
data.bornRadius = bornRadius[atom];
}
inline __device__ void zeroAtomData(AtomData2& data) {
data.force = make_real3(0);
data.bornForce = 0;
}
/**
* Compute electrostatic interactions.
*/
extern "C" __global__ void computeGKForces(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
const real* __restrict__ bornRadii, unsigned long long* __restrict__ bornForce) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int numTiles = numTileIndices;
unsigned int pos = startTileIndex+warp*numTiles/totalWarps;
unsigned int end = startTileIndex+(warp+1)*numTiles/totalWarps;
real energy = 0;
__shared__ AtomData2 localData[THREAD_BLOCK_SIZE];
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
unsigned int x, y;
AtomData2 data;
if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData2(data, atom1, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, bornRadii);
zeroAtomData(data);
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].q = data.q;
localData[threadIdx.x].dipole = data.dipole;
localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX;
localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY;
localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ;
localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY;
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].quadrupoleZZ = data.quadrupoleZZ;
localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].bornRadius = data.bornRadius;
// Compute forces.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
computeOneInteractionF1(data, localData[tbx+j], tempEnergy, tempForce);
computeOneInteractionF2(data, localData[tbx+j], tempEnergy, tempForce);
data.force += tempForce;
energy += 0.5f*tempEnergy;
}
}
data.force *= 0.5f;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
// Compute torques.
zeroAtomData(data);
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempTorque;
computeOneInteractionT1(data, localData[tbx+j], tempTorque);
computeOneInteractionT2(data, localData[tbx+j], tempTorque);
data.force += tempTorque;
}
}
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
// Chain rule terms?
zeroAtomData(data);
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
computeOneInteractionB1B2(data, localData[tbx+j]);
}
atomicAdd(&bornForce[atom1], static_cast<unsigned long long>((long long) (data.bornForce*0xFFFFFFFF)));
}
else {
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
loadAtomData2(localData[threadIdx.x], j, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, bornRadii);
zeroAtomData(localData[threadIdx.x]);
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
computeOneInteractionF1(data, localData[tbx+tj], tempEnergy, tempForce);
computeOneInteractionF2(data, localData[tbx+tj], tempEnergy, tempForce);
data.force += tempForce;
localData[tbx+tj].force -= tempForce;
energy += tempEnergy;
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= 0.5f;
localData[threadIdx.x].force *= 0.5f;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
// Compute torques.
zeroAtomData(data);
zeroAtomData(localData[threadIdx.x]);
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempTorque;
computeOneInteractionT1(data, localData[tbx+tj], tempTorque);
computeOneInteractionT2(data, localData[tbx+tj], tempTorque);
data.force += tempTorque;
computeOneInteractionT1(localData[tbx+tj], data, tempTorque);
computeOneInteractionT2(localData[tbx+tj], data, tempTorque);
localData[tbx+tj].force += tempTorque;
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
// Chain rule terms?
zeroAtomData(data);
zeroAtomData(localData[threadIdx.x]);
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
computeOneInteractionB1B2(data, localData[tbx+tj]);
tj = (tj + 1) & (TILE_SIZE - 1);
}
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (data.bornForce*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornForce*0xFFFFFFFF)));
}
}
}
pos++;
} while (pos < end);
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy*0.5f;
}
/**
* Data structure used by computeChainRuleForce().
*/
typedef struct {
real3 pos, force;
real radius, scaledRadius, bornRadius, bornForce;
} AtomData3;
inline __device__ void loadAtomData3(AtomData3& data, int atom, const real4* __restrict__ posq, const float2* __restrict__ params, const real* __restrict__ bornRadius, const long long* __restrict__ bornForce) {
data.pos = trimTo3(posq[atom]);
data.bornRadius = bornRadius[atom];
float2 params1 = params[atom];
data.radius = params1.x;
data.scaledRadius = params1.y;
data.bornForce = bornForce[atom]/(real) 0xFFFFFFFF;
}
__device__ void computeBornChainRuleInteraction(AtomData3& atom1, AtomData3& atom2, real3& force) {
real third = 1/(real) 3;
real pi43 = 4*third*M_PI;
real factor = -POW(M_PI, third)*POW(6, 2/(real) 3)/9;
real term = pi43/(atom1.bornRadius*atom1.bornRadius*atom1.bornRadius);
term = factor/POW(term, 4/(real) 3);
real3 delta = atom2.pos-atom1.pos;
float sk = atom2.scaledRadius;
real sk2 = sk*sk;
real r2 = dot(delta, delta);
real r = SQRT(r2);
real de = 0;
if (atom1.radius+r < sk) {
real uik = sk-r;
real uik4 = uik*uik;
uik4 = uik4*uik4;
de = -4*M_PI/uik4;
real lik = sk - r;
real lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*M_PI*(sk2-4*sk*r+17*r2)/(r2*lik4);
}
else if (r < atom1.radius+sk) {
real lik = atom1.radius;
real lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*M_PI*(2*atom1.radius*atom1.radius-sk2-r2)/(r2*lik4);
}
else {
real lik = r-sk;
real lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*M_PI*(sk2-4*sk*r+r2)/(r2*lik4);
}
real uik = r+sk;
real uik4 = uik*uik;
uik4 = uik4*uik4;
de -= 0.25f*M_PI*(sk2+4*sk*r+r2)/(r2*uik4);
real dbr = term*de/r;
de = dbr*atom1.bornForce;
force = delta*de;
}
/**
* Compute electrostatic interactions.
*/
extern "C" __global__ void computeChainRuleForce(
unsigned long long* __restrict__ forceBuffers, const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices,
const float2* __restrict__ params, const real* __restrict__ bornRadii, const long long* __restrict__ bornForce) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int numTiles = numTileIndices;
unsigned int pos = startTileIndex+warp*numTiles/totalWarps;
unsigned int end = startTileIndex+(warp+1)*numTiles/totalWarps;
__shared__ AtomData3 localData[THREAD_BLOCK_SIZE];
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
unsigned int x, y;
AtomData3 data;
if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData3(data, atom1, posq, params, bornRadii, bornForce);
data.force = make_real3(0);
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].radius = data.radius;
localData[threadIdx.x].scaledRadius = data.scaledRadius;
localData[threadIdx.x].bornRadius = data.bornRadius;
localData[threadIdx.x].bornForce = data.bornForce;
// Compute forces.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
computeBornChainRuleInteraction(data, localData[tbx+j], tempForce);
data.force -= tempForce;
localData[tbx+j].force += tempForce;
}
}
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
}
else {
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
loadAtomData3(localData[threadIdx.x], j, posq, params, bornRadii, bornForce);
localData[threadIdx.x].force = make_real3(0);
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
computeBornChainRuleInteraction(data, localData[tbx+tj], tempForce);
data.force -= tempForce;
localData[tbx+tj].force += tempForce;
computeBornChainRuleInteraction(localData[tbx+tj], data, tempForce);
data.force += tempForce;
localData[tbx+tj].force -= tempForce;
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
}
}
pos++;
} while (pos < end);
}
typedef struct {
real3 pos, force, dipole, inducedDipole, inducedDipolePolar, inducedDipoleS, inducedDipolePolarS;
real q, quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ, quadrupoleZZ;
float thole, damp, padding;
} AtomData4;
__device__ void computeOneEDiffInteractionF1(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real& outputEnergy, real3& outputForce);
__device__ void computeOneEDiffInteractionT1(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce);
__device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce);
inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS, const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.q = atomPosq.w;
data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2];
data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
data.inducedDipoleS.x = inducedDipoleS[atom*3];
data.inducedDipoleS.y = inducedDipoleS[atom*3+1];
data.inducedDipoleS.z = inducedDipoleS[atom*3+2];
data.inducedDipolePolarS.x = inducedDipolePolarS[atom*3];
data.inducedDipolePolarS.y = inducedDipolePolarS[atom*3+1];
data.inducedDipolePolarS.z = inducedDipolePolarS[atom*3+2];
float2 temp = dampingAndThole[atom];
data.damp = temp.x;
data.thole = temp.y;
}
__device__ real computeDScaleFactor(unsigned int polarizationGroup) {
return (polarizationGroup & 1 ? 0 : 1);
}
__device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGroup) {
bool x = (covalent.x & 1);
bool y = (covalent.y & 1);
bool p = (polarizationGroup & 1);
return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f));
}
/**
* Compute electrostatic interactions.
*/
extern "C" __global__ void computeEDiffForce(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices,
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices,
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS,
const float2* __restrict__ dampingAndThole) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int numTiles = numTileIndices;
unsigned int pos = startTileIndex+warp*numTiles/totalWarps;
unsigned int end = startTileIndex+(warp+1)*numTiles/totalWarps;
real energy = 0;
__shared__ AtomData4 localData[THREAD_BLOCK_SIZE];
__shared__ unsigned int exclusionRange[2*WARPS_PER_GROUP];
__shared__ int exclusionIndex[WARPS_PER_GROUP];
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
const unsigned int localGroupIndex = threadIdx.x/TILE_SIZE;
unsigned int x, y;
AtomData4 data;
if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData4(data, atom1, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, inducedDipoleS, inducedDipolePolarS, dampingAndThole);
data.force = make_real3(0);
// Locate the exclusion data for this tile.
if (tgx < 2)
exclusionRange[2*localGroupIndex+tgx] = exclusionRowIndices[x+tgx];
if (tgx == 0)
exclusionIndex[localGroupIndex] = -1;
for (unsigned int i = exclusionRange[2*localGroupIndex]+tgx; i < exclusionRange[2*localGroupIndex+1]; i += TILE_SIZE)
if (exclusionIndices[i] == y)
exclusionIndex[localGroupIndex] = i*TILE_SIZE;
bool hasExclusions = (exclusionIndex[localGroupIndex] > -1);
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].q = data.q;
localData[threadIdx.x].dipole = data.dipole;
localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX;
localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY;
localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ;
localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY;
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].quadrupoleZZ = data.quadrupoleZZ;
localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].inducedDipoleS = data.inducedDipoleS;
localData[threadIdx.x].inducedDipolePolarS = data.inducedDipolePolarS;
localData[threadIdx.x].thole = data.thole;
localData[threadIdx.x].damp = data.damp;
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
// Compute forces.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneEDiffInteractionF1(data, localData[tbx+j], d, p, tempEnergy, tempForce);
energy += 0.25f*tempEnergy;
data.force += tempForce;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
// Compute torques.
data.force = make_real3(0);
covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempTorque;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneEDiffInteractionT1(data, localData[tbx+j], d, p, tempTorque);
data.force += tempTorque;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
}
else {
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
loadAtomData4(localData[threadIdx.x], j, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, inducedDipoleS, inducedDipolePolarS, dampingAndThole);
localData[threadIdx.x].force = make_real3(0);
uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
// Compute forces.
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneEDiffInteractionF1(data, localData[tbx+tj], d, p, tempEnergy, tempForce);
energy += 0.5f*tempEnergy;
data.force += tempForce;
localData[tbx+tj].force -= tempForce;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
// Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempTorque;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneEDiffInteractionT1(data, localData[tbx+tj], d, p, tempTorque);
data.force += tempTorque;
computeOneEDiffInteractionT3(data, localData[tbx+tj], d, p, tempTorque);
localData[tbx+tj].force += tempTorque;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
}
}
pos++;
} while (pos < end);
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy*ENERGY_SCALE_FACTOR;
}
#if defined F1
__device__ void computeOneEDiffInteractionF1(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real& outputEnergy, real3& outputForce) {
#elif defined T1
__device__ void computeOneEDiffInteractionT1(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce) {
#elif defined T3
__device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce) {
#endif
const float uscale = 1;
// deltaR
real xr = atom2.pos.x - atom1.pos.x;
real yr = atom2.pos.y - atom1.pos.y;
real zr = atom2.pos.z - atom1.pos.z;
real r22 = xr*xr + yr*yr + zr*zr;
real r = SQRT(r22);
real rr1 = RECIP(r);
real rr2 = rr1*rr1;
real rr3 = rr1*rr2;
real scale3 = 1;
real scale5 = 1;
real scale7 = 1;
#ifdef F1
real ddsc3_1 = 0;
real ddsc3_2 = 0;
real ddsc3_3 = 0;
real ddsc5_1 = 0;
real ddsc5_2 = 0;
real ddsc5_3 = 0;
real ddsc7_1 = 0;
real ddsc7_2 = 0;
real ddsc7_3 = 0;
real ftm2i1 = 0;
real ftm2i2 = 0;
real ftm2i3 = 0;
#endif
// apply Thole polarization damping to scale factors
real damp = atom1.damp*atom2.damp;
if (damp != 0) {
real pgamma = atom2.thole > atom1.thole ? atom1.thole : atom2.thole;
real ratio = (r/damp);
damp = -pgamma*ratio*ratio*ratio;
if (damp > -50) {
real dampE = EXP(damp);
real damp2 = damp*damp;
scale3 = 1 - dampE;
scale5 = 1 - (1 - damp)*dampE;
scale7 = 1 - (1 - damp + 0.6f*damp2)*dampE;
#ifdef F1
ddsc3_1 = -3*damp*EXP(damp)*xr*rr2*rr3;
ddsc3_2 = -3*damp*EXP(damp)*yr*rr2*rr3;
ddsc3_3 = -3*damp*EXP(damp)*zr*rr2*rr3;
ddsc5_1 = -3*damp*ddsc3_1*rr2;
ddsc5_2 = -3*damp*ddsc3_2*rr2;
ddsc5_3 = -3*damp*ddsc3_3*rr2;
ddsc7_1 = -5*(0.2f+0.6f*damp)*ddsc5_1*rr2;
ddsc7_2 = -5*(0.2f+0.6f*damp)*ddsc5_2*rr2;
ddsc7_3 = -5*(0.2f+0.6f*damp)*ddsc5_3*rr2;
#endif
}
}
real scale3i = 3*scale3*uscale*rr3*rr2;
real scale5i = 3*scale5*uscale*rr3*rr2;
#ifdef APPLY_SCALE
real dsc3 = scale3*dScale*rr3;
real dsc5 = 3*scale5*dScale*rr3*rr2;
real dsc7 = 15*scale7*dScale*rr3*rr3*rr1;
real psc3 = scale3*pScale*rr3;
real psc5 = 3*scale5*pScale*rr3*rr2;
real psc7 = 15*scale7*pScale*rr3*rr3*rr1;
#else
real psc3 = scale3*rr3;
real psc5 = 3*scale5*rr3*rr2;
real psc7 = 15*scale7*rr3*rr3*rr1;
#endif
#ifdef T1
real dixr1 = atom1.dipole.y*zr - atom1.dipole.z*yr;
real dixr2 = atom1.dipole.z*xr - atom1.dipole.x*zr;
real dixr3 = atom1.dipole.x*yr - atom1.dipole.y*xr;
#endif
#ifdef T3
real dkxr1 = atom2.dipole.y*zr - atom2.dipole.z*yr;
real dkxr2 = atom2.dipole.z*xr - atom2.dipole.x*zr;
real dkxr3 = atom2.dipole.x*yr - atom2.dipole.y*xr;
#endif
real qir1 = atom1.quadrupoleXX*xr + atom1.quadrupoleXY*yr + atom1.quadrupoleXZ*zr;
real qir2 = atom1.quadrupoleXY*xr + atom1.quadrupoleYY*yr + atom1.quadrupoleYZ*zr;
real qir3 = atom1.quadrupoleXZ*xr + atom1.quadrupoleYZ*yr + atom1.quadrupoleZZ*zr;
real qkr1 = atom2.quadrupoleXX*xr + atom2.quadrupoleXY*yr + atom2.quadrupoleXZ*zr;
real qkr2 = atom2.quadrupoleXY*xr + atom2.quadrupoleYY*yr + atom2.quadrupoleYZ*zr;
real qkr3 = atom2.quadrupoleXZ*xr + atom2.quadrupoleYZ*yr + atom2.quadrupoleZZ*zr;
#ifdef T1
real rxqir1 = yr*qir3 - zr*qir2;
real rxqir2 = zr*qir1 - xr*qir3;
real rxqir3 = xr*qir2 - yr*qir1;
#endif
#ifdef T3
real rxqkr1 = yr*qkr3 - zr*qkr2;
real rxqkr2 = zr*qkr1 - xr*qkr3;
real rxqkr3 = xr*qkr2 - yr*qkr1;
#endif
// get intermediate variables for permanent energy terms
real sc3 = atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr;
real sc4 = atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr;
real sc5 = qir1*xr + qir2*yr + qir3*zr;
real sc6 = qkr1*xr + qkr2*yr + qkr3*zr;
#ifdef T1
real dixuk1 = atom1.dipole.y*atom2.inducedDipoleS.z - atom1.dipole.z*atom2.inducedDipoleS.y;
real dixuk2 = atom1.dipole.z*atom2.inducedDipoleS.x - atom1.dipole.x*atom2.inducedDipoleS.z;
real dixuk3 = atom1.dipole.x*atom2.inducedDipoleS.y - atom1.dipole.y*atom2.inducedDipoleS.x;
real dixukp1 = atom1.dipole.y*atom2.inducedDipolePolarS.z - atom1.dipole.z*atom2.inducedDipolePolarS.y;
real dixukp2 = atom1.dipole.z*atom2.inducedDipolePolarS.x - atom1.dipole.x*atom2.inducedDipolePolarS.z;
real dixukp3 = atom1.dipole.x*atom2.inducedDipolePolarS.y - atom1.dipole.y*atom2.inducedDipolePolarS.x;
#endif
#ifdef T3
real dkxui1 = atom2.dipole.y*atom1.inducedDipoleS.z - atom2.dipole.z*atom1.inducedDipoleS.y;
real dkxui2 = atom2.dipole.z*atom1.inducedDipoleS.x - atom2.dipole.x*atom1.inducedDipoleS.z;
real dkxui3 = atom2.dipole.x*atom1.inducedDipoleS.y - atom2.dipole.y*atom1.inducedDipoleS.x;
real dkxuip1 = atom2.dipole.y*atom1.inducedDipolePolarS.z - atom2.dipole.z*atom1.inducedDipolePolarS.y;
real dkxuip2 = atom2.dipole.z*atom1.inducedDipolePolarS.x - atom2.dipole.x*atom1.inducedDipolePolarS.z;
real dkxuip3 = atom2.dipole.x*atom1.inducedDipolePolarS.y - atom2.dipole.y*atom1.inducedDipolePolarS.x;
#endif
#if defined F1 || defined T1
real qiuk1 = atom1.quadrupoleXX*atom2.inducedDipoleS.x + atom1.quadrupoleXY*atom2.inducedDipoleS.y + atom1.quadrupoleXZ*atom2.inducedDipoleS.z;
real qiuk2 = atom1.quadrupoleXY*atom2.inducedDipoleS.x + atom1.quadrupoleYY*atom2.inducedDipoleS.y + atom1.quadrupoleYZ*atom2.inducedDipoleS.z;
real qiuk3 = atom1.quadrupoleXZ*atom2.inducedDipoleS.x + atom1.quadrupoleYZ*atom2.inducedDipoleS.y + atom1.quadrupoleZZ*atom2.inducedDipoleS.z;
real qiukp1 = atom1.quadrupoleXX*atom2.inducedDipolePolarS.x + atom1.quadrupoleXY*atom2.inducedDipolePolarS.y + atom1.quadrupoleXZ*atom2.inducedDipolePolarS.z;
real qiukp2 = atom1.quadrupoleXY*atom2.inducedDipolePolarS.x + atom1.quadrupoleYY*atom2.inducedDipolePolarS.y + atom1.quadrupoleYZ*atom2.inducedDipolePolarS.z;
real qiukp3 = atom1.quadrupoleXZ*atom2.inducedDipolePolarS.x + atom1.quadrupoleYZ*atom2.inducedDipolePolarS.y + atom1.quadrupoleZZ*atom2.inducedDipolePolarS.z;
#if defined F1
qiuk1 -= atom1.quadrupoleXX*atom2.inducedDipole.x + atom1.quadrupoleXY*atom2.inducedDipole.y + atom1.quadrupoleXZ*atom2.inducedDipole.z;
qiuk2 -= atom1.quadrupoleXY*atom2.inducedDipole.x + atom1.quadrupoleYY*atom2.inducedDipole.y + atom1.quadrupoleYZ*atom2.inducedDipole.z;
qiuk3 -= atom1.quadrupoleXZ*atom2.inducedDipole.x + atom1.quadrupoleYZ*atom2.inducedDipole.y + atom1.quadrupoleZZ*atom2.inducedDipole.z;
qiukp1 -= atom1.quadrupoleXX*atom2.inducedDipolePolar.x + atom1.quadrupoleXY*atom2.inducedDipolePolar.y + atom1.quadrupoleXZ*atom2.inducedDipolePolar.z;
qiukp2 -= atom1.quadrupoleXY*atom2.inducedDipolePolar.x + atom1.quadrupoleYY*atom2.inducedDipolePolar.y + atom1.quadrupoleYZ*atom2.inducedDipolePolar.z;
qiukp3 -= atom1.quadrupoleXZ*atom2.inducedDipolePolar.x + atom1.quadrupoleYZ*atom2.inducedDipolePolar.y + atom1.quadrupoleZZ*atom2.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i1 -= psc5*qiuk1 + dsc5*qiukp1;
ftm2i2 -= psc5*qiuk2 + dsc5*qiukp2;
ftm2i3 -= psc5*qiuk3 + dsc5*qiukp3;
#else
ftm2i1 -= psc5*(qiuk1 + qiukp1);
ftm2i2 -= psc5*(qiuk2 + qiukp2);
ftm2i3 -= psc5*(qiuk3 + qiukp3);
#endif
#endif
#endif
#if defined F1 || defined T3
real qkui1 = atom2.quadrupoleXX*atom1.inducedDipoleS.x + atom2.quadrupoleXY*atom1.inducedDipoleS.y + atom2.quadrupoleXZ*atom1.inducedDipoleS.z;
real qkui2 = atom2.quadrupoleXY*atom1.inducedDipoleS.x + atom2.quadrupoleYY*atom1.inducedDipoleS.y + atom2.quadrupoleYZ*atom1.inducedDipoleS.z;
real qkui3 = atom2.quadrupoleXZ*atom1.inducedDipoleS.x + atom2.quadrupoleYZ*atom1.inducedDipoleS.y + atom2.quadrupoleZZ*atom1.inducedDipoleS.z;
real qkuip1 = atom2.quadrupoleXX*atom1.inducedDipolePolarS.x + atom2.quadrupoleXY*atom1.inducedDipolePolarS.y + atom2.quadrupoleXZ*atom1.inducedDipolePolarS.z;
real qkuip2 = atom2.quadrupoleXY*atom1.inducedDipolePolarS.x + atom2.quadrupoleYY*atom1.inducedDipolePolarS.y + atom2.quadrupoleYZ*atom1.inducedDipolePolarS.z;
real qkuip3 = atom2.quadrupoleXZ*atom1.inducedDipolePolarS.x + atom2.quadrupoleYZ*atom1.inducedDipolePolarS.y + atom2.quadrupoleZZ*atom1.inducedDipolePolarS.z;
#if defined F1
qkui1 -= atom2.quadrupoleXX*atom1.inducedDipole.x + atom2.quadrupoleXY*atom1.inducedDipole.y + atom2.quadrupoleXZ*atom1.inducedDipole.z;
qkui2 -= atom2.quadrupoleXY*atom1.inducedDipole.x + atom2.quadrupoleYY*atom1.inducedDipole.y + atom2.quadrupoleYZ*atom1.inducedDipole.z;
qkui3 -= atom2.quadrupoleXZ*atom1.inducedDipole.x + atom2.quadrupoleYZ*atom1.inducedDipole.y + atom2.quadrupoleZZ*atom1.inducedDipole.z;
qkuip1 -= atom2.quadrupoleXX*atom1.inducedDipolePolar.x + atom2.quadrupoleXY*atom1.inducedDipolePolar.y + atom2.quadrupoleXZ*atom1.inducedDipolePolar.z;
qkuip2 -= atom2.quadrupoleXY*atom1.inducedDipolePolar.x + atom2.quadrupoleYY*atom1.inducedDipolePolar.y + atom2.quadrupoleYZ*atom1.inducedDipolePolar.z;
qkuip3 -= atom2.quadrupoleXZ*atom1.inducedDipolePolar.x + atom2.quadrupoleYZ*atom1.inducedDipolePolar.y + atom2.quadrupoleZZ*atom1.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i1 += psc5*qkui1 + dsc5*qkuip1;
ftm2i2 += psc5*qkui2 + dsc5*qkuip2;
ftm2i3 += psc5*qkui3 + dsc5*qkuip3;
#else
ftm2i1 += psc5*(qkui1 + qkuip1);
ftm2i2 += psc5*(qkui2 + qkuip2);
ftm2i3 += psc5*(qkui3 + qkuip3);
#endif
#endif
#endif
#ifdef T3
real uixqkr1 = atom1.inducedDipoleS.y*qkr3 - atom1.inducedDipoleS.z*qkr2;
real uixqkr2 = atom1.inducedDipoleS.z*qkr1 - atom1.inducedDipoleS.x*qkr3;
real uixqkr3 = atom1.inducedDipoleS.x*qkr2 - atom1.inducedDipoleS.y*qkr1;
real uixqkrp1 = atom1.inducedDipolePolarS.y*qkr3 - atom1.inducedDipolePolarS.z*qkr2;
real uixqkrp2 = atom1.inducedDipolePolarS.z*qkr1 - atom1.inducedDipolePolarS.x*qkr3;
real uixqkrp3 = atom1.inducedDipolePolarS.x*qkr2 - atom1.inducedDipolePolarS.y*qkr1;
real rxqkuip1 = yr*qkuip3 - zr*qkuip2;
real rxqkuip2 = zr*qkuip1 - xr*qkuip3;
real rxqkuip3 = xr*qkuip2 - yr*qkuip1;
real rxqkui1 = yr*qkui3 - zr*qkui2;
real rxqkui2 = zr*qkui1 - xr*qkui3;
real rxqkui3 = xr*qkui2 - yr*qkui1;
#endif
#ifdef T1
real ukxqir1 = atom2.inducedDipoleS.y*qir3 - atom2.inducedDipoleS.z*qir2;
real ukxqir2 = atom2.inducedDipoleS.z*qir1 - atom2.inducedDipoleS.x*qir3;
real ukxqir3 = atom2.inducedDipoleS.x*qir2 - atom2.inducedDipoleS.y*qir1;
real ukxqirp1 = atom2.inducedDipolePolarS.y*qir3 - atom2.inducedDipolePolarS.z*qir2;
real ukxqirp2 = atom2.inducedDipolePolarS.z*qir1 - atom2.inducedDipolePolarS.x*qir3;
real ukxqirp3 = atom2.inducedDipolePolarS.x*qir2 - atom2.inducedDipolePolarS.y*qir1;
real rxqiuk1 = yr*qiuk3 - zr*qiuk2;
real rxqiuk2 = zr*qiuk1 - xr*qiuk3;
real rxqiuk3 = xr*qiuk2 - yr*qiuk1;
real rxqiukp1 = yr*qiukp3 - zr*qiukp2;
real rxqiukp2 = zr*qiukp1 - xr*qiukp3;
real rxqiukp3 = xr*qiukp2 - yr*qiukp1;
#endif
// get intermediate variables for induction energy terms
real sci3 = atom1.inducedDipoleS.x*xr + atom1.inducedDipoleS.y*yr + atom1.inducedDipoleS.z*zr;
real sci4 = atom2.inducedDipoleS.x*xr + atom2.inducedDipoleS.y*yr + atom2.inducedDipoleS.z*zr;
#ifdef F1
ftm2i1 += 0.5f*scale5i*(sci4*atom1.inducedDipolePolarS.x + sci3*atom2.inducedDipolePolarS.x);
ftm2i2 += 0.5f*scale5i*(sci4*atom1.inducedDipolePolarS.y + sci3*atom2.inducedDipolePolarS.y);
ftm2i3 += 0.5f*scale5i*(sci4*atom1.inducedDipolePolarS.z + sci3*atom2.inducedDipolePolarS.z);
#endif
real sci3Y = atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr;
real sci4Y = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
#ifdef F1
ftm2i1 -= 0.5f*scale5i*(sci3Y*atom2.inducedDipolePolar.x + sci4Y*atom1.inducedDipolePolar.x);
ftm2i2 -= 0.5f*scale5i*(sci3Y*atom2.inducedDipolePolar.y + sci4Y*atom1.inducedDipolePolar.y);
ftm2i3 -= 0.5f*scale5i*(sci3Y*atom2.inducedDipolePolar.z + sci4Y*atom1.inducedDipolePolar.z);
#endif
real sci7 = qir1*atom2.inducedDipoleS.x + qir2*atom2.inducedDipoleS.y + qir3*atom2.inducedDipoleS.z;
real sci8 = qkr1*atom1.inducedDipoleS.x + qkr2*atom1.inducedDipoleS.y + qkr3*atom1.inducedDipoleS.z;
real scip1 = atom1.inducedDipolePolarS.x*atom2.dipole.x + atom1.inducedDipolePolarS.y*atom2.dipole.y + atom1.inducedDipolePolarS.z*atom2.dipole.z +
atom1.dipole.x*atom2.inducedDipolePolarS.x + atom1.dipole.y*atom2.inducedDipolePolarS.y + atom1.dipole.z*atom2.inducedDipolePolarS.z;
real scip2 = atom1.inducedDipoleS.x*atom2.inducedDipolePolarS.x + atom1.inducedDipoleS.y*atom2.inducedDipolePolarS.y + atom1.inducedDipoleS.z*atom2.inducedDipolePolarS.z +
atom1.inducedDipolePolarS.x*atom2.inducedDipoleS.x + atom1.inducedDipolePolarS.y*atom2.inducedDipoleS.y + atom1.inducedDipolePolarS.z*atom2.inducedDipoleS.z;
sci7 -= qir1*atom2.inducedDipole.x + qir2*atom2.inducedDipole.y + qir3*atom2.inducedDipole.z;
sci8 -= qkr1*atom1.inducedDipole.x + qkr2*atom1.inducedDipole.y + qkr3*atom1.inducedDipole.z;
scip1 -= atom1.inducedDipolePolar.x*atom2.dipole.x + atom1.inducedDipolePolar.y*atom2.dipole.y + atom1.inducedDipolePolar.z*atom2.dipole.z +
atom1.dipole.x*atom2.inducedDipolePolar.x + atom1.dipole.y*atom2.inducedDipolePolar.y + atom1.dipole.z*atom2.inducedDipolePolar.z;
scip2 -= atom1.inducedDipole.x*atom2.inducedDipolePolar.x + atom1.inducedDipole.y*atom2.inducedDipolePolar.y + atom1.inducedDipole.z*atom2.inducedDipolePolar.z +
atom1.inducedDipolePolar.x*atom2.inducedDipole.x + atom1.inducedDipolePolar.y*atom2.inducedDipole.y + atom1.inducedDipolePolar.z*atom2.inducedDipole.z;
real scip3 = atom1.inducedDipolePolarS.x*xr + atom1.inducedDipolePolarS.y*yr + atom1.inducedDipolePolarS.z*zr;
real scip4 = atom2.inducedDipolePolarS.x*xr + atom2.inducedDipolePolarS.y*yr + atom2.inducedDipolePolarS.z*zr;
real gfi1 = -2.5f*(sci3*scip4+scip3*sci4)*scale5i;
#ifdef F1
ftm2i1 += 0.5f*scale5i*(scip4*atom1.inducedDipoleS.x + scip3*atom2.inducedDipoleS.x);
ftm2i2 += 0.5f*scale5i*(scip4*atom1.inducedDipoleS.y + scip3*atom2.inducedDipoleS.y);
ftm2i3 += 0.5f*scale5i*(scip4*atom1.inducedDipoleS.z + scip3*atom2.inducedDipoleS.z);
#endif
real scip3Y = atom1.inducedDipolePolar.x*xr + atom1.inducedDipolePolar.y*yr + atom1.inducedDipolePolar.z*zr;
real scip4Y = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
gfi1 += 2.5f*(sci3Y*scip4Y + scip3Y*sci4Y)*scale5i;
#ifdef F1
ftm2i1 -= 0.5f*scale5i*(scip3Y*atom2.inducedDipole.x + scip4Y*atom1.inducedDipole.x);
ftm2i2 -= 0.5f*scale5i*(scip3Y*atom2.inducedDipole.y + scip4Y*atom1.inducedDipole.y);
ftm2i3 -= 0.5f*scale5i*(scip3Y*atom2.inducedDipole.z + scip4Y*atom1.inducedDipole.z);
#endif
sci3Y = sci3 - sci3Y;
sci4Y = sci4 - sci4Y;
scip3Y = scip3 - scip3Y;
scip4Y = scip4 - scip4Y;
real scip7 = qir1*atom2.inducedDipolePolarS.x + qir2*atom2.inducedDipolePolarS.y + qir3*atom2.inducedDipolePolarS.z;
scip7 -= qir1*atom2.inducedDipolePolar.x + qir2*atom2.inducedDipolePolar.y + qir3*atom2.inducedDipolePolar.z;
real scip8 = qkr1*atom1.inducedDipolePolarS.x + qkr2*atom1.inducedDipolePolarS.y + qkr3*atom1.inducedDipolePolarS.z;
scip8 -= qkr1*atom1.inducedDipolePolar.x + qkr2*atom1.inducedDipolePolar.y + qkr3*atom1.inducedDipolePolar.z;
real sci1 = atom1.inducedDipoleS.x*atom2.dipole.x + atom1.inducedDipoleS.y*atom2.dipole.y +
atom1.inducedDipoleS.z*atom2.dipole.z + atom1.dipole.x*atom2.inducedDipoleS.x +
atom1.dipole.y*atom2.inducedDipoleS.y + atom1.dipole.z*atom2.inducedDipoleS.z;
sci1 -= atom1.inducedDipole.x*atom2.dipole.x + atom1.inducedDipole.y*atom2.dipole.y +
atom1.inducedDipole.z*atom2.dipole.z + atom1.dipole.x*atom2.inducedDipole.x +
atom1.dipole.y*atom2.inducedDipole.y + atom1.dipole.z*atom2.inducedDipole.z;
real gli1 = atom2.q*sci3Y - atom1.q*sci4Y + sci1;
real gli2 = -sc3*sci4Y - sci3Y*sc4 + 2*(sci7-sci8);
real gli3 = sci3Y*sc6 - sci4Y*sc5;
real glip1 = atom2.q*scip3Y - atom1.q*scip4Y + scip1;
real glip2 = -sc3*scip4Y - scip3Y*sc4 + 2*(scip7-scip8);
real glip3 = scip3Y*sc6 - scip4Y*sc5;
#ifdef F1
#ifdef APPLY_SCALE
ftm2i1 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_1 + (gli2*pScale + glip2*dScale)*ddsc5_1 + (gli3*pScale+glip3*dScale)*ddsc7_1);
ftm2i2 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_2 + (gli2*pScale + glip2*dScale)*ddsc5_2 + (gli3*pScale+glip3*dScale)*ddsc7_2);
ftm2i3 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_3 + (gli2*pScale + glip2*dScale)*ddsc5_3 + (gli3*pScale+glip3*dScale)*ddsc7_3);
#else
ftm2i1 -= 0.5f*((gli1 + glip1)*ddsc3_1 + (gli2 + glip2)*ddsc5_1 + (gli3 + glip3)*ddsc7_1);
ftm2i2 -= 0.5f*((gli1 + glip1)*ddsc3_2 + (gli2 + glip2)*ddsc5_2 + (gli3 + glip3)*ddsc7_2);
ftm2i3 -= 0.5f*((gli1 + glip1)*ddsc3_3 + (gli2 + glip2)*ddsc5_3 + (gli3 + glip3)*ddsc7_3);
#endif
outputEnergy = gli1*psc3 + gli2*psc5 + gli3*psc7;
#endif
#ifdef APPLY_SCALE
gfi1 += 1.5f*(gli1*psc3 + glip1*dsc3);
gfi1 += 2.5f*(gli2*psc5 + glip2*dsc5);
gfi1 += 3.5f*(gli3*psc7 + glip3*dsc7);
#else
gfi1 += 1.5f*psc3*(gli1 + glip1);
gfi1 += 2.5f*psc5*(gli2 + glip2);
gfi1 += 3.5f*psc7*(gli3 + glip3);
#endif
gfi1 *= rr2;
gfi1 += 0.5f*scip2*scale3i;
#if defined F1 || defined T1
#ifdef APPLY_SCALE
real gfi5 = (sci4Y*psc7+scip4Y*dsc7);
#else
real gfi5 = psc7*(sci4Y + scip4Y);
#endif
#endif
#if defined F1 || defined T3
#ifdef APPLY_SCALE
real gfi6 = -(sci3Y*psc7+scip3Y*dsc7);
#else
real gfi6 = -psc7*(sci3Y + scip3Y);
#endif
#endif
#ifdef F1
ftm2i1 += gfi1*xr;
real diff0 = atom1.inducedDipoleS.x - atom1.inducedDipole.x;
real diff1 = atom1.inducedDipolePolarS.x - atom1.inducedDipolePolar.x;
#ifdef APPLY_SCALE
ftm2i1 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7));
#else
ftm2i1 += 0.5f*(-atom2.q*psc3*(diff0 + diff1) + sc4*psc5*(diff0 + diff1) - sc6*psc7*(diff0 + diff1));
#endif
diff0 = atom2.inducedDipoleS.x - atom2.inducedDipole.x;
diff1 = atom2.inducedDipolePolarS.x - atom2.inducedDipolePolar.x;
#ifdef APPLY_SCALE
ftm2i1 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7));
ftm2i1 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.x + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.x + gfi5*qir1 + gfi6*qkr1;
#else
ftm2i1 += 0.5f*(atom1.q*psc3*(diff0 + diff1) + sc3*psc5*(diff0 + diff1) + sc5*psc7*(diff0 + diff1));
ftm2i1 += 0.5f*psc5*(sci4Y + scip4Y)*atom1.dipole.x + 0.5f*psc5*(sci3Y + scip3Y)*atom2.dipole.x + gfi5*qir1 + gfi6*qkr1;
#endif
ftm2i2 += gfi1*yr;
diff0 = atom1.inducedDipoleS.y - atom1.inducedDipole.y;
diff1 = atom1.inducedDipolePolarS.y - atom1.inducedDipolePolar.y;
#ifdef APPLY_SCALE
ftm2i2 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7));
#else
ftm2i2 += 0.5f*(-atom2.q*psc3*(diff0 + diff1) + sc4*psc5*(diff0 + diff1) - sc6*psc7*(diff0 + diff1));
#endif
diff0 = atom2.inducedDipoleS.y - atom2.inducedDipole.y;
diff1 = atom2.inducedDipolePolarS.y - atom2.inducedDipolePolar.y;
#ifdef APPLY_SCALE
ftm2i2 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7));
ftm2i2 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.y + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.y + gfi5*qir2 + gfi6*qkr2;
#else
ftm2i2 += 0.5f*(atom1.q*psc3*(diff0 + diff1) + sc3*psc5*(diff0 + diff1) + sc5*psc7*(diff0 + diff1));
ftm2i2 += 0.5f*psc5*(sci4Y +scip4Y)*atom1.dipole.y + 0.5f*psc5*(sci3Y +scip3Y)*atom2.dipole.y + gfi5*qir2 + gfi6*qkr2;
#endif
ftm2i3 += gfi1*zr;
diff0 = atom1.inducedDipoleS.z - atom1.inducedDipole.z;
diff1 = atom1.inducedDipolePolarS.z - atom1.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i3 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7));
#else
ftm2i3 += 0.5f*(-atom2.q*psc3*(diff0 + diff1) + sc4*psc5*(diff0 + diff1) - sc6*psc7*(diff0 + diff1));
#endif
diff0 = atom2.inducedDipoleS.z - atom2.inducedDipole.z;
diff1 = atom2.inducedDipolePolarS.z - atom2.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i3 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7));
ftm2i3 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.z + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.z + gfi5*qir3 + gfi6*qkr3;
#else
ftm2i3 += 0.5f*(atom1.q*psc3*(diff0 + diff1) + sc3*psc5*(diff0 + diff1) + sc5*psc7*(diff0 + diff1));
ftm2i3 += 0.5f*psc5*(sci4Y + scip4Y)*atom1.dipole.z + 0.5f*psc5*(sci3Y+scip3Y)*atom2.dipole.z + gfi5*qir3 + gfi6*qkr3;
#endif
// intermediate values needed for partially excluded interactions
// correction to convert mutual to direct polarization force
#ifdef DIRECT_POLARIZATION
real gfd = (scip2*scale3i - 5*rr2*(scip3*sci4+sci3*scip4)*scale5i);
real fdir1 = gfd*xr + scale5i* (sci4*atom1.inducedDipolePolarS.x+scip4*atom1.inducedDipoleS.x + sci3*atom2.inducedDipolePolarS.x+scip3*atom2.inducedDipoleS.x);
real fdir2 = gfd*yr + scale5i* (sci4*atom1.inducedDipolePolarS.y+scip4*atom1.inducedDipoleS.y + sci3*atom2.inducedDipolePolarS.y+scip3*atom2.inducedDipoleS.y);
real fdir3 = gfd*zr + scale5i* (sci4*atom1.inducedDipolePolarS.z+scip4*atom1.inducedDipoleS.z + sci3*atom2.inducedDipolePolarS.z+scip3*atom2.inducedDipoleS.z);
ftm2i1 -= 0.5f*fdir1;
ftm2i2 -= 0.5f*fdir2;
ftm2i3 -= 0.5f*fdir3;
real sci3X = sci3 - sci3Y;
real sci4X = sci4 - sci4Y;
real scip3X = scip3 - scip3Y;
real scip4X = scip4 - scip4Y;
gfd = -5*rr2*(scip3X*sci4X+sci3X*scip4X)*scale5i;
fdir1 = gfd*xr + scale5i*(sci4X*atom1.inducedDipolePolar.x + scip4X*atom1.inducedDipole.x + sci3X*atom2.inducedDipolePolar.x + scip3X*atom2.inducedDipole.x);
fdir2 = gfd*yr + scale5i*(sci4X*atom1.inducedDipolePolar.y + scip4X*atom1.inducedDipole.y + sci3X*atom2.inducedDipolePolar.y + scip3X*atom2.inducedDipole.y);
fdir3 = gfd*zr + scale5i*(sci4X*atom1.inducedDipolePolar.z + scip4X*atom1.inducedDipole.z + sci3X*atom2.inducedDipolePolar.z + scip3X*atom2.inducedDipole.z);
ftm2i1 += 0.5f*fdir1;
ftm2i2 += 0.5f*fdir2;
ftm2i3 += 0.5f*fdir3;
#else
real findmp1 = uscale*(scip2*ddsc3_1 - ddsc5_1*(sci3*scip4+scip3*sci4));
real findmp2 = uscale*(scip2*ddsc3_2 - ddsc5_2*(sci3*scip4+scip3*sci4));
real findmp3 = uscale*(scip2*ddsc3_3 - ddsc5_3*(sci3*scip4+scip3*sci4));
ftm2i1 -= 0.5f*findmp1;
ftm2i2 -= 0.5f*findmp2;
ftm2i3 -= 0.5f*findmp3;
real sci3X = sci3 - sci3Y;
real sci4X = sci4 - sci4Y;
real scip3X = scip3 - scip3Y;
real scip4X = scip4 - scip4Y;
ftm2i1 += 0.5f*uscale*(-ddsc5_1*(sci3X*scip4X+scip3X*sci4X));
ftm2i2 += 0.5f*uscale*(-ddsc5_2*(sci3X*scip4X+scip3X*sci4X));
ftm2i3 += 0.5f*uscale*(-ddsc5_3*(sci3X*scip4X+scip3X*sci4X));
#endif
#endif
#ifdef T1
#ifdef APPLY_SCALE
real gti2 = 0.5f*(sci4Y*psc5 + scip4Y*dsc5);
real ttm2i1 = -(dixuk1*psc3+dixukp1*dsc3)*0.5f + gti2*dixr1 + ((ukxqir1+rxqiuk1)*psc5 +(ukxqirp1+rxqiukp1)*dsc5) - gfi5*rxqir1;
real ttm2i2 = -(dixuk2*psc3+dixukp2*dsc3)*0.5f + gti2*dixr2 + ((ukxqir2+rxqiuk2)*psc5 +(ukxqirp2+rxqiukp2)*dsc5) - gfi5*rxqir2;
real ttm2i3 = -(dixuk3*psc3+dixukp3*dsc3)*0.5f + gti2*dixr3 + ((ukxqir3+rxqiuk3)*psc5 +(ukxqirp3+rxqiukp3)*dsc5) - gfi5*rxqir3;
#else
real gti2 = 0.5f*psc5*(sci4Y + scip4Y);
real ttm2i1 = -psc3*(dixuk1 + dixukp1)*0.5f + gti2*dixr1 + psc5*((ukxqir1+rxqiuk1) + (ukxqirp1+rxqiukp1)) - gfi5*rxqir1;
real ttm2i2 = -psc3*(dixuk2 + dixukp2)*0.5f + gti2*dixr2 + psc5*((ukxqir2+rxqiuk2) + (ukxqirp2+rxqiukp2)) - gfi5*rxqir2;
real ttm2i3 = -psc3*(dixuk3 + dixukp3)*0.5f + gti2*dixr3 + psc5*((ukxqir3+rxqiuk3) + (ukxqirp3+rxqiukp3)) - gfi5*rxqir3;
#endif
#endif
#ifdef T3
#ifdef APPLY_SCALE
real gti3 = 0.5f*(sci3Y*psc5 + scip3Y*dsc5);
real ttm3i1 = -(dkxui1*psc3+dkxuip1*dsc3)*0.5f + gti3*dkxr1 - ((uixqkr1+rxqkui1)*psc5 +(uixqkrp1+rxqkuip1)*dsc5) - gfi6*rxqkr1;
real ttm3i2 = -(dkxui2*psc3+dkxuip2*dsc3)*0.5f + gti3*dkxr2 - ((uixqkr2+rxqkui2)*psc5 +(uixqkrp2+rxqkuip2)*dsc5) - gfi6*rxqkr2;
real ttm3i3 = -(dkxui3*psc3+dkxuip3*dsc3)*0.5f + gti3*dkxr3 - ((uixqkr3+rxqkui3)*psc5 +(uixqkrp3+rxqkuip3)*dsc5) - gfi6*rxqkr3;
#else
real gti3 = 0.5f*psc5*(sci3Y + scip3Y);
real ttm3i1 = -psc3*(dkxui1 + dkxuip1)*0.5f + gti3*dkxr1 - psc5*((uixqkr1+rxqkui1) + (uixqkrp1+rxqkuip1)) - gfi6*rxqkr1;
real ttm3i2 = -psc3*(dkxui2 + dkxuip2)*0.5f + gti3*dkxr2 - psc5*((uixqkr2+rxqkui2) + (uixqkrp2+rxqkuip2)) - gfi6*rxqkr2;
real ttm3i3 = -psc3*(dkxui3 + dkxuip3)*0.5f + gti3*dkxr3 - psc5*((uixqkr3+rxqkui3) + (uixqkrp3+rxqkuip3)) - gfi6*rxqkr3;
#endif
#endif
// update force and torque on site k
#ifdef F1
outputForce.x = -ftm2i1;
outputForce.y = -ftm2i2;
outputForce.z = -ftm2i3;
#endif
#ifdef T1
outputForce.x = ttm2i1;
outputForce.y = ttm2i2;
outputForce.z = ttm2i3;
#endif
#ifdef T3
outputForce.x = ttm3i1;
outputForce.y = ttm3i2;
outputForce.z = ttm3i3;
#endif
// construct auxiliary vectors for induced terms
#ifdef T1
dixuk1 = atom1.dipole.y*atom2.inducedDipole.z - atom1.dipole.z*atom2.inducedDipole.y;
dixuk2 = atom1.dipole.z*atom2.inducedDipole.x - atom1.dipole.x*atom2.inducedDipole.z;
dixuk3 = atom1.dipole.x*atom2.inducedDipole.y - atom1.dipole.y*atom2.inducedDipole.x;
dixukp1 = atom1.dipole.y*atom2.inducedDipolePolar.z - atom1.dipole.z*atom2.inducedDipolePolar.y;
dixukp2 = atom1.dipole.z*atom2.inducedDipolePolar.x - atom1.dipole.x*atom2.inducedDipolePolar.z;
dixukp3 = atom1.dipole.x*atom2.inducedDipolePolar.y - atom1.dipole.y*atom2.inducedDipolePolar.x;
#endif
#ifdef T3
dkxui1 = atom2.dipole.y*atom1.inducedDipole.z - atom2.dipole.z*atom1.inducedDipole.y;
dkxui2 = atom2.dipole.z*atom1.inducedDipole.x - atom2.dipole.x*atom1.inducedDipole.z;
dkxui3 = atom2.dipole.x*atom1.inducedDipole.y - atom2.dipole.y*atom1.inducedDipole.x;
dkxuip1 = atom2.dipole.y*atom1.inducedDipolePolar.z - atom2.dipole.z*atom1.inducedDipolePolar.y;
dkxuip2 = atom2.dipole.z*atom1.inducedDipolePolar.x - atom2.dipole.x*atom1.inducedDipolePolar.z;
dkxuip3 = atom2.dipole.x*atom1.inducedDipolePolar.y - atom2.dipole.y*atom1.inducedDipolePolar.x;
#endif
#if defined T1
qiuk1 = atom1.quadrupoleXX*atom2.inducedDipole.x + atom1.quadrupoleXY*atom2.inducedDipole.y + atom1.quadrupoleXZ*atom2.inducedDipole.z;
qiuk2 = atom1.quadrupoleXY*atom2.inducedDipole.x + atom1.quadrupoleYY*atom2.inducedDipole.y + atom1.quadrupoleYZ*atom2.inducedDipole.z;
qiuk3 = atom1.quadrupoleXZ*atom2.inducedDipole.x + atom1.quadrupoleYZ*atom2.inducedDipole.y + atom1.quadrupoleZZ*atom2.inducedDipole.z;
qiukp1 = atom1.quadrupoleXX*atom2.inducedDipolePolar.x + atom1.quadrupoleXY*atom2.inducedDipolePolar.y + atom1.quadrupoleXZ*atom2.inducedDipolePolar.z;
qiukp2 = atom1.quadrupoleXY*atom2.inducedDipolePolar.x + atom1.quadrupoleYY*atom2.inducedDipolePolar.y + atom1.quadrupoleYZ*atom2.inducedDipolePolar.z;
qiukp3 = atom1.quadrupoleXZ*atom2.inducedDipolePolar.x + atom1.quadrupoleYZ*atom2.inducedDipolePolar.y + atom1.quadrupoleZZ*atom2.inducedDipolePolar.z;
#endif
#if defined T3
qkui1 = atom2.quadrupoleXX*atom1.inducedDipole.x + atom2.quadrupoleXY*atom1.inducedDipole.y + atom2.quadrupoleXZ*atom1.inducedDipole.z;
qkui2 = atom2.quadrupoleXY*atom1.inducedDipole.x + atom2.quadrupoleYY*atom1.inducedDipole.y + atom2.quadrupoleYZ*atom1.inducedDipole.z;
qkui3 = atom2.quadrupoleXZ*atom1.inducedDipole.x + atom2.quadrupoleYZ*atom1.inducedDipole.y + atom2.quadrupoleZZ*atom1.inducedDipole.z;
qkuip1 = atom2.quadrupoleXX*atom1.inducedDipolePolar.x + atom2.quadrupoleXY*atom1.inducedDipolePolar.y + atom2.quadrupoleXZ*atom1.inducedDipolePolar.z;
qkuip2 = atom2.quadrupoleXY*atom1.inducedDipolePolar.x + atom2.quadrupoleYY*atom1.inducedDipolePolar.y + atom2.quadrupoleYZ*atom1.inducedDipolePolar.z;
qkuip3 = atom2.quadrupoleXZ*atom1.inducedDipolePolar.x + atom2.quadrupoleYZ*atom1.inducedDipolePolar.y + atom2.quadrupoleZZ*atom1.inducedDipolePolar.z;
#endif
#ifdef T3
uixqkr1 = atom1.inducedDipole.y*qkr3 - atom1.inducedDipole.z*qkr2;
uixqkr2 = atom1.inducedDipole.z*qkr1 - atom1.inducedDipole.x*qkr3;
uixqkr3 = atom1.inducedDipole.x*qkr2 - atom1.inducedDipole.y*qkr1;
uixqkrp1 = atom1.inducedDipolePolar.y*qkr3 - atom1.inducedDipolePolar.z*qkr2;
uixqkrp2 = atom1.inducedDipolePolar.z*qkr1 - atom1.inducedDipolePolar.x*qkr3;
uixqkrp3 = atom1.inducedDipolePolar.x*qkr2 - atom1.inducedDipolePolar.y*qkr1;
#endif
#ifdef T1
ukxqir1 = atom2.inducedDipole.y*qir3 - atom2.inducedDipole.z*qir2;
ukxqir2 = atom2.inducedDipole.z*qir1 - atom2.inducedDipole.x*qir3;
ukxqir3 = atom2.inducedDipole.x*qir2 - atom2.inducedDipole.y*qir1;
ukxqirp1 = atom2.inducedDipolePolar.y*qir3 - atom2.inducedDipolePolar.z*qir2;
ukxqirp2 = atom2.inducedDipolePolar.z*qir1 - atom2.inducedDipolePolar.x*qir3;
ukxqirp3 = atom2.inducedDipolePolar.x*qir2 - atom2.inducedDipolePolar.y*qir1;
rxqiuk1 = yr*qiuk3 - zr*qiuk2;
rxqiuk2 = zr*qiuk1 - xr*qiuk3;
rxqiuk3 = xr*qiuk2 - yr*qiuk1;
rxqiukp1 = yr*qiukp3 - zr*qiukp2;
rxqiukp2 = zr*qiukp1 - xr*qiukp3;
rxqiukp3 = xr*qiukp2 - yr*qiukp1;
#endif
#ifdef T3
rxqkui1 = yr*qkui3 - zr*qkui2;
rxqkui2 = zr*qkui1 - xr*qkui3;
rxqkui3 = xr*qkui2 - yr*qkui1;
rxqkuip1 = yr*qkuip3 - zr*qkuip2;
rxqkuip2 = zr*qkuip1 - xr*qkuip3;
rxqkuip3 = xr*qkuip2 - yr*qkuip1;
#endif
#ifdef T1
#ifdef APPLY_SCALE
ttm2i1 = -(dixuk1*psc3+dixukp1*dsc3)*0.5f + ((ukxqir1+rxqiuk1)*psc5 +(ukxqirp1+rxqiukp1)*dsc5);
ttm2i2 = -(dixuk2*psc3+dixukp2*dsc3)*0.5f + ((ukxqir2+rxqiuk2)*psc5 +(ukxqirp2+rxqiukp2)*dsc5);
ttm2i3 = -(dixuk3*psc3+dixukp3*dsc3)*0.5f + ((ukxqir3+rxqiuk3)*psc5 +(ukxqirp3+rxqiukp3)*dsc5);
#else
ttm2i1 = -psc3*(dixuk1+dixukp1)*0.5f + psc5*((ukxqir1+rxqiuk1) + (ukxqirp1+rxqiukp1));
ttm2i2 = -psc3*(dixuk2+dixukp2)*0.5f + psc5*((ukxqir2+rxqiuk2) + (ukxqirp2+rxqiukp2));
ttm2i3 = -psc3*(dixuk3+dixukp3)*0.5f + psc5*((ukxqir3+rxqiuk3) + (ukxqirp3+rxqiukp3));
#endif
#endif
#ifdef T3
#ifdef APPLY_SCALE
ttm3i1 = -(dkxui1*psc3+dkxuip1*dsc3)*0.5f - ((uixqkr1+rxqkui1)*psc5 +(uixqkrp1+rxqkuip1)*dsc5);
ttm3i2 = -(dkxui2*psc3+dkxuip2*dsc3)*0.5f - ((uixqkr2+rxqkui2)*psc5 +(uixqkrp2+rxqkuip2)*dsc5);
ttm3i3 = -(dkxui3*psc3+dkxuip3*dsc3)*0.5f - ((uixqkr3+rxqkui3)*psc5 +(uixqkrp3+rxqkuip3)*dsc5);
#else
ttm3i1 = -psc3*(dkxui1 + dkxuip1)*0.5f - psc5*((uixqkr1+rxqkui1) + (uixqkrp1+rxqkuip1));
ttm3i2 = -psc3*(dkxui2 + dkxuip2)*0.5f - psc5*((uixqkr2+rxqkui2) + (uixqkrp2+rxqkuip2));
ttm3i3 = -psc3*(dkxui3 + dkxuip3)*0.5f - psc5*((uixqkr3+rxqkui3) + (uixqkrp3+rxqkuip3));
#endif
#endif
// update force and torque on site k;
#ifdef T1
outputForce.x -= ttm2i1;
outputForce.y -= ttm2i2;
outputForce.z -= ttm2i3;
#endif
#ifdef T3
outputForce.x -= ttm3i1;
outputForce.y -= ttm3i2;
outputForce.z -= ttm3i3;
#endif
}
/**
* This defines three different closely related functions, depending on which constant (F1, T1, or T3) is defined.
*/
#if defined F1
__device__ void computeOneInteractionF1(AtomData2& atom1, volatile AtomData2& atom2, real& outputEnergy, real3& force) {
#elif defined F2
__device__ void computeOneInteractionF2(AtomData2& atom1, volatile AtomData2& atom2, real& outputEnergy, real3& force) {
#elif defined T1
__device__ void computeOneInteractionT1(AtomData2& atom1, volatile AtomData2& atom2, real3& torque) {
#elif defined T2
__device__ void computeOneInteractionT2(AtomData2& atom1, volatile AtomData2& atom2, real3& torque) {
#elif defined B1 && defined B2
__device__ void computeOneInteractionB1B2(AtomData2& atom1, volatile AtomData2& atom2) {
#endif
#if defined F2 || defined B2
real sxi = atom1.inducedDipole.x + atom1.inducedDipolePolar.x;
real syi = atom1.inducedDipole.y + atom1.inducedDipolePolar.y;
real szi = atom1.inducedDipole.z + atom1.inducedDipolePolar.z;
#endif
#if defined F2 || defined T2 || defined B2
real sxk = atom2.inducedDipole.x + atom2.inducedDipolePolar.x;
real syk = atom2.inducedDipole.y + atom2.inducedDipolePolar.y;
real szk = atom2.inducedDipole.z + atom2.inducedDipolePolar.z;
#endif
// decide whether to compute the current interaction;
real xr = atom2.pos.x - atom1.pos.x;
real yr = atom2.pos.y - atom1.pos.y;
real zr = atom2.pos.z - atom1.pos.z;
real xr2 = xr*xr;
real yr2 = yr*yr;
real zr2 = zr*zr;
real r2 = xr2 + yr2 + zr2;
real rb2 = atom1.bornRadius*atom2.bornRadius;
real expterm = EXP(-r2/(GK_C*rb2));
real expc = expterm/GK_C;
real expcr = r2*expterm/(GK_C*GK_C*rb2*rb2);
real dexpc = -2 / (GK_C*rb2);
real dexpcr = 2 / (GK_C*rb2*rb2);
real dgfdr = 0.5f*expterm*(1 + r2/(rb2*GK_C));
real gf2 = 1 / (r2 + rb2*expterm);
real gf = SQRT(gf2);
real gf3 = gf2*gf;
real gf5 = gf3*gf2;
real gf7 = gf5*gf2;
real gf9 = gf7*gf2;
real gf11 = gf9*gf2;
// reaction potential auxiliary terms;
real a00 = gf;
real a10 = -gf3;
real a20 = 3*gf5;
real a30 = -15*gf7;
real a40 = 105*gf9;
real a50 = -945*gf11;
// Born radii derivatives of reaction potential auxiliary terms;
real b00 = dgfdr*a10;
real b10 = dgfdr*a20;
real b20 = dgfdr*a30;
real b30 = dgfdr*a40;
real b40 = dgfdr*a50;
// reaction potential gradient auxiliary terms;
real expc1 = 1 - expc;
real a01 = expc1*a10;
real a11 = expc1*a20;
real a21 = expc1*a30;
real a31 = expc1*a40;
real a41 = expc1*a50;
// Born radii derivs of reaction potential gradient auxiliary terms;
real b01 = b10 - expcr*a10 - expc*b10;
real b11 = b20 - expcr*a20 - expc*b20;
real b21 = b30 - expcr*a30 - expc*b30;
real b31 = b40 - expcr*a40 - expc*b40;
// 2nd reaction potential gradient auxiliary terms;
real expcdexpc = -expc*dexpc;
real a02 = expc1*a11 + expcdexpc*a10;
real a12 = expc1*a21 + expcdexpc*a20;
real a22 = expc1*a31 + expcdexpc*a30;
real a32 = expc1*a41 + expcdexpc*a40;
// Born radii derivatives of the 2nd reaction potential
// gradient auxiliary terms
real b02 = b11 - (expcr*(a11 + dexpc*a10) + expc*(b11 + dexpcr*a10 + dexpc*b10));
real b12 = b21 - (expcr*(a21 + dexpc*a20) + expc*(b21 + dexpcr*a20 + dexpc*b20));
real b22 = b31 - (expcr*(a31 + dexpc*a30) + expc*(b31 + dexpcr*a30 + dexpc*b30));
// 3rd reaction potential gradient auxiliary terms
expcdexpc = 2*expcdexpc;
real a03 = expc1*a12 + expcdexpc*a11;
real a13 = expc1*a22 + expcdexpc*a21;
real a23 = expc1*a32 + expcdexpc*a31;
expcdexpc = -expc*dexpc*dexpc;
a03 = a03 + expcdexpc*a10;
a13 = a13 + expcdexpc*a20;
a23 = a23 + expcdexpc*a30;
// multiply the auxillary terms by their dieletric functions;
a00 *= GK_FC;
a01 *= GK_FC;
a02 *= GK_FC;
a03 *= GK_FC;
b00 *= GK_FC;
b01 *= GK_FC;
b02 *= GK_FC;
a10 *= GK_FD;
a11 *= GK_FD;
a12 *= GK_FD;
a13 *= GK_FD;
b10 *= GK_FD;
b11 *= GK_FD;
b12 *= GK_FD;
a20 *= GK_FQ;
a21 *= GK_FQ;
a22 *= GK_FQ;
a23 *= GK_FQ;
b20 *= GK_FQ;
b21 *= GK_FQ;
b22 *= GK_FQ;
// unweighted reaction potential tensor
#if defined F2
real energy = -a10*atom2.q*(atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr);
energy += a10*atom1.q*(atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr);
#endif
#if defined F1
real energy = 2*atom1.q*atom2.q*a00;
energy += -a10*atom2.q*(atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr);
energy += a10*atom1.q*(atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr);
energy += a20*atom2.q*(atom1.quadrupoleXX*xr2 + atom1.quadrupoleYY*yr2 + atom1.quadrupoleZZ*zr2 + 2*(atom1.quadrupoleXY*xr*yr + atom1.quadrupoleXZ*xr*zr + atom1.quadrupoleYZ*yr*zr));
energy += a20*atom1.q*(atom2.quadrupoleXX*xr2 + atom2.quadrupoleYY*yr2 + atom2.quadrupoleZZ*zr2 + 2*(atom2.quadrupoleXY*xr*yr + atom2.quadrupoleXZ*xr*zr + atom2.quadrupoleYZ*yr*zr));
#endif
// Born radii derivs of unweighted reaction potential tensor
#if defined B1
real dsumdrB1 = 2*(atom1.q*atom2.q*b00);
dsumdrB1 = b10*atom2.q*(atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr);
dsumdrB1 += b10*atom1.q*(atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr);
#endif
#if defined B2
real dsumdrB2 = -b10*atom2.q*(sxi*xr + syi*yr + szi*zr);
dsumdrB2 += b10*atom1.q*(sxk*xr + syk*yr + szk*zr);
#endif
#if defined B1
real gqxx21 = xr*xr;
real gqyy21 = yr*yr;
real gqzz21 = zr*zr;
real gqxy21 = xr*yr;
real gqxz21 = xr*zr;
real gqyz21 = yr*zr;
dsumdrB1 += b20*atom2.q*(atom1.quadrupoleXX*gqxx21 + atom1.quadrupoleYY*gqyy21 + atom1.quadrupoleZZ*gqzz21 + 2*(atom1.quadrupoleXY*gqxy21 + atom1.quadrupoleXZ*gqxz21 + atom1.quadrupoleYZ*gqyz21));
dsumdrB1 += b20*atom1.q*(atom2.quadrupoleXX*gqxx21 + atom2.quadrupoleYY*gqyy21 + atom2.quadrupoleZZ*gqzz21 + 2*(atom2.quadrupoleXY*gqxy21 + atom2.quadrupoleXZ*gqxz21 + atom2.quadrupoleYZ*gqyz21));
#endif
#if defined F1
energy += a01*atom1.q*(atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr);
energy = a01*atom2.q*(atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr);
real factor = a01*2*atom1.q*atom2.q;
real dedx = factor*xr;
real dedy = factor*yr;
real dedz = factor*zr;
#endif
#if defined F2
energy += a01*atom1.q*(atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr);
energy = a01*atom2.q*(atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr);
#endif
#if defined F1 || defined F2 || defined T1 || defined T2
real gux2 = a10 + xr*xr*a11;
real gux3 = xr*yr*a11;
real gux4 = xr*zr*a11;
real guy3 = a10 + yr*yr*a11;
real guy4 = yr*zr*a11;
real guz4 = a10 + zr*zr*a11;
#if defined T1
real guy2 = gux3;
real guz2 = gux4;
real guz3 = guy4;
#endif
#if defined T2
real fid1 = sxk*gux2 + syk*gux3 + szk*gux4;
real fid2 = sxk*gux3 + syk*guy3 + szk*guy4;
real fid3 = sxk*gux4 + syk*guy4 + szk*guz4;
real trqi1 = atom1.dipole.y*fid3 - atom1.dipole.z*fid2;
real trqi2 = atom1.dipole.z*fid1 - atom1.dipole.x*fid3;
real trqi3 = atom1.dipole.x*fid2 - atom1.dipole.y*fid1;
#endif
#if defined F1
energy = 2*(atom1.dipole.x*(atom2.dipole.x*gux2 + atom2.dipole.y*gux3 + atom2.dipole.z*gux4) +
atom1.dipole.y*(atom2.dipole.x*gux3 + atom2.dipole.y*guy3 + atom2.dipole.z*guy4) +
atom1.dipole.z*(atom2.dipole.x*gux4 + atom2.dipole.y*guy4 + atom2.dipole.z*guz4));
dedx = atom2.q*(atom1.dipole.x*gux2 + atom1.dipole.y*gux3 + atom1.dipole.z*gux4);
dedx += atom1.q*(atom2.dipole.x*gux2 + atom2.dipole.y*gux3 + atom2.dipole.z*gux4);
dedy = atom2.q*(atom1.dipole.x*gux3 + atom1.dipole.y*guy3 + atom1.dipole.z*guy4);
dedy += atom1.q*(atom2.dipole.x*gux3 + atom2.dipole.y*guy3 + atom2.dipole.z*guy4);
dedz = atom2.q*(atom1.dipole.x*gux4 + atom1.dipole.y*guy4 + atom1.dipole.z*guz4);
dedz += atom1.q*(atom2.dipole.x*gux4 + atom2.dipole.y*guy4 + atom2.dipole.z*guz4);
#endif
#if defined F2
energy = 2*(
atom1.dipole.x*(atom2.inducedDipole.x*gux2 + atom2.inducedDipole.y*gux3 + atom2.inducedDipole.z*gux4) +
atom1.dipole.y*(atom2.inducedDipole.x*gux3 + atom2.inducedDipole.y*guy3 + atom2.inducedDipole.z*guy4) +
atom1.dipole.z*(atom2.inducedDipole.x*gux4 + atom2.inducedDipole.y*guy4 + atom2.inducedDipole.z*guz4) +
atom2.dipole.x*(atom1.inducedDipole.x*gux2 + atom1.inducedDipole.y*gux3 + atom1.inducedDipole.z*gux4) +
atom2.dipole.y*(atom1.inducedDipole.x*gux3 + atom1.inducedDipole.y*guy3 + atom1.inducedDipole.z*guy4) +
atom2.dipole.z*(atom1.inducedDipole.x*gux4 + atom1.inducedDipole.y*guy4 + atom1.inducedDipole.z*guz4));
real dpdx = atom1.q*(sxk*gux2 + syk*gux3 + szk*gux4);
dpdx = atom2.q*(sxi*gux2 + syi*gux3 + szi*gux4);
real dpdy = atom1.q*(sxk*gux3 + syk*guy3 + szk*guy4);
dpdy = atom2.q*(sxi*gux3 + syi*guy3 + szi*guy4);
real dpdz = atom1.q*(sxk*gux4 + syk*guy4 + szk*guz4);
dpdz = atom2.q*(sxi*gux4 + syi*guy4 + szi*guz4);
#endif
real gqxx2 = xr*(2*a20 + xr*xr*a21);
real gqxx3 = yr*xr*xr*a21;
real gqxx4 = zr*xr*xr*a21;
real gqyy2 = xr*yr*yr*a21;
real gqyy3 = yr*(2*a20 + yr*yr*a21);
real gqyy4 = zr*yr*yr*a21;
real gqzz2 = xr*zr*zr*a21;
real gqzz3 = yr*zr*zr*a21;
real gqzz4 = zr*(2*a20 + zr*zr*a21);
real gqxy2 = yr*(a20 + xr*xr*a21);
real gqxy3 = xr*(a20 + yr*yr*a21);
real gqxy4 = zr*xr*yr*a21;
real gqxz2 = zr*(a20 + xr*xr*a21);
real gqxz4 = xr*(a20 + zr*zr*a21);
real gqyz3 = zr*(a20 + yr*yr*a21);
real gqyz4 = yr*(a20 + zr*zr*a21);
#if defined T1 || defined T2
real gqxz3 = gqxy4;
real gqyz2 = gqxy4;
#endif
#if defined F1
energy += atom2.dipole.x*(atom1.quadrupoleXX*gqxx2 + atom1.quadrupoleYY*gqyy2 + atom1.quadrupoleZZ*gqzz2 + 2*(atom1.quadrupoleXY*gqxy2 + atom1.quadrupoleXZ*gqxz2 + atom1.quadrupoleYZ*gqxy4)) +
atom2.dipole.y*(atom1.quadrupoleXX*gqxx3 + atom1.quadrupoleYY*gqyy3 + atom1.quadrupoleZZ*gqzz3 + 2*(atom1.quadrupoleXY*gqxy3 + atom1.quadrupoleXZ*gqxy4 + atom1.quadrupoleYZ*gqyz3)) +
atom2.dipole.z*(atom1.quadrupoleXX*gqxx4 + atom1.quadrupoleYY*gqyy4 + atom1.quadrupoleZZ*gqzz4 + 2*(atom1.quadrupoleXY*gqxy4 + atom1.quadrupoleXZ*gqxz4 + atom1.quadrupoleYZ*gqyz4));
energy = atom1.dipole.x*(atom2.quadrupoleXX*gqxx2 + atom2.quadrupoleYY*gqyy2 + atom2.quadrupoleZZ*gqzz2 + 2*(atom2.quadrupoleXY*gqxy2 + atom2.quadrupoleXZ*gqxz2 + atom2.quadrupoleYZ*gqxy4)) +
atom1.dipole.y*(atom2.quadrupoleXX*gqxx3 + atom2.quadrupoleYY*gqyy3 + atom2.quadrupoleZZ*gqzz3 + 2*(atom2.quadrupoleXY*gqxy3 + atom2.quadrupoleXZ*gqxy4 + atom2.quadrupoleYZ*gqyz3)) +
atom1.dipole.z*(atom2.quadrupoleXX*gqxx4 + atom2.quadrupoleYY*gqyy4 + atom2.quadrupoleZZ*gqzz4 + 2*(atom2.quadrupoleXY*gqxy4 + atom2.quadrupoleXZ*gqxz4 + atom2.quadrupoleYZ*gqyz4));
dedx += atom2.q*(atom1.quadrupoleXX*gqxx2 + atom1.quadrupoleYY*gqyy2 + atom1.quadrupoleZZ*gqzz2 + 2*(atom1.quadrupoleXY*gqxy2 + atom1.quadrupoleXZ*gqxz2 + atom1.quadrupoleYZ*gqxy4));
dedx += atom1.q*(atom2.quadrupoleXX*gqxx2 + atom2.quadrupoleYY*gqyy2 + atom2.quadrupoleZZ*gqzz2 + 2*(atom2.quadrupoleXY*gqxy2 + atom2.quadrupoleXZ*gqxz2 + atom2.quadrupoleYZ*gqxy4));
dedy += atom2.q*(atom1.quadrupoleXX*gqxx3 + atom1.quadrupoleYY*gqyy3 + atom1.quadrupoleZZ*gqzz3 + 2*(atom1.quadrupoleXY*gqxy3 + atom1.quadrupoleXZ*gqxy4 + atom1.quadrupoleYZ*gqyz3));
dedy += atom1.q*(atom2.quadrupoleXX*gqxx3 + atom2.quadrupoleYY*gqyy3 + atom2.quadrupoleZZ*gqzz3 + 2*(atom2.quadrupoleXY*gqxy3 + atom2.quadrupoleXZ*gqxy4 + atom2.quadrupoleYZ*gqyz3));
dedz += atom2.q*(atom1.quadrupoleXX*gqxx4 + atom1.quadrupoleYY*gqyy4 + atom1.quadrupoleZZ*gqzz4 + 2*(atom1.quadrupoleXY*gqxy4 + atom1.quadrupoleXZ*gqxz4 + atom1.quadrupoleYZ*gqyz4));
dedz += atom1.q*(atom2.quadrupoleXX*gqxx4 + atom2.quadrupoleYY*gqyy4 + atom2.quadrupoleZZ*gqzz4 + 2*(atom2.quadrupoleXY*gqxy4 + atom2.quadrupoleXZ*gqxz4 + atom2.quadrupoleYZ*gqyz4));
#endif
#if defined F2
energy += atom2.inducedDipole.x*(atom1.quadrupoleXX*gqxx2 + atom1.quadrupoleYY*gqyy2 + atom1.quadrupoleZZ*gqzz2 + 2*(atom1.quadrupoleXY*gqxy2 + atom1.quadrupoleXZ*gqxz2 + atom1.quadrupoleYZ*gqxy4)) +
atom2.inducedDipole.y*(atom1.quadrupoleXX*gqxx3 + atom1.quadrupoleYY*gqyy3 + atom1.quadrupoleZZ*gqzz3 + 2*(atom1.quadrupoleXY*gqxy3 + atom1.quadrupoleXZ*gqxy4 + atom1.quadrupoleYZ*gqyz3)) +
atom2.inducedDipole.z*(atom1.quadrupoleXX*gqxx4 + atom1.quadrupoleYY*gqyy4 + atom1.quadrupoleZZ*gqzz4 + 2*(atom1.quadrupoleXY*gqxy4 + atom1.quadrupoleXZ*gqxz4 + atom1.quadrupoleYZ*gqyz4));
energy = atom1.inducedDipole.x*(atom2.quadrupoleXX*gqxx2 + atom2.quadrupoleYY*gqyy2 + atom2.quadrupoleZZ*gqzz2 + 2*(atom2.quadrupoleXY*gqxy2 + atom2.quadrupoleXZ*gqxz2 + atom2.quadrupoleYZ*gqxy4)) +
atom1.inducedDipole.y*(atom2.quadrupoleXX*gqxx3 + atom2.quadrupoleYY*gqyy3 + atom2.quadrupoleZZ*gqzz3 + 2*(atom2.quadrupoleXY*gqxy3 + atom2.quadrupoleXZ*gqxy4 + atom2.quadrupoleYZ*gqyz3)) +
atom1.inducedDipole.z*(atom2.quadrupoleXX*gqxx4 + atom2.quadrupoleYY*gqyy4 + atom2.quadrupoleZZ*gqzz4 + 2*(atom2.quadrupoleXY*gqxy4 + atom2.quadrupoleXZ*gqxz4 + atom2.quadrupoleYZ*gqyz4));
#endif
#endif
// Born derivs of the unweighted reaction potential gradient tensor
#if defined B1
dsumdrB1 += b01*atom1.q*(atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr);
dsumdrB1 = b01*atom2.q*(atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr);
#endif
#if defined B2
dsumdrB2 += b01*atom1.q*(sxk*xr+ syk*yr + szk*zr);
dsumdrB2 = b01*atom2.q*(sxi*xr+ syi*yr + szi*zr);
#endif
#if defined B1 || defined B2
real gux22 = b10 + xr2*b11;
real gux23 = xr*yr*b11;
real gux24 = xr*zr*b11;
real guy22 = gux23;
real guy23 = b10 + yr2*b11;
real guy24 = yr*zr*b11;
real guz22 = gux24;
real guz23 = guy24;
real guz24 = b10 + zr2*b11;
#if defined B1
dsumdrB1 = 2*(atom1.dipole.x*(atom2.dipole.x*gux22 + atom2.dipole.y*guy22 + atom2.dipole.z*guz22) +
atom1.dipole.y*(atom2.dipole.x*gux23 + atom2.dipole.y*guy23 + atom2.dipole.z*guz23) +
atom1.dipole.z*(atom2.dipole.x*gux24 + atom2.dipole.y*guy24 + atom2.dipole.z*guz24));
#endif
#if defined B2
dsumdrB2 = 2*(atom1.dipole.x*(sxk*gux22 + syk*guy22 + szk*guz22) +
atom1.dipole.y*(sxk*gux23 + syk*guy23 + szk*guz23) +
atom1.dipole.z*(sxk*gux24 + syk*guy24 + szk*guz24) +
atom2.dipole.x*(sxi*gux22 + syi*guy22 + szi*guz22) +
atom2.dipole.y*(sxi*gux23 + syi*guy23 + szi*guz23) +
atom2.dipole.z*(sxi*gux24 + syi*guy24 + szi*guz24));
#ifndef DIRECT_POLARIZATION
dsumdrB2 = 2*(atom1.inducedDipole.x*(atom2.inducedDipolePolar.x*gux22 + atom2.inducedDipolePolar.y*gux23 + atom2.inducedDipolePolar.z*gux24)
+ atom1.inducedDipole.y*(atom2.inducedDipolePolar.x*guy22 + atom2.inducedDipolePolar.y*guy23 + atom2.inducedDipolePolar.z*guy24)
+ atom1.inducedDipole.z*(atom2.inducedDipolePolar.x*guz22 + atom2.inducedDipolePolar.y*guz23 + atom2.inducedDipolePolar.z*guz24)
+ atom2.inducedDipole.x*(atom1.inducedDipolePolar.x*gux22 + atom1.inducedDipolePolar.y*gux23 + atom1.inducedDipolePolar.z*gux24)
+ atom2.inducedDipole.y*(atom1.inducedDipolePolar.x*guy22 + atom1.inducedDipolePolar.y*guy23 + atom1.inducedDipolePolar.z*guy24)
+ atom2.inducedDipole.z*(atom1.inducedDipolePolar.x*guz22 + atom1.inducedDipolePolar.y*guz23 + atom1.inducedDipolePolar.z*guz24));
#endif
#endif
real gqxx22 = xr*(2*b20 + xr2*b21);
real gqxx23 = yr*xr2*b21;
real gqxx24 = zr*xr2*b21;
real gqyy22 = xr*yr2*b21;
real gqyy23 = yr*(2*b20 + yr2*b21);
real gqyy24 = zr*yr2*b21;
real gqzz22 = xr*zr2*b21;
real gqzz23 = yr*zr2*b21;
real gqzz24 = zr*(2*b20 + zr2*b21);
real gqxy22 = yr*(b20 + xr2*b21);
real gqxy23 = xr*(b20 + yr2*b21);
real gqxy24 = zr*xr*yr*b21;
real gqxz22 = zr*(b20 + xr2*b21);
real gqxz23 = gqxy24;
real gqxz24 = xr*(b20 + zr2*b21);
real gqyz22 = gqxy24;
real gqyz23 = zr*(b20 + yr2*b21);
real gqyz24 = yr*(b20 + zr2*b21);
#if defined B1
dsumdrB1 += atom2.dipole.x*(atom1.quadrupoleXX*gqxx22 + atom1.quadrupoleYY*gqyy22 + atom1.quadrupoleZZ*gqzz22 + 2*(atom1.quadrupoleXY*gqxy22 + atom1.quadrupoleXZ*gqxz22 + atom1.quadrupoleYZ*gqyz22)) +
atom2.dipole.y*(atom1.quadrupoleXX*gqxx23 + atom1.quadrupoleYY*gqyy23 + atom1.quadrupoleZZ*gqzz23 + 2*(atom1.quadrupoleXY*gqxy23 + atom1.quadrupoleXZ*gqxz23 + atom1.quadrupoleYZ*gqyz23)) +
atom2.dipole.z*(atom1.quadrupoleXX*gqxx24 + atom1.quadrupoleYY*gqyy24 + atom1.quadrupoleZZ*gqzz24 + 2*(atom1.quadrupoleXY*gqxy24 + atom1.quadrupoleXZ*gqxz24 + atom1.quadrupoleYZ*gqyz24));
dsumdrB1 = atom1.dipole.x*(atom2.quadrupoleXX*gqxx22 + atom2.quadrupoleYY*gqyy22 + atom2.quadrupoleZZ*gqzz22 + 2*(atom2.quadrupoleXY*gqxy22 + atom2.quadrupoleXZ*gqxz22 + atom2.quadrupoleYZ*gqyz22)) +
atom1.dipole.y*(atom2.quadrupoleXX*gqxx23 + atom2.quadrupoleYY*gqyy23 + atom2.quadrupoleZZ*gqzz23 + 2*(atom2.quadrupoleXY*gqxy23 + atom2.quadrupoleXZ*gqxz23 + atom2.quadrupoleYZ*gqyz23)) +
atom1.dipole.z*(atom2.quadrupoleXX*gqxx24 + atom2.quadrupoleYY*gqyy24 + atom2.quadrupoleZZ*gqzz24 + 2*(atom2.quadrupoleXY*gqxy24 + atom2.quadrupoleXZ*gqxz24 + atom2.quadrupoleYZ*gqyz24));
#endif
#if defined B2
dsumdrB2 += sxk*(atom1.quadrupoleXX*gqxx22 + atom1.quadrupoleYY*gqyy22 + atom1.quadrupoleZZ*gqzz22 + 2*(atom1.quadrupoleXY*gqxy22 + atom1.quadrupoleXZ*gqxz22 + atom1.quadrupoleYZ*gqyz22)) +
syk*(atom1.quadrupoleXX*gqxx23 + atom1.quadrupoleYY*gqyy23 + atom1.quadrupoleZZ*gqzz23 + 2*(atom1.quadrupoleXY*gqxy23 + atom1.quadrupoleXZ*gqxz23 + atom1.quadrupoleYZ*gqyz23)) +
szk*(atom1.quadrupoleXX*gqxx24 + atom1.quadrupoleYY*gqyy24 + atom1.quadrupoleZZ*gqzz24 + 2*(atom1.quadrupoleXY*gqxy24 + atom1.quadrupoleXZ*gqxz24 + atom1.quadrupoleYZ*gqyz24));
dsumdrB2 = sxi*(atom2.quadrupoleXX*gqxx22 + atom2.quadrupoleYY*gqyy22 + atom2.quadrupoleZZ*gqzz22 + 2*(atom2.quadrupoleXY*gqxy22 + atom2.quadrupoleXZ*gqxz22 + atom2.quadrupoleYZ*gqyz22)) +
syi*(atom2.quadrupoleXX*gqxx23 + atom2.quadrupoleYY*gqyy23 + atom2.quadrupoleZZ*gqzz23 + 2*(atom2.quadrupoleXY*gqxy23 + atom2.quadrupoleXZ*gqxz23 + atom2.quadrupoleYZ*gqyz23)) +
szi*(atom2.quadrupoleXX*gqxx24 + atom2.quadrupoleYY*gqyy24 + atom2.quadrupoleZZ*gqzz24 + 2*(atom2.quadrupoleXY*gqxy24 + atom2.quadrupoleXZ*gqxz24 + atom2.quadrupoleYZ*gqyz24));
#endif
#endif
// unweighted 2nd reaction potential gradient tensor;
#if defined F1 || defined F2 || defined T1
real gc5 = a01 + xr2*a02;
real gc6 = xr*yr*a02;
real gc7 = xr*zr*a02;
real gc8 = a01 + yr2*a02;
real gc9 = yr*zr*a02;
real gc10 = a01 + zr2*a02;
#if defined F1
energy += atom1.q*(atom2.quadrupoleXX*gc5 + atom2.quadrupoleYY*gc8 + atom2.quadrupoleZZ*gc10 + 2*(atom2.quadrupoleXY*gc6 + atom2.quadrupoleXZ*gc7 + atom2.quadrupoleYZ*gc9));
energy += atom2.q*(atom1.quadrupoleXX*gc5 + atom1.quadrupoleYY*gc8 + atom1.quadrupoleZZ*gc10 + 2*(atom1.quadrupoleXY*gc6 + atom1.quadrupoleXZ*gc7 + atom1.quadrupoleYZ*gc9));
dedx += atom1.q*(atom2.dipole.x*gc5 + atom2.dipole.y*gc6 + atom2.dipole.z*gc7);
dedx = atom2.q*(atom1.dipole.x*gc5 + atom1.dipole.y*gc6 + atom1.dipole.z*gc7);
dedy += atom1.q*(atom2.dipole.x*gc6 + atom2.dipole.y*gc8 + atom2.dipole.z*gc9);
dedy = atom2.q*(atom1.dipole.x*gc6 + atom1.dipole.y*gc8 + atom1.dipole.z*gc9);
dedz += atom1.q*(atom2.dipole.x*gc7 + atom2.dipole.y*gc9 + atom2.dipole.z*gc10);
dedz = atom2.q*(atom1.dipole.x*gc7 + atom1.dipole.y*gc9 + atom1.dipole.z*gc10);
#endif
#if defined F2
dpdx += atom1.q*(sxk*gc5 + syk*gc6 + szk*gc7);
dpdx = atom2.q*(sxi*gc5 + syi*gc6 + szi*gc7);
dpdy += atom1.q*(sxk*gc6 + syk*gc8 + szk*gc9);
dpdy = atom2.q*(sxi*gc6 + syi*gc8 + szi*gc9);
dpdz += atom1.q*(sxk*gc7 + syk*gc9 + szk*gc10);
dpdz = atom2.q*(sxi*gc7 + syi*gc9 + szi*gc10);
#endif
#endif
#if defined F1 || defined F2 || defined T1 || defined T2
real gux5 = xr*(3*a11 + xr2*a12);
real gux6 = yr*(a11 + xr2*a12);
real gux7 = zr*(a11 + xr2*a12);
real gux8 = xr*(a11 + yr2*a12);
real gux9 = zr*xr*yr*a12;
real gux10 = xr*(a11 + zr2*a12);
real guy5 = yr*(a11 + xr2*a12);
real guy6 = xr*(a11 + yr2*a12);
real guy7 = gux9;
real guy8 = yr*(3*a11 + yr2*a12);
real guy9 = zr*(a11 + yr2*a12);
real guy10 = yr*(a11 + zr2*a12);
real guz5 = zr*(a11 + xr2*a12);
real guz6 = gux9;
real guz7 = xr*(a11 + zr2*a12);
real guz8 = zr*(a11 + yr2*a12);
real guz9 = yr*(a11 + zr2*a12);
real guz10 = zr*(3*a11 + zr2*a12);
#if defined F1
energy = atom1.dipole.x*(atom2.quadrupoleXX*gux5 + atom2.quadrupoleYY*gux8 + atom2.quadrupoleZZ*gux10 + 2*(atom2.quadrupoleXY*gux6 + atom2.quadrupoleXZ*gux7 + atom2.quadrupoleYZ*gux9)) +
atom1.dipole.y*(atom2.quadrupoleXX*guy5 + atom2.quadrupoleYY*guy8 + atom2.quadrupoleZZ*guy10 + 2*(atom2.quadrupoleXY*guy6 + atom2.quadrupoleXZ*guy7 + atom2.quadrupoleYZ*guy9)) +
atom1.dipole.z*(atom2.quadrupoleXX*guz5 + atom2.quadrupoleYY*guz8 + atom2.quadrupoleZZ*guz10 + 2*(atom2.quadrupoleXY*guz6 + atom2.quadrupoleXZ*guz7 + atom2.quadrupoleYZ*guz9));
energy += atom2.dipole.x*(atom1.quadrupoleXX*gux5 + atom1.quadrupoleYY*gux8 + atom1.quadrupoleZZ*gux10 + 2*(atom1.quadrupoleXY*gux6 + atom1.quadrupoleXZ*gux7 + atom1.quadrupoleYZ*gux9)) +
atom2.dipole.y*(atom1.quadrupoleXX*guy5 + atom1.quadrupoleYY*guy8 + atom1.quadrupoleZZ*guy10 + 2*(atom1.quadrupoleXY*guy6 + atom1.quadrupoleXZ*guy7 + atom1.quadrupoleYZ*guy9)) +
atom2.dipole.z*(atom1.quadrupoleXX*guz5 + atom1.quadrupoleYY*guz8 + atom1.quadrupoleZZ*guz10 + 2*(atom1.quadrupoleXY*guz6 + atom1.quadrupoleXZ*guz7 + atom1.quadrupoleYZ*guz9));
dedx = 2*(atom1.dipole.x*(atom2.dipole.x*gux5 + atom2.dipole.y*guy5 + atom2.dipole.z*guz5) +
atom1.dipole.y*(atom2.dipole.x*gux6 + atom2.dipole.y*guy6 + atom2.dipole.z*guz6) +
atom1.dipole.z*(atom2.dipole.x*gux7 + atom2.dipole.y*guy7 + atom2.dipole.z*guz7));
dedy = 2*(atom1.dipole.x*(atom2.dipole.x*gux6 + atom2.dipole.y*guy6 + atom2.dipole.z*guz6) +
atom1.dipole.y*(atom2.dipole.x*gux8 + atom2.dipole.y*guy8 + atom2.dipole.z*guz8) +
atom1.dipole.z*(atom2.dipole.x*gux9 + atom2.dipole.y*guy9 + atom2.dipole.z*guz9));
dedz = 2*(atom1.dipole.x*(atom2.dipole.x*gux7 + atom2.dipole.y*guy7 + atom2.dipole.z*guz7) +
atom1.dipole.y*(atom2.dipole.x*gux9 + atom2.dipole.y*guy9 + atom2.dipole.z*guz9) +
atom1.dipole.z*(atom2.dipole.x*gux10 + atom2.dipole.y*guy10 + atom2.dipole.z*guz10));
#endif
#if defined F2
energy = atom1.inducedDipole.x*(atom2.quadrupoleXX*gux5 + atom2.quadrupoleYY*gux8 + atom2.quadrupoleZZ*gux10 + 2*(atom2.quadrupoleXY*gux6 + atom2.quadrupoleXZ*gux7 + atom2.quadrupoleYZ*gux9)) +
atom1.inducedDipole.y*(atom2.quadrupoleXX*guy5 + atom2.quadrupoleYY*guy8 + atom2.quadrupoleZZ*guy10 + 2*(atom2.quadrupoleXY*guy6 + atom2.quadrupoleXZ*guy7 + atom2.quadrupoleYZ*guy9)) +
atom1.inducedDipole.z*(atom2.quadrupoleXX*guz5 + atom2.quadrupoleYY*guz8 + atom2.quadrupoleZZ*guz10 + 2*(atom2.quadrupoleXY*guz6 + atom2.quadrupoleXZ*guz7 + atom2.quadrupoleYZ*guz9));
energy += atom2.inducedDipole.x*(atom1.quadrupoleXX*gux5 + atom1.quadrupoleYY*gux8 + atom1.quadrupoleZZ*gux10 + 2*(atom1.quadrupoleXY*gux6 + atom1.quadrupoleXZ*gux7 + atom1.quadrupoleYZ*gux9)) +
atom2.inducedDipole.y*(atom1.quadrupoleXX*guy5 + atom1.quadrupoleYY*guy8 + atom1.quadrupoleZZ*guy10 + 2*(atom1.quadrupoleXY*guy6 + atom1.quadrupoleXZ*guy7 + atom1.quadrupoleYZ*guy9)) +
atom2.inducedDipole.z*(atom1.quadrupoleXX*guz5 + atom1.quadrupoleYY*guz8 + atom1.quadrupoleZZ*guz10 + 2*(atom1.quadrupoleXY*guz6 + atom1.quadrupoleXZ*guz7 + atom1.quadrupoleYZ*guz9));
dpdx = 2*(atom1.dipole.x*(sxk*gux5 + syk*guy5 + szk*guz5) + atom1.dipole.y*(sxk*gux6 + syk*guy6 + szk*guz6) + atom1.dipole.z*(sxk*gux7 + syk*guy7 + szk*guz7) +
atom2.dipole.x*(sxi*gux5 + syi*guy5 + szi*guz5) + atom2.dipole.y*(sxi*gux6 + syi*guy6 + szi*guz6) + atom2.dipole.z*(sxi*gux7 + syi*guy7 + szi*guz7));
dpdy = 2*(atom1.dipole.x*(sxk*gux6 + syk*guy6 + szk*guz6) + atom1.dipole.y*(sxk*gux8 + syk*guy8 + szk*guz8) + atom1.dipole.z*(sxk*gux9 + syk*guy9 + szk*guz9) +
atom2.dipole.x*(sxi*gux6 + syi*guy6 + szi*guz6) + atom2.dipole.y*(sxi*gux8 + syi*guy8 + szi*guz8) + atom2.dipole.z*(sxi*gux9 + syi*guy9 + szi*guz9));
dpdz = 2*(atom1.dipole.x*(sxk*gux7 + syk*guy7 + szk*guz7) + atom1.dipole.y*(sxk*gux9 + syk*guy9 + szk*guz9) + atom1.dipole.z*(sxk*gux10 + syk*guy10 + szk*guz10) +
atom2.dipole.x*(sxi*gux7 + syi*guy7 + szi*guz7) + atom2.dipole.y*(sxi*gux9 + syi*guy9 + szi*guz9) + atom2.dipole.z*(sxi*gux10 + syi*guy10 + szi*guz10));
#ifndef DIRECT_POLARIZATION
dpdx = 2*(atom1.inducedDipole.x*(atom2.inducedDipolePolar.x*gux5 + atom2.inducedDipolePolar.y*gux6 + atom2.inducedDipolePolar.z*gux7)
+ atom1.inducedDipole.y*(atom2.inducedDipolePolar.x*guy5 + atom2.inducedDipolePolar.y*guy6 + atom2.inducedDipolePolar.z*guy7)
+ atom1.inducedDipole.z*(atom2.inducedDipolePolar.x*guz5 + atom2.inducedDipolePolar.y*guz6 + atom2.inducedDipolePolar.z*guz7)
+ atom2.inducedDipole.x*(atom1.inducedDipolePolar.x*gux5 + atom1.inducedDipolePolar.y*gux6 + atom1.inducedDipolePolar.z*gux7)
+ atom2.inducedDipole.y*(atom1.inducedDipolePolar.x*guy5 + atom1.inducedDipolePolar.y*guy6 + atom1.inducedDipolePolar.z*guy7)
+ atom2.inducedDipole.z*(atom1.inducedDipolePolar.x*guz5 + atom1.inducedDipolePolar.y*guz6 + atom1.inducedDipolePolar.z*guz7));
dpdy = 2*(atom1.inducedDipole.x*(atom2.inducedDipolePolar.x*gux6 + atom2.inducedDipolePolar.y*gux8 + atom2.inducedDipolePolar.z*gux9)
+ atom1.inducedDipole.y*(atom2.inducedDipolePolar.x*guy6 + atom2.inducedDipolePolar.y*guy8 + atom2.inducedDipolePolar.z*guy9)
+ atom1.inducedDipole.z*(atom2.inducedDipolePolar.x*guz6 + atom2.inducedDipolePolar.y*guz8 + atom2.inducedDipolePolar.z*guz9)
+ atom2.inducedDipole.x*(atom1.inducedDipolePolar.x*gux6 + atom1.inducedDipolePolar.y*gux8 + atom1.inducedDipolePolar.z*gux9)
+ atom2.inducedDipole.y*(atom1.inducedDipolePolar.x*guy6 + atom1.inducedDipolePolar.y*guy8 + atom1.inducedDipolePolar.z*guy9)
+ atom2.inducedDipole.z*(atom1.inducedDipolePolar.x*guz6 + atom1.inducedDipolePolar.y*guz8 + atom1.inducedDipolePolar.z*guz9));
dpdz = 2*(atom1.inducedDipole.x*(atom2.inducedDipolePolar.x*gux7 + atom2.inducedDipolePolar.y*gux9 + atom2.inducedDipolePolar.z*gux10)
+ atom1.inducedDipole.y*(atom2.inducedDipolePolar.x*guy7 + atom2.inducedDipolePolar.y*guy9 + atom2.inducedDipolePolar.z*guy10)
+ atom1.inducedDipole.z*(atom2.inducedDipolePolar.x*guz7 + atom2.inducedDipolePolar.y*guz9 + atom2.inducedDipolePolar.z*guz10)
+ atom2.inducedDipole.x*(atom1.inducedDipolePolar.x*gux7 + atom1.inducedDipolePolar.y*gux9 + atom1.inducedDipolePolar.z*gux10)
+ atom2.inducedDipole.y*(atom1.inducedDipolePolar.x*guy7 + atom1.inducedDipolePolar.y*guy9 + atom1.inducedDipolePolar.z*guy10)
+ atom2.inducedDipole.z*(atom1.inducedDipolePolar.x*guz7 + atom1.inducedDipolePolar.y*guz9 + atom1.inducedDipolePolar.z*guz10));
#endif
#endif
#endif
#if defined F1 || defined F2 || defined T1
real gqxx5 = 2*a20 + xr2*(5*a21 + xr2*a22);
real gqxx6 = yr*xr*(2*a21 + xr2*a22);
real gqxx7 = zr*xr*(2*a21 + xr2*a22);
real gqxx8 = xr2*(a21 + yr2*a22);
real gqxx9 = zr*yr*xr2*a22;
real gqxx10 = xr2*(a21 + zr2*a22);
real gqyy5 = yr2*(a21 + xr2*a22);
real gqyy6 = xr*yr*(2*a21 + yr2*a22);
real gqyy7 = xr*zr*yr2*a22;
real gqyy8 = 2*a20 + yr2*(5*a21 + yr2*a22);
real gqyy9 = yr*zr*(2*a21 + yr2*a22);
real gqyy10 = yr2*(a21 + zr2*a22);
real gqzz5 = zr2*(a21 + xr2*a22);
real gqzz6 = xr*yr*zr2*a22;
real gqzz7 = xr*zr*(2*a21 + zr2*a22);
real gqzz8 = zr2*(a21 + yr2*a22);
real gqzz9 = yr*zr*(2*a21 + zr2*a22);
real gqzz10 = 2*a20 + zr2*(5*a21 + zr2*a22);
real gqxy5 = xr*yr*(3*a21 + xr2*a22);
real gqxy6 = a20 + (xr2 + yr2)*a21 + xr2*yr2*a22;
real gqxy7 = zr*yr*(a21 + xr2*a22);
real gqxy8 = xr*yr*(3*a21 + yr2*a22);
real gqxy9 = zr*xr*(a21 + yr2*a22);
real gqxy10 = xr*yr*(a21 + zr2*a22);
real gqxz5 = xr*zr*(3*a21 + xr2*a22);
real gqxz6 = yr*zr*(a21 + xr2*a22);
real gqxz7 = a20 + (xr2 + zr2)*a21 + xr2*zr2*a22;
real gqxz8 = xr*zr*(a21 + yr2*a22);
real gqxz9 = xr*yr*(a21 + zr2*a22);
real gqxz10 = xr*zr*(3*a21 + zr2*a22);
real gqyz5 = zr*yr*(a21 + xr2*a22);
real gqyz6 = xr*zr*(a21 + yr2*a22);
real gqyz7 = xr*yr*(a21 + zr2*a22);
real gqyz8 = yr*zr*(3*a21 + yr2*a22);
real gqyz9 = a20 + (yr2 + zr2)*a21 + yr2*zr2*a22;
real gqyz10 = yr*zr*(3*a21 + zr2*a22);
#if defined F1
energy += atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqxx8 + atom2.quadrupoleZZ*gqxx10 + 2*(atom2.quadrupoleXY*gqxx6 + atom2.quadrupoleXZ*gqxx7 + atom2.quadrupoleYZ*gqxx9))
+ atom1.quadrupoleYY*(atom2.quadrupoleXX*gqyy5 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqyy10 + 2*(atom2.quadrupoleXY*gqyy6 + atom2.quadrupoleXZ*gqyy7 + atom2.quadrupoleYZ*gqyy9))
+ atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqzz5 + atom2.quadrupoleYY*gqzz8 + atom2.quadrupoleZZ*gqzz10 + 2*(atom2.quadrupoleXY*gqzz6 + atom2.quadrupoleXZ*gqzz7 + atom2.quadrupoleYZ*gqzz9))
+ 2*(atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxy5 + atom2.quadrupoleYY*gqxy8 + atom2.quadrupoleZZ*gqxy10
+ 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxy7 + atom2.quadrupoleYZ*gqxy9))
+ atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxz5 + atom2.quadrupoleYY*gqxz8 + atom2.quadrupoleZZ*gqxz10
+ 2*(atom2.quadrupoleXY*gqxz6 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqxz9))
+ atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqyz5 + atom2.quadrupoleYY*gqyz8 + atom2.quadrupoleZZ*gqyz10
+ 2*(atom2.quadrupoleXY*gqyz6 + atom2.quadrupoleXZ*gqyz7 + atom2.quadrupoleYZ*gqyz9)));
energy += atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqyy5 + atom2.quadrupoleZZ*gqzz5 + 2*(atom2.quadrupoleXY*gqxy5 + atom2.quadrupoleXZ*gqxz5 + atom2.quadrupoleYZ*gqyz5))
+ atom1.quadrupoleYY*(atom2.quadrupoleXX*gqxx8 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqzz8
+ 2*(atom2.quadrupoleXY*gqxy8 + atom2.quadrupoleXZ*gqxz8 + atom2.quadrupoleYZ*gqyz8))
+ atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqxx10 + atom2.quadrupoleYY*gqyy10 + atom2.quadrupoleZZ*gqzz10
+ 2*(atom2.quadrupoleXY*gqxy10 + atom2.quadrupoleXZ*gqxz10 + atom2.quadrupoleYZ*gqyz10))
+ 2*(atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6
+ 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6))
+ atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7
+ 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7))
+ atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9
+ 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9)));
dedx += atom2.dipole.x*(atom1.quadrupoleXX*gqxx5 + atom1.quadrupoleYY*gqyy5 + atom1.quadrupoleZZ*gqzz5 + 2*(atom1.quadrupoleXY*gqxy5 + atom1.quadrupoleXZ*gqxz5 + atom1.quadrupoleYZ*gqyz5)) +
atom2.dipole.y*(atom1.quadrupoleXX*gqxx6 + atom1.quadrupoleYY*gqyy6 + atom1.quadrupoleZZ*gqzz6 + 2*(atom1.quadrupoleXY*gqxy6 + atom1.quadrupoleXZ*gqxz6 + atom1.quadrupoleYZ*gqyz6)) +
atom2.dipole.z*(atom1.quadrupoleXX*gqxx7 + atom1.quadrupoleYY*gqyy7 + atom1.quadrupoleZZ*gqzz7 + 2*(atom1.quadrupoleXY*gqxy7 + atom1.quadrupoleXZ*gqxz7 + atom1.quadrupoleYZ*gqyz7));
dedx = atom1.dipole.x*(atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqyy5 + atom2.quadrupoleZZ*gqzz5 + 2*(atom2.quadrupoleXY*gqxy5 + atom2.quadrupoleXZ*gqxz5 + atom2.quadrupoleYZ*gqyz5)) +
atom1.dipole.y*(atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6 + 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6)) +
atom1.dipole.z*(atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7 + 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7));
dedy += atom2.dipole.x*(atom1.quadrupoleXX*gqxx6 + atom1.quadrupoleYY*gqyy6 + atom1.quadrupoleZZ*gqzz6 + 2*(atom1.quadrupoleXY*gqxy6 + atom1.quadrupoleXZ*gqxz6 + atom1.quadrupoleYZ*gqyz6)) +
atom2.dipole.y*(atom1.quadrupoleXX*gqxx8 + atom1.quadrupoleYY*gqyy8 + atom1.quadrupoleZZ*gqzz8 + 2*(atom1.quadrupoleXY*gqxy8 + atom1.quadrupoleXZ*gqxz8 + atom1.quadrupoleYZ*gqyz8)) +
atom2.dipole.z*(atom1.quadrupoleXX*gqxx9 + atom1.quadrupoleYY*gqyy9 + atom1.quadrupoleZZ*gqzz9 + 2*(atom1.quadrupoleXY*gqxy9 + atom1.quadrupoleXZ*gqxz9 + atom1.quadrupoleYZ*gqyz9));
dedy = atom1.dipole.x*(atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6 + 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6)) +
atom1.dipole.y*(atom2.quadrupoleXX*gqxx8 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqzz8 + 2*(atom2.quadrupoleXY*gqxy8 + atom2.quadrupoleXZ*gqxz8 + atom2.quadrupoleYZ*gqyz8)) +
atom1.dipole.z*(atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9 + 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9));
dedz += atom2.dipole.x*(atom1.quadrupoleXX*gqxx7 + atom1.quadrupoleYY*gqyy7 + atom1.quadrupoleZZ*gqzz7 + 2*(atom1.quadrupoleXY*gqxy7 + atom1.quadrupoleXZ*gqxz7 + atom1.quadrupoleYZ*gqyz7)) +
atom2.dipole.y*(atom1.quadrupoleXX*gqxx9 + atom1.quadrupoleYY*gqyy9 + atom1.quadrupoleZZ*gqzz9 + 2*(atom1.quadrupoleXY*gqxy9 + atom1.quadrupoleXZ*gqxz9 + atom1.quadrupoleYZ*gqyz9)) +
atom2.dipole.z*(atom1.quadrupoleXX*gqxx10 + atom1.quadrupoleYY*gqyy10 + atom1.quadrupoleZZ*gqzz10 + 2*(atom1.quadrupoleXY*gqxy10 + atom1.quadrupoleXZ*gqxz10 + atom1.quadrupoleYZ*gqyz10));
dedz = atom1.dipole.x*(atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7 + 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7)) +
atom1.dipole.y*(atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9 + 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9)) +
atom1.dipole.z*(atom2.quadrupoleXX*gqxx10 + atom2.quadrupoleYY*gqyy10 + atom2.quadrupoleZZ*gqzz10 + 2*(atom2.quadrupoleXY*gqxy10 + atom2.quadrupoleXZ*gqxz10 + atom2.quadrupoleYZ*gqyz10));
#endif
#if defined F2
dpdx += sxk*(atom1.quadrupoleXX*gqxx5 + atom1.quadrupoleYY*gqyy5 + atom1.quadrupoleZZ*gqzz5 + 2*(atom1.quadrupoleXY*gqxy5 + atom1.quadrupoleXZ*gqxz5 + atom1.quadrupoleYZ*gqyz5)) +
syk*(atom1.quadrupoleXX*gqxx6 + atom1.quadrupoleYY*gqyy6 + atom1.quadrupoleZZ*gqzz6 + 2*(atom1.quadrupoleXY*gqxy6 + atom1.quadrupoleXZ*gqxz6 + atom1.quadrupoleYZ*gqyz6)) +
szk*(atom1.quadrupoleXX*gqxx7 + atom1.quadrupoleYY*gqyy7 + atom1.quadrupoleZZ*gqzz7 + 2*(atom1.quadrupoleXY*gqxy7 + atom1.quadrupoleXZ*gqxz7 + atom1.quadrupoleYZ*gqyz7));
dpdx = sxi*(atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqyy5 + atom2.quadrupoleZZ*gqzz5 + 2*(atom2.quadrupoleXY*gqxy5 + atom2.quadrupoleXZ*gqxz5 + atom2.quadrupoleYZ*gqyz5)) +
syi*(atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6 + 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6)) +
szi*(atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7 + 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7));
dpdy += sxk*(atom1.quadrupoleXX*gqxx6 + atom1.quadrupoleYY*gqyy6 + atom1.quadrupoleZZ*gqzz6 + 2*(atom1.quadrupoleXY*gqxy6 + atom1.quadrupoleXZ*gqxz6 + atom1.quadrupoleYZ*gqyz6)) +
syk*(atom1.quadrupoleXX*gqxx8 + atom1.quadrupoleYY*gqyy8 + atom1.quadrupoleZZ*gqzz8 + 2*(atom1.quadrupoleXY*gqxy8 + atom1.quadrupoleXZ*gqxz8 + atom1.quadrupoleYZ*gqyz8)) +
szk*(atom1.quadrupoleXX*gqxx9 + atom1.quadrupoleYY*gqyy9 + atom1.quadrupoleZZ*gqzz9 + 2*(atom1.quadrupoleXY*gqxy9 + atom1.quadrupoleXZ*gqxz9 + atom1.quadrupoleYZ*gqyz9));
dpdy = sxi*(atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6 + 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6)) +
syi*(atom2.quadrupoleXX*gqxx8 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqzz8 + 2*(atom2.quadrupoleXY*gqxy8 + atom2.quadrupoleXZ*gqxz8 + atom2.quadrupoleYZ*gqyz8)) +
szi*(atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9 + 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9));
dpdz = sxi*(atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7 + 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7)) +
syi*(atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9 + 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9)) +
szi*(atom2.quadrupoleXX*gqxx10 + atom2.quadrupoleYY*gqyy10 + atom2.quadrupoleZZ*gqzz10 + 2*(atom2.quadrupoleXY*gqxy10 + atom2.quadrupoleXZ*gqxz10 + atom2.quadrupoleYZ*gqyz10));
dpdz += sxk*(atom1.quadrupoleXX*gqxx7 + atom1.quadrupoleYY*gqyy7 + atom1.quadrupoleZZ*gqzz7 + 2*(atom1.quadrupoleXY*gqxy7 + atom1.quadrupoleXZ*gqxz7 + atom1.quadrupoleYZ*gqyz7)) +
syk*(atom1.quadrupoleXX*gqxx9 + atom1.quadrupoleYY*gqyy9 + atom1.quadrupoleZZ*gqzz9 + 2*(atom1.quadrupoleXY*gqxy9 + atom1.quadrupoleXZ*gqxz9 + atom1.quadrupoleYZ*gqyz9)) +
szk*(atom1.quadrupoleXX*gqxx10 + atom1.quadrupoleYY*gqyy10 + atom1.quadrupoleZZ*gqzz10 + 2*(atom1.quadrupoleXY*gqxy10 + atom1.quadrupoleXZ*gqxz10 + atom1.quadrupoleYZ*gqyz10));
#endif
#endif
// Born radii derivatives of the unweighted 2nd reaction;
// potential gradient tensor;
#if defined B1
real gc25 = b01 + xr2*b02;
real gc26 = xr*yr*b02;
real gc27 = xr*zr*b02;
real gc28 = b01 + yr2*b02;
real gc29 = yr*zr*b02;
real gc30 = b01 + zr2*b02;
dsumdrB1 += atom1.q*(atom2.quadrupoleXX*gc25 + atom2.quadrupoleYY*gc28 + atom2.quadrupoleZZ*gc30 + 2*(atom2.quadrupoleXY*gc26 + atom2.quadrupoleXZ*gc27 + atom2.quadrupoleYZ*gc29));
dsumdrB1 += atom2.q*(atom1.quadrupoleXX*gc25 + atom1.quadrupoleYY*gc28 + atom1.quadrupoleZZ*gc30 + 2*(atom1.quadrupoleXY*gc26 + atom1.quadrupoleXZ*gc27 + atom1.quadrupoleYZ*gc29));
#endif
#if defined B1 || defined B2
real gux25 = xr*(3*b11 + xr2*b12);
real gux26 = yr*(b11 + xr2*b12);
real gux27 = zr*(b11 + xr2*b12);
real gux28 = xr*(b11 + yr2*b12);
real gux29 = zr*xr*yr*b12;
real gux30 = xr*(b11 + zr2*b12);
real guy25 = yr*(b11 + xr2*b12);
real guy26 = xr*(b11 + yr2*b12);
real guy27 = gux29;
real guy28 = yr*(3*b11 + yr2*b12);
real guy29 = zr*(b11 + yr2*b12);
real guy30 = yr*(b11 + zr2*b12);
real guz25 = zr*(b11 + xr2*b12);
real guz26 = gux29;
real guz27 = xr*(b11 + zr2*b12);
real guz28 = zr*(b11 + yr2*b12);
real guz29 = yr*(b11 + zr2*b12);
real guz30 = zr*(3*b11 + zr2*b12);
#endif
#if defined B2
dsumdrB2 = sxi*(atom2.quadrupoleXX*gux25 + atom2.quadrupoleYY*gux28 + atom2.quadrupoleZZ*gux30 + 2*(atom2.quadrupoleXY*gux26 + atom2.quadrupoleXZ*gux27 + atom2.quadrupoleYZ*gux29)) +
syi*(atom2.quadrupoleXX*guy25 + atom2.quadrupoleYY*guy28 + atom2.quadrupoleZZ*guy30 + 2*(atom2.quadrupoleXY*guy26 + atom2.quadrupoleXZ*guy27 + atom2.quadrupoleYZ*guy29)) +
szi*(atom2.quadrupoleXX*guz25 + atom2.quadrupoleYY*guz28 + atom2.quadrupoleZZ*guz30 + 2*(atom2.quadrupoleXY*guz26 + atom2.quadrupoleXZ*guz27 + atom2.quadrupoleYZ*guz29));
dsumdrB2 += sxk*(atom1.quadrupoleXX*gux25 + atom1.quadrupoleYY*gux28 + atom1.quadrupoleZZ*gux30 + 2*(atom1.quadrupoleXY*gux26 + atom1.quadrupoleXZ*gux27 + atom1.quadrupoleYZ*gux29)) +
syk*(atom1.quadrupoleXX*guy25 + atom1.quadrupoleYY*guy28 + atom1.quadrupoleZZ*guy30 + 2*(atom1.quadrupoleXY*guy26 + atom1.quadrupoleXZ*guy27 + atom1.quadrupoleYZ*guy29)) +
szk*(atom1.quadrupoleXX*guz25 + atom1.quadrupoleYY*guz28 + atom1.quadrupoleZZ*guz30 + 2*(atom1.quadrupoleXY*guz26 + atom1.quadrupoleXZ*guz27 + atom1.quadrupoleYZ*guz29));
#endif
#if defined B1
dsumdrB1 = atom1.dipole.x*(atom2.quadrupoleXX*gux25 + atom2.quadrupoleYY*gux28 + atom2.quadrupoleZZ*gux30 + 2*(atom2.quadrupoleXY*gux26 + atom2.quadrupoleXZ*gux27 + atom2.quadrupoleYZ*gux29)) +
atom1.dipole.y*(atom2.quadrupoleXX*guy25 + atom2.quadrupoleYY*guy28 + atom2.quadrupoleZZ*guy30 + 2*(atom2.quadrupoleXY*guy26 + atom2.quadrupoleXZ*guy27 + atom2.quadrupoleYZ*guy29)) +
atom1.dipole.z*(atom2.quadrupoleXX*guz25 + atom2.quadrupoleYY*guz28 + atom2.quadrupoleZZ*guz30 + 2*(atom2.quadrupoleXY*guz26 + atom2.quadrupoleXZ*guz27 + atom2.quadrupoleYZ*guz29));
dsumdrB1 += atom2.dipole.x*(atom1.quadrupoleXX*gux25 + atom1.quadrupoleYY*gux28 + atom1.quadrupoleZZ*gux30 + 2*(atom1.quadrupoleXY*gux26 + atom1.quadrupoleXZ*gux27 + atom1.quadrupoleYZ*gux29)) +
atom2.dipole.y*(atom1.quadrupoleXX*guy25 + atom1.quadrupoleYY*guy28 + atom1.quadrupoleZZ*guy30 + 2*(atom1.quadrupoleXY*guy26 + atom1.quadrupoleXZ*guy27 + atom1.quadrupoleYZ*guy29)) +
atom2.dipole.z*(atom1.quadrupoleXX*guz25 + atom1.quadrupoleYY*guz28 + atom1.quadrupoleZZ*guz30 + 2*(atom1.quadrupoleXY*guz26 + atom1.quadrupoleXZ*guz27 + atom1.quadrupoleYZ*guz29));
real gqxx25 = 2*b20 + xr2*(5*b21 + xr2*b22);
real gqxx26 = yr*xr*(2*b21 + xr2*b22);
real gqxx27 = zr*xr*(2*b21 + xr2*b22);
real gqxx28 = xr2*(b21 + yr2*b22);
real gqxx29 = zr*yr*xr2*b22;
real gqxx30 = xr2*(b21 + zr2*b22);
real gqyy25 = yr2*(b21 + xr2*b22);
real gqyy26 = xr*yr*(2*b21 + yr2*b22);
real gqyy27 = xr*zr*yr2*b22;
real gqyy28 = 2*b20 + yr2*(5*b21 + yr2*b22);
real gqyy29 = yr*zr*(2*b21 + yr2*b22);
real gqyy30 = yr2*(b21 + zr2*b22);
real gqzz25 = zr2*(b21 + xr2*b22);
real gqzz26 = xr*yr*zr2*b22;
real gqzz27 = xr*zr*(2*b21 + zr2*b22);
real gqzz28 = zr2*(b21 + yr2*b22);
real gqzz29 = yr*zr*(2*b21 + zr2*b22);
real gqzz30 = 2*b20 + zr2*(5*b21 + zr2*b22);
real gqxy25 = xr*yr*(3*b21 + xr2*b22);
real gqxy26 = b20 + (xr2 + yr2)*b21 + xr2*yr2*b22;
real gqxy27 = zr*yr*(b21 + xr2*b22);
real gqxy28 = xr*yr*(3*b21 + yr2*b22);
real gqxy29 = zr*xr*(b21 + yr2*b22);
real gqxy30 = xr*yr*(b21 + zr2*b22);
real gqxz25 = xr*zr*(3*b21 + xr2*b22);
real gqxz26 = yr*zr*(b21 + xr2*b22);
real gqxz27 = b20 + (xr2 + zr2)*b21 + xr2*zr2*b22;
real gqxz28 = xr*zr*(b21 + yr2*b22);
real gqxz29 = xr*yr*(b21 + zr2*b22);
real gqxz30 = xr*zr*(3*b21 + zr2*b22);
real gqyz25 = zr*yr*(b21 + xr2*b22);
real gqyz26 = xr*zr*(b21 + yr2*b22);
real gqyz27 = xr*yr*(b21 + zr2*b22);
real gqyz28 = yr*zr*(3*b21 + yr2*b22);
real gqyz29 = b20 + (yr2 + zr2)*b21 + yr2*zr2*b22;
real gqyz30 = yr*zr*(3*b21 + zr2*b22);
dsumdrB1 +=
atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx25 + atom2.quadrupoleYY*gqxx28 + atom2.quadrupoleZZ*gqxx30 + 2*(atom2.quadrupoleXY*gqxx26 + atom2.quadrupoleXZ*gqxx27 + atom2.quadrupoleYZ*gqxx29)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqyy25 + atom2.quadrupoleYY*gqyy28 + atom2.quadrupoleZZ*gqyy30 + 2*(atom2.quadrupoleXY*gqyy26 + atom2.quadrupoleXZ*gqyy27 + atom2.quadrupoleYZ*gqyy29)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqzz25 + atom2.quadrupoleYY*gqzz28 + atom2.quadrupoleZZ*gqzz30 + 2*(atom2.quadrupoleXY*gqzz26 + atom2.quadrupoleXZ*gqzz27 + atom2.quadrupoleYZ*gqzz29));
dsumdrB1 += 2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxy25 + atom2.quadrupoleYY*gqxy28 + atom2.quadrupoleZZ*gqxy30 + 2*(atom2.quadrupoleXY*gqxy26 + atom2.quadrupoleXZ*gqxy27 + atom2.quadrupoleYZ*gqxy29)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxz25 + atom2.quadrupoleYY*gqxz28 + atom2.quadrupoleZZ*gqxz30 + 2*(atom2.quadrupoleXY*gqxz26 + atom2.quadrupoleXZ*gqxz27 + atom2.quadrupoleYZ*gqxz29)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqyz25 + atom2.quadrupoleYY*gqyz28 + atom2.quadrupoleZZ*gqyz30 + 2*(atom2.quadrupoleXY*gqyz26 + atom2.quadrupoleXZ*gqyz27 + atom2.quadrupoleYZ*gqyz29)));
dsumdrB1 +=
atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx25 + atom2.quadrupoleYY*gqyy25 + atom2.quadrupoleZZ*gqzz25 + 2*(atom2.quadrupoleXY*gqxy25 + atom2.quadrupoleXZ*gqxz25 + atom2.quadrupoleYZ*gqyz25)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqxx28 + atom2.quadrupoleYY*gqyy28 + atom2.quadrupoleZZ*gqzz28 + 2*(atom2.quadrupoleXY*gqxy28 + atom2.quadrupoleXZ*gqxz28 + atom2.quadrupoleYZ*gqyz28)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqxx30 + atom2.quadrupoleYY*gqyy30 + atom2.quadrupoleZZ*gqzz30 + 2*(atom2.quadrupoleXY*gqxy30 + atom2.quadrupoleXZ*gqxz30 + atom2.quadrupoleYZ*gqyz30));
dsumdrB1 += 2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxx26 + atom2.quadrupoleYY*gqyy26 + atom2.quadrupoleZZ*gqzz26 + 2*(atom2.quadrupoleXY*gqxy26 + atom2.quadrupoleXZ*gqxz26 + atom2.quadrupoleYZ*gqyz26)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxx27 + atom2.quadrupoleYY*gqyy27 + atom2.quadrupoleZZ*gqzz27 + 2*(atom2.quadrupoleXY*gqxy27 + atom2.quadrupoleXZ*gqxz27 + atom2.quadrupoleYZ*gqyz27)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqxx29 + atom2.quadrupoleYY*gqyy29 + atom2.quadrupoleZZ*gqzz29 + 2*(atom2.quadrupoleXY*gqxy29 + atom2.quadrupoleXZ*gqxz29 + atom2.quadrupoleYZ*gqyz29)));
dsumdrB1 *= 0.5f;
atom1.bornForce += atom2.bornRadius*dsumdrB1;
atom2.bornForce += atom1.bornRadius*dsumdrB1;
#endif
// unweighted 3rd reaction potential gradient tensor;
#if defined F1
real gc11 = xr*(3*a02 + xr2*a03);
real gc12 = yr*(a02 + xr2*a03);
real gc13 = zr*(a02 + xr2*a03);
real gc14 = xr*(a02 + yr2*a03);
real gc15 = xr*yr*zr*a03;
real gc16 = xr*(a02 + zr2*a03);
real gc17 = yr*(3*a02 + yr2*a03);
real gc18 = zr*(a02 + yr2*a03);
real gc19 = yr*(a02 + zr2*a03);
real gc20 = zr*(3*a02 + zr2*a03);
dedx += atom1.q*(atom2.quadrupoleXX*gc11 + atom2.quadrupoleYY*gc14 + atom2.quadrupoleZZ*gc16 + 2*(atom2.quadrupoleXY*gc12 + atom2.quadrupoleXZ*gc13 + atom2.quadrupoleYZ*gc15));
dedx += atom2.q*(atom1.quadrupoleXX*gc11 + atom1.quadrupoleYY*gc14 + atom1.quadrupoleZZ*gc16 + 2*(atom1.quadrupoleXY*gc12 + atom1.quadrupoleXZ*gc13 + atom1.quadrupoleYZ*gc15));
dedy += atom1.q*(atom2.quadrupoleXX*gc12 + atom2.quadrupoleYY*gc17 + atom2.quadrupoleZZ*gc19 + 2*(atom2.quadrupoleXY*gc14 + atom2.quadrupoleXZ*gc15 + atom2.quadrupoleYZ*gc18));
dedy += atom2.q*(atom1.quadrupoleXX*gc12 + atom1.quadrupoleYY*gc17 + atom1.quadrupoleZZ*gc19 + 2*(atom1.quadrupoleXY*gc14 + atom1.quadrupoleXZ*gc15 + atom1.quadrupoleYZ*gc18));
dedz += atom1.q*(atom2.quadrupoleXX*gc13 + atom2.quadrupoleYY*gc18 + atom2.quadrupoleZZ*gc20 + 2*(atom2.quadrupoleXY*gc15 + atom2.quadrupoleXZ*gc16 + atom2.quadrupoleYZ*gc19));
dedz += atom2.q*(atom1.quadrupoleXX*gc13 + atom1.quadrupoleYY*gc18 + atom1.quadrupoleZZ*gc20 + 2*(atom1.quadrupoleXY*gc15 + atom1.quadrupoleXZ*gc16 + atom1.quadrupoleYZ*gc19));
#endif
#if defined F1 || defined F2
real gux11 = 3*a11 + xr2*(6*a12 + xr2*a13);
real gux12 = xr*yr*(3*a12 + xr2*a13);
real gux13 = xr*zr*(3*a12 + xr2*a13);
real gux14 = a11 + (xr2 + yr2)*a12 + xr2*yr2*a13;
real gux15 = yr*zr*(a12 + xr2*a13);
real gux16 = a11 + (xr2 + zr2)*a12 + xr2*zr2*a13;
real gux17 = xr*yr*(3*a12 + yr2*a13);
real gux18 = xr*zr*(a12 + yr2*a13);
real gux19 = xr*yr*(a12 + zr2*a13);
real gux20 = xr*zr*(3*a12 + zr2*a13);
real guy11 = gux12;
real guy12 = gux14;
real guy13 = gux15;
real guy14 = gux17;
real guy15 = gux18;
real guy16 = gux19;
real guy17 = 3*a11 + yr2*(6*a12 + yr2*a13);
real guy18 = yr*zr*(3*a12 + yr2*a13);
real guy19 = a11 + (yr2 + zr2)*a12 + yr2*zr2*a13;
real guy20 = yr*zr*(3*a12 + zr2*a13);
real guz11 = gux13;
real guz12 = gux15;
real guz13 = gux16;
real guz14 = gux18;
real guz15 = gux19;
real guz16 = gux20;
real guz17 = guy18;
real guz18 = guy19;
real guz19 = guy20;
real guz20 = 3*a11 + zr2*(6*a12 + zr2*a13);
#if defined F1
dedx = atom1.dipole.x*(atom2.quadrupoleXX*gux11 + atom2.quadrupoleYY*gux14 + atom2.quadrupoleZZ*gux16 + 2*(atom2.quadrupoleXY*gux12 + atom2.quadrupoleXZ*gux13 + atom2.quadrupoleYZ*gux15)) +
atom1.dipole.y*(atom2.quadrupoleXX*guy11 + atom2.quadrupoleYY*guy14 + atom2.quadrupoleZZ*guy16 + 2*(atom2.quadrupoleXY*guy12 + atom2.quadrupoleXZ*guy13 + atom2.quadrupoleYZ*guy15)) +
atom1.dipole.z*(atom2.quadrupoleXX*guz11 + atom2.quadrupoleYY*guz14 + atom2.quadrupoleZZ*guz16 + 2*(atom2.quadrupoleXY*guz12 + atom2.quadrupoleXZ*guz13 + atom2.quadrupoleYZ*guz15));
dedx += atom2.dipole.x*(atom1.quadrupoleXX*gux11 + atom1.quadrupoleYY*gux14 + atom1.quadrupoleZZ*gux16 + 2*(atom1.quadrupoleXY*gux12 + atom1.quadrupoleXZ*gux13 + atom1.quadrupoleYZ*gux15)) +
atom2.dipole.y*(atom1.quadrupoleXX*guy11 + atom1.quadrupoleYY*guy14 + atom1.quadrupoleZZ*guy16 + 2*(atom1.quadrupoleXY*guy12 + atom1.quadrupoleXZ*guy13 + atom1.quadrupoleYZ*guy15)) +
atom2.dipole.z*(atom1.quadrupoleXX*guz11 + atom1.quadrupoleYY*guz14 + atom1.quadrupoleZZ*guz16 + 2*(atom1.quadrupoleXY*guz12 + atom1.quadrupoleXZ*guz13 + atom1.quadrupoleYZ*guz15));
dedy = atom1.dipole.x*(atom2.quadrupoleXX*gux12 + atom2.quadrupoleYY*gux17 + atom2.quadrupoleZZ*gux19 + 2*(atom2.quadrupoleXY*gux14 + atom2.quadrupoleXZ*gux15 + atom2.quadrupoleYZ*gux18)) +
atom1.dipole.y*(atom2.quadrupoleXX*guy12 + atom2.quadrupoleYY*guy17 + atom2.quadrupoleZZ*guy19 + 2*(atom2.quadrupoleXY*guy14 + atom2.quadrupoleXZ*guy15 + atom2.quadrupoleYZ*guy18)) +
atom1.dipole.z*(atom2.quadrupoleXX*guz12 + atom2.quadrupoleYY*guz17 + atom2.quadrupoleZZ*guz19 + 2*(atom2.quadrupoleXY*guz14 + atom2.quadrupoleXZ*guz15 + atom2.quadrupoleYZ*guz18));
dedy += atom2.dipole.x*(atom1.quadrupoleXX*gux12 + atom1.quadrupoleYY*gux17 + atom1.quadrupoleZZ*gux19 + 2*(atom1.quadrupoleXY*gux14 + atom1.quadrupoleXZ*gux15 + atom1.quadrupoleYZ*gux18)) +
atom2.dipole.y*(atom1.quadrupoleXX*guy12 + atom1.quadrupoleYY*guy17 + atom1.quadrupoleZZ*guy19 + 2*(atom1.quadrupoleXY*guy14 + atom1.quadrupoleXZ*guy15 + atom1.quadrupoleYZ*guy18)) +
atom2.dipole.z*(atom1.quadrupoleXX*guz12 + atom1.quadrupoleYY*guz17 + atom1.quadrupoleZZ*guz19 + 2*(atom1.quadrupoleXY*guz14 + atom1.quadrupoleXZ*guz15 + atom1.quadrupoleYZ*guz18));
dedz = atom1.dipole.x*(atom2.quadrupoleXX*gux13 + atom2.quadrupoleYY*gux18 + atom2.quadrupoleZZ*gux20 + 2*(atom2.quadrupoleXY*gux15 + atom2.quadrupoleXZ*gux16 + atom2.quadrupoleYZ*gux19)) +
atom1.dipole.y*(atom2.quadrupoleXX*guy13 + atom2.quadrupoleYY*guy18 + atom2.quadrupoleZZ*guy20 + 2*(atom2.quadrupoleXY*guy15 + atom2.quadrupoleXZ*guy16 + atom2.quadrupoleYZ*guy19)) +
atom1.dipole.z*(atom2.quadrupoleXX*guz13 + atom2.quadrupoleYY*guz18 + atom2.quadrupoleZZ*guz20 + 2*(atom2.quadrupoleXY*guz15 + atom2.quadrupoleXZ*guz16 + atom2.quadrupoleYZ*guz19));
dedz += atom2.dipole.x*(atom1.quadrupoleXX*gux13 + atom1.quadrupoleYY*gux18 + atom1.quadrupoleZZ*gux20 + 2*(atom1.quadrupoleXY*gux15 + atom1.quadrupoleXZ*gux16 + atom1.quadrupoleYZ*gux19)) +
atom2.dipole.y*(atom1.quadrupoleXX*guy13 + atom1.quadrupoleYY*guy18 + atom1.quadrupoleZZ*guy20 + 2*(atom1.quadrupoleXY*guy15 + atom1.quadrupoleXZ*guy16 + atom1.quadrupoleYZ*guy19)) +
atom2.dipole.z*(atom1.quadrupoleXX*guz13 + atom1.quadrupoleYY*guz18 + atom1.quadrupoleZZ*guz20 + 2*(atom1.quadrupoleXY*guz15 + atom1.quadrupoleXZ*guz16 + atom1.quadrupoleYZ*guz19));
#endif
#if defined F2
dpdx = sxi*(atom2.quadrupoleXX*gux11 + atom2.quadrupoleYY*gux14 + atom2.quadrupoleZZ*gux16 + 2*(atom2.quadrupoleXY*gux12 + atom2.quadrupoleXZ*gux13 + atom2.quadrupoleYZ*gux15)) +
syi*(atom2.quadrupoleXX*guy11 + atom2.quadrupoleYY*guy14 + atom2.quadrupoleZZ*guy16 + 2*(atom2.quadrupoleXY*guy12 + atom2.quadrupoleXZ*guy13 + atom2.quadrupoleYZ*guy15)) +
szi*(atom2.quadrupoleXX*guz11 + atom2.quadrupoleYY*guz14 + atom2.quadrupoleZZ*guz16 + 2*(atom2.quadrupoleXY*guz12 + atom2.quadrupoleXZ*guz13 + atom2.quadrupoleYZ*guz15));
dpdx += sxk*(atom1.quadrupoleXX*gux11 + atom1.quadrupoleYY*gux14 + atom1.quadrupoleZZ*gux16 + 2*(atom1.quadrupoleXY*gux12 + atom1.quadrupoleXZ*gux13 + atom1.quadrupoleYZ*gux15)) +
syk*(atom1.quadrupoleXX*guy11 + atom1.quadrupoleYY*guy14 + atom1.quadrupoleZZ*guy16 + 2*(atom1.quadrupoleXY*guy12 + atom1.quadrupoleXZ*guy13 + atom1.quadrupoleYZ*guy15)) +
szk*(atom1.quadrupoleXX*guz11 + atom1.quadrupoleYY*guz14 + atom1.quadrupoleZZ*guz16 + 2*(atom1.quadrupoleXY*guz12 + atom1.quadrupoleXZ*guz13 + atom1.quadrupoleYZ*guz15));
dpdy = sxi*(atom2.quadrupoleXX*gux12 + atom2.quadrupoleYY*gux17 + atom2.quadrupoleZZ*gux19 + 2*(atom2.quadrupoleXY*gux14 + atom2.quadrupoleXZ*gux15 + atom2.quadrupoleYZ*gux18)) +
syi*(atom2.quadrupoleXX*guy12 + atom2.quadrupoleYY*guy17 + atom2.quadrupoleZZ*guy19 + 2*(atom2.quadrupoleXY*guy14 + atom2.quadrupoleXZ*guy15 + atom2.quadrupoleYZ*guy18)) +
szi*(atom2.quadrupoleXX*guz12 + atom2.quadrupoleYY*guz17 + atom2.quadrupoleZZ*guz19 + 2*(atom2.quadrupoleXY*guz14 + atom2.quadrupoleXZ*guz15 + atom2.quadrupoleYZ*guz18));
dpdy += sxk*(atom1.quadrupoleXX*gux12 + atom1.quadrupoleYY*gux17 + atom1.quadrupoleZZ*gux19 + 2*(atom1.quadrupoleXY*gux14 + atom1.quadrupoleXZ*gux15 + atom1.quadrupoleYZ*gux18)) +
syk*(atom1.quadrupoleXX*guy12 + atom1.quadrupoleYY*guy17 + atom1.quadrupoleZZ*guy19 + 2*(atom1.quadrupoleXY*guy14 + atom1.quadrupoleXZ*guy15 + atom1.quadrupoleYZ*guy18)) +
szk*(atom1.quadrupoleXX*guz12 + atom1.quadrupoleYY*guz17 + atom1.quadrupoleZZ*guz19 + 2*(atom1.quadrupoleXY*guz14 + atom1.quadrupoleXZ*guz15 + atom1.quadrupoleYZ*guz18));
dpdz = sxi*(atom2.quadrupoleXX*gux13 + atom2.quadrupoleYY*gux18 + atom2.quadrupoleZZ*gux20 + 2*(atom2.quadrupoleXY*gux15 + atom2.quadrupoleXZ*gux16 + atom2.quadrupoleYZ*gux19)) +
syi*(atom2.quadrupoleXX*guy13 + atom2.quadrupoleYY*guy18 + atom2.quadrupoleZZ*guy20 + 2*(atom2.quadrupoleXY*guy15 + atom2.quadrupoleXZ*guy16 + atom2.quadrupoleYZ*guy19)) +
szi*(atom2.quadrupoleXX*guz13 + atom2.quadrupoleYY*guz18 + atom2.quadrupoleZZ*guz20 + 2*(atom2.quadrupoleXY*guz15 + atom2.quadrupoleXZ*guz16 + atom2.quadrupoleYZ*guz19));
dpdz += sxk*(atom1.quadrupoleXX*gux13 + atom1.quadrupoleYY*gux18 + atom1.quadrupoleZZ*gux20 + 2*(atom1.quadrupoleXY*gux15 + atom1.quadrupoleXZ*gux16 + atom1.quadrupoleYZ*gux19)) +
syk*(atom1.quadrupoleXX*guy13 + atom1.quadrupoleYY*guy18 + atom1.quadrupoleZZ*guy20 + 2*(atom1.quadrupoleXY*guy15 + atom1.quadrupoleXZ*guy16 + atom1.quadrupoleYZ*guy19)) +
szk*(atom1.quadrupoleXX*guz13 + atom1.quadrupoleYY*guz18 + atom1.quadrupoleZZ*guz20 + 2*(atom1.quadrupoleXY*guz15 + atom1.quadrupoleXZ*guz16 + atom1.quadrupoleYZ*guz19));
#endif
#endif
#if defined F1
real gqxx11 = xr*(12*a21 + xr2*(9*a22 + xr2*a23));
real gqxx12 = yr*(2*a21 + xr2*(5*a22 + xr2*a23));
real gqxx13 = zr*(2*a21 + xr2*(5*a22 + xr2*a23));
real gqxx14 = xr*(2*a21 + yr2*2*a22 + xr2*(a22 + yr2*a23));
real gqxx15 = xr*yr*zr*(2*a22 + xr2*a23);
real gqxx16 = xr*(2*a21 + zr2*2*a22 + xr2*(a22 + zr2*a23));
real gqxx17 = yr*xr2*(3*a22 + yr2*a23);
real gqxx18 = zr*xr2*(a22 + yr2*a23);
real gqxx19 = yr*xr2*(a22 + zr2*a23);
real gqxx20 = zr*xr2*(3*a22 + zr2*a23);
real gqxy11 = yr*(3*a21 + xr2*(6*a22 + xr2*a23));
real gqxy12 = xr*(3*(a21 + yr2*a22) + xr2*(a22 + yr2*a23));
real gqxy13 = xr*yr*zr*(3*a22 + xr2*a23);
real gqxy14 = yr*(3*(a21 + xr2*a22) + yr2*(a22 + xr2*a23));
real gqxy15 = zr*(a21 + (yr2 + xr2)*a22 + yr2*xr2*a23);
real gqxy16 = yr*(a21 + (xr2 + zr2)*a22 + xr2*zr2*a23);
real gqxy17 = xr*(3*(a21 + yr2*a22) + yr2*(3*a22 + yr2*a23));
real gqxy18 = xr*yr*zr*(3*a22 + yr2*a23);
real gqxy19 = xr*(a21 + (yr2 + zr2)*a22 + yr2*zr2*a23);
real gqxy20 = xr*yr*zr*(3*a22 + zr2*a23);
real gqxz11 = zr*(3*a21 + xr2*(6*a22 + xr2*a23));
real gqxz12 = xr*yr*zr*(3*a22 + xr2*a23);
real gqxz13 = xr*(3*(a21 + zr2*a22) + xr2*(a22 + zr2*a23));
real gqxz14 = zr*(a21 + (xr2 + yr2)*a22 + xr2*yr2*a23);
real gqxz15 = yr*(a21 + (xr2 + zr2)*a22 + zr2*xr2*a23);
real gqxz16 = zr*(3*(a21 + xr2*a22) + zr2*(a22 + xr2*a23));
real gqxz17 = xr*yr*zr*(3*a22 + yr2*a23);
real gqxz18 = xr*(a21 + (zr2 + yr2)*a22 + zr2*yr2*a23);
real gqxz19 = xr*yr*zr*(3*a22 + zr2*a23);
real gqxz20 = xr*(3*a21 + zr2*(6*a22 + zr2*a23));
real gqyy11 = xr*yr2*(3*a22 + xr2*a23);
real gqyy12 = yr*(2*a21 + xr2*2*a22 + yr2*(a22 + xr2*a23));
real gqyy13 = zr*yr2*(a22 + xr2*a23);
real gqyy14 = xr*(2*a21 + yr2*(5*a22 + yr2*a23));
real gqyy15 = xr*yr*zr*(2*a22 + yr2*a23);
real gqyy16 = xr*yr2*(a22 + zr2*a23);
real gqyy17 = yr*(12*a21 + yr2*(9*a22 + yr2*a23));
real gqyy18 = zr*(2*a21 + yr2*(5*a22 + yr2*a23));
real gqyy19 = yr*(2*a21 + zr2*2*a22 + yr2*(a22 + zr2*a23));
real gqyy20 = zr*yr2*(3*a22 + zr2*a23);
real gqyz11 = xr*yr*zr*(3*a22 + xr2*a23);
real gqyz12 = zr*(a21 + (xr2 + yr2)*a22 + xr2*yr2*a23);
real gqyz13 = yr*(a21 + (xr2 + zr2)*a22 + xr2*zr2*a23);
real gqyz14 = xr*yr*zr*(3*a22 + yr2*a23);
real gqyz15 = xr*(a21 + (yr2 + zr2)*a22 + yr2*zr2*a23);
real gqyz16 = xr*yr*zr*(3*a22 + zr2*a23);
real gqyz17 = zr*(3*a21 + yr2*(6*a22 + yr2*a23));
real gqyz18 = yr*(3*(a21 + zr2*a22) + yr2*(a22 + zr2*a23));
real gqyz19 = zr*(3*(a21 + yr2*a22) + zr2*(a22 + yr2*a23));
real gqyz20 = yr*(3*a21 + zr2*(6*a22 + zr2*a23));
real gqzz11 = xr*zr2*(3*a22 + xr2*a23);
real gqzz12 = yr*(zr2*a22 + xr2*(zr2*a23));
real gqzz13 = zr*(2*a21 + xr2*2*a22 + zr2*(a22 + xr2*a23));
real gqzz14 = xr*zr2*(a22 + yr2*a23);
real gqzz15 = xr*yr*zr*(2*a22 + zr2*a23);
real gqzz16 = xr*(2*a21 + zr2*(5*a22 + zr2*a23));
real gqzz17 = yr*zr2*(3*a22 + yr2*a23);
real gqzz18 = zr*(2*a21 + yr2*2*a22 + zr2*(a22 + yr2*a23));
real gqzz19 = yr*(2*a21 + zr2*(5*a22 + zr2*a23));
real gqzz20 = zr*(12*a21 + zr2*(9*a22 + zr2*a23));
dedx += atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx11 + atom2.quadrupoleYY*gqxx14 + atom2.quadrupoleZZ*gqxx16 + 2*(atom2.quadrupoleXY*gqxx12 + atom2.quadrupoleXZ*gqxx13 + atom2.quadrupoleYZ*gqxx15)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqyy11 + atom2.quadrupoleYY*gqyy14 + atom2.quadrupoleZZ*gqyy16 + 2*(atom2.quadrupoleXY*gqyy12 + atom2.quadrupoleXZ*gqyy13 + atom2.quadrupoleYZ*gqyy15)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqzz11 + atom2.quadrupoleYY*gqzz14 + atom2.quadrupoleZZ*gqzz16 + 2*(atom2.quadrupoleXY*gqzz12 + atom2.quadrupoleXZ*gqzz13 + atom2.quadrupoleYZ*gqzz15)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxy11 + atom2.quadrupoleYY*gqxy14 + atom2.quadrupoleZZ*gqxy16 + 2*(atom2.quadrupoleXY*gqxy12 + atom2.quadrupoleXZ*gqxy13 + atom2.quadrupoleYZ*gqxy15)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxz11 + atom2.quadrupoleYY*gqxz14 + atom2.quadrupoleZZ*gqxz16 + 2*(atom2.quadrupoleXY*gqxz12 + atom2.quadrupoleXZ*gqxz13 + atom2.quadrupoleYZ*gqxz15)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqyz11 + atom2.quadrupoleYY*gqyz14 + atom2.quadrupoleZZ*gqyz16 + 2*(atom2.quadrupoleXY*gqyz12 + atom2.quadrupoleXZ*gqyz13 + atom2.quadrupoleYZ*gqyz15))) +
atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx11 + atom2.quadrupoleYY*gqyy11 + atom2.quadrupoleZZ*gqzz11 + 2*(atom2.quadrupoleXY*gqxy11 + atom2.quadrupoleXZ*gqxz11 + atom2.quadrupoleYZ*gqyz11)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqxx14 + atom2.quadrupoleYY*gqyy14 + atom2.quadrupoleZZ*gqzz14 + 2*(atom2.quadrupoleXY*gqxy14 + atom2.quadrupoleXZ*gqxz14 + atom2.quadrupoleYZ*gqyz14)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqxx16 + atom2.quadrupoleYY*gqyy16 + atom2.quadrupoleZZ*gqzz16 + 2*(atom2.quadrupoleXY*gqxy16 + atom2.quadrupoleXZ*gqxz16 + atom2.quadrupoleYZ*gqyz16)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxx12 + atom2.quadrupoleYY*gqyy12 + atom2.quadrupoleZZ*gqzz12 + 2*(atom2.quadrupoleXY*gqxy12 + atom2.quadrupoleXZ*gqxz12 + atom2.quadrupoleYZ*gqyz12)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxx13 + atom2.quadrupoleYY*gqyy13 + atom2.quadrupoleZZ*gqzz13 + 2*(atom2.quadrupoleXY*gqxy13 + atom2.quadrupoleXZ*gqxz13 + atom2.quadrupoleYZ*gqyz13)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqxx15 + atom2.quadrupoleYY*gqyy15 + atom2.quadrupoleZZ*gqzz15 + 2*(atom2.quadrupoleXY*gqxy15 + atom2.quadrupoleXZ*gqxz15 + atom2.quadrupoleYZ*gqyz15)));
dedy += atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx12 + atom2.quadrupoleYY*gqxx17 + atom2.quadrupoleZZ*gqxx19 + 2*(atom2.quadrupoleXY*gqxx14 + atom2.quadrupoleXZ*gqxx15 + atom2.quadrupoleYZ*gqxx18)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqyy12 + atom2.quadrupoleYY*gqyy17 + atom2.quadrupoleZZ*gqyy19 + 2*(atom2.quadrupoleXY*gqyy14 + atom2.quadrupoleXZ*gqyy15 + atom2.quadrupoleYZ*gqyy18)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqzz12 + atom2.quadrupoleYY*gqzz17 + atom2.quadrupoleZZ*gqzz19 + 2*(atom2.quadrupoleXY*gqzz14 + atom2.quadrupoleXZ*gqzz15 + atom2.quadrupoleYZ*gqzz18)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxy12 + atom2.quadrupoleYY*gqxy17 + atom2.quadrupoleZZ*gqxy19 + 2*(atom2.quadrupoleXY*gqxy14 + atom2.quadrupoleXZ*gqxy15 + atom2.quadrupoleYZ*gqxy18)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxz12 + atom2.quadrupoleYY*gqxz17 + atom2.quadrupoleZZ*gqxz19 + 2*(atom2.quadrupoleXY*gqxz14 + atom2.quadrupoleXZ*gqxz15 + atom2.quadrupoleYZ*gqxz18)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqyz12 + atom2.quadrupoleYY*gqyz17 + atom2.quadrupoleZZ*gqyz19 + 2*(atom2.quadrupoleXY*gqyz14 + atom2.quadrupoleXZ*gqyz15 + atom2.quadrupoleYZ*gqyz18))) +
atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx12 + atom2.quadrupoleYY*gqyy12 + atom2.quadrupoleZZ*gqzz12 + 2*(atom2.quadrupoleXY*gqxy12 + atom2.quadrupoleXZ*gqxz12 + atom2.quadrupoleYZ*gqyz12)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqxx17 + atom2.quadrupoleYY*gqyy17 + atom2.quadrupoleZZ*gqzz17 + 2*(atom2.quadrupoleXY*gqxy17 + atom2.quadrupoleXZ*gqxz17 + atom2.quadrupoleYZ*gqyz17)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqxx19 + atom2.quadrupoleYY*gqyy19 + atom2.quadrupoleZZ*gqzz19 + 2*(atom2.quadrupoleXY*gqxy19 + atom2.quadrupoleXZ*gqxz19 + atom2.quadrupoleYZ*gqyz19)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxx14 + atom2.quadrupoleYY*gqyy14 + atom2.quadrupoleZZ*gqzz14 + 2*(atom2.quadrupoleXY*gqxy14 + atom2.quadrupoleXZ*gqxz14 + atom2.quadrupoleYZ*gqyz14)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxx15 + atom2.quadrupoleYY*gqyy15 + atom2.quadrupoleZZ*gqzz15 + 2*(atom2.quadrupoleXY*gqxy15 + atom2.quadrupoleXZ*gqxz15 + atom2.quadrupoleYZ*gqyz15)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqxx18 + atom2.quadrupoleYY*gqyy18 + atom2.quadrupoleZZ*gqzz18 + 2*(atom2.quadrupoleXY*gqxy18 + atom2.quadrupoleXZ*gqxz18 + atom2.quadrupoleYZ*gqyz18)));
dedz += atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx13 + atom2.quadrupoleYY*gqxx18 + atom2.quadrupoleZZ*gqxx20 + 2*(atom2.quadrupoleXY*gqxx15 + atom2.quadrupoleXZ*gqxx16 + atom2.quadrupoleYZ*gqxx19)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqyy13 + atom2.quadrupoleYY*gqyy18 + atom2.quadrupoleZZ*gqyy20 + 2*(atom2.quadrupoleXY*gqyy15 + atom2.quadrupoleXZ*gqyy16 + atom2.quadrupoleYZ*gqyy19)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqzz13 + atom2.quadrupoleYY*gqzz18 + atom2.quadrupoleZZ*gqzz20 + 2*(atom2.quadrupoleXY*gqzz15 + atom2.quadrupoleXZ*gqzz16 + atom2.quadrupoleYZ*gqzz19)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxy13 + atom2.quadrupoleYY*gqxy18 + atom2.quadrupoleZZ*gqxy20 + 2*(atom2.quadrupoleXY*gqxy15 + atom2.quadrupoleXZ*gqxy16 + atom2.quadrupoleYZ*gqxy19)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxz13 + atom2.quadrupoleYY*gqxz18 + atom2.quadrupoleZZ*gqxz20 + 2*(atom2.quadrupoleXY*gqxz15 + atom2.quadrupoleXZ*gqxz16 + atom2.quadrupoleYZ*gqxz19)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqyz13 + atom2.quadrupoleYY*gqyz18 + atom2.quadrupoleZZ*gqyz20 + 2*(atom2.quadrupoleXY*gqyz15 + atom2.quadrupoleXZ*gqyz16 + atom2.quadrupoleYZ*gqyz19))) +
atom1.quadrupoleXX*(atom2.quadrupoleXX*gqxx13 + atom2.quadrupoleYY*gqyy13 + atom2.quadrupoleZZ*gqzz13 + 2*(atom2.quadrupoleXY*gqxy13 + atom2.quadrupoleXZ*gqxz13 + atom2.quadrupoleYZ*gqyz13)) +
atom1.quadrupoleYY*(atom2.quadrupoleXX*gqxx18 + atom2.quadrupoleYY*gqyy18 + atom2.quadrupoleZZ*gqzz18 + 2*(atom2.quadrupoleXY*gqxy18 + atom2.quadrupoleXZ*gqxz18 + atom2.quadrupoleYZ*gqyz18)) +
atom1.quadrupoleZZ*(atom2.quadrupoleXX*gqxx20 + atom2.quadrupoleYY*gqyy20 + atom2.quadrupoleZZ*gqzz20 + 2*(atom2.quadrupoleXY*gqxy20 + atom2.quadrupoleXZ*gqxz20 + atom2.quadrupoleYZ*gqyz20)) +
2*(
atom1.quadrupoleXY*(atom2.quadrupoleXX*gqxx15 + atom2.quadrupoleYY*gqyy15 + atom2.quadrupoleZZ*gqzz15 + 2*(atom2.quadrupoleXY*gqxy15 + atom2.quadrupoleXZ*gqxz15 + atom2.quadrupoleYZ*gqyz15)) +
atom1.quadrupoleXZ*(atom2.quadrupoleXX*gqxx16 + atom2.quadrupoleYY*gqyy16 + atom2.quadrupoleZZ*gqzz16 + 2*(atom2.quadrupoleXY*gqxy16 + atom2.quadrupoleXZ*gqxz16 + atom2.quadrupoleYZ*gqyz16)) +
atom1.quadrupoleYZ*(atom2.quadrupoleXX*gqxx19 + atom2.quadrupoleYY*gqyy19 + atom2.quadrupoleZZ*gqzz19 + 2*(atom2.quadrupoleXY*gqxy19 + atom2.quadrupoleXZ*gqxz19 + atom2.quadrupoleYZ*gqyz19)));
#endif
#if defined T1
if (xr != 0 || yr != 0 || zr != 0) {
real gux1 = xr*a10;
real guy1 = yr*a10;
real guz1 = zr*a10;
real gc2 = xr*a01;
real gc3 = yr*a01;
real gc4 = zr*a01;
real fid1 = atom2.dipole.x*gux2 + atom2.dipole.y*gux3 + atom2.dipole.z*gux4 + 0.5f*(atom2.q*gux1 + atom2.quadrupoleXX*gux5 + atom2.quadrupoleYY*gux8 + atom2.quadrupoleZZ*gux10 +
2*(atom2.quadrupoleXY*gux6 + atom2.quadrupoleXZ*gux7 + atom2.quadrupoleYZ*gux9) +
atom2.q*gc2 + atom2.quadrupoleXX*gqxx2 + atom2.quadrupoleYY*gqyy2 + atom2.quadrupoleZZ*gqzz2 +
2*(atom2.quadrupoleXY*gqxy2 + atom2.quadrupoleXZ*gqxz2 + atom2.quadrupoleYZ*gqyz2));
real fid2 = atom2.dipole.x*guy2 + atom2.dipole.y*guy3 + atom2.dipole.z*guy4 + 0.5f*(atom2.q*guy1 + atom2.quadrupoleXX*guy5 + atom2.quadrupoleYY*guy8 + atom2.quadrupoleZZ*guy10 +
2*(atom2.quadrupoleXY*guy6 + atom2.quadrupoleXZ*guy7 + atom2.quadrupoleYZ*guy9) +
atom2.q*gc3 + atom2.quadrupoleXX*gqxx3 + atom2.quadrupoleYY*gqyy3 + atom2.quadrupoleZZ*gqzz3 +
2*(atom2.quadrupoleXY*gqxy3 + atom2.quadrupoleXZ*gqxz3 + atom2.quadrupoleYZ*gqyz3));
real fid3 = atom2.dipole.x*guz2 + atom2.dipole.y*guz3 + atom2.dipole.z*guz4 + 0.5f*(atom2.q*guz1 + atom2.quadrupoleXX*guz5 + atom2.quadrupoleYY*guz8 + atom2.quadrupoleZZ*guz10 +
2*(atom2.quadrupoleXY*guz6 + atom2.quadrupoleXZ*guz7 + atom2.quadrupoleYZ*guz9) +
atom2.q*gc4 + atom2.quadrupoleXX*gqxx4 + atom2.quadrupoleYY*gqyy4 + atom2.quadrupoleZZ*gqzz4 +
2*(atom2.quadrupoleXY*gqxy4 + atom2.quadrupoleXZ*gqxz4 + atom2.quadrupoleYZ*gqyz4));
real trq1 = atom1.dipole.y*fid3 - atom1.dipole.z*fid2;
real trq2 = atom1.dipole.z*fid1 - atom1.dipole.x*fid3;
real trq3 = atom1.dipole.x*fid2 - atom1.dipole.y*fid1;
// torque on quadrupoles due to permanent reaction field gradient
real fidg11 =
(atom2.q*xr2*a20 + atom2.dipole.x*gqxx2 + atom2.dipole.y*gqxx3 + atom2.dipole.z*gqxx4
+ atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqxx8 + atom2.quadrupoleZZ*gqxx10
+ 2*(atom2.quadrupoleXY*gqxx6 + atom2.quadrupoleXZ*gqxx7 + atom2.quadrupoleYZ*gqxx9)
+ atom2.q*gc5 + atom2.dipole.x*gux5 + atom2.dipole.y*guy5 + atom2.dipole.z*guz5
+ atom2.quadrupoleXX*gqxx5 + atom2.quadrupoleYY*gqyy5 + atom2.quadrupoleZZ*gqzz5
+ 2*(atom2.quadrupoleXY*gqxy5 + atom2.quadrupoleXZ*gqxz5 + atom2.quadrupoleYZ*gqyz5));
real fidg12 =
(atom2.q*xr*yr*a20 + atom2.dipole.x*gqxy2 + atom2.dipole.y*gqxy3 + atom2.dipole.z*gqxy4
+ atom2.quadrupoleXX*gqxy5 + atom2.quadrupoleYY*gqxy8 + atom2.quadrupoleZZ*gqxy10
+ 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxy7 + atom2.quadrupoleYZ*gqxy9)
+ atom2.q*gc6 + atom2.dipole.x*gux6 + atom2.dipole.y*guy6 + atom2.dipole.z*guz6
+ atom2.quadrupoleXX*gqxx6 + atom2.quadrupoleYY*gqyy6 + atom2.quadrupoleZZ*gqzz6
+ 2*(atom2.quadrupoleXY*gqxy6 + atom2.quadrupoleXZ*gqxz6 + atom2.quadrupoleYZ*gqyz6));
real fidg13 =
(atom2.q*xr*zr*a20 + atom2.dipole.x*gqxz2 + atom2.dipole.y*gqxz3 + atom2.dipole.z*gqxz4
+ atom2.quadrupoleXX*gqxz5 + atom2.quadrupoleYY*gqxz8 + atom2.quadrupoleZZ*gqxz10
+ 2*(atom2.quadrupoleXY*gqxz6 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqxz9)
+ atom2.q*gc7 + atom2.dipole.x*gux7 + atom2.dipole.y*guy7 + atom2.dipole.z*guz7
+ atom2.quadrupoleXX*gqxx7 + atom2.quadrupoleYY*gqyy7 + atom2.quadrupoleZZ*gqzz7
+ 2*(atom2.quadrupoleXY*gqxy7 + atom2.quadrupoleXZ*gqxz7 + atom2.quadrupoleYZ*gqyz7));
real fidg22 =
(atom2.q*yr2*a20 + atom2.dipole.x*gqyy2 + atom2.dipole.y*gqyy3 + atom2.dipole.z*gqyy4
+ atom2.quadrupoleXX*gqyy5 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqyy10
+ 2*(atom2.quadrupoleXY*gqyy6 + atom2.quadrupoleXZ*gqyy7 + atom2.quadrupoleYZ*gqyy9)
+ atom2.q*gc8 + atom2.dipole.x*gux8 + atom2.dipole.y*guy8 + atom2.dipole.z*guz8
+ atom2.quadrupoleXX*gqxx8 + atom2.quadrupoleYY*gqyy8 + atom2.quadrupoleZZ*gqzz8
+ 2*(atom2.quadrupoleXY*gqxy8 + atom2.quadrupoleXZ*gqxz8 + atom2.quadrupoleYZ*gqyz8));
real fidg23 =
(atom2.q*yr*zr*a20 + atom2.dipole.x*gqyz2 + atom2.dipole.y*gqyz3 + atom2.dipole.z*gqyz4
+ atom2.quadrupoleXX*gqyz5 + atom2.quadrupoleYY*gqyz8 + atom2.quadrupoleZZ*gqyz10
+ 2*(atom2.quadrupoleXY*gqyz6 + atom2.quadrupoleXZ*gqyz7 + atom2.quadrupoleYZ*gqyz9)
+ atom2.q*gc9 + atom2.dipole.x*gux9 + atom2.dipole.y*guy9 + atom2.dipole.z*guz9
+ atom2.quadrupoleXX*gqxx9 + atom2.quadrupoleYY*gqyy9 + atom2.quadrupoleZZ*gqzz9
+ 2*(atom2.quadrupoleXY*gqxy9 + atom2.quadrupoleXZ*gqxz9 + atom2.quadrupoleYZ*gqyz9));
real fidg33 =
(atom2.q*zr2*a20 + atom2.dipole.x*gqzz2 + atom2.dipole.y*gqzz3 + atom2.dipole.z*gqzz4
+ atom2.quadrupoleXX*gqzz5 + atom2.quadrupoleYY*gqzz8 + atom2.quadrupoleZZ*gqzz10
+ 2*(atom2.quadrupoleXY*gqzz6 + atom2.quadrupoleXZ*gqzz7 + atom2.quadrupoleYZ*gqzz9)
+ atom2.q*gc10 + atom2.dipole.x*gux10 + atom2.dipole.y*guy10 + atom2.dipole.z*guz10
+ atom2.quadrupoleXX*gqxx10 + atom2.quadrupoleYY*gqyy10 + atom2.quadrupoleZZ*gqzz10
+ 2*(atom2.quadrupoleXY*gqxy10 + atom2.quadrupoleXZ*gqxz10 + atom2.quadrupoleYZ*gqyz10));
trq1 = (atom1.quadrupoleXY*fidg13 + atom1.quadrupoleYY*fidg23 + atom1.quadrupoleYZ*fidg33 -atom1.quadrupoleXZ*fidg12-atom1.quadrupoleYZ*fidg22-atom1.quadrupoleZZ*fidg23);
trq2 = (atom1.quadrupoleXZ*fidg11 + atom1.quadrupoleYZ*fidg12 + atom1.quadrupoleZZ*fidg13 -atom1.quadrupoleXX*fidg13-atom1.quadrupoleXY*fidg23-atom1.quadrupoleXZ*fidg33);
trq3 = (atom1.quadrupoleXX*fidg12 + atom1.quadrupoleXY*fidg22 + atom1.quadrupoleXZ*fidg23 -atom1.quadrupoleXY*fidg11-atom1.quadrupoleYY*fidg12-atom1.quadrupoleYZ*fidg13);
torque.x = trq1;
torque.y = trq2;
torque.z = trq3;
} else {
torque.x = 0;
torque.y = 0;
torque.z = 0;
}
#endif
#if defined B2
dsumdrB2 *= 0.5f;
atom1.bornForce += 0.5f*atom2.bornRadius*dsumdrB2;
atom2.bornForce += 0.5f*atom1.bornRadius*dsumdrB2;
#endif
#if defined T2
// torque due to induced reaction field gradient on quadrupoles;
real fidg11 = sxk*gqxx2 + syk*gqxx3 + szk*gqxx4 + sxk*gux5 + syk*guy5 + szk*guz5;
real fidg12 = sxk*gqxy2 + syk*gqxy3 + szk*gqxy4 + sxk*gux6 + syk*guy6 + szk*guz6;
real fidg13 = sxk*gqxz2 + syk*gqxz3 + szk*gqxz4 + sxk*gux7 + syk*guy7 + szk*guz7;
real fidg22 = sxk*gqyy2 + syk*gqyy3 + szk*gqyy4 + sxk*gux8 + syk*guy8 + szk*guz8;
real fidg23 = sxk*gqyz2 + syk*gqyz3 + szk*gqyz4 + sxk*gux9 + syk*guy9 + szk*guz9;
real fidg33 = sxk*gqzz2 + syk*gqzz3 + szk*gqzz4 + sxk*gux10 + syk*guy10 + szk*guz10;
trqi1 = atom1.quadrupoleXY*fidg13 + atom1.quadrupoleYY*fidg23 + atom1.quadrupoleYZ*fidg33
-atom1.quadrupoleXZ*fidg12 - atom1.quadrupoleYZ*fidg22 - atom1.quadrupoleZZ*fidg23;
trqi2 = atom1.quadrupoleXZ*fidg11 + atom1.quadrupoleYZ*fidg12 + atom1.quadrupoleZZ*fidg13
-atom1.quadrupoleXX*fidg13 - atom1.quadrupoleXY*fidg23 - atom1.quadrupoleXZ*fidg33;
trqi3 = atom1.quadrupoleXX*fidg12 + atom1.quadrupoleXY*fidg22 + atom1.quadrupoleXZ*fidg23
-atom1.quadrupoleXY*fidg11 - atom1.quadrupoleYY*fidg12 - atom1.quadrupoleYZ*fidg13;
torque.x += 0.5f*trqi1;
torque.y += 0.5f*trqi2;
torque.z += 0.5f*trqi3;
#endif
#if defined F1
outputEnergy += energy;
if ((xr != 0 || yr != 0 || zr != 0)) {
force.x = dedx;
force.y = dedy;
force.z = dedz;
} else {
force.x = force.y = force.z = 0;
}
#endif
#if defined F2
outputEnergy += 0.5f*energy;
dpdx *= 0.5f;
dpdy *= 0.5f;
dpdz *= 0.5f;
if ((xr != 0 || yr != 0 || zr != 0)) {
force.x += dpdx;
force.y += dpdy;
force.z += dpdz;
}
#endif
}
......@@ -135,8 +135,8 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].thole = data.thole; // IS THIS CORRECT?
localData[threadIdx.x].damp = data.damp; // IS THIS CORRECT?
localData[threadIdx.x].thole = data.thole;
localData[threadIdx.x].damp = data.damp;
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
......@@ -260,6 +260,8 @@ extern "C" __global__ void computeElectrostatics(
// Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
for (j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) {
int atom2 = tbx+j;
......@@ -362,6 +364,8 @@ extern "C" __global__ void computeElectrostatics(
// Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
......
......@@ -7,6 +7,10 @@ typedef struct {
real quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ, quadrupoleZZ;
float thole, damp;
#ifdef USE_GK
real3 gkField;
real bornRadius;
#endif
} AtomData;
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const float2* __restrict__ dampingAndThole) {
......@@ -178,6 +182,205 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
}
#endif
#ifdef USE_GK
__device__ void computeOneGkInteraction(AtomData& atom1, AtomData& atom2, real3 delta, real3* fields) {
real a[4][4];
real gc[5];
real gux[11],guy[11],guz[11];
real gqxx[5],gqxy[5];
real gqxz[5],gqyy[5];
real gqyz[5],gqzz[5];
real ci = atom1.posq.w;
real ck = atom2.posq.w;
real uxi = atom1.dipole.x;
real uyi = atom1.dipole.y;
real uzi = atom1.dipole.z;
real uxk = atom2.dipole.x;
real uyk = atom2.dipole.y;
real uzk = atom2.dipole.z;
real qxxi = atom1.quadrupoleXX;
real qxyi = atom1.quadrupoleXY;
real qxzi = atom1.quadrupoleXZ;
real qyyi = atom1.quadrupoleYY;
real qyzi = atom1.quadrupoleYZ;
real qzzi = atom1.quadrupoleZZ;
real qxxk = atom2.quadrupoleXX;
real qxyk = atom2.quadrupoleXY;
real qxzk = atom2.quadrupoleXZ;
real qyyk = atom2.quadrupoleYY;
real qyzk = atom2.quadrupoleYZ;
real qzzk = atom2.quadrupoleZZ;
real xr2 = delta.x*delta.x;
real yr2 = delta.y*delta.y;
real zr2 = delta.z*delta.z;
real r2 = xr2 + yr2 + zr2;
real rb2 = atom1.bornRadius*atom2.bornRadius;
real expterm = EXP(-r2/(GK_C*rb2));
real expc = expterm / GK_C;
real dexpc = -2/(GK_C*rb2);
real gf2 = RECIP(r2+rb2*expterm);
real gf = SQRT(gf2);
real gf3 = gf2*gf;
real gf5 = gf3*gf2;
real gf7 = gf5*gf2;
// reaction potential auxiliary terms
a[0][0] = gf;
a[1][0] = -gf3;
a[2][0] = 3*gf5;
a[3][0] = -15*gf7;
// reaction potential gradient auxiliary terms
real expc1 = 1 - expc;
a[0][1] = expc1*a[1][0];
a[1][1] = expc1*a[2][0];
a[2][1] = expc1*a[3][0];
// dipole second reaction potential gradient auxiliary term
real expcdexpc = -expc*dexpc;
a[1][2] = expc1*a[2][1] + expcdexpc*a[2][0];
// multiply the auxillary terms by dielectric functions;
a[0][1] = GK_FC*a[0][1];
a[1][0] = GK_FD*a[1][0];
a[1][1] = GK_FD*a[1][1];
a[1][2] = GK_FD*a[1][2];
a[2][0] = GK_FQ*a[2][0];
a[2][1] = GK_FQ*a[2][1];
// unweighted dipole reaction potential tensor
gux[1] = delta.x*a[1][0];
guy[1] = delta.y*a[1][0];
guz[1] = delta.z*a[1][0];
// unweighted reaction potential gradient tensor
gc[2] = delta.x*a[0][1];
gc[3] = delta.y*a[0][1];
gc[4] = delta.z*a[0][1];
gux[2] = a[1][0] + xr2*a[1][1];
gux[3] = delta.x*delta.y*a[1][1];
gux[4] = delta.x*delta.z*a[1][1];
guy[2] = gux[3];
guy[3] = a[1][0] + yr2*a[1][1];
guy[4] = delta.y*delta.z*a[1][1];
guz[2] = gux[4];
guz[3] = guy[4];
guz[4] = a[1][0] + zr2*a[1][1];
gqxx[2] = delta.x*(2*a[2][0]+xr2*a[2][1]);
gqxx[3] = delta.y*xr2*a[2][1];
gqxx[4] = delta.z*xr2*a[2][1];
gqyy[2] = delta.x*yr2*a[2][1];
gqyy[3] = delta.y*(2*a[2][0]+yr2*a[2][1]);
gqyy[4] = delta.z*yr2*a[2][1];
gqzz[2] = delta.x*zr2*a[2][1];
gqzz[3] = delta.y*zr2*a[2][1];
gqzz[4] = delta.z*(2*a[2][0]+zr2*a[2][1]);
gqxy[2] = delta.y*(a[2][0]+xr2*a[2][1]);
gqxy[3] = delta.x*(a[2][0]+yr2*a[2][1]);
gqxy[4] = delta.z*delta.x*delta.y*a[2][1];
gqxz[2] = delta.z*(a[2][0]+xr2*a[2][1]);
gqxz[3] = gqxy[4];
gqxz[4] = delta.x*(a[2][0]+zr2*a[2][1]);
gqyz[2] = gqxy[4];
gqyz[3] = delta.z*(a[2][0]+yr2*a[2][1]);
gqyz[4] = delta.y*(a[2][0]+zr2*a[2][1]);
// unweighted dipole second reaction potential gradient tensor
gux[5] = delta.x*(3*a[1][1]+xr2*a[1][2]);
gux[6] = delta.y*(a[1][1]+xr2*a[1][2]);
gux[7] = delta.z*(a[1][1]+xr2*a[1][2]);
gux[8] = delta.x*(a[1][1]+yr2*a[1][2]);
gux[9] = delta.z*delta.x*delta.y*a[1][2];
gux[10] = delta.x*(a[1][1]+zr2*a[1][2]);
guy[5] = delta.y*(a[1][1]+xr2*a[1][2]);
guy[6] = delta.x*(a[1][1]+yr2*a[1][2]);
guy[7] = gux[9];
guy[8] = delta.y*(3*a[1][1]+yr2*a[1][2]);
guy[9] = delta.z*(a[1][1]+yr2*a[1][2]);
guy[10] = delta.y*(a[1][1]+zr2*a[1][2]);
guz[5] = delta.z*(a[1][1]+xr2*a[1][2]);
guz[6] = gux[9];
guz[7] = delta.x*(a[1][1]+zr2*a[1][2]);
guz[8] = delta.z*(a[1][1]+yr2*a[1][2]);
guz[9] = delta.y*(a[1][1]+zr2*a[1][2]);
guz[10] = delta.z*(3*a[1][1]+zr2*a[1][2]);
// generalized Kirkwood permanent reaction field
fields[0].x = uxk*gux[2] + uyk*gux[3] + uzk*gux[4]
+ 0.5f*(ck*gux[1] + qxxk*gux[5]
+ qyyk*gux[8] + qzzk*gux[10]
+ 2*(qxyk*gux[6]+qxzk*gux[7]
+ qyzk*gux[9]))
+ 0.5f*(ck*gc[2] + qxxk*gqxx[2]
+ qyyk*gqyy[2] + qzzk*gqzz[2]
+ 2*(qxyk*gqxy[2]+qxzk*gqxz[2]
+ qyzk*gqyz[2]));
fields[0].y = uxk*guy[2] + uyk*guy[3] + uzk*guy[4]
+ 0.5f*(ck*guy[1] + qxxk*guy[5]
+ qyyk*guy[8] + qzzk*guy[10]
+ 2*(qxyk*guy[6]+qxzk*guy[7]
+ qyzk*guy[9]))
+ 0.5f*(ck*gc[3] + qxxk*gqxx[3]
+ qyyk*gqyy[3] + qzzk*gqzz[3]
+ 2*(qxyk*gqxy[3]+qxzk*gqxz[3]
+ qyzk*gqyz[3]));
fields[0].z = uxk*guz[2] + uyk*guz[3] + uzk*guz[4]
+ 0.5f*(ck*guz[1] + qxxk*guz[5]
+ qyyk*guz[8] + qzzk*guz[10]
+ 2*(qxyk*guz[6]+qxzk*guz[7]
+ qyzk*guz[9]))
+ 0.5f*(ck*gc[4] + qxxk*gqxx[4]
+ qyyk*gqyy[4] + qzzk*gqzz[4]
+ 2*(qxyk*gqxy[4]+qxzk*gqxz[4]
+ qyzk*gqyz[4]));
fields[1].x = uxi*gux[2] + uyi*gux[3] + uzi*gux[4]
- 0.5f*(ci*gux[1] + qxxi*gux[5]
+ qyyi*gux[8] + qzzi*gux[10]
+ 2*(qxyi*gux[6]+qxzi*gux[7]
+ qyzi*gux[9]))
- 0.5f*(ci*gc[2] + qxxi*gqxx[2]
+ qyyi*gqyy[2] + qzzi*gqzz[2]
+ 2*(qxyi*gqxy[2]+qxzi*gqxz[2]
+ qyzi*gqyz[2]));
fields[1].y = uxi*guy[2] + uyi*guy[3] + uzi*guy[4]
- 0.5f*(ci*guy[1] + qxxi*guy[5]
+ qyyi*guy[8] + qzzi*guy[10]
+ 2*(qxyi*guy[6]+qxzi*guy[7]
+ qyzi*guy[9]))
- 0.5f*(ci*gc[3] + qxxi*gqxx[3]
+ qyyi*gqyy[3] + qzzi*gqzz[3]
+ 2*(qxyi*gqxy[3]+qxzi*gqxz[3]
+ qyzi*gqyz[3]));
fields[1].z = uxi*guz[2] + uyi*guz[3] + uzi*guz[4]
- 0.5f*(ci*guz[1] + qxxi*guz[5]
+ qyyi*guz[8] + qzzi*guz[10]
+ 2*(qxyi*guz[6]+qxzi*guz[7]
+ qyzi*guz[9]))
- 0.5f*(ci*gc[4] + qxxi*gqxx[4]
+ qyyi*gqyy[4] + qzzi*gqzz[4]
+ 2*(qxyi*gqxy[4]+qxzi*gqxz[4]
+ qyzi*gqyz[4]));
}
#endif
__device__ real computeDScaleFactor(unsigned int polarizationGroup) {
return (polarizationGroup & 1 ? 0 : 1);
}
......@@ -198,6 +401,8 @@ extern "C" __global__ void computeFixedField(
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags,
#elif defined USE_GK
const real* __restrict__ bornRadii, unsigned long long* __restrict__ gkFieldBuffers,
#endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const float2* __restrict__ dampingAndThole) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
......@@ -246,6 +451,9 @@ extern "C" __global__ void computeFixedField(
}
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, dampingAndThole);
#ifdef USE_GK
data.bornRadius = bornRadii[atom1];
#endif
// Locate the exclusion data for this tile.
......@@ -271,26 +479,32 @@ extern "C" __global__ void computeFixedField(
localData[localAtomIndex].quadrupoleYY = data.quadrupoleYY;
localData[localAtomIndex].quadrupoleYZ = data.quadrupoleYZ;
localData[localAtomIndex].quadrupoleZZ = data.quadrupoleZZ;
localData[localAtomIndex].thole = data.thole; // IS THIS CORRECT?
localData[localAtomIndex].damp = data.damp; // IS THIS CORRECT?
localData[localAtomIndex].thole = data.thole;
localData[localAtomIndex].damp = data.damp;
#ifdef USE_GK
localData[localAtomIndex].bornRadius = data.bornRadius;
#endif
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+j;
real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z);
real3 delta = trimTo3(localData[tbx+j].posq-data.posq);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real3 fields[4];
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneInteraction(data, localData[atom2], delta, d, p, fields);
atom2 = y*TILE_SIZE+j;
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 fields[4];
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneInteraction(data, localData[tbx+j], delta, d, p, fields);
data.field += fields[0];
data.fieldPolar += fields[1];
#ifdef USE_GK
computeOneGkInteraction(data, localData[tbx+j], delta, fields);
data.gkField += fields[0];
#endif
}
covalent.x >>= 1;
covalent.y >>= 1;
......@@ -305,6 +519,10 @@ extern "C" __global__ void computeFixedField(
loadAtomData(localData[localAtomIndex], j, posq, labFrameDipole, labFrameQuadrupole, dampingAndThole);
localData[localAtomIndex].field = make_real3(0);
localData[localAtomIndex].fieldPolar = make_real3(0);
#ifdef USE_GK
localData[localAtomIndex].bornRadius = bornRadii[j];
localData[localAtomIndex].gkField = make_real3(0);
#endif
#ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (!hasExclusions && flags != 0xFFFFFFFF) {
......@@ -385,22 +603,27 @@ extern "C" __global__ void computeFixedField(
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj;
real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z);
real3 delta = trimTo3(localData[tbx+tj].posq-data.posq);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real3 fields[4];
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneInteraction(data, localData[atom2], delta, d, p, fields);
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 fields[4];
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneInteraction(data, localData[tbx+tj], delta, d, p, fields);
data.field += fields[0];
data.fieldPolar += fields[1];
localData[atom2].field += fields[2];
localData[atom2].fieldPolar += fields[3];
localData[tbx+tj].field += fields[2];
localData[tbx+tj].fieldPolar += fields[3];
#ifdef USE_GK
computeOneGkInteraction(data, localData[tbx+tj], delta, fields);
data.gkField += fields[0];
localData[tbx+tj].gkField += fields[1];
#endif
}
covalent.x >>= 1;
covalent.y >>= 1;
......@@ -421,6 +644,11 @@ extern "C" __global__ void computeFixedField(
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF)));
#ifdef USE_GK
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (data.gkField.x*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.y*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.z*0xFFFFFFFF)));
#endif
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
......@@ -430,6 +658,11 @@ extern "C" __global__ void computeFixedField(
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF)));
#ifdef USE_GK
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.x*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.y*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.z*0xFFFFFFFF)));
#endif
}
pos++;
} while (pos < end);
......
......@@ -207,6 +207,9 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
}
extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fieldBuffers, const long long* __restrict__ fieldPolarBuffers,
#ifdef USE_GK
const long long* __restrict__ gkFieldBuffers, real* __restrict__ inducedDipoleS, real* __restrict__ inducedDipolePolarS,
#endif
real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
real scale = polarizability[atom]/(real) 0xFFFFFFFF;
......@@ -216,16 +219,17 @@ extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fi
inducedDipolePolar[3*atom] = scale*fieldPolarBuffers[atom];
inducedDipolePolar[3*atom+1] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS];
inducedDipolePolar[3*atom+2] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2];
#ifdef USE_GK
inducedDipoleS[3*atom] = scale*(fieldBuffers[atom]+gkFieldBuffers[atom]);
inducedDipoleS[3*atom+1] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipoleS[3*atom+2] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
inducedDipolePolarS[3*atom] = scale*(fieldPolarBuffers[atom]+gkFieldBuffers[atom]);
inducedDipolePolarS[3*atom+1] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipolePolarS[3*atom+2] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
#endif
}
}
/**
* Convert a real4 to a real3 by removing its last element.
*/
inline __device__ real3 trim(real4 v) {
return make_real3(v.x, v.y, v.z);
}
/**
* Normalize a vector and return what its magnitude was.
*/
......@@ -274,11 +278,11 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
// NoAxisType
if (axisType < 5 && particles.z >= 0) {
real3 atomPos = trim(posq[atom]);
vector[U] = atomPos - trim(posq[axisAtom]);
real3 atomPos = trimTo3(posq[atom]);
vector[U] = atomPos - trimTo3(posq[axisAtom]);
norms[U] = normVector(vector[U]);
if (axisType != 4 && particles.x >= 0)
vector[V] = atomPos - trim(posq[particles.x]);
vector[V] = atomPos - trimTo3(posq[particles.x]);
else
vector[V] = make_real3(0.1f);
norms[V] = normVector(vector[V]);
......@@ -288,7 +292,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
if (axisType < 2 || axisType > 3)
vector[W] = cross(vector[U], vector[V]);
else
vector[W] = atomPos - trim(posq[particles.y]);
vector[W] = atomPos - trimTo3(posq[particles.y]);
norms[W] = normVector(vector[W]);
vector[UV] = cross(vector[V], vector[U]);
......
......@@ -260,8 +260,8 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].thole = data.thole; // IS THIS CORRECT?
localData[threadIdx.x].damp = data.damp; // IS THIS CORRECT?
localData[threadIdx.x].thole = data.thole;
localData[threadIdx.x].damp = data.damp;
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment