Commit 72ab8864 authored by peastman's avatar peastman
Browse files

Continuing CUDA implementation of spherical harmonics for multipoles

parent fa21870f
......@@ -1150,6 +1150,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
if (maxInducedIterations > 0) {
defines["THREAD_BLOCK_SIZE"] = cu.intToString(inducedFieldThreads);
defines["MAX_PREV_DIIS_DIPOLES"] = cu.intToString(MaxPrevDIISDipoles);
defines["USE_MUTUAL_POLARIZATION"] = "1";
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleInducedField, defines);
computeInducedFieldKernel = cu.getKernel(module, "computeInducedField");
updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldByDIIS");
......@@ -1159,10 +1160,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
stringstream electrostaticsSource;
if (usePME) {
electrostaticsSource << CudaKernelSources::vectorOps;
electrostaticsSource << CudaAmoebaKernelSources::sphericalMultipoles;
electrostaticsSource << CudaAmoebaKernelSources::pmeMultipoleElectrostatics;
electrostaticsSource << (hasQuadrupoles ? CudaAmoebaKernelSources::pmeElectrostaticPairForce : CudaAmoebaKernelSources::pmeElectrostaticPairForceNoQuadrupoles);
electrostaticsSource << "#define APPLY_SCALE\n";
electrostaticsSource << (hasQuadrupoles ? CudaAmoebaKernelSources::pmeElectrostaticPairForce : CudaAmoebaKernelSources::pmeElectrostaticPairForceNoQuadrupoles);
electrostaticsThreadMemory = 24*elementSize+3*sizeof(float)+3*sizeof(int)/(double) cu.TileSize;
if (!useShuffle)
electrostaticsThreadMemory += 3*elementSize;
......@@ -1659,8 +1658,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(),
cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &sphericalDipoles->getDevicePointer(), &sphericalQuadrupoles->getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*electrostaticsThreads, electrostaticsThreads);
void* pmeTransformInducedPotentialArgs[] = {&pmePhidp->getDevicePointer(), &pmeCphi->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeTransformPotentialKernel, pmeTransformInducedPotentialArgs, cu.getNumAtoms());
......
......@@ -10,10 +10,10 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
sphericalDipoles[offset+2] = molecularDipoles[offset+1]; // y -> Q_11s
offset = 5*atom;
sphericalQuadrupoles[offset+0] = -3.0f*(molecularQuadrupoles[offset+0]+molecularQuadrupoles[offset+3]); // zz -> Q_20
sphericalQuadrupoles[offset+1] = (2*SQRT(3))*molecularQuadrupoles[offset+2]; // xz -> Q_21c
sphericalQuadrupoles[offset+2] = (2*SQRT(3))*molecularQuadrupoles[offset+4]; // yz -> Q_21s
sphericalQuadrupoles[offset+3] = SQRT(3)*(molecularQuadrupoles[offset+0]-molecularQuadrupoles[offset+3]); // xx-yy -> Q_22c
sphericalQuadrupoles[offset+4] = (2*SQRT(3))*molecularQuadrupoles[offset+1]; // xy -> Q_22s
sphericalQuadrupoles[offset+1] = (2*SQRT((real) 3))*molecularQuadrupoles[offset+2]; // xz -> Q_21c
sphericalQuadrupoles[offset+2] = (2*SQRT((real) 3))*molecularQuadrupoles[offset+4]; // yz -> Q_21s
sphericalQuadrupoles[offset+3] = SQRT((real) 3)*(molecularQuadrupoles[offset+0]-molecularQuadrupoles[offset+3]); // xx-yy -> Q_22c
sphericalQuadrupoles[offset+4] = (2*SQRT((real) 3))*molecularQuadrupoles[offset+1]; // xy -> Q_22s
// get coordinates of this atom and the z & x axis atoms
// compute the vector between the atoms and 1/sqrt(d2), d2 is distance between
......@@ -236,32 +236,32 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
sphericalQuadrupole[4] *= -1;
}
real rotatedQuadrupole[5] = {0, 0, 0, 0, 0};
real sqrtThree = SQRT(3);
rotatedQuadrupole[0] += sphericalQuadrupole[0]*0.5f*(3.0f*vectorZ.z*vectorZ.z - 1.0f);
rotatedQuadrupole[1] += sphericalQuadrupole[0]*sqrtThree*vectorZ.z*vectorZ.x;
rotatedQuadrupole[2] += sphericalQuadrupole[0]*sqrtThree*vectorZ.z*vectorZ.y;
rotatedQuadrupole[3] += sphericalQuadrupole[0]*0.5f*sqrtThree*(vectorZ.x*vectorZ.x - vectorZ.y*vectorZ.y);
rotatedQuadrupole[4] += sphericalQuadrupole[0]*sqrtThree*vectorZ.x*vectorZ.y;
rotatedQuadrupole[0] += sphericalQuadrupole[1]*sqrtThree*vectorZ.z*vectorX.z;
rotatedQuadrupole[1] += sphericalQuadrupole[1]*(vectorZ.x*vectorX.z + vectorZ.z*vectorX.x);
rotatedQuadrupole[2] += sphericalQuadrupole[1]*(vectorZ.y*vectorX.z + vectorZ.z*vectorX.y);
rotatedQuadrupole[3] += sphericalQuadrupole[1]*(vectorZ.x*vectorX.x - vectorZ.y*vectorX.y);
rotatedQuadrupole[4] += sphericalQuadrupole[1]*(vectorZ.y*vectorX.x + vectorZ.x*vectorX.y);
rotatedQuadrupole[0] += sphericalQuadrupole[2]*sqrtThree*vectorZ.z*vectorY.z;
rotatedQuadrupole[1] += sphericalQuadrupole[2]*(vectorZ.x*vectorY.z + vectorZ.z*vectorY.x);
rotatedQuadrupole[2] += sphericalQuadrupole[2]*(vectorZ.y*vectorY.z + vectorZ.z*vectorY.y);
rotatedQuadrupole[3] += sphericalQuadrupole[2]*(vectorZ.x*vectorY.x - vectorZ.y*vectorY.y);
rotatedQuadrupole[4] += sphericalQuadrupole[2]*(vectorZ.y*vectorY.x + vectorZ.x*vectorY.y);
rotatedQuadrupole[0] += sphericalQuadrupole[3]*0.5f*sqrtThree*(vectorX.z*vectorX.z - vectorY.z*vectorY.z);
rotatedQuadrupole[1] += sphericalQuadrupole[3]*(vectorX.z*vectorX.x - vectorY.z*vectorY.x);
rotatedQuadrupole[2] += sphericalQuadrupole[3]*(vectorX.z*vectorX.y - vectorY.z*vectorY.y);
rotatedQuadrupole[3] += sphericalQuadrupole[3]*0.5f*(vectorX.x*vectorX.x - vectorX.y*vectorX.y - vectorY.x*vectorY.x + vectorY.y*vectorY.y);
rotatedQuadrupole[4] += sphericalQuadrupole[3]*(vectorX.x*vectorX.y - vectorY.x*vectorY.y);
rotatedQuadrupole[0] += sphericalQuadrupole[4]*sqrtThree*vectorX.z*vectorY.z;
rotatedQuadrupole[1] += sphericalQuadrupole[4]*(vectorX.x*vectorY.z + vectorX.z*vectorY.x);
rotatedQuadrupole[2] += sphericalQuadrupole[4]*(vectorX.y*vectorY.z + vectorX.z*vectorY.y);
rotatedQuadrupole[3] += sphericalQuadrupole[4]*(vectorX.x*vectorY.x - vectorX.y*vectorY.y);
rotatedQuadrupole[4] += sphericalQuadrupole[4]*(vectorX.y*vectorY.x + vectorX.x*vectorY.y);
real sqrtThree = SQRT((real) 3);
rotatedQuadrupole[0] += sphericalQuadrupole[0]*0.5f*(3.0f*vectorZ.z*vectorZ.z - 1.0f) +
sphericalQuadrupole[1]*sqrtThree*vectorZ.z*vectorX.z +
sphericalQuadrupole[2]*sqrtThree*vectorZ.z*vectorY.z +
sphericalQuadrupole[3]*0.5f*sqrtThree*(vectorX.z*vectorX.z - vectorY.z*vectorY.z) +
sphericalQuadrupole[4]*sqrtThree*vectorX.z*vectorY.z;
rotatedQuadrupole[1] += sphericalQuadrupole[0]*sqrtThree*vectorZ.z*vectorZ.x +
sphericalQuadrupole[1]*(vectorZ.x*vectorX.z + vectorZ.z*vectorX.x) +
sphericalQuadrupole[2]*(vectorZ.x*vectorY.z + vectorZ.z*vectorY.x) +
sphericalQuadrupole[3]*(vectorX.z*vectorX.x - vectorY.z*vectorY.x) +
sphericalQuadrupole[4]*(vectorX.x*vectorY.z + vectorX.z*vectorY.x);
rotatedQuadrupole[2] += sphericalQuadrupole[0]*sqrtThree*vectorZ.z*vectorZ.y +
sphericalQuadrupole[1]*(vectorZ.y*vectorX.z + vectorZ.z*vectorX.y) +
sphericalQuadrupole[2]*(vectorZ.y*vectorY.z + vectorZ.z*vectorY.y) +
sphericalQuadrupole[3]*(vectorX.z*vectorX.y - vectorY.z*vectorY.y) +
sphericalQuadrupole[4]*(vectorX.y*vectorY.z + vectorX.z*vectorY.y);
rotatedQuadrupole[3] += sphericalQuadrupole[0]*0.5f*sqrtThree*(vectorZ.x*vectorZ.x - vectorZ.y*vectorZ.y) +
sphericalQuadrupole[1]*(vectorZ.x*vectorX.x - vectorZ.y*vectorX.y) +
sphericalQuadrupole[2]*(vectorZ.x*vectorY.x - vectorZ.y*vectorY.y) +
sphericalQuadrupole[3]*0.5f*(vectorX.x*vectorX.x - vectorX.y*vectorX.y - vectorY.x*vectorY.x + vectorY.y*vectorY.y) +
sphericalQuadrupole[4]*(vectorX.x*vectorY.x - vectorX.y*vectorY.y);
rotatedQuadrupole[4] += sphericalQuadrupole[0]*sqrtThree*vectorZ.x*vectorZ.y +
sphericalQuadrupole[1]*(vectorZ.y*vectorX.x + vectorZ.x*vectorX.y) +
sphericalQuadrupole[2]*(vectorZ.y*vectorY.x + vectorZ.x*vectorY.y) +
sphericalQuadrupole[3]*(vectorX.x*vectorX.y - vectorY.x*vectorY.y) +
sphericalQuadrupole[4]*(vectorX.y*vectorY.x + vectorX.x*vectorY.y);
sphericalQuadrupoles[offset] = rotatedQuadrupole[0];
sphericalQuadrupoles[offset+1] = rotatedQuadrupole[1];
sphericalQuadrupoles[offset+2] = rotatedQuadrupole[2];
......
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct {
real3 pos, force, torque, dipole, inducedDipole, inducedDipolePolar;
real3 pos, force, torque, inducedDipole, inducedDipolePolar, sphericalDipole;
real q;
float thole, damp;
#ifdef INCLUDE_QUADRUPOLES
real quadrupoleXX, quadrupoleXY, quadrupoleXZ, quadrupoleYY, quadrupoleYZ;
float padding;
real sphericalQuadrupole[5];
#endif
} AtomData;
__device__ void computeOneInteractionF1(AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, real bn5, float forceFactor, float dScale, float pScale, float mScale, real3& force, real& energy);
__device__ void computeOneInteractionF2(AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, float forceFactor, float dScale, float pScale, float mScale, real3& force, real& energy);
__device__ void computeOneInteractionT1(AtomData& atom1, volatile AtomData& atom2, const real4 delta, const real4 bn, float dScale, float pScale, float mScale);
__device__ void computeOneInteractionT2(AtomData& atom1, volatile AtomData& atom2, const real4 delta, const real4 bn, float dScale, float pScale, float mScale);
__device__ void computeOneInteractionF1NoScale(AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, real bn5, float forceFactor, real3& force, real& energy);
__device__ void computeOneInteractionF2NoScale(AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, float forceFactor, real3& force, real& energy);
__device__ void computeOneInteractionT1NoScale(AtomData& atom1, volatile AtomData& atom2, const real4 delta, const real4 bn);
__device__ void computeOneInteractionT2NoScale(AtomData& atom1, volatile AtomData& atom2, const real4 delta, const real4 bn);
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ sphericalDipole,
const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.q = atomPosq.w;
data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2];
data.sphericalDipole.x = sphericalDipole[atom*3];
data.sphericalDipole.y = sphericalDipole[atom*3+1];
data.sphericalDipole.z = sphericalDipole[atom*3+2];
#ifdef INCLUDE_QUADRUPOLES
data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.sphericalQuadrupole[0] = sphericalQuadrupole[atom*5];
data.sphericalQuadrupole[1] = sphericalQuadrupole[atom*5+1];
data.sphericalQuadrupole[2] = sphericalQuadrupole[atom*5+2];
data.sphericalQuadrupole[3] = sphericalQuadrupole[atom*5+3];
data.sphericalQuadrupole[4] = sphericalQuadrupole[atom*5+4];
#endif
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
......@@ -66,7 +57,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool hasExclusions, float dScale, float pScale, float mScale, float forceFactor,
real& energy, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 delta;
real3 delta;
delta.x = atom2.pos.x - atom1.pos.x;
delta.y = atom2.pos.y - atom1.pos.y;
delta.z = atom2.pos.z - atom1.pos.z;
......@@ -74,113 +65,405 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
// periodic box
APPLY_PERIODIC_TO_DELTA(delta)
delta.w = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (delta.w > CUTOFF_SQUARED)
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 > CUTOFF_SQUARED)
return;
real r = SQRT(delta.w);
real ralpha = EWALD_ALPHA*r;
real rInv = RSQRT(r2);
real r = r2*rInv;
real qiRotationMatrix[3][3];
buildQIRotationMatrix(delta, rInv, qiRotationMatrix);
real3 qiUindI = 0.5f*make_real3(qiRotationMatrix[0][1]*atom1.inducedDipole.x + qiRotationMatrix[0][2]*atom1.inducedDipole.y + qiRotationMatrix[0][0]*atom1.inducedDipole.z,
qiRotationMatrix[1][1]*atom1.inducedDipole.x + qiRotationMatrix[1][2]*atom1.inducedDipole.y + qiRotationMatrix[1][0]*atom1.inducedDipole.z,
qiRotationMatrix[2][1]*atom1.inducedDipole.x + qiRotationMatrix[2][2]*atom1.inducedDipole.y + qiRotationMatrix[2][0]*atom1.inducedDipole.z);
real3 qiUindJ = 0.5f*make_real3(qiRotationMatrix[0][1]*atom2.inducedDipole.x + qiRotationMatrix[0][2]*atom2.inducedDipole.y + qiRotationMatrix[0][0]*atom2.inducedDipole.z,
qiRotationMatrix[1][1]*atom2.inducedDipole.x + qiRotationMatrix[1][2]*atom2.inducedDipole.y + qiRotationMatrix[1][0]*atom2.inducedDipole.z,
qiRotationMatrix[2][1]*atom2.inducedDipole.x + qiRotationMatrix[2][2]*atom2.inducedDipole.y + qiRotationMatrix[2][0]*atom2.inducedDipole.z);
real3 qiUinpI = 0.5f*make_real3(qiRotationMatrix[0][1]*atom1.inducedDipolePolar.x + qiRotationMatrix[0][2]*atom1.inducedDipolePolar.y + qiRotationMatrix[0][0]*atom1.inducedDipolePolar.z,
qiRotationMatrix[1][1]*atom1.inducedDipolePolar.x + qiRotationMatrix[1][2]*atom1.inducedDipolePolar.y + qiRotationMatrix[1][0]*atom1.inducedDipolePolar.z,
qiRotationMatrix[2][1]*atom1.inducedDipolePolar.x + qiRotationMatrix[2][2]*atom1.inducedDipolePolar.y + qiRotationMatrix[2][0]*atom1.inducedDipolePolar.z);
real3 qiUinpJ = 0.5f*make_real3(qiRotationMatrix[0][1]*atom2.inducedDipolePolar.x + qiRotationMatrix[0][2]*atom2.inducedDipolePolar.y + qiRotationMatrix[0][0]*atom2.inducedDipolePolar.z,
qiRotationMatrix[1][1]*atom2.inducedDipolePolar.x + qiRotationMatrix[1][2]*atom2.inducedDipolePolar.y + qiRotationMatrix[1][0]*atom2.inducedDipolePolar.z,
qiRotationMatrix[2][1]*atom2.inducedDipolePolar.x + qiRotationMatrix[2][2]*atom2.inducedDipolePolar.y + qiRotationMatrix[2][0]*atom2.inducedDipolePolar.z);
real3 rotatedDipole1 = rotateDipole(atom1.sphericalDipole, qiRotationMatrix);
real3 rotatedDipole2 = rotateDipole(atom2.sphericalDipole, qiRotationMatrix);
real rotatedQuadrupole1[] = {0, 0, 0, 0, 0};
real rotatedQuadrupole2[] = {0, 0, 0, 0, 0};
#ifdef INCLUDE_QUADRUPOLES
rotateQuadupoles(qiRotationMatrix, atom1.sphericalQuadrupole, atom2.sphericalQuadrupole, rotatedQuadrupole1, rotatedQuadrupole2);
#endif
real qiQI[9] = {atom1.q, rotatedDipole1.x, rotatedDipole1.y, rotatedDipole1.z, rotatedQuadrupole1[0], rotatedQuadrupole1[1], rotatedQuadrupole1[2], rotatedQuadrupole1[3], rotatedQuadrupole1[4]};
real qiQJ[9] = {atom2.q, rotatedDipole2.x, rotatedDipole2.y, rotatedDipole2.z, rotatedQuadrupole2[0], rotatedQuadrupole2[1], rotatedQuadrupole2[2], rotatedQuadrupole2[3], rotatedQuadrupole2[4]};
// The Qtilde{x,y,z} torque intermediates for atoms I and J, which are used to obtain the torques on the permanent moments.
const real sqrtThree = SQRT((real) 3);
real qiQIX[9] = {0, qiQI[3], 0, -qiQI[1], sqrtThree*qiQI[6], qiQI[8], -sqrtThree*qiQI[4] - qiQI[7], qiQI[6], -qiQI[5]};
real qiQIY[9] = {0, -qiQI[2], qiQI[1], 0, -sqrtThree*qiQI[5], sqrtThree*qiQI[4] - qiQI[7], -qiQI[8], qiQI[5], qiQI[6]};
real qiQIZ[9] = {0, 0, -qiQI[3], qiQI[2], 0, -qiQI[6], qiQI[5], -2*qiQI[8], 2*qiQI[7]};
real qiQJX[9] = {0, qiQJ[3], 0, -qiQJ[1], sqrtThree*qiQJ[6], qiQJ[8], -sqrtThree*qiQJ[4] - qiQJ[7], qiQJ[6], -qiQJ[5]};
real qiQJY[9] = {0, -qiQJ[2], qiQJ[1], 0, -sqrtThree*qiQJ[5], sqrtThree*qiQJ[4] - qiQJ[7], -qiQJ[8], qiQJ[5], qiQJ[6]};
real qiQJZ[9] = {0, 0, -qiQJ[3], qiQJ[2], 0, -qiQJ[6], qiQJ[5], -2*qiQJ[8], 2*qiQJ[7]};
real alsq2 = 2*EWALD_ALPHA*EWALD_ALPHA;
real alsq2n = 0;
if (EWALD_ALPHA > 0)
alsq2n = RECIP(SQRT_PI*EWALD_ALPHA);
// The field derivatives at I due to permanent and induced moments on J, and vice-versa.
// Also, their derivatives w.r.t. R, which are needed for force calculations
real Vij[9], Vji[9], VjiR[9], VijR[9];
// The field derivatives at I due to only permanent moments on J, and vice-versa.
real Vijp[3], Vijd[3], Vjip[3], Vjid[3];
real rInvVec[7], alphaRVec[8], bVec[5];
// The rInvVec array is defined such that the ith element is R^-i, with the
// dieleectric constant folded in, to avoid conversions later.
rInvVec[1] = rInv;
for(int i = 2; i < 7; ++i)
rInvVec[i] = rInvVec[i-1] * rInv;
// The alpharVec array is defined such that the ith element is (alpha R)^i,
// where kappa (alpha in OpenMM parlance) is the Ewald attenuation parameter.
real ralpha = EWALD_ALPHA*r;
real exp2a = EXP(-(ralpha*ralpha));
real rr1 = RECIP(r);
delta.w = rr1;
#ifdef USE_DOUBLE_PRECISION
const real erfcAlphaR = erfc(ralpha);
const real erfAlphaR = erf(ralpha);
#else
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7.
const real t = RECIP(1.0f+0.3275911f*ralpha);
const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*exp2a;
const real erfAlphaR = 1-(0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*exp2a;
#endif
real bn0 = erfcAlphaR*rr1;
energy += forceFactor*atom1.q*atom2.q*bn0;
real rr2 = rr1*rr1;
alsq2n *= alsq2;
real4 bn;
bn.x = (bn0+alsq2n*exp2a)*rr2;
alsq2n *= alsq2;
bn.y = (3*bn.x+alsq2n*exp2a)*rr2;
alsq2n *= alsq2;
bn.z = (5*bn.y+alsq2n*exp2a)*rr2;
alsq2n *= alsq2;
bn.w = (7*bn.z+alsq2n*exp2a)*rr2;
alsq2n *= alsq2;
real bn5 = (9*bn.w+alsq2n*exp2a)*rr2;
alphaRVec[1] = ralpha;
for (int i = 2; i < 8; ++i)
alphaRVec[i] = alphaRVec[i-1]*ralpha;
real3 force;
real X = 2*EXP(-alphaRVec[2])/SQRT_PI;
real uScale = 1; // Delete!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
if (hasExclusions) {
computeOneInteractionF1(atom1, atom2, delta, bn, bn5, forceFactor, dScale, pScale, mScale, force, energy);
computeOneInteractionF2(atom1, atom2, delta, bn, forceFactor, dScale, pScale, mScale, force, energy);
}
else {
computeOneInteractionF1NoScale(atom1, atom2, delta, bn, bn5, forceFactor, force, energy);
computeOneInteractionF2NoScale(atom1, atom2, delta, bn, forceFactor, force, energy);
int doubleFactorial = 1, facCount = 1;
real tmp = alphaRVec[1];
bVec[1] = -erfAlphaR;
for(int i=2; i < 5; ++i){
bVec[i] = bVec[i-1] + tmp * X / (real)(doubleFactorial);
facCount = facCount + 2;
doubleFactorial = doubleFactorial * facCount;
tmp *= 2*alphaRVec[2];
}
atom1.force += force;
if (forceFactor == 1)
atom2.force -= force;
if (hasExclusions) {
computeOneInteractionT1(atom1, atom2, delta, bn, dScale, pScale, mScale);
computeOneInteractionT2(atom1, atom2, delta, bn, dScale, pScale, mScale);
}
else {
computeOneInteractionT1NoScale(atom1, atom2, delta, bn);
computeOneInteractionT2NoScale(atom1, atom2, delta, bn);
real dmp = atom1.damp*atom2.damp;
real a = min(atom1.thole, atom2.thole);
real u = fabs(dmp) > 1.0e-5f ? r/dmp : 1e10f;
real au3 = a*u*u*u;
real expau3 = au3 < 50 ? EXP(-au3) : 0;
real a2u6 = au3*au3;
real a3u9 = a2u6*au3;
// Thole damping factors for energies
real thole_c = 1 - expau3;
real thole_d0 = 1 - expau3*(1 + 1.5f*au3);
real thole_d1 = 1 - expau3;
real thole_q0 = 1 - expau3*(1 + au3 + a2u6);
real thole_q1 = 1 - expau3*(1 + au3);
// Thole damping factors for derivatives
real dthole_c = 1 - expau3*(1 + 1.5f*au3);
real dthole_d0 = 1 - expau3*(1 + au3 + 1.5f*a2u6);
real dthole_d1 = 1 - expau3*(1 + au3);
real dthole_q0 = 1 - expau3*(1 + au3 + 0.25*a2u6 + 0.75*a3u9);
real dthole_q1 = 1 - expau3*(1 + au3 + 0.75*a2u6);
// Now we compute the (attenuated) Coulomb operator and its derivatives, contracted with
// permanent moments and induced dipoles. Note that the coefficient of the permanent force
// terms is half of the expected value; this is because we compute the interaction of I with
// the sum of induced and permanent moments on J, as well as the interaction of J with I's
// permanent and induced moments; doing so double counts the permanent-permanent interaction.
real ePermCoef, dPermCoef, eUIndCoef, dUIndCoef, eUInpCoef, dUInpCoef;
// C-C terms (m=0)
ePermCoef = rInvVec[1]*(mScale + bVec[2] - alphaRVec[1]*X);
dPermCoef = -0.5f*(mScale + bVec[2])*rInvVec[2];
Vij[0] = ePermCoef*qiQJ[0];
Vji[0] = ePermCoef*qiQI[0];
VijR[0] = dPermCoef*qiQJ[0];
VjiR[0] = dPermCoef*qiQI[0];
// C-D and C-Uind terms (m=0)
ePermCoef = rInvVec[2]*(mScale + bVec[2]);
eUIndCoef = rInvVec[2]*(dScale*thole_c + bVec[2]);
eUInpCoef = rInvVec[2]*(pScale*thole_c + bVec[2]);
dPermCoef = -rInvVec[3]*(mScale + bVec[2] + alphaRVec[3]*X);
dUIndCoef = -2*rInvVec[3]*(dScale*dthole_c + bVec[2] + alphaRVec[3]*X);
dUInpCoef = -2*rInvVec[3]*(pScale*dthole_c + bVec[2] + alphaRVec[3]*X);
Vij[0] += -(ePermCoef*qiQJ[1] + eUIndCoef*qiUindJ.x + eUInpCoef*qiUinpJ.x);
Vji[1] = -(ePermCoef*qiQI[0]);
VijR[0] += -(dPermCoef*qiQJ[1] + dUIndCoef*qiUindJ.x + dUInpCoef*qiUinpJ.x);
VjiR[1] = -(dPermCoef*qiQI[0]);
Vjip[0] = -(eUInpCoef*qiQI[0]);
Vjid[0] = -(eUIndCoef*qiQI[0]);
// D-C and Uind-C terms (m=0)
Vij[1] = ePermCoef*qiQJ[0];
Vji[0] += ePermCoef*qiQI[1] + eUIndCoef*qiUindI.x + eUInpCoef*qiUinpI.x;
VijR[1] = dPermCoef*qiQJ[0];
VjiR[0] += dPermCoef*qiQI[1] + dUIndCoef*qiUindI.x + dUInpCoef*qiUinpI.x;
Vijp[0] = eUInpCoef*qiQJ[0];
Vijd[0] = eUIndCoef*qiQJ[0];
// D-D and D-Uind terms (m=0)
const real twoThirds = (real) 2/3;
ePermCoef = -twoThirds*rInvVec[3]*(3*(mScale + bVec[3]) + alphaRVec[3]*X);
eUIndCoef = -twoThirds*rInvVec[3]*(3*(dScale*thole_d0 + bVec[3]) + alphaRVec[3]*X);
eUInpCoef = -twoThirds*rInvVec[3]*(3*(pScale*thole_d0 + bVec[3]) + alphaRVec[3]*X);
dPermCoef = rInvVec[4]*(3*(mScale + bVec[3]) + 2*alphaRVec[5]*X);
dUIndCoef = rInvVec[4]*(6*(dScale*dthole_d0 + bVec[3]) + 4*alphaRVec[5]*X);
dUInpCoef = rInvVec[4]*(6*(pScale*dthole_d0 + bVec[3]) + 4*alphaRVec[5]*X);
Vij[1] += ePermCoef*qiQJ[1] + eUIndCoef*qiUindJ.x + eUInpCoef*qiUinpJ.x;
Vji[1] += ePermCoef*qiQI[1] + eUIndCoef*qiUindI.x + eUInpCoef*qiUinpI.x;
VijR[1] += dPermCoef*qiQJ[1] + dUIndCoef*qiUindJ.x + dUInpCoef*qiUinpJ.x;
VjiR[1] += dPermCoef*qiQI[1] + dUIndCoef*qiUindI.x + dUInpCoef*qiUinpI.x;
Vijp[0] += eUInpCoef*qiQJ[1];
Vijd[0] += eUIndCoef*qiQJ[1];
Vjip[0] += eUInpCoef*qiQI[1];
Vjid[0] += eUIndCoef*qiQI[1];
// D-D and D-Uind terms (m=1)
ePermCoef = rInvVec[3]*(mScale + bVec[3] - twoThirds*alphaRVec[3]*X);
eUIndCoef = rInvVec[3]*(dScale*thole_d1 + bVec[3] - twoThirds*alphaRVec[3]*X);
eUInpCoef = rInvVec[3]*(pScale*thole_d1 + bVec[3] - twoThirds*alphaRVec[3]*X);
dPermCoef = -1.5f*rInvVec[4]*(mScale + bVec[3]);
dUIndCoef = -3*rInvVec[4]*(dScale*dthole_d1 + bVec[3]);
dUInpCoef = -3*rInvVec[4]*(pScale*dthole_d1 + bVec[3]);
Vij[2] = ePermCoef*qiQJ[2] + eUIndCoef*qiUindJ.y + eUInpCoef*qiUinpJ.y;
Vji[2] = ePermCoef*qiQI[2] + eUIndCoef*qiUindI.y + eUInpCoef*qiUinpI.y;
VijR[2] = dPermCoef*qiQJ[2] + dUIndCoef*qiUindJ.y + dUInpCoef*qiUinpJ.y;
VjiR[2] = dPermCoef*qiQI[2] + dUIndCoef*qiUindI.y + dUInpCoef*qiUinpI.y;
Vij[3] = ePermCoef*qiQJ[3] + eUIndCoef*qiUindJ.z + eUInpCoef*qiUinpJ.z;
Vji[3] = ePermCoef*qiQI[3] + eUIndCoef*qiUindI.z + eUInpCoef*qiUinpI.z;
VijR[3] = dPermCoef*qiQJ[3] + dUIndCoef*qiUindJ.z + dUInpCoef*qiUinpJ.z;
VjiR[3] = dPermCoef*qiQI[3] + dUIndCoef*qiUindI.z + dUInpCoef*qiUinpI.z;
Vijp[1] = eUInpCoef*qiQJ[2];
Vijd[1] = eUIndCoef*qiQJ[2];
Vjip[1] = eUInpCoef*qiQI[2];
Vjid[1] = eUIndCoef*qiQI[2];
Vijp[2] = eUInpCoef*qiQJ[3];
Vijd[2] = eUIndCoef*qiQJ[3];
Vjip[2] = eUInpCoef*qiQI[3];
Vjid[2] = eUIndCoef*qiQI[3];
// C-Q terms (m=0)
ePermCoef = (mScale + bVec[3])*rInvVec[3];
dPermCoef = -((real) 1/3)*rInvVec[4]*(4.5*(mScale + bVec[3]) + 2*alphaRVec[5]*X);
Vij[0] += ePermCoef*qiQJ[4];
Vji[4] = ePermCoef*qiQI[0];
VijR[0] += dPermCoef*qiQJ[4];
VjiR[4] = dPermCoef*qiQI[0];
// Q-C terms (m=0)
Vij[4] = ePermCoef*qiQJ[0];
Vji[0] += ePermCoef*qiQI[4];
VijR[4] = dPermCoef*qiQJ[0];
VjiR[0] += dPermCoef*qiQI[4];
// D-Q and Uind-Q terms (m=0)
const real fourThirds = (real) 4/3;
ePermCoef = rInvVec[4]*(3*(mScale + bVec[3]) + fourThirds*alphaRVec[5]*X);
eUIndCoef = rInvVec[4]*(3*(dScale*thole_q0 + bVec[3]) + fourThirds*alphaRVec[5]*X);
eUInpCoef = rInvVec[4]*(3*(pScale*thole_q0 + bVec[3]) + fourThirds*alphaRVec[5]*X);
dPermCoef = -fourThirds*rInvVec[5]*(4.5f*(mScale + bVec[3]) + (1 + alphaRVec[2])*alphaRVec[5]*X);
dUIndCoef = -fourThirds*rInvVec[5]*(9*(dScale*dthole_q0 + bVec[3]) + 2*(1 + alphaRVec[2])*alphaRVec[5]*X);
dUInpCoef = -fourThirds*rInvVec[5]*(9*(pScale*dthole_q0 + bVec[3]) + 2*(1 + alphaRVec[2])*alphaRVec[5]*X);
Vij[1] += ePermCoef*qiQJ[4];
Vji[4] += ePermCoef*qiQI[1] + eUIndCoef*qiUindI.x + eUInpCoef*qiUinpI.x;
VijR[1] += dPermCoef*qiQJ[4];
VjiR[4] += dPermCoef*qiQI[1] + dUIndCoef*qiUindI.x + dUInpCoef*qiUinpI.x;
Vijp[0] += eUInpCoef*qiQJ[4];
Vijd[0] += eUIndCoef*qiQJ[4];
// Q-D and Q-Uind terms (m=0)
Vij[4] += -(ePermCoef*qiQJ[1] + eUIndCoef*qiUindJ.x + eUInpCoef*qiUinpJ.x);
Vji[1] += -(ePermCoef*qiQI[4]);
VijR[4] += -(dPermCoef*qiQJ[1] + dUIndCoef*qiUindJ.x + dUInpCoef*qiUinpJ.x);
VjiR[1] += -(dPermCoef*qiQI[4]);
Vjip[0] += -(eUInpCoef*qiQI[4]);
Vjid[0] += -(eUIndCoef*qiQI[4]);
// D-Q and Uind-Q terms (m=1)
ePermCoef = -sqrtThree*rInvVec[4]*(mScale + bVec[3]);
eUIndCoef = -sqrtThree*rInvVec[4]*(dScale*thole_q1 + bVec[3]);
eUInpCoef = -sqrtThree*rInvVec[4]*(pScale*thole_q1 + bVec[3]);
const real fourSqrtOneThird = 4/sqrt((real) 3);
dPermCoef = fourSqrtOneThird*rInvVec[5]*(1.5f*(mScale + bVec[3]) + 0.5f*alphaRVec[5]*X);
dUIndCoef = fourSqrtOneThird*rInvVec[5]*(3*(dScale*dthole_q1 + bVec[3]) + alphaRVec[5]*X);
dUInpCoef = fourSqrtOneThird*rInvVec[5]*(3*(pScale*dthole_q1 + bVec[3]) + alphaRVec[5]*X);
Vij[2] += ePermCoef*qiQJ[5];
Vji[5] = ePermCoef*qiQI[2] + eUIndCoef*qiUindI.y + eUInpCoef*qiUinpI.y;
VijR[2] += dPermCoef*qiQJ[5];
VjiR[5] = dPermCoef*qiQI[2] + dUIndCoef*qiUindI.y + dUInpCoef*qiUinpI.y;
Vij[3] += ePermCoef*qiQJ[6];
Vji[6] = ePermCoef*qiQI[3] + eUIndCoef*qiUindI.z + eUInpCoef*qiUinpI.z;
VijR[3] += dPermCoef*qiQJ[6];
VjiR[6] = dPermCoef*qiQI[3] + dUIndCoef*qiUindI.z + dUInpCoef*qiUinpI.z;
Vijp[1] += eUInpCoef*qiQJ[5];
Vijd[1] += eUIndCoef*qiQJ[5];
Vijp[2] += eUInpCoef*qiQJ[6];
Vijd[2] += eUIndCoef*qiQJ[6];
// D-Q and Uind-Q terms (m=1)
Vij[5] = -(ePermCoef*qiQJ[2] + eUIndCoef*qiUindJ.y + eUInpCoef*qiUinpJ.y);
Vji[2] += -(ePermCoef*qiQI[5]);
VijR[5] = -(dPermCoef*qiQJ[2] + dUIndCoef*qiUindJ.y + dUInpCoef*qiUinpJ.y);
VjiR[2] += -(dPermCoef*qiQI[5]);
Vij[6] = -(ePermCoef*qiQJ[3] + eUIndCoef*qiUindJ.z + eUInpCoef*qiUinpJ.z);
Vji[3] += -(ePermCoef*qiQI[6]);
VijR[6] = -(dPermCoef*qiQJ[3] + dUIndCoef*qiUindJ.z + dUInpCoef*qiUinpJ.z);
VjiR[3] += -(dPermCoef*qiQI[6]);
Vjip[1] += -(eUInpCoef*qiQI[5]);
Vjid[1] += -(eUIndCoef*qiQI[5]);
Vjip[2] += -(eUInpCoef*qiQI[6]);
Vjid[2] += -(eUIndCoef*qiQI[6]);
// Q-Q terms (m=0)
ePermCoef = rInvVec[5]*(6*(mScale + bVec[4]) + ((real) 4/45)*(-3 + 10*alphaRVec[2])*alphaRVec[5]*X);
dPermCoef = -rInvVec[6]*(135*(mScale + bVec[4]) + 4*(1 + 2*alphaRVec[2])*alphaRVec[7]*X)/9;
Vij[4] += ePermCoef*qiQJ[4];
Vji[4] += ePermCoef*qiQI[4];
VijR[4] += dPermCoef*qiQJ[4];
VjiR[4] += dPermCoef*qiQI[4];
// Q-Q terms (m=1)
const real fourOverFifteen = (real) 4/15;
ePermCoef = -fourOverFifteen*rInvVec[5]*(15*(mScale + bVec[4]) + alphaRVec[5]*X);
dPermCoef = rInvVec[6]*(10*(mScale + bVec[4]) + fourThirds*alphaRVec[7]*X);
Vij[5] += ePermCoef*qiQJ[5];
Vji[5] += ePermCoef*qiQI[5];
VijR[5] += dPermCoef*qiQJ[5];
VjiR[5] += dPermCoef*qiQI[5];
Vij[6] += ePermCoef*qiQJ[6];
Vji[6] += ePermCoef*qiQI[6];
VijR[6] += dPermCoef*qiQJ[6];
VjiR[6] += dPermCoef*qiQI[6];
// Q-Q terms (m=2)
ePermCoef = rInvVec[5]*(mScale + bVec[4] - fourOverFifteen*alphaRVec[5]*X);
dPermCoef = -2.5*(mScale + bVec[4])*rInvVec[6];
Vij[7] = ePermCoef*qiQJ[7];
Vji[7] = ePermCoef*qiQI[7];
VijR[7] = dPermCoef*qiQJ[7];
VjiR[7] = dPermCoef*qiQI[7];
Vij[8] = ePermCoef*qiQJ[8];
Vji[8] = ePermCoef*qiQI[8];
VijR[8] = dPermCoef*qiQJ[8];
VjiR[8] = dPermCoef*qiQI[8];
// Evaluate the energies, forces and torques due to permanent+induced moments
// interacting with just the permanent moments.
energy += forceFactor*0.5f*(qiQI[0]*Vij[0] + qiQJ[0]*Vji[0]);
real fIZ = qiQI[0]*VijR[0];
real fJZ = qiQJ[0]*VjiR[0];
real EIX = 0, EIY = 0, EIZ = 0, EJX = 0, EJY = 0, EJZ = 0;
for(int i = 1; i < 9; ++i){
energy += forceFactor*0.5f*(qiQI[i]*Vij[i] + qiQJ[i]*Vji[i]);
fIZ += qiQI[i]*VijR[i];
fJZ += qiQJ[i]*VjiR[i];
EIX += qiQIX[i]*Vij[i];
EIY += qiQIY[i]*Vij[i];
EIZ += qiQIZ[i]*Vij[i];
EJX += qiQJX[i]*Vji[i];
EJY += qiQJY[i]*Vji[i];
EJZ += qiQJZ[i]*Vji[i];
}
// Define the torque intermediates for the induced dipoles. These are simply the induced dipole torque
// intermediates dotted with the field due to permanent moments only, at each center. We inline the
// induced dipole torque intermediates here, for simplicity. N.B. There are no torques on the dipoles
// themselves, so we accumulate the torque intermediates into separate variables to allow them to be
// used only in the force calculation.
//
// The torque about the x axis (needed to obtain the y force on the induced dipoles, below)
// qiUindIx[0] = qiQUindI[2]; qiUindIx[1] = 0; qiUindIx[2] = -qiQUindI[0]
real iEIX = qiUinpI.z*Vijp[0] + qiUindI.z*Vijd[0] - qiUinpI.x*Vijp[2] - qiUindI.x*Vijd[2];
real iEJX = qiUinpJ.z*Vjip[0] + qiUindJ.z*Vjid[0] - qiUinpJ.x*Vjip[2] - qiUindJ.x*Vjid[2];
// The torque about the y axis (needed to obtain the x force on the induced dipoles, below)
// qiUindIy[0] = -qiQUindI[1]; qiUindIy[1] = qiQUindI[0]; qiUindIy[2] = 0
real iEIY = qiUinpI.x*Vijp[1] + qiUindI.x*Vijd[1] - qiUinpI.y*Vijp[0] - qiUindI.y*Vijd[0];
real iEJY = qiUinpJ.x*Vjip[1] + qiUindJ.x*Vjid[1] - qiUinpJ.y*Vjip[0] - qiUindJ.y*Vjid[0];
#ifdef USE_MUTUAL_POLARIZATION
// Uind-Uind terms (m=0)
real eCoef = -fourThirds*rInvVec[3]*(3*(uScale*thole_d0 + bVec[3]) + alphaRVec[3]*X);
real dCoef = rInvVec[4]*(6*(uScale*dthole_d0 + bVec[3]) + 4*alphaRVec[5]*X);
iEIX += eCoef*(qiUinpI.z*qiUindJ.x + qiUindI.z*qiUinpJ.x);
iEJX += eCoef*(qiUinpJ.z*qiUindI.x + qiUindJ.z*qiUinpI.x);
iEIY -= eCoef*(qiUinpI.y*qiUindJ.x + qiUindI.y*qiUinpJ.x);
iEJY -= eCoef*(qiUinpJ.y*qiUindI.x + qiUindJ.y*qiUinpI.x);
fIZ += dCoef*(qiUinpI.x*qiUindJ.x + qiUindI.x*qiUinpJ.x);
fIZ += dCoef*(qiUinpJ.x*qiUindI.x + qiUindJ.x*qiUinpI.x);
// Uind-Uind terms (m=1)
eCoef = 2*rInvVec[3]*(uScale*thole_d1 + bVec[3] - twoThirds*alphaRVec[3]*X);
dCoef = -3*rInvVec[4]*(uScale*dthole_d1 + bVec[3]);
iEIX -= eCoef*(qiUinpI.x*qiUindJ.z + qiUindI.x*qiUinpJ.z);
iEJX -= eCoef*(qiUinpJ.x*qiUindI.z + qiUindJ.x*qiUinpI.z);
iEIY += eCoef*(qiUinpI.x*qiUindJ.y + qiUindI.x*qiUinpJ.y);
iEJY += eCoef*(qiUinpJ.x*qiUindI.y + qiUindJ.x*qiUinpI.y);
fIZ += dCoef*(qiUinpI.y*qiUindJ.y + qiUindI.y*qiUinpJ.y + qiUinpI.z*qiUindJ.z + qiUindI.z*qiUinpJ.z);
fIZ += dCoef*(qiUinpJ.y*qiUindI.y + qiUindJ.y*qiUinpI.y + qiUinpJ.z*qiUindI.z + qiUindJ.z*qiUinpI.z);
#endif
// The quasi-internal frame forces and torques. Note that the induced torque intermediates are
// used in the force expression, but not in the torques; the induced dipoles are isotropic.
real qiForce[3] = {rInv*(EIY+EJY+iEIY+iEJY), -rInv*(EIX+EJX+iEIX+iEJX), -(fJZ+fIZ)};
real qiTorqueI[3] = {-EIX, -EIY, -EIZ};
real qiTorqueJ[3] = {-EJX, -EJY, -EJZ};
real3 force = make_real3(qiRotationMatrix[1][1]*qiForce[0] + qiRotationMatrix[2][1]*qiForce[1] + qiRotationMatrix[0][1]*qiForce[2],
qiRotationMatrix[1][2]*qiForce[0] + qiRotationMatrix[2][2]*qiForce[1] + qiRotationMatrix[0][2]*qiForce[2],
qiRotationMatrix[1][0]*qiForce[0] + qiRotationMatrix[2][0]*qiForce[1] + qiRotationMatrix[0][0]*qiForce[2]);
atom1.force += force;
atom1.torque += make_real3(qiRotationMatrix[1][1]*qiTorqueI[0] + qiRotationMatrix[2][1]*qiTorqueI[1] + qiRotationMatrix[0][1]*qiTorqueI[2],
qiRotationMatrix[1][2]*qiTorqueI[0] + qiRotationMatrix[2][2]*qiTorqueI[1] + qiRotationMatrix[0][2]*qiTorqueI[2],
qiRotationMatrix[1][0]*qiTorqueI[0] + qiRotationMatrix[2][0]*qiTorqueI[1] + qiRotationMatrix[0][0]*qiTorqueI[2]);
if (forceFactor == 1) {
// T3 == T1 w/ particles I and J reversed
// T4 == T2 w/ particles I and J reversed
delta.x = -delta.x;
delta.y = -delta.y;
delta.z = -delta.z;
if (hasExclusions) {
computeOneInteractionT1(atom2, atom1, delta, bn, dScale, pScale, mScale);
computeOneInteractionT2(atom2, atom1, delta, bn, dScale, pScale, mScale);
}
else {
computeOneInteractionT1NoScale(atom2, atom1, delta, bn);
computeOneInteractionT2NoScale(atom2, atom1, delta, bn);
}
atom2.force -= force;
atom2.torque += make_real3(qiRotationMatrix[1][1]*qiTorqueJ[0] + qiRotationMatrix[2][1]*qiTorqueJ[1] + qiRotationMatrix[0][1]*qiTorqueJ[2],
qiRotationMatrix[1][2]*qiTorqueJ[0] + qiRotationMatrix[2][2]*qiTorqueJ[1] + qiRotationMatrix[0][2]*qiTorqueJ[2],
qiRotationMatrix[1][0]*qiTorqueJ[0] + qiRotationMatrix[2][0]*qiTorqueJ[1] + qiRotationMatrix[0][0]*qiTorqueJ[2]);
}
}
/**
* Compute the self energy and self torque.
*/
__device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) {
__device__ void computeSelfEnergyAndTorque(AtomData& atom1, int atomIndex, real& energy, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole) {
real term = 2*EWALD_ALPHA*EWALD_ALPHA;
real fterm = -EWALD_ALPHA/SQRT_PI;
real cii = atom1.q*atom1.q;
real dii = dot(atom1.dipole, atom1.dipole);
real3 dipole = make_real3(labFrameDipole[atomIndex*3], labFrameDipole[atomIndex*3+1], labFrameDipole[atomIndex*3+2]);
real dii = dot(dipole, dipole);
#ifdef INCLUDE_QUADRUPOLES
real qii = 2*(atom1.quadrupoleXX*atom1.quadrupoleXX +
atom1.quadrupoleYY*atom1.quadrupoleYY +
atom1.quadrupoleXX*atom1.quadrupoleYY +
atom1.quadrupoleXY*atom1.quadrupoleXY +
atom1.quadrupoleXZ*atom1.quadrupoleXZ +
atom1.quadrupoleYZ*atom1.quadrupoleYZ);
real quadrupoleXX = labFrameQuadrupole[atomIndex*5];
real quadrupoleXY = labFrameQuadrupole[atomIndex*5+1];
real quadrupoleXZ = labFrameQuadrupole[atomIndex*5+2];
real quadrupoleYY = labFrameQuadrupole[atomIndex*5+3];
real quadrupoleYZ = labFrameQuadrupole[atomIndex*5+4];
real qii = 2*(quadrupoleXX*quadrupoleXX +
quadrupoleYY*quadrupoleYY +
quadrupoleXX*quadrupoleYY +
quadrupoleXY*quadrupoleXY +
quadrupoleXZ*quadrupoleXZ +
quadrupoleYZ*quadrupoleYZ);
#else
real qii = 0;
#endif
real uii = dot(atom1.dipole, atom1.inducedDipole);
real uii = dot(dipole, atom1.inducedDipole);
real selfEnergy = (cii + term*(dii/3 + 2*term*qii/5));
selfEnergy += term*uii/3;
selfEnergy *= fterm;
......@@ -189,7 +472,7 @@ __device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) {
// self-torque for PME
real3 ui = atom1.inducedDipole+atom1.inducedDipolePolar;
atom1.torque += ((2/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI)*cross(atom1.dipole, ui);
atom1.torque += ((2/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI)*cross(dipole, ui);
}
/**
......@@ -204,8 +487,9 @@ extern "C" __global__ void computeElectrostatics(
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms,
#endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ sphericalDipole,
const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
......@@ -223,7 +507,7 @@ extern "C" __global__ void computeElectrostatics(
const unsigned int y = tileIndices.y;
AtomData data;
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(data, atom1, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
data.force = make_real3(0);
data.torque = make_real3(0);
uint2 covalent = covalentFlags[pos*TILE_SIZE+tgx];
......@@ -233,13 +517,13 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].q = data.q;
localData[threadIdx.x].dipole = data.dipole;
localData[threadIdx.x].sphericalDipole = data.sphericalDipole;
#ifdef INCLUDE_QUADRUPOLES
localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX;
localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY;
localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ;
localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY;
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].sphericalQuadrupole[0] = data.sphericalQuadrupole[0];
localData[threadIdx.x].sphericalQuadrupole[1] = data.sphericalQuadrupole[1];
localData[threadIdx.x].sphericalQuadrupole[2] = data.sphericalQuadrupole[2];
localData[threadIdx.x].sphericalQuadrupole[3] = data.sphericalQuadrupole[3];
localData[threadIdx.x].sphericalQuadrupole[4] = data.sphericalQuadrupole[4];
#endif
localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
......@@ -258,7 +542,7 @@ extern "C" __global__ void computeElectrostatics(
}
}
if (atom1 < NUM_ATOMS)
computeSelfEnergyAndTorque(data, energy);
computeSelfEnergyAndTorque(data, atom1, energy, labFrameDipole, labFrameQuadrupole);
data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
......@@ -272,7 +556,7 @@ extern "C" __global__ void computeElectrostatics(
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
loadAtomData(localData[threadIdx.x], j, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(localData[threadIdx.x], j, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
localData[threadIdx.x].force = make_real3(0);
localData[threadIdx.x].torque = make_real3(0);
unsigned int tj = tgx;
......@@ -366,7 +650,7 @@ extern "C" __global__ void computeElectrostatics(
// Load atom data for this tile.
AtomData data;
loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(data, atom1, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
data.force = make_real3(0);
data.torque = make_real3(0);
#ifdef USE_CUTOFF
......@@ -375,7 +659,7 @@ extern "C" __global__ void computeElectrostatics(
unsigned int j = y*TILE_SIZE + tgx;
#endif
atomIndices[threadIdx.x] = j;
loadAtomData(localData[threadIdx.x], j, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(localData[threadIdx.x], j, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
localData[threadIdx.x].force = make_real3(0);
localData[threadIdx.x].torque = make_real3(0);
......
__device__ void buildQIRotationMatrix(real3 deltaR, real rInv, real (&rotationMatrix)[3][3]) {
real3 vectorZ = deltaR*rInv;
real3 vectorX = vectorZ;
if (deltaR.y != 0 || deltaR.z != 0)
vectorX.x += 1;
else
vectorX.y += 1;
vectorX -= vectorZ*dot(vectorX, vectorZ);
vectorX = normalize(vectorX);
real3 vectorY = cross(vectorZ, vectorX);
// Reorder the Cartesian {x,y,z} dipole rotation matrix, to account
// for spherical harmonic ordering {z,x,y}.
rotationMatrix[0][0] = vectorZ.z;
rotationMatrix[0][1] = vectorZ.x;
rotationMatrix[0][2] = vectorZ.y;
rotationMatrix[1][0] = vectorX.z;
rotationMatrix[1][1] = vectorX.x;
rotationMatrix[1][2] = vectorX.y;
rotationMatrix[2][0] = vectorY.z;
rotationMatrix[2][1] = vectorY.x;
rotationMatrix[2][2] = vectorY.y;
}
__device__ real3 rotateDipole(real3& dipole, const real (&rotationMatrix)[3][3]) {
return make_real3(rotationMatrix[0][0]*dipole.x + rotationMatrix[0][1]*dipole.y + rotationMatrix[0][2]*dipole.z,
rotationMatrix[1][0]*dipole.x + rotationMatrix[1][1]*dipole.y + rotationMatrix[1][2]*dipole.z,
rotationMatrix[2][0]*dipole.x + rotationMatrix[2][1]*dipole.y + rotationMatrix[2][2]*dipole.z);
}
__device__ void rotateQuadupoles(const real (&rotationMatrix)[3][3], const real* quad1, const real* quad2, real* rotated1, real* rotated2) {
real sqrtThree = SQRT((real) 3);
real element;
element = 0.5f*(3.0f*rotationMatrix[0][0]*rotationMatrix[0][0] - 1.0f);
rotated1[0] += quad1[0]*element;
rotated2[0] += quad2[0]*element;
element = sqrtThree*rotationMatrix[0][0]*rotationMatrix[0][1];
rotated1[0] += quad1[1]*element;
rotated2[0] += quad2[1]*element;
element = sqrtThree*rotationMatrix[0][0]*rotationMatrix[0][2];
rotated1[0] += quad1[2]*element;
rotated2[0] += quad2[2]*element;
element = 0.5f*sqrtThree*(rotationMatrix[0][1]*rotationMatrix[0][1] - rotationMatrix[0][2]*rotationMatrix[0][2]);
rotated1[0] += quad1[3]*element;
rotated2[0] += quad2[3]*element;
element = sqrtThree*rotationMatrix[0][1]*rotationMatrix[0][2];
rotated1[0] += quad1[4]*element;
rotated2[0] += quad2[4]*element;
element = sqrtThree*rotationMatrix[0][0]*rotationMatrix[1][0];
rotated1[1] += quad1[0]*element;
rotated2[1] += quad2[0]*element;
element = rotationMatrix[1][0]*rotationMatrix[0][1] + rotationMatrix[0][0]*rotationMatrix[1][1];
rotated1[1] += quad1[1]*element;
rotated2[1] += quad2[1]*element;
element = rotationMatrix[1][0]*rotationMatrix[0][2] + rotationMatrix[0][0]*rotationMatrix[1][2];
rotated1[1] += quad1[2]*element;
rotated2[1] += quad2[2]*element;
element = rotationMatrix[0][1]*rotationMatrix[1][1] - rotationMatrix[0][2]*rotationMatrix[1][2];
rotated1[1] += quad1[3]*element;
rotated2[1] += quad2[3]*element;
element = rotationMatrix[1][1]*rotationMatrix[0][2] + rotationMatrix[0][1]*rotationMatrix[1][2];
rotated1[1] += quad1[4]*element;
rotated2[1] += quad2[4]*element;
element = sqrtThree*rotationMatrix[0][0]*rotationMatrix[2][0];
rotated1[2] += quad1[0]*element;
rotated2[2] += quad2[0]*element;
element = rotationMatrix[2][0]*rotationMatrix[0][1] + rotationMatrix[0][0]*rotationMatrix[2][1];
rotated1[2] += quad1[1]*element;
rotated2[2] += quad2[1]*element;
element = rotationMatrix[2][0]*rotationMatrix[0][2] + rotationMatrix[0][0]*rotationMatrix[2][2];
rotated1[2] += quad1[2]*element;
rotated2[2] += quad2[2]*element;
element = rotationMatrix[0][1]*rotationMatrix[2][1] - rotationMatrix[0][2]*rotationMatrix[2][2];
rotated1[2] += quad1[3]*element;
rotated2[2] += quad2[3]*element;
element = rotationMatrix[2][1]*rotationMatrix[0][2] + rotationMatrix[0][1]*rotationMatrix[2][2];
rotated1[2] += quad1[4]*element;
rotated2[2] += quad2[4]*element;
element = 0.5f*sqrtThree*(rotationMatrix[1][0]*rotationMatrix[1][0] - rotationMatrix[2][0]*rotationMatrix[2][0]);
rotated1[3] += quad1[0]*element;
rotated2[3] += quad2[0]*element;
element = rotationMatrix[1][0]*rotationMatrix[1][1] - rotationMatrix[2][0]*rotationMatrix[2][1];
rotated1[3] += quad1[1]*element;
rotated2[3] += quad2[1]*element;
element = rotationMatrix[1][0]*rotationMatrix[1][2] - rotationMatrix[2][0]*rotationMatrix[2][2];
rotated1[3] += quad1[2]*element;
rotated2[3] += quad2[2]*element;
element = 0.5f*(rotationMatrix[1][1]*rotationMatrix[1][1] - rotationMatrix[2][1]*rotationMatrix[2][1] - rotationMatrix[1][2]*rotationMatrix[1][2] + rotationMatrix[2][2]*rotationMatrix[2][2]);
rotated1[3] += quad1[3]*element;
rotated2[3] += quad2[3]*element;
element = rotationMatrix[1][1]*rotationMatrix[1][2] - rotationMatrix[2][1]*rotationMatrix[2][2];
rotated1[3] += quad1[4]*element;
rotated2[3] += quad2[4]*element;
element = sqrtThree*rotationMatrix[1][0]*rotationMatrix[2][0];
rotated1[4] += quad1[0]*element;
rotated2[4] += quad2[0]*element;
element = rotationMatrix[2][0]*rotationMatrix[1][1] + rotationMatrix[1][0]*rotationMatrix[2][1];
rotated1[4] += quad1[1]*element;
rotated2[4] += quad2[1]*element;
element = rotationMatrix[2][0]*rotationMatrix[1][2] + rotationMatrix[1][0]*rotationMatrix[2][2];
rotated1[4] += quad1[2]*element;
rotated2[4] += quad2[2]*element;
element = rotationMatrix[1][1]*rotationMatrix[2][1] - rotationMatrix[1][2]*rotationMatrix[2][2];
rotated1[4] += quad1[3]*element;
rotated2[4] += quad2[3]*element;
element = rotationMatrix[2][1]*rotationMatrix[1][2] + rotationMatrix[1][1]*rotationMatrix[2][2];
rotated1[4] += quad1[4]*element;
rotated2[4] += quad2[4]*element;
}
......@@ -6676,6 +6676,6 @@ RealOpenMM AmoebaReferencePmeMultipoleForce::calculateElectrostatic(const vector
energy += computeReciprocalSpaceInducedDipoleForceAndEnergy(getPolarizationType(), particleData, forces, torques);
energy += computeReciprocalSpaceFixedMultipoleForceAndEnergy(particleData, forces, torques);
energy += calculatePmeSelfEnergy(particleData);
return energy;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment