"platforms/vscode:/vscode.git/clone" did not exist on "160a491ec2701ef149e5b15083e5f9b1a5714002"
Commit 474f600e authored by peastman's avatar peastman
Browse files

CUDA implementation of spherical harmonics w/o PME

parent 39e640c0
......@@ -1163,23 +1163,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
electrostaticsSource << CudaAmoebaKernelSources::sphericalMultipoles;
electrostaticsSource << CudaAmoebaKernelSources::pmeMultipoleElectrostatics;
electrostaticsThreadMemory = 24*elementSize+3*sizeof(float)+3*sizeof(int)/(double) cu.TileSize;
if (!useShuffle)
electrostaticsThreadMemory += 3*elementSize;
}
else {
electrostaticsSource << CudaKernelSources::vectorOps;
electrostaticsSource << CudaAmoebaKernelSources::sphericalMultipoles;
electrostaticsSource << CudaAmoebaKernelSources::multipoleElectrostatics;
electrostaticsSource << "#define F1\n";
electrostaticsSource << (hasQuadrupoles ? CudaAmoebaKernelSources::electrostaticPairForce : CudaAmoebaKernelSources::electrostaticPairForceNoQuadrupoles);
electrostaticsSource << "#undef F1\n";
electrostaticsSource << "#define T1\n";
electrostaticsSource << (hasQuadrupoles ? CudaAmoebaKernelSources::electrostaticPairForce : CudaAmoebaKernelSources::electrostaticPairForceNoQuadrupoles);
electrostaticsSource << "#undef T1\n";
electrostaticsSource << "#define T3\n";
electrostaticsSource << (hasQuadrupoles ? CudaAmoebaKernelSources::electrostaticPairForce : CudaAmoebaKernelSources::electrostaticPairForceNoQuadrupoles);
electrostaticsThreadMemory = 21*elementSize+2*sizeof(float)+3*sizeof(int)/(double) cu.TileSize;
if (!useShuffle)
electrostaticsThreadMemory += 3*elementSize;
electrostaticsThreadMemory = 24*elementSize+2*sizeof(float)+3*sizeof(int)/(double) cu.TileSize;
if (gk != NULL)
electrostaticsThreadMemory += 4*elementSize;
}
......@@ -1503,8 +1492,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* electrostaticsArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&cu.getPosq().getDevicePointer(), &covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &startTileIndex, &numTileIndices,
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &sphericalDipoles->getDevicePointer(), &sphericalQuadrupoles->getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*electrostaticsThreads, electrostaticsThreads);
if (gkKernel != NULL)
gkKernel->finishComputation(*torque, *labFrameDipoles, *labFrameQuadrupoles, *inducedDipole, *inducedDipolePolar, *dampingAndThole, *covalentFlags, *polarizationGroupFlags);
......
/**
* This defines three different closely related functions, depending on which constant (F1, T1, or T3) is defined.
*/
#if defined F1
__device__ void computeOneInteractionF1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real& energy, real3& outputForce) {
#elif defined T1
__device__ void computeOneInteractionT1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce) {
#else
__device__ void computeOneInteractionT3(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce) {
#endif
#ifdef F1
const float uScale = 1;
real ddsc3_0 = 0;
real ddsc3_1 = 0;
real ddsc3_2 = 0;
real ddsc5_0 = 0;
real ddsc5_1 = 0;
real ddsc5_2 = 0;
real ddsc7_0 = 0;
real ddsc7_1 = 0;
real ddsc7_2 = 0;
#endif
real xr = atom2.posq.x - atom1.posq.x;
real yr = atom2.posq.y - atom1.posq.y;
real zr = atom2.posq.z - atom1.posq.z;
real r2 = xr*xr + yr*yr + zr*zr;
real r = SQRT(r2);
real rr1 = RECIP(r);
real rr2 = rr1*rr1;
real rr3 = rr1*rr2;
real rr5 = 3*rr3*rr2;
real rr7 = 5*rr5*rr2;
real rr9 = 7*rr7*rr2;
#ifdef F1
real rr11 = 9*rr9*rr2;
#endif
real scale3 = 1;
real scale5 = 1;
real scale7 = 1;
real pdamp = atom1.damp*atom2.damp;
if (pdamp != 0) {
real ratio = r/pdamp;
float pGamma = atom2.thole > atom1.thole ? atom1.thole : atom2.thole;
real damp = ratio*ratio*ratio*pGamma;
real dampExp = EXP(-damp);
real damp1 = damp + 1;
real damp2 = damp*damp;
scale3 = 1 - dampExp;
scale5 = 1 - damp1*dampExp;
scale7 = 1 - (damp1 + 0.6f*damp2)*dampExp;
#ifdef F1
real factor = 3*damp*dampExp*rr2;
real factor7 = -0.2f + 0.6f*damp;
ddsc3_0 = factor*xr;
ddsc5_0 = ddsc3_0*damp;
ddsc7_0 = ddsc5_0*factor7;
ddsc3_1 = factor*yr;
ddsc5_1 = ddsc3_1*damp;
ddsc7_1 = ddsc5_1*factor7;
ddsc3_2 = factor*zr;
ddsc5_2 = ddsc3_2*damp;
ddsc7_2 = ddsc5_2*factor7;
#endif
}
#if defined F1
real scale3i = rr3*scale3*uScale;
real scale5i = rr5*scale5*uScale;
#endif
real dsc3 = rr3*scale3*dScale;
real psc3 = rr3*scale3*pScale;
real dsc5 = rr5*scale5*dScale;
real psc5 = rr5*scale5*pScale;
real dsc7 = rr7*scale7*dScale;
real psc7 = rr7*scale7*pScale;
real atom2quadrupoleZZ = -(atom2.quadrupoleXX+atom2.quadrupoleYY);
real qJr_0 = atom2.quadrupoleXX*xr + atom2.quadrupoleXY*yr + atom2.quadrupoleXZ*zr;
real qJr_1 = atom2.quadrupoleXY*xr + atom2.quadrupoleYY*yr + atom2.quadrupoleYZ*zr;
real qJr_2 = atom2.quadrupoleXZ*xr + atom2.quadrupoleYZ*yr + atom2quadrupoleZZ*zr;
real atom1quadrupoleZZ = -(atom1.quadrupoleXX+atom1.quadrupoleYY);
real qIr_0 = atom1.quadrupoleXX*xr + atom1.quadrupoleXY*yr + atom1.quadrupoleXZ*zr;
real qIr_1 = atom1.quadrupoleXY*xr + atom1.quadrupoleYY*yr + atom1.quadrupoleYZ*zr;
real qIr_2 = atom1.quadrupoleXZ*xr + atom1.quadrupoleYZ*yr + atom1quadrupoleZZ*zr;
#if defined F1
real sc2 = atom1.dipole.x*atom2.dipole.x + atom1.dipole.y*atom2.dipole.y + atom1.dipole.z*atom2.dipole.z;
#endif
#if defined F1 || defined T1
real sc4 = atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr;
real sc6 = qJr_0*xr + qJr_1*yr + qJr_2*zr;
#endif
#if defined F1 || defined T3
real sc3 = atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr;
real sc5 = qIr_0*xr + qIr_1*yr + qIr_2*zr;
#endif
#if defined F1
real sc7 = qIr_0*atom2.dipole.x + qIr_1*atom2.dipole.y + qIr_2*atom2.dipole.z;
real sc8 = qJr_0*atom1.dipole.x + qJr_1*atom1.dipole.y + qJr_2*atom1.dipole.z;
real sc9 = qIr_0*qJr_0 + qIr_1*qJr_1 + qIr_2*qJr_2;
real sc10 = atom1.quadrupoleXX*atom2.quadrupoleXX + atom1.quadrupoleXY*atom2.quadrupoleXY + atom1.quadrupoleXZ*atom2.quadrupoleXZ +
atom1.quadrupoleXY*atom2.quadrupoleXY + atom1.quadrupoleYY*atom2.quadrupoleYY + atom1.quadrupoleYZ*atom2.quadrupoleYZ +
atom1.quadrupoleXZ*atom2.quadrupoleXZ + atom1.quadrupoleYZ*atom2.quadrupoleYZ + atom1quadrupoleZZ*atom2quadrupoleZZ;
real sci1 = atom1.inducedDipole.x*atom2.dipole.x + atom1.inducedDipole.y*atom2.dipole.y + atom1.inducedDipole.z*atom2.dipole.z +
atom2.inducedDipole.x*atom1.dipole.x + atom2.inducedDipole.y*atom1.dipole.y + atom2.inducedDipole.z*atom1.dipole.z;
#endif
#if defined F1 || defined T3
real sci3 = atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr;
#endif
#if defined F1
real sci7 = qIr_0*atom2.inducedDipole.x + qIr_1*atom2.inducedDipole.y + qIr_2*atom2.inducedDipole.z;
real sci8 = qJr_0*atom1.inducedDipole.x + qJr_1*atom1.inducedDipole.y + qJr_2*atom1.inducedDipole.z;
#endif
#if defined F1 || defined T1
real sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
#endif
#if defined F1
real scip1 = atom1.inducedDipolePolar.x*atom2.dipole.x + atom1.inducedDipolePolar.y*atom2.dipole.y + atom1.inducedDipolePolar.z*atom2.dipole.z +
atom2.inducedDipolePolar.x*atom1.dipole.x + atom2.inducedDipolePolar.y*atom1.dipole.y + atom2.inducedDipolePolar.z*atom1.dipole.z;
real scip2 = atom1.inducedDipole.x*atom2.inducedDipolePolar.x + atom1.inducedDipole.y*atom2.inducedDipolePolar.y + atom1.inducedDipole.z*atom2.inducedDipolePolar.z +
atom2.inducedDipole.x*atom1.inducedDipolePolar.x + atom2.inducedDipole.y*atom1.inducedDipolePolar.y + atom2.inducedDipole.z*atom1.inducedDipolePolar.z;
#endif
#if defined F1 || defined T3
real scip3 = ((atom1.inducedDipolePolar.x)*(xr) + (atom1.inducedDipolePolar.y)*(yr) + (atom1.inducedDipolePolar.z)*(zr));
#endif
#if defined F1 || defined T1
real scip4 = ((atom2.inducedDipolePolar.x)*(xr) + (atom2.inducedDipolePolar.y)*(yr) + (atom2.inducedDipolePolar.z)*(zr));
#endif
#ifdef F1
real scip7 = ((qIr_0)*(atom2.inducedDipolePolar.x) + (qIr_1)*(atom2.inducedDipolePolar.y) + (qIr_2)*(atom2.inducedDipolePolar.z));
real scip8 = ((qJr_0)*(atom1.inducedDipolePolar.x) + (qJr_1)*(atom1.inducedDipolePolar.y) + (qJr_2)*(atom1.inducedDipolePolar.z));
real gli1 = atom2.posq.w*sci3 - atom1.posq.w*sci4;
real gli6 = sci1;
real glip1 = atom2.posq.w*scip3 - atom1.posq.w*scip4;
real glip6 = scip1;
real gli2 = -sc3*sci4 - sci3*sc4;
real gli3 = sci3*sc6 - sci4*sc5;
real gli7 = 2*(sci7-sci8);
real glip2 = -sc3*scip4 - scip3*sc4;
real glip3 = scip3*sc6 - scip4*sc5;
real glip7 = 2*(scip7-scip8);
real factor3 = rr3*((gli1 + gli6)*pScale + (glip1 + glip6)*dScale);
real factor5 = rr5*((gli2 + gli7)*pScale + (glip2 + glip7)*dScale);
real factor7 = rr7*(gli3*pScale + glip3*dScale);
real ftm2i_0 = -0.5f*(factor3*ddsc3_0 + factor5*ddsc5_0 + factor7*ddsc7_0);
real ftm2i_1 = -0.5f*(factor3*ddsc3_1 + factor5*ddsc5_1 + factor7*ddsc7_1);
real ftm2i_2 = -0.5f*(factor3*ddsc3_2 + factor5*ddsc5_2 + factor7*ddsc7_2);
real gl0 = atom1.posq.w*atom2.posq.w;
real gl1 = atom2.posq.w*sc3 - atom1.posq.w*sc4;
real gl2 = atom1.posq.w*sc6 + atom2.posq.w*sc5 - sc3*sc4;
real gl3 = sc3*sc6 - sc4*sc5;
real gl4 = sc5*sc6;
real gl6 = sc2;
real gl7 = 2*(sc7-sc8);
real gl8 = 2*sc10;
real gl5 = -4*sc9;
real gf1 = rr3*gl0 + rr5*(gl1+gl6) + rr7*(gl2+gl7+gl8) + rr9*(gl3+gl5) + rr11*gl4;
#endif
#if defined F1 || defined T1
real gf2 = -atom2.posq.w*rr3 + sc4*rr5 - sc6*rr7;
real gf5 = 2*(-atom2.posq.w*rr5+sc4*rr7-sc6*rr9);
#endif
#if defined F1 || defined T3
real gf3 = atom1.posq.w*rr3 + sc3*rr5 + sc5*rr7;
real gf6 = 2*(-atom1.posq.w*rr5-sc3*rr7-sc5*rr9);
#endif
#ifdef F1
real em = mScale*(rr1*gl0 + rr3*(gl1+gl6) + rr5*(gl2+gl7+gl8) + rr7*(gl3+gl5) + rr9*gl4);
real ei = 0.5f*((gli1+gli6)*psc3 + (gli2+gli7)*psc5 + gli3*psc7);
energy = em+ei;
#endif
#if defined F1 || defined T1
real qIdJ_0 = atom1.quadrupoleXX*atom2.dipole.x + atom1.quadrupoleXY*atom2.dipole.y + atom1.quadrupoleXZ*atom2.dipole.z;
real qIdJ_1 = atom1.quadrupoleXY*atom2.dipole.x + atom1.quadrupoleYY*atom2.dipole.y + atom1.quadrupoleYZ*atom2.dipole.z;
real qIdJ_2 = atom1.quadrupoleXZ*atom2.dipole.x + atom1.quadrupoleYZ*atom2.dipole.y + atom1quadrupoleZZ*atom2.dipole.z;
real qIqJr_0 = atom1.quadrupoleXX*qJr_0 + atom1.quadrupoleXY*qJr_1 + atom1.quadrupoleXZ*qJr_2;
real qIqJr_1 = atom1.quadrupoleXY*qJr_0 + atom1.quadrupoleYY*qJr_1 + atom1.quadrupoleYZ*qJr_2;
real qIqJr_2 = atom1.quadrupoleXZ*qJr_0 + atom1.quadrupoleYZ*qJr_1 + atom1quadrupoleZZ*qJr_2;
#endif
#ifdef F1
real qkqir_0 = atom2.quadrupoleXX*qIr_0 + atom2.quadrupoleXY*qIr_1 + atom2.quadrupoleXZ*qIr_2;
real qkqir_1 = atom2.quadrupoleXY*qIr_0 + atom2.quadrupoleYY*qIr_1 + atom2.quadrupoleYZ*qIr_2;
real qkqir_2 = atom2.quadrupoleXZ*qIr_0 + atom2.quadrupoleYZ*qIr_1 + atom2quadrupoleZZ*qIr_2;
real qkdi_0 = atom2.quadrupoleXX*atom1.dipole.x + atom2.quadrupoleXY*atom1.dipole.y + atom2.quadrupoleXZ*atom1.dipole.z;
real qkdi_1 = atom2.quadrupoleXY*atom1.dipole.x + atom2.quadrupoleYY*atom1.dipole.y + atom2.quadrupoleYZ*atom1.dipole.z;
real qkdi_2 = atom2.quadrupoleXZ*atom1.dipole.x + atom2.quadrupoleYZ*atom1.dipole.y + atom2quadrupoleZZ*atom1.dipole.z;
real ftm2_0 = mScale*(gf1*xr + gf2*atom1.dipole.x + gf3*atom2.dipole.x + 2*rr5*(qkdi_0 - qIdJ_0) + gf5*qIr_0 + gf6*qJr_0 + 4*rr7*(qIqJr_0 + qkqir_0));
real ftm2_1 = mScale*(gf1*yr + gf2*atom1.dipole.y + gf3*atom2.dipole.y + 2*rr5*(qkdi_1 - qIdJ_1) + gf5*qIr_1 + gf6*qJr_1 + 4*rr7*(qIqJr_1 + qkqir_1));
real ftm2_2 = mScale*(gf1*zr + gf2*atom1.dipole.z + gf3*atom2.dipole.z + 2*rr5*(qkdi_2 - qIdJ_2) + gf5*qIr_2 + gf6*qJr_2 + 4*rr7*(qIqJr_2 + qkqir_2));
real gfi1 = rr2*(1.5f*((gli1+gli6)*psc3 + (glip1+glip6)*dsc3 + scip2*scale3i) + 2.5f*((gli7+gli2)*psc5 + (glip7+glip2)*dsc5 - (sci3*scip4+scip3*sci4)*scale5i) + 3.5f*(gli3*psc7+glip3*dsc7));
ftm2i_0 += gfi1*xr;
ftm2i_1 += gfi1*yr;
ftm2i_2 += gfi1*zr;
#endif
#if defined F1 || defined T1
real gfi5 = (sci4*psc7 + scip4*dsc7);
#endif
#if defined F1 || defined T3
real gfi6 = -(sci3*psc7 + scip3*dsc7);
#endif
#if defined F1 || defined T1
real qIuJ_0 = atom1.quadrupoleXX*atom2.inducedDipole.x + atom1.quadrupoleXY*atom2.inducedDipole.y + atom1.quadrupoleXZ*atom2.inducedDipole.z;
real qIuJ_1 = atom1.quadrupoleXY*atom2.inducedDipole.x + atom1.quadrupoleYY*atom2.inducedDipole.y + atom1.quadrupoleYZ*atom2.inducedDipole.z;
real qIuJ_2 = atom1.quadrupoleXZ*atom2.inducedDipole.x + atom1.quadrupoleYZ*atom2.inducedDipole.y + atom1quadrupoleZZ*atom2.inducedDipole.z;
real qIuJp_0 = atom1.quadrupoleXX*atom2.inducedDipolePolar.x + atom1.quadrupoleXY*atom2.inducedDipolePolar.y + atom1.quadrupoleXZ*atom2.inducedDipolePolar.z;
real qIuJp_1 = atom1.quadrupoleXY*atom2.inducedDipolePolar.x + atom1.quadrupoleYY*atom2.inducedDipolePolar.y + atom1.quadrupoleYZ*atom2.inducedDipolePolar.z;
real qIuJp_2 = atom1.quadrupoleXZ*atom2.inducedDipolePolar.x + atom1.quadrupoleYZ*atom2.inducedDipolePolar.y + atom1quadrupoleZZ*atom2.inducedDipolePolar.z;
#endif
#if defined T3
real qJuIp_0 = atom2.quadrupoleXX*atom1.inducedDipolePolar.x + atom2.quadrupoleXY*atom1.inducedDipolePolar.y + atom2.quadrupoleXZ*atom1.inducedDipolePolar.z;
real qJuIp_1 = atom2.quadrupoleXY*atom1.inducedDipolePolar.x + atom2.quadrupoleYY*atom1.inducedDipolePolar.y + atom2.quadrupoleYZ*atom1.inducedDipolePolar.z;
real qJuIp_2 = atom2.quadrupoleXZ*atom1.inducedDipolePolar.x + atom2.quadrupoleYZ*atom1.inducedDipolePolar.y + atom2quadrupoleZZ*atom1.inducedDipolePolar.z;
real qJuI_0 = atom2.quadrupoleXX*atom1.inducedDipole.x + atom2.quadrupoleXY*atom1.inducedDipole.y + atom2.quadrupoleXZ*atom1.inducedDipole.z;
real qJuI_1 = atom2.quadrupoleXY*atom1.inducedDipole.x + atom2.quadrupoleYY*atom1.inducedDipole.y + atom2.quadrupoleYZ*atom1.inducedDipole.z;
real qJuI_2 = atom2.quadrupoleXZ*atom1.inducedDipole.x + atom2.quadrupoleYZ*atom1.inducedDipole.y + atom2quadrupoleZZ*atom1.inducedDipole.z;
#endif
#ifdef F1
real qkui_0 = atom2.quadrupoleXX*atom1.inducedDipole.x + atom2.quadrupoleXY*atom1.inducedDipole.y + atom2.quadrupoleXZ*atom1.inducedDipole.z;
real qkui_1 = atom2.quadrupoleXY*atom1.inducedDipole.x + atom2.quadrupoleYY*atom1.inducedDipole.y + atom2.quadrupoleYZ*atom1.inducedDipole.z;
real qkui_2 = atom2.quadrupoleXZ*atom1.inducedDipole.x + atom2.quadrupoleYZ*atom1.inducedDipole.y + atom2quadrupoleZZ*atom1.inducedDipole.z;
real qkuip_0 = atom2.quadrupoleXX*atom1.inducedDipolePolar.x + atom2.quadrupoleXY*atom1.inducedDipolePolar.y + atom2.quadrupoleXZ*atom1.inducedDipolePolar.z;
real qkuip_1 = atom2.quadrupoleXY*atom1.inducedDipolePolar.x + atom2.quadrupoleYY*atom1.inducedDipolePolar.y + atom2.quadrupoleYZ*atom1.inducedDipolePolar.z;
real qkuip_2 = atom2.quadrupoleXZ*atom1.inducedDipolePolar.x + atom2.quadrupoleYZ*atom1.inducedDipolePolar.y + atom2quadrupoleZZ*atom1.inducedDipolePolar.z;
ftm2i_0 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.x*psc3 + atom1.inducedDipolePolar.x*dsc3) +
sc4*(atom1.inducedDipole.x*psc5 + atom1.inducedDipolePolar.x*dsc5) -
sc6*(atom1.inducedDipole.x*psc7 + atom1.inducedDipolePolar.x*dsc7)) +
0.5f*(atom1.posq.w*(atom2.inducedDipole.x*psc3+atom2.inducedDipolePolar.x*dsc3) +
sc3*(atom2.inducedDipole.x*psc5 +atom2.inducedDipolePolar.x*dsc5) +
sc5*(atom2.inducedDipole.x*psc7 +atom2.inducedDipolePolar.x*dsc7)) +
scale5i*(sci4*atom1.inducedDipolePolar.x+scip4*atom1.inducedDipole.x +
sci3*atom2.inducedDipolePolar.x+scip3*atom2.inducedDipole.x)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.x +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.x +
((qkui_0-qIuJ_0)*psc5 + (qkuip_0-qIuJp_0)*dsc5) +
gfi5*qIr_0 + gfi6*qJr_0;
ftm2i_1 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.y*psc3 + atom1.inducedDipolePolar.y*dsc3) +
sc4*(atom1.inducedDipole.y*psc5 + atom1.inducedDipolePolar.y*dsc5) -
sc6*(atom1.inducedDipole.y*psc7 + atom1.inducedDipolePolar.y*dsc7)) +
(atom1.posq.w*(atom2.inducedDipole.y*psc3+atom2.inducedDipolePolar.y*dsc3) +
sc3*(atom2.inducedDipole.y*psc5+atom2.inducedDipolePolar.y*dsc5) +
sc5*(atom2.inducedDipole.y*psc7+atom2.inducedDipolePolar.y*dsc7))*0.5f +
scale5i*(sci4*atom1.inducedDipolePolar.y+scip4*atom1.inducedDipole.y + sci3*atom2.inducedDipolePolar.y+scip3*atom2.inducedDipole.y)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.y +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.y +
((qkui_1-qIuJ_1)*psc5 + (qkuip_1-qIuJp_1)*dsc5) +
gfi5*qIr_1 + gfi6*qJr_1;
ftm2i_2 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.z*psc3 + atom1.inducedDipolePolar.z*dsc3) +
sc4*(atom1.inducedDipole.z*psc5 + atom1.inducedDipolePolar.z*dsc5) -
sc6*(atom1.inducedDipole.z*psc7 + atom1.inducedDipolePolar.z*dsc7)) +
(atom1.posq.w*(atom2.inducedDipole.z*psc3+atom2.inducedDipolePolar.z*dsc3) +
sc3*(atom2.inducedDipole.z*psc5+atom2.inducedDipolePolar.z*dsc5) +
sc5*(atom2.inducedDipole.z*psc7+atom2.inducedDipolePolar.z*dsc7))*0.5f +
scale5i*(sci4*atom1.inducedDipolePolar.z+scip4*atom1.inducedDipole.z +
sci3*atom2.inducedDipolePolar.z+scip3*atom2.inducedDipole.z)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.z +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.z +
((qkui_2-qIuJ_2)*psc5 + (qkuip_2-qIuJp_2)*dsc5) +
gfi5*qIr_2 + gfi6*qJr_2;
#ifdef DIRECT_POLARIZATION
real gfd = 0.5*(3*rr2*scip2*scale3i - 5*rr2*(scip3*sci4+sci3*scip4)*scale5i);
real temp5 = 0.5*scale5i;
real fdir_0 = gfd*xr + temp5*(sci4*atom1.inducedDipolePolar.x + scip4*atom1.inducedDipole.x + sci3*atom2.inducedDipolePolar.x + scip3*atom2.inducedDipole.x);
real fdir_1 = gfd*yr + temp5*(sci4*atom1.inducedDipolePolar.y + scip4*atom1.inducedDipole.y + sci3*atom2.inducedDipolePolar.y + scip3*atom2.inducedDipole.y);
real fdir_2 = gfd*zr + temp5*(sci4*atom1.inducedDipolePolar.z + scip4*atom1.inducedDipole.z + sci3*atom2.inducedDipolePolar.z + scip3*atom2.inducedDipole.z);
ftm2i_0 -= fdir_0;
ftm2i_1 -= fdir_1;
ftm2i_2 -= fdir_2;
#else
real scaleF = 0.5f*uScale;
real inducedFactor3 = scip2*rr3*scaleF;
real inducedFactor5 = (sci3*scip4+scip3*sci4)*rr5*scaleF;
real findmp_0 = inducedFactor3*ddsc3_0 - inducedFactor5*ddsc5_0;
real findmp_1 = inducedFactor3*ddsc3_1 - inducedFactor5*ddsc5_1;
real findmp_2 = inducedFactor3*ddsc3_2 - inducedFactor5*ddsc5_2;
ftm2i_0 -= findmp_0;
ftm2i_1 -= findmp_1;
ftm2i_2 -= findmp_2;
#endif
#endif
#if defined T1
real gti2 = 0.5f*(sci4*psc5+scip4*dsc5);
real gti5 = gfi5;
#endif
#if defined T3
real gti3 = 0.5f*(sci3*psc5+scip3*dsc5);
real gti6 = gfi6;
#endif
#if defined T1 || defined T3
real dixdk_0 = atom1.dipole.y*atom2.dipole.z - atom1.dipole.z*atom2.dipole.y;
real dixdk_1 = atom1.dipole.z*atom2.dipole.x - atom1.dipole.x*atom2.dipole.z;
real dixdk_2 = atom1.dipole.x*atom2.dipole.y - atom1.dipole.y*atom2.dipole.x;
#if defined T1
real dixuk_0 = atom1.dipole.y*atom2.inducedDipole.z - atom1.dipole.z*atom2.inducedDipole.y;
real dixuk_1 = atom1.dipole.z*atom2.inducedDipole.x - atom1.dipole.x*atom2.inducedDipole.z;
real dixuk_2 = atom1.dipole.x*atom2.inducedDipole.y - atom1.dipole.y*atom2.inducedDipole.x;
#endif
#endif
#ifdef T1
real dixukp_0 = atom1.dipole.y*atom2.inducedDipolePolar.z - atom1.dipole.z*atom2.inducedDipolePolar.y;
real dixukp_1 = atom1.dipole.z*atom2.inducedDipolePolar.x - atom1.dipole.x*atom2.inducedDipolePolar.z;
real dixukp_2 = atom1.dipole.x*atom2.inducedDipolePolar.y - atom1.dipole.y*atom2.inducedDipolePolar.x;
#endif
#ifdef T1
real dixr_0 = atom1.dipole.y*zr - atom1.dipole.z*yr;
real dixr_1 = atom1.dipole.z*xr - atom1.dipole.x*zr;
real dixr_2 = atom1.dipole.x*yr - atom1.dipole.y*xr;
#endif
#ifdef T1
real rxqiukp_0 = yr*qIuJp_2 - zr*qIuJp_1;
real rxqiukp_1 = zr*qIuJp_0 - xr*qIuJp_2;
real rxqiukp_2 = xr*qIuJp_1 - yr*qIuJp_0;
real rxqir_0 = yr*qIr_2 - zr*qIr_1;
real rxqir_1 = zr*qIr_0 - xr*qIr_2;
real rxqir_2 = xr*qIr_1 - yr*qIr_0;
real rxqiuk_0 = yr*qIuJ_2 - zr*qIuJ_1;
real rxqiuk_1 = zr*qIuJ_0 - xr*qIuJ_2;
real rxqiuk_2 = xr*qIuJ_1 - yr*qIuJ_0;
real ukxqir_0 = atom2.inducedDipole.y*qIr_2 - atom2.inducedDipole.z*qIr_1;
real ukxqir_1 = atom2.inducedDipole.z*qIr_0 - atom2.inducedDipole.x*qIr_2;
real ukxqir_2 = atom2.inducedDipole.x*qIr_1 - atom2.inducedDipole.y*qIr_0;
real ukxqirp_0 = atom2.inducedDipolePolar.y*qIr_2 - atom2.inducedDipolePolar.z*qIr_1;
real ukxqirp_1 = atom2.inducedDipolePolar.z*qIr_0 - atom2.inducedDipolePolar.x*qIr_2;
real ukxqirp_2 = atom2.inducedDipolePolar.x*qIr_1 - atom2.inducedDipolePolar.y*qIr_0;
real dixqkr_0 = atom1.dipole.y*qJr_2 - atom1.dipole.z*qJr_1;
real dixqkr_1 = atom1.dipole.z*qJr_0 - atom1.dipole.x*qJr_2;
real dixqkr_2 = atom1.dipole.x*qJr_1 - atom1.dipole.y*qJr_0;
real dkxqir_0 = atom2.dipole.y*qIr_2 - atom2.dipole.z*qIr_1;
real dkxqir_1 = atom2.dipole.z*qIr_0 - atom2.dipole.x*qIr_2;
real dkxqir_2 = atom2.dipole.x*qIr_1 - atom2.dipole.y*qIr_0;
real rxqikr_0 = yr*qIqJr_2 - zr*qIqJr_1;
real rxqikr_1 = zr*qIqJr_0 - xr*qIqJr_2;
real rxqikr_2 = xr*qIqJr_1 - yr*qIqJr_0;
real rxqidk_0 = yr*qIdJ_2 - zr*qIdJ_1;
real rxqidk_1 = zr*qIdJ_0 - xr*qIdJ_2;
real rxqidk_2 = xr*qIdJ_1 - yr*qIdJ_0;
real qkrxqir_0 = qJr_1*qIr_2 - qJr_2*qIr_1;
real qkrxqir_1 = qJr_2*qIr_0 - qJr_0*qIr_2;
real qkrxqir_2 = qJr_0*qIr_1 - qJr_1*qIr_0;
#endif
#if defined T1 || defined T3
real qixqk_0 = atom1.quadrupoleXY*atom2.quadrupoleXZ + atom1.quadrupoleYY*atom2.quadrupoleYZ + atom1.quadrupoleYZ*atom2quadrupoleZZ -
atom1.quadrupoleXZ*atom2.quadrupoleXY - atom1.quadrupoleYZ*atom2.quadrupoleYY - atom1quadrupoleZZ*atom2.quadrupoleYZ;
real qixqk_1 = atom1.quadrupoleXZ*atom2.quadrupoleXX + atom1.quadrupoleYZ*atom2.quadrupoleXY + atom1quadrupoleZZ*atom2.quadrupoleXZ -
atom1.quadrupoleXX*atom2.quadrupoleXZ - atom1.quadrupoleXY*atom2.quadrupoleYZ - atom1.quadrupoleXZ*atom2quadrupoleZZ;
real qixqk_2 = atom1.quadrupoleXX*atom2.quadrupoleXY + atom1.quadrupoleXY*atom2.quadrupoleYY + atom1.quadrupoleXZ*atom2.quadrupoleYZ -
atom1.quadrupoleXY*atom2.quadrupoleXX - atom1.quadrupoleYY*atom2.quadrupoleXY - atom1.quadrupoleYZ*atom2.quadrupoleXZ;
#endif
#ifdef T1
real ttm2_0 = -rr3*dixdk_0 + gf2*dixr_0-gf5*rxqir_0 + 2*rr5*(dixqkr_0 + dkxqir_0 + rxqidk_0-2*qixqk_0) - 4*rr7*(rxqikr_0 + qkrxqir_0);
real ttm2_1 = -rr3*dixdk_1 + gf2*dixr_1-gf5*rxqir_1 + 2*rr5*(dixqkr_1 + dkxqir_1 + rxqidk_1-2*qixqk_1) - 4*rr7*(rxqikr_1 + qkrxqir_1);
real ttm2_2 = -rr3*dixdk_2 + gf2*dixr_2-gf5*rxqir_2 + 2*rr5*(dixqkr_2 + dkxqir_2 + rxqidk_2-2*qixqk_2) - 4*rr7*(rxqikr_2 + qkrxqir_2);
real ttm2i_0 = -(dixuk_0*psc3+dixukp_0*dsc3)*0.5f + gti2*dixr_0 + ((ukxqir_0+ rxqiuk_0)*psc5 + (ukxqirp_0 + rxqiukp_0)*dsc5) - gti5*rxqir_0;
real ttm2i_1 = -(dixuk_1*psc3+dixukp_1*dsc3)*0.5f + gti2*dixr_1 + ((ukxqir_1+ rxqiuk_1)*psc5 + (ukxqirp_1 + rxqiukp_1)*dsc5) - gti5*rxqir_1;
real ttm2i_2 = -(dixuk_2*psc3+dixukp_2*dsc3)*0.5f + gti2*dixr_2 + ((ukxqir_2+ rxqiuk_2)*psc5 + (ukxqirp_2 + rxqiukp_2)*dsc5) - gti5*rxqir_2;
#endif
#ifdef T3
real qJqIr_0 = atom2.quadrupoleXX*qIr_0 + atom2.quadrupoleXY*qIr_1 + atom2.quadrupoleXZ*qIr_2;
real qJqIr_1 = atom2.quadrupoleXY*qIr_0 + atom2.quadrupoleYY*qIr_1 + atom2.quadrupoleYZ*qIr_2;
real qJqIr_2 = atom2.quadrupoleXZ*qIr_0 + atom2.quadrupoleYZ*qIr_1 + atom2quadrupoleZZ*qIr_2;
real qJdI_0 = atom2.quadrupoleXX*atom1.dipole.x + atom2.quadrupoleXY*atom1.dipole.y + atom2.quadrupoleXZ*atom1.dipole.z;
real qJdI_1 = atom2.quadrupoleXY*atom1.dipole.x + atom2.quadrupoleYY*atom1.dipole.y + atom2.quadrupoleYZ*atom1.dipole.z;
real qJdI_2 = atom2.quadrupoleXZ*atom1.dipole.x + atom2.quadrupoleYZ*atom1.dipole.y + atom2quadrupoleZZ*atom1.dipole.z;
real dkxr_0 = atom2.dipole.y*zr - atom2.dipole.z*yr;
real dkxr_1 = atom2.dipole.z*xr - atom2.dipole.x*zr;
real dkxr_2 = atom2.dipole.x*yr - atom2.dipole.y*xr;
real rxqkr_0 = yr*qJr_2 - zr*qJr_1;
real rxqkr_1 = zr*qJr_0 - xr*qJr_2;
real rxqkr_2 = xr*qJr_1 - yr*qJr_0;
real dixqkr_0 = atom1.dipole.y*qJr_2 - atom1.dipole.z*qJr_1;
real dixqkr_1 = atom1.dipole.z*qJr_0 - atom1.dipole.x*qJr_2;
real dixqkr_2 = atom1.dipole.x*qJr_1 - atom1.dipole.y*qJr_0;
real dkxqir_0 = atom2.dipole.y*qIr_2 - atom2.dipole.z*qIr_1;
real dkxqir_1 = atom2.dipole.z*qIr_0 - atom2.dipole.x*qIr_2;
real dkxqir_2 = atom2.dipole.x*qIr_1 - atom2.dipole.y*qIr_0;
real rxqkdi_0 = yr*qJdI_2 - zr*qJdI_1;
real rxqkdi_1 = zr*qJdI_0 - xr*qJdI_2;
real rxqkdi_2 = xr*qJdI_1 - yr*qJdI_0;
real rxqkir_0 = yr*qJqIr_2 - zr*qJqIr_1;
real rxqkir_1 = zr*qJqIr_0 - xr*qJqIr_2;
real rxqkir_2 = xr*qJqIr_1 - yr*qJqIr_0;
real qkrxqir_0 = qJr_1*qIr_2 - qJr_2*qIr_1;
real qkrxqir_1 = qJr_2*qIr_0 - qJr_0*qIr_2;
real qkrxqir_2 = qJr_0*qIr_1 - qJr_1*qIr_0;
real dkxui_0 = atom2.dipole.y*atom1.inducedDipole.z - atom2.dipole.z*atom1.inducedDipole.y;
real dkxui_1 = atom2.dipole.z*atom1.inducedDipole.x - atom2.dipole.x*atom1.inducedDipole.z;
real dkxui_2 = atom2.dipole.x*atom1.inducedDipole.y - atom2.dipole.y*atom1.inducedDipole.x;
real dkxuip_0 = atom2.dipole.y*atom1.inducedDipolePolar.z - atom2.dipole.z*atom1.inducedDipolePolar.y;
real dkxuip_1 = atom2.dipole.z*atom1.inducedDipolePolar.x - atom2.dipole.x*atom1.inducedDipolePolar.z;
real dkxuip_2 = atom2.dipole.x*atom1.inducedDipolePolar.y - atom2.dipole.y*atom1.inducedDipolePolar.x;
real uixqkrp_0 = atom1.inducedDipolePolar.y*qJr_2 - atom1.inducedDipolePolar.z*qJr_1;
real uixqkrp_1 = atom1.inducedDipolePolar.z*qJr_0 - atom1.inducedDipolePolar.x*qJr_2;
real uixqkrp_2 = atom1.inducedDipolePolar.x*qJr_1 - atom1.inducedDipolePolar.y*qJr_0;
real uixqkr_0 = atom1.inducedDipole.y*qJr_2 - atom1.inducedDipole.z*qJr_1;
real uixqkr_1 = atom1.inducedDipole.z*qJr_0 - atom1.inducedDipole.x*qJr_2;
real uixqkr_2 = atom1.inducedDipole.x*qJr_1 - atom1.inducedDipole.y*qJr_0;
real rxqkuip_0 = yr*qJuIp_2 - zr*qJuIp_1;
real rxqkuip_1 = zr*qJuIp_0 - xr*qJuIp_2;
real rxqkuip_2 = xr*qJuIp_1 - yr*qJuIp_0;
real rxqkui_0 = yr*qJuI_2 - zr*qJuI_1;
real rxqkui_1 = zr*qJuI_0 - xr*qJuI_2;
real rxqkui_2 = xr*qJuI_1 - yr*qJuI_0;
real ttm3_0 = rr3*dixdk_0 + gf3*dkxr_0 - gf6*rxqkr_0 - 2*rr5*(dixqkr_0 + dkxqir_0 + rxqkdi_0 - 2*qixqk_0) - 4*rr7*(rxqkir_0 - qkrxqir_0);
real ttm3_1 = rr3*dixdk_1 + gf3*dkxr_1 - gf6*rxqkr_1 - 2*rr5*(dixqkr_1 + dkxqir_1 + rxqkdi_1 - 2*qixqk_1) - 4*rr7*(rxqkir_1 - qkrxqir_1);
real ttm3_2 = rr3*dixdk_2 + gf3*dkxr_2 - gf6*rxqkr_2 - 2*rr5*(dixqkr_2 + dkxqir_2 + rxqkdi_2 - 2*qixqk_2) - 4*rr7*(rxqkir_2 - qkrxqir_2);
real ttm3i_0 = -(dkxui_0*psc3+ dkxuip_0*dsc3)*0.5f + gti3*dkxr_0 - ((uixqkr_0 + rxqkui_0)*psc5 + (uixqkrp_0 + rxqkuip_0)*dsc5) - gti6*rxqkr_0;
real ttm3i_1 = -(dkxui_1*psc3+ dkxuip_1*dsc3)*0.5f + gti3*dkxr_1 - ((uixqkr_1 + rxqkui_1)*psc5 + (uixqkrp_1 + rxqkuip_1)*dsc5) - gti6*rxqkr_1;
real ttm3i_2 = -(dkxui_2*psc3+ dkxuip_2*dsc3)*0.5f + gti3*dkxr_2 - ((uixqkr_2 + rxqkui_2)*psc5 + (uixqkrp_2 + rxqkuip_2)*dsc5) - gti6*rxqkr_2;
#endif
if (mScale < 1) {
#ifdef T1
ttm2_0 *= mScale;
ttm2_1 *= mScale;
ttm2_2 *= mScale;
#endif
#ifdef T3
ttm3_0 *= mScale;
ttm3_1 *= mScale;
ttm3_2 *= mScale;
#endif
}
#ifdef F1
outputForce.x = -(ftm2_0+ftm2i_0);
outputForce.y = -(ftm2_1+ftm2i_1);
outputForce.z = -(ftm2_2+ftm2i_2);
#endif
#ifdef T1
outputForce.x = (ttm2_0 + ttm2i_0);
outputForce.y = (ttm2_1 + ttm2i_1);
outputForce.z = (ttm2_2 + ttm2i_2);
#endif
#ifdef T3
outputForce.x = (ttm3_0 + ttm3i_0);
outputForce.y = (ttm3_1 + ttm3i_1);
outputForce.z = (ttm3_2 + ttm3i_2);
#endif
}
/**
* This defines three different closely related functions, depending on which constant (F1, T1, or T3) is defined.
*/
#if defined F1
__device__ void computeOneInteractionF1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real& energy, real3& outputForce) {
#elif defined T1
__device__ void computeOneInteractionT1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce) {
#else
__device__ void computeOneInteractionT3(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce) {
#endif
#ifdef F1
const float uScale = 1;
real ddsc3_0 = 0;
real ddsc3_1 = 0;
real ddsc3_2 = 0;
real ddsc5_0 = 0;
real ddsc5_1 = 0;
real ddsc5_2 = 0;
real ddsc7_0 = 0;
real ddsc7_1 = 0;
real ddsc7_2 = 0;
#endif
real xr = atom2.posq.x - atom1.posq.x;
real yr = atom2.posq.y - atom1.posq.y;
real zr = atom2.posq.z - atom1.posq.z;
real r2 = xr*xr + yr*yr + zr*zr;
real r = SQRT(r2);
real rr1 = RECIP(r);
real rr2 = rr1*rr1;
real rr3 = rr1*rr2;
real rr5 = 3*rr3*rr2;
real rr7 = 5*rr5*rr2;
real rr9 = 7*rr7*rr2;
#ifdef F1
real rr11 = 9*rr9*rr2;
#endif
real scale3 = 1;
real scale5 = 1;
real scale7 = 1;
real pdamp = atom1.damp*atom2.damp;
if (pdamp != 0) {
real ratio = r/pdamp;
float pGamma = atom2.thole > atom1.thole ? atom1.thole : atom2.thole;
real damp = ratio*ratio*ratio*pGamma;
real dampExp = EXP(-damp);
real damp1 = damp + 1;
real damp2 = damp*damp;
scale3 = 1 - dampExp;
scale5 = 1 - damp1*dampExp;
scale7 = 1 - (damp1 + 0.6f*damp2)*dampExp;
#ifdef F1
real factor = 3*damp*dampExp*rr2;
real factor7 = -0.2f + 0.6f*damp;
ddsc3_0 = factor*xr;
ddsc5_0 = ddsc3_0*damp;
ddsc7_0 = ddsc5_0*factor7;
ddsc3_1 = factor*yr;
ddsc5_1 = ddsc3_1*damp;
ddsc7_1 = ddsc5_1*factor7;
ddsc3_2 = factor*zr;
ddsc5_2 = ddsc3_2*damp;
ddsc7_2 = ddsc5_2*factor7;
#endif
}
#if defined F1
real scale3i = rr3*scale3*uScale;
real scale5i = rr5*scale5*uScale;
#endif
real dsc3 = rr3*scale3*dScale;
real psc3 = rr3*scale3*pScale;
real dsc5 = rr5*scale5*dScale;
real psc5 = rr5*scale5*pScale;
real dsc7 = rr7*scale7*dScale;
real psc7 = rr7*scale7*pScale;
#if defined F1
real sc2 = atom1.dipole.x*atom2.dipole.x + atom1.dipole.y*atom2.dipole.y + atom1.dipole.z*atom2.dipole.z;
#endif
#if defined F1 || defined T1
real sc4 = atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr;
#endif
#if defined F1 || defined T3
real sc3 = atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr;
#endif
#if defined F1
real sci1 = atom1.inducedDipole.x*atom2.dipole.x + atom1.inducedDipole.y*atom2.dipole.y + atom1.inducedDipole.z*atom2.dipole.z +
atom2.inducedDipole.x*atom1.dipole.x + atom2.inducedDipole.y*atom1.dipole.y + atom2.inducedDipole.z*atom1.dipole.z;
#endif
#if defined F1 || defined T3
real sci3 = atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr;
#endif
#if defined F1 || defined T1
real sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
#endif
#if defined F1
real scip1 = atom1.inducedDipolePolar.x*atom2.dipole.x + atom1.inducedDipolePolar.y*atom2.dipole.y + atom1.inducedDipolePolar.z*atom2.dipole.z +
atom2.inducedDipolePolar.x*atom1.dipole.x + atom2.inducedDipolePolar.y*atom1.dipole.y + atom2.inducedDipolePolar.z*atom1.dipole.z;
real scip2 = atom1.inducedDipole.x*atom2.inducedDipolePolar.x + atom1.inducedDipole.y*atom2.inducedDipolePolar.y + atom1.inducedDipole.z*atom2.inducedDipolePolar.z +
atom2.inducedDipole.x*atom1.inducedDipolePolar.x + atom2.inducedDipole.y*atom1.inducedDipolePolar.y + atom2.inducedDipole.z*atom1.inducedDipolePolar.z;
#endif
#if defined F1 || defined T3
real scip3 = ((atom1.inducedDipolePolar.x)*(xr) + (atom1.inducedDipolePolar.y)*(yr) + (atom1.inducedDipolePolar.z)*(zr));
#endif
#if defined F1 || defined T1
real scip4 = ((atom2.inducedDipolePolar.x)*(xr) + (atom2.inducedDipolePolar.y)*(yr) + (atom2.inducedDipolePolar.z)*(zr));
#endif
#ifdef F1
real gli1 = atom2.posq.w*sci3 - atom1.posq.w*sci4;
real gli6 = sci1;
real glip1 = atom2.posq.w*scip3 - atom1.posq.w*scip4;
real glip6 = scip1;
real gli2 = -sc3*sci4 - sci3*sc4;
real glip2 = -sc3*scip4 - scip3*sc4;
real factor3 = rr3*((gli1 + gli6)*pScale + (glip1 + glip6)*dScale);
real factor5 = rr5*(gli2*pScale + glip2*dScale);
real ftm2i_0 = -0.5f*(factor3*ddsc3_0 + factor5*ddsc5_0);
real ftm2i_1 = -0.5f*(factor3*ddsc3_1 + factor5*ddsc5_1);
real ftm2i_2 = -0.5f*(factor3*ddsc3_2 + factor5*ddsc5_2);
real gl0 = atom1.posq.w*atom2.posq.w;
real gl1 = atom2.posq.w*sc3 - atom1.posq.w*sc4;
real gl2 = -sc3*sc4;
real gl6 = sc2;
real gf1 = rr3*gl0 + rr5*(gl1+gl6) + rr7*gl2;
#endif
#if defined F1 || defined T1
real gf2 = -atom2.posq.w*rr3 + sc4*rr5;
real gf5 = 2*(-atom2.posq.w*rr5+sc4*rr7);
#endif
#if defined F1 || defined T3
real gf3 = atom1.posq.w*rr3 + sc3*rr5;
real gf6 = 2*(-atom1.posq.w*rr5-sc3*rr7);
#endif
#ifdef F1
real em = mScale*(rr1*gl0 + rr3*(gl1+gl6) + rr5*gl2);
real ei = 0.5f*((gli1+gli6)*psc3 + gli2*psc5);
energy = em+ei;
#endif
#ifdef F1
real ftm2_0 = mScale*(gf1*xr + gf2*atom1.dipole.x + gf3*atom2.dipole.x);
real ftm2_1 = mScale*(gf1*yr + gf2*atom1.dipole.y + gf3*atom2.dipole.y);
real ftm2_2 = mScale*(gf1*zr + gf2*atom1.dipole.z + gf3*atom2.dipole.z);
real gfi1 = rr2*(1.5f*((gli1+gli6)*psc3 + (glip1+glip6)*dsc3 + scip2*scale3i) + 2.5f*(gli2*psc5 + glip2*dsc5 - (sci3*scip4+scip3*sci4)*scale5i));
ftm2i_0 += gfi1*xr;
ftm2i_1 += gfi1*yr;
ftm2i_2 += gfi1*zr;
#endif
#if defined F1 || defined T1
real gfi5 = (sci4*psc7 + scip4*dsc7);
#endif
#if defined F1 || defined T3
real gfi6 = -(sci3*psc7 + scip3*dsc7);
#endif
#ifdef F1
ftm2i_0 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.x*psc3 + atom1.inducedDipolePolar.x*dsc3) +
sc4*(atom1.inducedDipole.x*psc5 + atom1.inducedDipolePolar.x*dsc5)) +
0.5f*(atom1.posq.w*(atom2.inducedDipole.x*psc3+atom2.inducedDipolePolar.x*dsc3) +
sc3*(atom2.inducedDipole.x*psc5 +atom2.inducedDipolePolar.x*dsc5)) +
scale5i*(sci4*atom1.inducedDipolePolar.x+scip4*atom1.inducedDipole.x +
sci3*atom2.inducedDipolePolar.x+scip3*atom2.inducedDipole.x)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.x +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.x;
ftm2i_1 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.y*psc3 + atom1.inducedDipolePolar.y*dsc3) +
sc4*(atom1.inducedDipole.y*psc5 + atom1.inducedDipolePolar.y*dsc5)) +
(atom1.posq.w*(atom2.inducedDipole.y*psc3+atom2.inducedDipolePolar.y*dsc3) +
sc3*(atom2.inducedDipole.y*psc5+atom2.inducedDipolePolar.y*dsc5))*0.5f +
scale5i*(sci4*atom1.inducedDipolePolar.y+scip4*atom1.inducedDipole.y + sci3*atom2.inducedDipolePolar.y+scip3*atom2.inducedDipole.y)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.y +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.y;
ftm2i_2 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.z*psc3 + atom1.inducedDipolePolar.z*dsc3) +
sc4*(atom1.inducedDipole.z*psc5 + atom1.inducedDipolePolar.z*dsc5)) +
(atom1.posq.w*(atom2.inducedDipole.z*psc3+atom2.inducedDipolePolar.z*dsc3) +
sc3*(atom2.inducedDipole.z*psc5+atom2.inducedDipolePolar.z*dsc5))*0.5f +
scale5i*(sci4*atom1.inducedDipolePolar.z+scip4*atom1.inducedDipole.z +
sci3*atom2.inducedDipolePolar.z+scip3*atom2.inducedDipole.z)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.z +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.z;
#ifdef DIRECT_POLARIZATION
real gfd = 0.5*(3*rr2*scip2*scale3i - 5*rr2*(scip3*sci4+sci3*scip4)*scale5i);
real temp5 = 0.5*scale5i;
real fdir_0 = gfd*xr + temp5*(sci4*atom1.inducedDipolePolar.x + scip4*atom1.inducedDipole.x + sci3*atom2.inducedDipolePolar.x + scip3*atom2.inducedDipole.x);
real fdir_1 = gfd*yr + temp5*(sci4*atom1.inducedDipolePolar.y + scip4*atom1.inducedDipole.y + sci3*atom2.inducedDipolePolar.y + scip3*atom2.inducedDipole.y);
real fdir_2 = gfd*zr + temp5*(sci4*atom1.inducedDipolePolar.z + scip4*atom1.inducedDipole.z + sci3*atom2.inducedDipolePolar.z + scip3*atom2.inducedDipole.z);
ftm2i_0 -= fdir_0;
ftm2i_1 -= fdir_1;
ftm2i_2 -= fdir_2;
#else
real scaleF = 0.5f*uScale;
real inducedFactor3 = scip2*rr3*scaleF;
real inducedFactor5 = (sci3*scip4+scip3*sci4)*rr5*scaleF;
real findmp_0 = inducedFactor3*ddsc3_0 - inducedFactor5*ddsc5_0;
real findmp_1 = inducedFactor3*ddsc3_1 - inducedFactor5*ddsc5_1;
real findmp_2 = inducedFactor3*ddsc3_2 - inducedFactor5*ddsc5_2;
ftm2i_0 -= findmp_0;
ftm2i_1 -= findmp_1;
ftm2i_2 -= findmp_2;
#endif
#endif
#if defined T1
real gti2 = 0.5f*(sci4*psc5+scip4*dsc5);
real gti5 = gfi5;
#endif
#if defined T3
real gti3 = 0.5f*(sci3*psc5+scip3*dsc5);
real gti6 = gfi6;
#endif
#if defined T1 || defined T3
real dixdk_0 = atom1.dipole.y*atom2.dipole.z - atom1.dipole.z*atom2.dipole.y;
real dixdk_1 = atom1.dipole.z*atom2.dipole.x - atom1.dipole.x*atom2.dipole.z;
real dixdk_2 = atom1.dipole.x*atom2.dipole.y - atom1.dipole.y*atom2.dipole.x;
#if defined T1
real dixuk_0 = atom1.dipole.y*atom2.inducedDipole.z - atom1.dipole.z*atom2.inducedDipole.y;
real dixuk_1 = atom1.dipole.z*atom2.inducedDipole.x - atom1.dipole.x*atom2.inducedDipole.z;
real dixuk_2 = atom1.dipole.x*atom2.inducedDipole.y - atom1.dipole.y*atom2.inducedDipole.x;
#endif
#endif
#ifdef T1
real dixukp_0 = atom1.dipole.y*atom2.inducedDipolePolar.z - atom1.dipole.z*atom2.inducedDipolePolar.y;
real dixukp_1 = atom1.dipole.z*atom2.inducedDipolePolar.x - atom1.dipole.x*atom2.inducedDipolePolar.z;
real dixukp_2 = atom1.dipole.x*atom2.inducedDipolePolar.y - atom1.dipole.y*atom2.inducedDipolePolar.x;
#endif
#ifdef T1
real dixr_0 = atom1.dipole.y*zr - atom1.dipole.z*yr;
real dixr_1 = atom1.dipole.z*xr - atom1.dipole.x*zr;
real dixr_2 = atom1.dipole.x*yr - atom1.dipole.y*xr;
#endif
#ifdef T1
real ttm2_0 = -rr3*dixdk_0 + gf2*dixr_0;
real ttm2_1 = -rr3*dixdk_1 + gf2*dixr_1;
real ttm2_2 = -rr3*dixdk_2 + gf2*dixr_2;
real ttm2i_0 = -(dixuk_0*psc3+dixukp_0*dsc3)*0.5f + gti2*dixr_0;
real ttm2i_1 = -(dixuk_1*psc3+dixukp_1*dsc3)*0.5f + gti2*dixr_1;
real ttm2i_2 = -(dixuk_2*psc3+dixukp_2*dsc3)*0.5f + gti2*dixr_2;
#endif
#ifdef T3
real dkxr_0 = atom2.dipole.y*zr - atom2.dipole.z*yr;
real dkxr_1 = atom2.dipole.z*xr - atom2.dipole.x*zr;
real dkxr_2 = atom2.dipole.x*yr - atom2.dipole.y*xr;
real dkxui_0 = atom2.dipole.y*atom1.inducedDipole.z - atom2.dipole.z*atom1.inducedDipole.y;
real dkxui_1 = atom2.dipole.z*atom1.inducedDipole.x - atom2.dipole.x*atom1.inducedDipole.z;
real dkxui_2 = atom2.dipole.x*atom1.inducedDipole.y - atom2.dipole.y*atom1.inducedDipole.x;
real dkxuip_0 = atom2.dipole.y*atom1.inducedDipolePolar.z - atom2.dipole.z*atom1.inducedDipolePolar.y;
real dkxuip_1 = atom2.dipole.z*atom1.inducedDipolePolar.x - atom2.dipole.x*atom1.inducedDipolePolar.z;
real dkxuip_2 = atom2.dipole.x*atom1.inducedDipolePolar.y - atom2.dipole.y*atom1.inducedDipolePolar.x;
real ttm3_0 = rr3*dixdk_0 + gf3*dkxr_0;
real ttm3_1 = rr3*dixdk_1 + gf3*dkxr_1;
real ttm3_2 = rr3*dixdk_2 + gf3*dkxr_2;
real ttm3i_0 = -(dkxui_0*psc3+ dkxuip_0*dsc3)*0.5f + gti3*dkxr_0;
real ttm3i_1 = -(dkxui_1*psc3+ dkxuip_1*dsc3)*0.5f + gti3*dkxr_1;
real ttm3i_2 = -(dkxui_2*psc3+ dkxuip_2*dsc3)*0.5f + gti3*dkxr_2;
#endif
if (mScale < 1) {
#ifdef T1
ttm2_0 *= mScale;
ttm2_1 *= mScale;
ttm2_2 *= mScale;
#endif
#ifdef T3
ttm3_0 *= mScale;
ttm3_1 *= mScale;
ttm3_2 *= mScale;
#endif
}
#ifdef F1
outputForce.x = -(ftm2_0+ftm2i_0);
outputForce.y = -(ftm2_1+ftm2i_1);
outputForce.z = -(ftm2_2+ftm2i_2);
#endif
#ifdef T1
outputForce.x = (ttm2_0 + ttm2i_0);
outputForce.y = (ttm2_1 + ttm2i_1);
outputForce.z = (ttm2_2 + ttm2i_2);
#endif
#ifdef T3
outputForce.x = (ttm3_0 + ttm3i_0);
outputForce.y = (ttm3_1 + ttm3i_1);
outputForce.z = (ttm3_2 + ttm3i_2);
#endif
}
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct {
real4 posq;
real3 force, dipole, inducedDipole, inducedDipolePolar;
real3 pos, force, torque, inducedDipole, inducedDipolePolar, sphericalDipole;
real q;
float thole, damp;
#ifdef INCLUDE_QUADRUPOLES
real quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ;
real sphericalQuadrupole[5];
#endif
float thole, damp;
} AtomData;
__device__ void computeOneInteractionF1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real& energy, real3& outputForce);
__device__ void computeOneInteractionT1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce);
__device__ void computeOneInteractionT3(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce);
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
data.posq = posq[atom];
data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2];
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ sphericalDipole,
const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.q = atomPosq.w;
data.sphericalDipole.x = sphericalDipole[atom*3];
data.sphericalDipole.y = sphericalDipole[atom*3+1];
data.sphericalDipole.z = sphericalDipole[atom*3+2];
#ifdef INCLUDE_QUADRUPOLES
data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.sphericalQuadrupole[0] = sphericalQuadrupole[atom*5];
data.sphericalQuadrupole[1] = sphericalQuadrupole[atom*5+1];
data.sphericalQuadrupole[2] = sphericalQuadrupole[atom*5+2];
data.sphericalQuadrupole[3] = sphericalQuadrupole[atom*5+3];
data.sphericalQuadrupole[4] = sphericalQuadrupole[atom*5+4];
#endif
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
......@@ -57,6 +54,322 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f));
}
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool hasExclusions, float dScale, float pScale, float mScale, float forceFactor, real& energy) {
// Compute the displacement.
real3 delta;
delta.x = atom2.pos.x - atom1.pos.x;
delta.y = atom2.pos.y - atom1.pos.y;
delta.z = atom2.pos.z - atom1.pos.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real rInv = RSQRT(r2);
real r = r2*rInv;
// Rotate the various dipoles and quadrupoles.
real qiRotationMatrix[3][3];
buildQIRotationMatrix(delta, rInv, qiRotationMatrix);
real3 qiUindI = 0.5f*make_real3(qiRotationMatrix[0][1]*atom1.inducedDipole.x + qiRotationMatrix[0][2]*atom1.inducedDipole.y + qiRotationMatrix[0][0]*atom1.inducedDipole.z,
qiRotationMatrix[1][1]*atom1.inducedDipole.x + qiRotationMatrix[1][2]*atom1.inducedDipole.y + qiRotationMatrix[1][0]*atom1.inducedDipole.z,
qiRotationMatrix[2][1]*atom1.inducedDipole.x + qiRotationMatrix[2][2]*atom1.inducedDipole.y + qiRotationMatrix[2][0]*atom1.inducedDipole.z);
real3 qiUindJ = 0.5f*make_real3(qiRotationMatrix[0][1]*atom2.inducedDipole.x + qiRotationMatrix[0][2]*atom2.inducedDipole.y + qiRotationMatrix[0][0]*atom2.inducedDipole.z,
qiRotationMatrix[1][1]*atom2.inducedDipole.x + qiRotationMatrix[1][2]*atom2.inducedDipole.y + qiRotationMatrix[1][0]*atom2.inducedDipole.z,
qiRotationMatrix[2][1]*atom2.inducedDipole.x + qiRotationMatrix[2][2]*atom2.inducedDipole.y + qiRotationMatrix[2][0]*atom2.inducedDipole.z);
real3 qiUinpI = 0.5f*make_real3(qiRotationMatrix[0][1]*atom1.inducedDipolePolar.x + qiRotationMatrix[0][2]*atom1.inducedDipolePolar.y + qiRotationMatrix[0][0]*atom1.inducedDipolePolar.z,
qiRotationMatrix[1][1]*atom1.inducedDipolePolar.x + qiRotationMatrix[1][2]*atom1.inducedDipolePolar.y + qiRotationMatrix[1][0]*atom1.inducedDipolePolar.z,
qiRotationMatrix[2][1]*atom1.inducedDipolePolar.x + qiRotationMatrix[2][2]*atom1.inducedDipolePolar.y + qiRotationMatrix[2][0]*atom1.inducedDipolePolar.z);
real3 qiUinpJ = 0.5f*make_real3(qiRotationMatrix[0][1]*atom2.inducedDipolePolar.x + qiRotationMatrix[0][2]*atom2.inducedDipolePolar.y + qiRotationMatrix[0][0]*atom2.inducedDipolePolar.z,
qiRotationMatrix[1][1]*atom2.inducedDipolePolar.x + qiRotationMatrix[1][2]*atom2.inducedDipolePolar.y + qiRotationMatrix[1][0]*atom2.inducedDipolePolar.z,
qiRotationMatrix[2][1]*atom2.inducedDipolePolar.x + qiRotationMatrix[2][2]*atom2.inducedDipolePolar.y + qiRotationMatrix[2][0]*atom2.inducedDipolePolar.z);
real3 rotatedDipole1 = rotateDipole(atom1.sphericalDipole, qiRotationMatrix);
real3 rotatedDipole2 = rotateDipole(atom2.sphericalDipole, qiRotationMatrix);
real rotatedQuadrupole1[] = {0, 0, 0, 0, 0};
real rotatedQuadrupole2[] = {0, 0, 0, 0, 0};
#ifdef INCLUDE_QUADRUPOLES
rotateQuadupoles(qiRotationMatrix, atom1.sphericalQuadrupole, atom2.sphericalQuadrupole, rotatedQuadrupole1, rotatedQuadrupole2);
#endif
// The field derivatives at I due to permanent and induced moments on J, and vice-versa.
// Also, their derivatives w.r.t. R, which are needed for force calculations
real Vij[9], Vji[9], VjiR[9], VijR[9];
// The field derivatives at I due to only permanent moments on J, and vice-versa.
real Vijp[3], Vijd[3], Vjip[3], Vjid[3];
real rInvVec[7];
// The rInvVec array is defined such that the ith element is R^-i, with the
// dieleectric constant folded in, to avoid conversions later.
rInvVec[1] = rInv;
for (int i = 2; i < 7; ++i)
rInvVec[i] = rInvVec[i-1] * rInv;
real dmp = atom1.damp*atom2.damp;
real a = min(atom1.thole, atom2.thole);
real u = fabs(dmp) > 1.0e-5f ? r/dmp : 1e10f;
real au3 = a*u*u*u;
real expau3 = au3 < 50 ? EXP(-au3) : 0;
real a2u6 = au3*au3;
real a3u9 = a2u6*au3;
// Thole damping factors for energies
real thole_c = 1 - expau3;
real thole_d0 = 1 - expau3*(1 + 1.5f*au3);
real thole_d1 = 1 - expau3;
real thole_q0 = 1 - expau3*(1 + au3 + a2u6);
real thole_q1 = 1 - expau3*(1 + au3);
// Thole damping factors for derivatives
real dthole_c = 1 - expau3*(1 + 1.5f*au3);
real dthole_d0 = 1 - expau3*(1 + au3 + 1.5f*a2u6);
real dthole_d1 = 1 - expau3*(1 + au3);
real dthole_q0 = 1 - expau3*(1 + au3 + 0.25f*a2u6 + 0.75f*a3u9);
real dthole_q1 = 1 - expau3*(1 + au3 + 0.75f*a2u6);
// Now we compute the (attenuated) Coulomb operator and its derivatives, contracted with
// permanent moments and induced dipoles. Note that the coefficient of the permanent force
// terms is half of the expected value; this is because we compute the interaction of I with
// the sum of induced and permanent moments on J, as well as the interaction of J with I's
// permanent and induced moments; doing so double counts the permanent-permanent interaction.
real ePermCoef, dPermCoef, eUIndCoef, dUIndCoef, eUInpCoef, dUInpCoef;
// C-C terms (m=0)
ePermCoef = rInvVec[1]*mScale;
dPermCoef = -0.5f*mScale*rInvVec[2];
Vij[0] = ePermCoef*atom2.q;
Vji[0] = ePermCoef*atom1.q;
VijR[0] = dPermCoef*atom2.q;
VjiR[0] = dPermCoef*atom1.q;
// C-D and C-Uind terms (m=0)
ePermCoef = rInvVec[2]*mScale;
eUIndCoef = rInvVec[2]*pScale*thole_c;
eUInpCoef = rInvVec[2]*dScale*thole_c;
dPermCoef = -rInvVec[3]*mScale;
dUIndCoef = -2*rInvVec[3]*pScale*dthole_c;
dUInpCoef = -2*rInvVec[3]*dScale*dthole_c;
Vij[0] += -(ePermCoef*rotatedDipole2.x + eUIndCoef*qiUindJ.x + eUInpCoef*qiUinpJ.x);
Vji[1] = -(ePermCoef*atom1.q);
VijR[0] += -(dPermCoef*rotatedDipole2.x + dUIndCoef*qiUindJ.x + dUInpCoef*qiUinpJ.x);
VjiR[1] = -(dPermCoef*atom1.q);
Vjip[0] = -(eUInpCoef*atom1.q);
Vjid[0] = -(eUIndCoef*atom1.q);
// D-C and Uind-C terms (m=0)
Vij[1] = ePermCoef*atom2.q;
Vji[0] += ePermCoef*rotatedDipole1.x + eUIndCoef*qiUindI.x + eUInpCoef*qiUinpI.x;
VijR[1] = dPermCoef*atom2.q;
VjiR[0] += dPermCoef*rotatedDipole1.x + dUIndCoef*qiUindI.x + dUInpCoef*qiUinpI.x;
Vijp[0] = eUInpCoef*atom2.q;
Vijd[0] = eUIndCoef*atom2.q;
// D-D and D-Uind terms (m=0)
ePermCoef = -2*rInvVec[3]*mScale;
eUIndCoef = -2*rInvVec[3]*pScale*thole_d0;
eUInpCoef = -2*rInvVec[3]*dScale*thole_d0;
dPermCoef = 3*rInvVec[4]*mScale;
dUIndCoef = 6*rInvVec[4]*pScale*dthole_d0;
dUInpCoef = 6*rInvVec[4]*dScale*dthole_d0;
Vij[1] += ePermCoef*rotatedDipole2.x + eUIndCoef*qiUindJ.x + eUInpCoef*qiUinpJ.x;
Vji[1] += ePermCoef*rotatedDipole1.x + eUIndCoef*qiUindI.x + eUInpCoef*qiUinpI.x;
VijR[1] += dPermCoef*rotatedDipole2.x + dUIndCoef*qiUindJ.x + dUInpCoef*qiUinpJ.x;
VjiR[1] += dPermCoef*rotatedDipole1.x + dUIndCoef*qiUindI.x + dUInpCoef*qiUinpI.x;
Vijp[0] += eUInpCoef*rotatedDipole2.x;
Vijd[0] += eUIndCoef*rotatedDipole2.x;
Vjip[0] += eUInpCoef*rotatedDipole1.x;
Vjid[0] += eUIndCoef*rotatedDipole1.x;
// D-D and D-Uind terms (m=1)
ePermCoef = rInvVec[3]*mScale;
eUIndCoef = rInvVec[3]*pScale*thole_d1;
eUInpCoef = rInvVec[3]*dScale*thole_d1;
dPermCoef = -1.5f*rInvVec[4]*mScale;
dUIndCoef = -3*rInvVec[4]*pScale*dthole_d1;
dUInpCoef = -3*rInvVec[4]*dScale*dthole_d1;
Vij[2] = ePermCoef*rotatedDipole2.y + eUIndCoef*qiUindJ.y + eUInpCoef*qiUinpJ.y;
Vji[2] = ePermCoef*rotatedDipole1.y + eUIndCoef*qiUindI.y + eUInpCoef*qiUinpI.y;
VijR[2] = dPermCoef*rotatedDipole2.y + dUIndCoef*qiUindJ.y + dUInpCoef*qiUinpJ.y;
VjiR[2] = dPermCoef*rotatedDipole1.y + dUIndCoef*qiUindI.y + dUInpCoef*qiUinpI.y;
Vij[3] = ePermCoef*rotatedDipole2.z + eUIndCoef*qiUindJ.z + eUInpCoef*qiUinpJ.z;
Vji[3] = ePermCoef*rotatedDipole1.z + eUIndCoef*qiUindI.z + eUInpCoef*qiUinpI.z;
VijR[3] = dPermCoef*rotatedDipole2.z + dUIndCoef*qiUindJ.z + dUInpCoef*qiUinpJ.z;
VjiR[3] = dPermCoef*rotatedDipole1.z + dUIndCoef*qiUindI.z + dUInpCoef*qiUinpI.z;
Vijp[1] = eUInpCoef*rotatedDipole2.y;
Vijd[1] = eUIndCoef*rotatedDipole2.y;
Vjip[1] = eUInpCoef*rotatedDipole1.y;
Vjid[1] = eUIndCoef*rotatedDipole1.y;
Vijp[2] = eUInpCoef*rotatedDipole2.z;
Vijd[2] = eUIndCoef*rotatedDipole2.z;
Vjip[2] = eUInpCoef*rotatedDipole1.z;
Vjid[2] = eUIndCoef*rotatedDipole1.z;
// C-Q terms (m=0)
ePermCoef = mScale*rInvVec[3];
dPermCoef = -1.5f*rInvVec[4]*mScale;
Vij[0] += ePermCoef*rotatedQuadrupole2[0];
Vji[4] = ePermCoef*atom1.q;
VijR[0] += dPermCoef*rotatedQuadrupole2[0];
VjiR[4] = dPermCoef*atom1.q;
// Q-C terms (m=0)
Vij[4] = ePermCoef*atom2.q;
Vji[0] += ePermCoef*rotatedQuadrupole1[0];
VijR[4] = dPermCoef*atom2.q;
VjiR[0] += dPermCoef*rotatedQuadrupole1[0];
// D-Q and Uind-Q terms (m=0)
ePermCoef = rInvVec[4]*3.0*mScale;
eUIndCoef = rInvVec[4]*3.0*pScale*thole_q0;
eUInpCoef = rInvVec[4]*3.0*dScale*thole_q0;
dPermCoef = -6*rInvVec[5]*mScale;
dUIndCoef = -12*rInvVec[5]*pScale*dthole_q0;
dUInpCoef = -12*rInvVec[5]*dScale*dthole_q0;
Vij[1] += ePermCoef*rotatedQuadrupole2[0];
Vji[4] += ePermCoef*rotatedDipole1.x + eUIndCoef*qiUindI.x + eUInpCoef*qiUinpI.x;
VijR[1] += dPermCoef*rotatedQuadrupole2[0];
VjiR[4] += dPermCoef*rotatedDipole1.x + dUIndCoef*qiUindI.x + dUInpCoef*qiUinpI.x;
Vijp[0] += eUInpCoef*rotatedQuadrupole2[0];
Vijd[0] += eUIndCoef*rotatedQuadrupole2[0];
// Q-D and Q-Uind terms (m=0)
Vij[4] += -(ePermCoef*rotatedDipole2.x + eUIndCoef*qiUindJ.x + eUInpCoef*qiUinpJ.x);
Vji[1] += -(ePermCoef*rotatedQuadrupole1[0]);
VijR[4] += -(dPermCoef*rotatedDipole2.x + dUIndCoef*qiUindJ.x + dUInpCoef*qiUinpJ.x);
VjiR[1] += -(dPermCoef*rotatedQuadrupole1[0]);
Vjip[0] += -(eUInpCoef*rotatedQuadrupole1[0]);
Vjid[0] += -(eUIndCoef*rotatedQuadrupole1[0]);
// D-Q and Uind-Q terms (m=1)
const real sqrtThree = SQRT((real) 3);
ePermCoef = -sqrtThree*rInvVec[4]*mScale;
eUIndCoef = -sqrtThree*rInvVec[4]*pScale*thole_q1;
eUInpCoef = -sqrtThree*rInvVec[4]*dScale*thole_q1;
dPermCoef = 2*sqrtThree*rInvVec[5]*mScale;
dUIndCoef = 4*sqrtThree*rInvVec[5]*pScale*dthole_q1;
dUInpCoef = 4*sqrtThree*rInvVec[5]*dScale*dthole_q1;
Vij[2] += ePermCoef*rotatedQuadrupole2[1];
Vji[5] = ePermCoef*rotatedDipole1.y + eUIndCoef*qiUindI.y + eUInpCoef*qiUinpI.y;
VijR[2] += dPermCoef*rotatedQuadrupole2[1];
VjiR[5] = dPermCoef*rotatedDipole1.y + dUIndCoef*qiUindI.y + dUInpCoef*qiUinpI.y;
Vij[3] += ePermCoef*rotatedQuadrupole2[2];
Vji[6] = ePermCoef*rotatedDipole1.z + eUIndCoef*qiUindI.z + eUInpCoef*qiUinpI.z;
VijR[3] += dPermCoef*rotatedQuadrupole2[2];
VjiR[6] = dPermCoef*rotatedDipole1.z + dUIndCoef*qiUindI.z + dUInpCoef*qiUinpI.z;
Vijp[1] += eUInpCoef*rotatedQuadrupole2[1];
Vijd[1] += eUIndCoef*rotatedQuadrupole2[1];
Vijp[2] += eUInpCoef*rotatedQuadrupole2[2];
Vijd[2] += eUIndCoef*rotatedQuadrupole2[2];
// D-Q and Uind-Q terms (m=1)
Vij[5] = -(ePermCoef*rotatedDipole2.y + eUIndCoef*qiUindJ.y + eUInpCoef*qiUinpJ.y);
Vji[2] += -(ePermCoef*rotatedQuadrupole1[1]);
VijR[5] = -(dPermCoef*rotatedDipole2.y + dUIndCoef*qiUindJ.y + dUInpCoef*qiUinpJ.y);
VjiR[2] += -(dPermCoef*rotatedQuadrupole1[1]);
Vij[6] = -(ePermCoef*rotatedDipole2.z + eUIndCoef*qiUindJ.z + eUInpCoef*qiUinpJ.z);
Vji[3] += -(ePermCoef*rotatedQuadrupole1[2]);
VijR[6] = -(dPermCoef*rotatedDipole2.z + dUIndCoef*qiUindJ.z + dUInpCoef*qiUinpJ.z);
VjiR[3] += -(dPermCoef*rotatedQuadrupole1[2]);
Vjip[1] += -(eUInpCoef*rotatedQuadrupole1[1]);
Vjid[1] += -(eUIndCoef*rotatedQuadrupole1[1]);
Vjip[2] += -(eUInpCoef*rotatedQuadrupole1[2]);
Vjid[2] += -(eUIndCoef*rotatedQuadrupole1[2]);
// Q-Q terms (m=0)
ePermCoef = 6*rInvVec[5]*mScale;
dPermCoef = -15*rInvVec[6]*mScale;
Vij[4] += ePermCoef*rotatedQuadrupole2[0];
Vji[4] += ePermCoef*rotatedQuadrupole1[0];
VijR[4] += dPermCoef*rotatedQuadrupole2[0];
VjiR[4] += dPermCoef*rotatedQuadrupole1[0];
// Q-Q terms (m=1)
ePermCoef = -4*rInvVec[5]*mScale;
dPermCoef = 10*rInvVec[6]*mScale;
Vij[5] += ePermCoef*rotatedQuadrupole2[1];
Vji[5] += ePermCoef*rotatedQuadrupole1[1];
VijR[5] += dPermCoef*rotatedQuadrupole2[1];
VjiR[5] += dPermCoef*rotatedQuadrupole1[1];
Vij[6] += ePermCoef*rotatedQuadrupole2[2];
Vji[6] += ePermCoef*rotatedQuadrupole1[2];
VijR[6] += dPermCoef*rotatedQuadrupole2[2];
VjiR[6] += dPermCoef*rotatedQuadrupole1[2];
// Q-Q terms (m=2)
ePermCoef = rInvVec[5]*mScale;
dPermCoef = -2.5f*rInvVec[6]*mScale;
Vij[7] = ePermCoef*rotatedQuadrupole2[3];
Vji[7] = ePermCoef*rotatedQuadrupole1[3];
VijR[7] = dPermCoef*rotatedQuadrupole2[3];
VjiR[7] = dPermCoef*rotatedQuadrupole1[3];
Vij[8] = ePermCoef*rotatedQuadrupole2[4];
Vji[8] = ePermCoef*rotatedQuadrupole1[4];
VijR[8] = dPermCoef*rotatedQuadrupole2[4];
VjiR[8] = dPermCoef*rotatedQuadrupole1[4];
// Evaluate the energies, forces and torques due to permanent+induced moments
// interacting with just the permanent moments.
energy += forceFactor*0.5f*(
atom1.q*Vij[0] + rotatedDipole1.x*Vij[1] + rotatedDipole1.y*Vij[2] + rotatedDipole1.z*Vij[3] + rotatedQuadrupole1[0]*Vij[4] + rotatedQuadrupole1[1]*Vij[5] + rotatedQuadrupole1[2]*Vij[6] + rotatedQuadrupole1[3]*Vij[7] + rotatedQuadrupole1[4]*Vij[8] +
atom2.q*Vji[0] + rotatedDipole2.x*Vji[1] + rotatedDipole2.y*Vji[2] + rotatedDipole2.z*Vji[3] + rotatedQuadrupole2[0]*Vji[4] + rotatedQuadrupole2[1]*Vji[5] + rotatedQuadrupole2[2]*Vji[6] + rotatedQuadrupole2[3]*Vji[7] + rotatedQuadrupole2[4]*Vji[8]);
real fIZ = atom1.q*VijR[0] + rotatedDipole1.x*VijR[1] + rotatedDipole1.y*VijR[2] + rotatedDipole1.z*VijR[3] + rotatedQuadrupole1[0]*VijR[4] + rotatedQuadrupole1[1]*VijR[5] + rotatedQuadrupole1[2]*VijR[6] + rotatedQuadrupole1[3]*VijR[7] + rotatedQuadrupole1[4]*VijR[8];
real fJZ = atom2.q*VjiR[0] + rotatedDipole2.x*VjiR[1] + rotatedDipole2.y*VjiR[2] + rotatedDipole2.z*VjiR[3] + rotatedQuadrupole2[0]*VjiR[4] + rotatedQuadrupole2[1]*VjiR[5] + rotatedQuadrupole2[2]*VjiR[6] + rotatedQuadrupole2[3]*VjiR[7] + rotatedQuadrupole2[4]*VjiR[8];
real EIX = rotatedDipole1.z*Vij[1] - rotatedDipole1.x*Vij[3] + sqrtThree*rotatedQuadrupole1[2]*Vij[4] + rotatedQuadrupole1[4]*Vij[5] - (sqrtThree*rotatedQuadrupole1[0]+rotatedQuadrupole1[3])*Vij[6] + rotatedQuadrupole1[2]*Vij[7] - rotatedQuadrupole1[1]*Vij[8];
real EIY = -rotatedDipole1.y*Vij[1] + rotatedDipole1.x*Vij[2] - sqrtThree*rotatedQuadrupole1[1]*Vij[4] + (sqrtThree*rotatedQuadrupole1[0]-rotatedQuadrupole1[3])*Vij[5] - rotatedQuadrupole1[4]*Vij[6] + rotatedQuadrupole1[1]*Vij[7] + rotatedQuadrupole1[2]*Vij[8];
real EIZ = -rotatedDipole1.z*Vij[2] + rotatedDipole1.y*Vij[3] - rotatedQuadrupole1[2]*Vij[5] + rotatedQuadrupole1[1]*Vij[6] - 2*rotatedQuadrupole1[4]*Vij[7] + 2*rotatedQuadrupole1[3]*Vij[8];
real EJX = rotatedDipole2.z*Vji[1] - rotatedDipole2.x*Vji[3] + sqrtThree*rotatedQuadrupole2[2]*Vji[4] + rotatedQuadrupole2[4]*Vji[5] - (sqrtThree*rotatedQuadrupole2[0]+rotatedQuadrupole2[3])*Vji[6] + rotatedQuadrupole2[2]*Vji[7] - rotatedQuadrupole2[1]*Vji[8];
real EJY = -rotatedDipole2.y*Vji[1] + rotatedDipole2.x*Vji[2] - sqrtThree*rotatedQuadrupole2[1]*Vji[4] + (sqrtThree*rotatedQuadrupole2[0]-rotatedQuadrupole2[3])*Vji[5] - rotatedQuadrupole2[4]*Vji[6] + rotatedQuadrupole2[1]*Vji[7] + rotatedQuadrupole2[2]*Vji[8];
real EJZ = -rotatedDipole2.z*Vji[2] + rotatedDipole2.y*Vji[3] - rotatedQuadrupole2[2]*Vji[5] + rotatedQuadrupole2[1]*Vji[6] - 2*rotatedQuadrupole2[4]*Vji[7] + 2*rotatedQuadrupole2[3]*Vji[8];
// Define the torque intermediates for the induced dipoles. These are simply the induced dipole torque
// intermediates dotted with the field due to permanent moments only, at each center. We inline the
// induced dipole torque intermediates here, for simplicity. N.B. There are no torques on the dipoles
// themselves, so we accumulate the torque intermediates into separate variables to allow them to be
// used only in the force calculation.
//
// The torque about the x axis (needed to obtain the y force on the induced dipoles, below)
// qiUindIx[0] = qiQUindI[2]; qiUindIx[1] = 0; qiUindIx[2] = -qiQUindI[0]
real iEIX = qiUinpI.z*Vijp[0] + qiUindI.z*Vijd[0] - qiUinpI.x*Vijp[2] - qiUindI.x*Vijd[2];
real iEJX = qiUinpJ.z*Vjip[0] + qiUindJ.z*Vjid[0] - qiUinpJ.x*Vjip[2] - qiUindJ.x*Vjid[2];
// The torque about the y axis (needed to obtain the x force on the induced dipoles, below)
// qiUindIy[0] = -qiQUindI[1]; qiUindIy[1] = qiQUindI[0]; qiUindIy[2] = 0
real iEIY = qiUinpI.x*Vijp[1] + qiUindI.x*Vijd[1] - qiUinpI.y*Vijp[0] - qiUindI.y*Vijd[0];
real iEJY = qiUinpJ.x*Vjip[1] + qiUindJ.x*Vjid[1] - qiUinpJ.y*Vjip[0] - qiUindJ.y*Vjid[0];
#ifdef USE_MUTUAL_POLARIZATION
// Uind-Uind terms (m=0)
real eCoef = -4*rInvVec[3]*thole_d0;
real dCoef = 6*rInvVec[4]*dthole_d0;
iEIX += eCoef*(qiUinpI.z*qiUindJ.x + qiUindI.z*qiUinpJ.x);
iEJX += eCoef*(qiUinpJ.z*qiUindI.x + qiUindJ.z*qiUinpI.x);
iEIY -= eCoef*(qiUinpI.y*qiUindJ.x + qiUindI.y*qiUinpJ.x);
iEJY -= eCoef*(qiUinpJ.y*qiUindI.x + qiUindJ.y*qiUinpI.x);
fIZ += dCoef*(qiUinpI.x*qiUindJ.x + qiUindI.x*qiUinpJ.x);
fIZ += dCoef*(qiUinpJ.x*qiUindI.x + qiUindJ.x*qiUinpI.x);
// Uind-Uind terms (m=1)
eCoef = 2*rInvVec[3]*thole_d1;
dCoef = -3*rInvVec[4]*dthole_d1;
iEIX -= eCoef*(qiUinpI.x*qiUindJ.z + qiUindI.x*qiUinpJ.z);
iEJX -= eCoef*(qiUinpJ.x*qiUindI.z + qiUindJ.x*qiUinpI.z);
iEIY += eCoef*(qiUinpI.x*qiUindJ.y + qiUindI.x*qiUinpJ.y);
iEJY += eCoef*(qiUinpJ.x*qiUindI.y + qiUindJ.x*qiUinpI.y);
fIZ += dCoef*(qiUinpI.y*qiUindJ.y + qiUindI.y*qiUinpJ.y + qiUinpI.z*qiUindJ.z + qiUindI.z*qiUinpJ.z);
fIZ += dCoef*(qiUinpJ.y*qiUindI.y + qiUindJ.y*qiUinpI.y + qiUinpJ.z*qiUindI.z + qiUindJ.z*qiUinpI.z);
#endif
// The quasi-internal frame forces and torques. Note that the induced torque intermediates are
// used in the force expression, but not in the torques; the induced dipoles are isotropic.
real qiForce[3] = {rInv*(EIY+EJY+iEIY+iEJY), -rInv*(EIX+EJX+iEIX+iEJX), -(fJZ+fIZ)};
real qiTorqueI[3] = {-EIX, -EIY, -EIZ};
real qiTorqueJ[3] = {-EJX, -EJY, -EJZ};
real3 force = make_real3(qiRotationMatrix[1][1]*qiForce[0] + qiRotationMatrix[2][1]*qiForce[1] + qiRotationMatrix[0][1]*qiForce[2],
qiRotationMatrix[1][2]*qiForce[0] + qiRotationMatrix[2][2]*qiForce[1] + qiRotationMatrix[0][2]*qiForce[2],
qiRotationMatrix[1][0]*qiForce[0] + qiRotationMatrix[2][0]*qiForce[1] + qiRotationMatrix[0][0]*qiForce[2]);
atom1.force += force;
atom1.torque += make_real3(qiRotationMatrix[1][1]*qiTorqueI[0] + qiRotationMatrix[2][1]*qiTorqueI[1] + qiRotationMatrix[0][1]*qiTorqueI[2],
qiRotationMatrix[1][2]*qiTorqueI[0] + qiRotationMatrix[2][2]*qiTorqueI[1] + qiRotationMatrix[0][2]*qiTorqueI[2],
qiRotationMatrix[1][0]*qiTorqueI[0] + qiRotationMatrix[2][0]*qiTorqueI[1] + qiRotationMatrix[0][0]*qiTorqueI[2]);
if (forceFactor == 1) {
atom2.force -= force;
atom2.torque += make_real3(qiRotationMatrix[1][1]*qiTorqueJ[0] + qiRotationMatrix[2][1]*qiTorqueJ[1] + qiRotationMatrix[0][1]*qiTorqueJ[2],
qiRotationMatrix[1][2]*qiTorqueJ[0] + qiRotationMatrix[2][2]*qiTorqueJ[1] + qiRotationMatrix[0][2]*qiTorqueJ[2],
qiRotationMatrix[1][0]*qiTorqueJ[0] + qiRotationMatrix[2][0]*qiTorqueJ[1] + qiRotationMatrix[0][0]*qiTorqueJ[2]);
}
}
/**
* Compute electrostatic interactions.
*/
......@@ -69,7 +382,8 @@ extern "C" __global__ void computeElectrostatics(
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms,
#endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ sphericalDipole,
const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
......@@ -78,7 +392,6 @@ extern "C" __global__ void computeElectrostatics(
real energy = 0;
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
// First loop: process tiles that contain exclusions.
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
......@@ -89,21 +402,23 @@ extern "C" __global__ void computeElectrostatics(
const unsigned int y = tileIndices.y;
AtomData data;
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(data, atom1, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
data.force = make_real3(0);
data.torque = make_real3(0);
uint2 covalent = covalentFlags[pos*TILE_SIZE+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[pos*TILE_SIZE+tgx];
if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].posq = data.posq;
localData[threadIdx.x].dipole = data.dipole;
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].q = data.q;
localData[threadIdx.x].sphericalDipole = data.sphericalDipole;
#ifdef INCLUDE_QUADRUPOLES
localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX;
localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY;
localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ;
localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY;
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].sphericalQuadrupole[0] = data.sphericalQuadrupole[0];
localData[threadIdx.x].sphericalQuadrupole[1] = data.sphericalQuadrupole[1];
localData[threadIdx.x].sphericalQuadrupole[2] = data.sphericalQuadrupole[2];
localData[threadIdx.x].sphericalQuadrupole[3] = data.sphericalQuadrupole[3];
localData[threadIdx.x].sphericalQuadrupole[4] = data.sphericalQuadrupole[4];
#endif
localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
......@@ -115,101 +430,57 @@ extern "C" __global__ void computeElectrostatics(
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
float d = computeDScaleFactor(polarizationGroup, j);
float p = computePScaleFactor(covalent, polarizationGroup, j);
float m = computeMScaleFactor(covalent, j);
computeOneInteractionF1(data, localData[tbx+j], d, p, m, tempEnergy, tempForce);
data.force += tempForce;
energy += 0.5f*tempEnergy;
computeOneInteraction(data, localData[tbx+j], true, d, p, m, 0.5f, energy);
}
}
data.force *= ENERGY_SCALE_FACTOR;
data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
// Compute torques.
data.force = make_real3(0);
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
float d = computeDScaleFactor(polarizationGroup, j);
float p = computePScaleFactor(covalent, polarizationGroup, j);
float m = computeMScaleFactor(covalent, j);
computeOneInteractionT1(data, localData[tbx+j], d, p, m, tempForce);
data.force += tempForce;
}
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
}
else {
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
loadAtomData(localData[threadIdx.x], j, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(localData[threadIdx.x], j, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
localData[threadIdx.x].force = make_real3(0);
localData[threadIdx.x].torque = make_real3(0);
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
float d = computeDScaleFactor(polarizationGroup, tj);
float p = computePScaleFactor(covalent, polarizationGroup, tj);
float m = computeMScaleFactor(covalent, tj);
computeOneInteractionF1(data, localData[tbx+tj], d, p, m, tempEnergy, tempForce);
data.force += tempForce;
localData[tbx+tj].force -= tempForce;
energy += tempEnergy;
computeOneInteraction(data, localData[tbx+tj], true, d, p, m, 1, energy);
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= -ENERGY_SCALE_FACTOR;
localData[threadIdx.x].torque *= ENERGY_SCALE_FACTOR;
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
// Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
float d = computeDScaleFactor(polarizationGroup, tj);
float p = computePScaleFactor(covalent, polarizationGroup, tj);
float m = computeMScaleFactor(covalent, tj);
computeOneInteractionT1(data, localData[tbx+tj], d, p, m, tempForce);
data.force += tempForce;
computeOneInteractionT3(data, localData[tbx+tj], d, p, m, tempForce);
localData[tbx+tj].force += tempForce;
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0x100000000)));
}
}
......@@ -272,16 +543,18 @@ extern "C" __global__ void computeElectrostatics(
// Load atom data for this tile.
AtomData data;
loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(data, atom1, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
data.force = make_real3(0);
data.torque = make_real3(0);
#ifdef USE_CUTOFF
unsigned int j = (numTiles <= maxTiles ? interactingAtoms[pos*TILE_SIZE+tgx] : y*TILE_SIZE + tgx);
#else
unsigned int j = y*TILE_SIZE + tgx;
#endif
atomIndices[threadIdx.x] = j;
loadAtomData(localData[threadIdx.x], j, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(localData[threadIdx.x], j, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
localData[threadIdx.x].force = make_real3(0);
localData[threadIdx.x].torque = make_real3(0);
// Compute forces.
......@@ -289,21 +562,24 @@ extern "C" __global__ void computeElectrostatics(
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
computeOneInteractionF1(data, localData[tbx+tj], 1, 1, 1, tempEnergy, tempForce);
data.force += tempForce;
localData[tbx+tj].force -= tempForce;
energy += tempEnergy;
computeOneInteraction(data, localData[tbx+tj], false, 1, 1, 1, 1, energy);
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= -ENERGY_SCALE_FACTOR;
localData[threadIdx.x].torque *= ENERGY_SCALE_FACTOR;
// Write results.
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
#ifdef USE_CUTOFF
offset = atomIndices[threadIdx.x];
#else
......@@ -312,36 +588,9 @@ extern "C" __global__ void computeElectrostatics(
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
// Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
computeOneInteractionT1(data, localData[tbx+tj], 1, 1, 1, tempForce);
data.force += tempForce;
computeOneInteractionT3(data, localData[tbx+tj], 1, 1, 1, tempForce);
localData[tbx+tj].force += tempForce;
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
#ifdef USE_CUTOFF
offset = atomIndices[threadIdx.x];
#else
offset = y*TILE_SIZE + tgx;
#endif
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0x100000000)));
}
pos++;
}
......
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionF1(
#else
computeOneInteractionF1NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, real bn5, float forceFactor,
#ifdef APPLY_SCALE
float dScale, float pScale, float mScale,
#endif
real3& force, real& energy) {
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
#ifdef APPLY_SCALE
real rr1 = delta.w;
#endif
// set the permanent multipole and induced dipole values;
real ci = atom1.q;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real qi1 = atom1.quadrupoleXX;
real qi2 = atom1.quadrupoleXY;
real qi3 = atom1.quadrupoleXZ;
real qi5 = atom1.quadrupoleYY;
real qi6 = atom1.quadrupoleYZ;
real qi9 = -(atom1.quadrupoleXX + atom1.quadrupoleYY);
real ck = atom2.q;
real dk1 = atom2.dipole.x;
real dk2 = atom2.dipole.y;
real dk3 = atom2.dipole.z;
real qk1 = atom2.quadrupoleXX;
real qk2 = atom2.quadrupoleXY;
real qk3 = atom2.quadrupoleXZ;
real qk5 = atom2.quadrupoleYY;
real qk6 = atom2.quadrupoleYZ;
real qk9 = -(atom2.quadrupoleXX + atom2.quadrupoleYY);
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
real bn4 = bn.w;
#ifdef APPLY_SCALE
real offset = 1-mScale;
real rr3 = rr1*rr1*rr1;
real gf4 = 2*(bn2 - 3*offset*rr3*rr1*rr1);
#else
real gf4 = 2*bn2;
#endif
real qidk1 = qi1*dk1 + qi2*dk2 + qi3*dk3;
real qkdi1 = qk1*di1 + qk2*di2 + qk3*di3;
real ftm21 = gf4*(qkdi1-qidk1);
real qidk2 = qi2*dk1 + qi5*dk2 + qi6*dk3;
real qkdi2 = qk2*di1 + qk5*di2 + qk6*di3;
real ftm22 = gf4*(qkdi2-qidk2);
real qidk3 = qi3*dk1 + qi6*dk2 + qi9*dk3;
real qkdi3 = qk3*di1 + qk6*di2 + qk9*di3;
real ftm23 = gf4*(qkdi3-qidk3);
real qir1 = qi1*xr + qi2*yr + qi3*zr;
real qir2 = qi2*xr + qi5*yr + qi6*zr;
real qir3 = qi3*xr + qi6*yr + qi9*zr;
real qkr1 = qk1*xr + qk2*yr + qk3*zr;
real qkr2 = qk2*xr + qk5*yr + qk6*zr;
real qkr3 = qk3*xr + qk6*yr + qk9*zr;
#ifdef APPLY_SCALE
real gf7 = 4*(bn3 - 15*offset*rr3*rr3*rr1);
#else
real gf7 = 4*bn3;
#endif
real qiqkr1 = qi1*qkr1 + qi2*qkr2 + qi3*qkr3;
real qkqir1 = qk1*qir1 + qk2*qir2 + qk3*qir3;
ftm21 += gf7*(qiqkr1+qkqir1);
real qiqkr2 = qi2*qkr1 + qi5*qkr2 + qi6*qkr3;
real qkqir2 = qk2*qir1 + qk5*qir2 + qk6*qir3;
ftm22 += gf7*(qiqkr2+qkqir2);
real qiqkr3 = qi3*qkr1 + qi6*qkr2 + qi9*qkr3;
real qkqir3 = qk3*qir1 + qk6*qir2 + qk9*qir3;
ftm23 += gf7*(qiqkr3+qkqir3);
// calculate the scalar products for permanent components
real gl6 = di1*dk1 + di2*dk2 + di3*dk3;
real gl7 = 2*(qir1*dk1 + qir2*dk2 + qir3*dk3 - (qkr1*di1 + qkr2*di2 + qkr3*di3));
real gl5 = -4*(qir1*qkr1 + qir2*qkr2 + qir3*qkr3);
real gl8 = 2*(qi1*qk1 + qi2*qk2 + qi3*qk3 + qi2*qk2 + qi5*qk5 + qi6*qk6 + qi3*qk3 + qi6*qk6 + qi9*qk9);
real sc3 = di1*xr + di2*yr + di3*zr;
real sc5 = qir1*xr + qir2*yr + qir3*zr;
real sc4 = dk1*xr + dk2*yr + dk3*zr;
real sc6 = qkr1*xr + qkr2*yr + qkr3*zr;
real gl0 = ci*ck;
real gl1 = ck*sc3 - ci*sc4;
real gl2 = ci*sc6 + ck*sc5 - sc3*sc4;
real gl3 = sc3*sc6 - sc4*sc5;
real gl4 = sc5*sc6;
#ifdef APPLY_SCALE
energy += forceFactor*(-offset*rr1*gl0 + (bn1-offset*rr3)*(gl1+gl6) + (bn2-offset*(3*rr3*rr1*rr1))*(gl2+gl7+gl8) + (bn3-offset*(15*rr3*rr3*rr1))*(gl3+gl5) + (bn4-offset*(105*rr3*rr3*rr3))*gl4);
#else
energy += forceFactor*(bn1*(gl1+gl6) + bn2*(gl2+gl7+gl8) + bn3*(gl3+gl5) + bn4*gl4);
#endif
real gf1 = bn1*gl0 + bn2*(gl1+gl6) + bn3*(gl2+gl7+gl8) + bn4*(gl3+gl5) + bn5*gl4;
#ifdef APPLY_SCALE
gf1 -= offset*(rr3*gl0 + (3*rr3*rr1*rr1)*(gl1+gl6) + (15*rr3*rr3*rr1)*(gl2+gl7+gl8) + (105*rr3*rr3*rr3)*(gl3+gl5) + (945*rr3*rr3*rr3*rr1*rr1)*gl4);
#endif
ftm21 += gf1*xr;
ftm22 += gf1*yr;
ftm23 += gf1*zr;
#ifdef APPLY_SCALE
real gf2 = -ck*bn1 + sc4*bn2 - sc6*bn3 - offset*(-ck*rr3 + sc4*(3*rr3*rr1*rr1) - sc6*(15*rr3*rr3*rr1));
#else
real gf2 = -ck*bn1 + sc4*bn2 - sc6*bn3;
#endif
ftm21 += gf2*di1;
ftm22 += gf2*di2;
ftm23 += gf2*di3;
#ifdef APPLY_SCALE
real gf5 = 2*(-ck*bn2+sc4*bn3-sc6*bn4 - offset*(-ck*(3*rr3*rr1*rr1)+sc4*(15*rr3*rr3*rr1)-sc6*(105*rr3*rr3*rr3)));
#else
real gf5 = 2*(-ck*bn2+sc4*bn3-sc6*bn4);
#endif
ftm21 += gf5*qir1;
ftm22 += gf5*qir2;
ftm23 += gf5*qir3;
#ifdef APPLY_SCALE
real gf3 = ci*bn1 + sc3*bn2 + sc5*bn3 - offset*(ci*rr3 + sc3*(3*rr3*rr1*rr1) + sc5*(15*rr3*rr3*rr1));
#else
real gf3 = ci*bn1 + sc3*bn2 + sc5*bn3;
#endif
ftm21 += gf3*dk1;
ftm22 += gf3*dk2;
ftm23 += gf3*dk3;
#ifdef APPLY_SCALE
real gf6 = 2*(-ci*bn2-sc3*bn3-sc5*bn4 - offset*(-ci*(3*rr3*rr1*rr1)-sc3*(15*rr3*rr3*rr1)-sc5*(105*rr3*rr3*rr3)));
#else
real gf6 = 2*(-ci*bn2-sc3*bn3-sc5*bn4);
#endif
ftm21 += gf6*qkr1;
ftm22 += gf6*qkr2;
ftm23 += gf6*qkr3;
force.x = ftm21;
force.y = ftm22;
force.z = ftm23;
}
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionF2(
#else
computeOneInteractionF2NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, float forceFactor,
#ifdef APPLY_SCALE
float dScale, float pScale, float mScale,
#endif
real3& force, real& energy) {
const float uScale = 1;
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
real rr1 = delta.w;
// set the permanent multipole and induced dipole values;
real ci = atom1.q;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real qi1 = atom1.quadrupoleXX;
real qi2 = atom1.quadrupoleXY;
real qi3 = atom1.quadrupoleXZ;
real qi5 = atom1.quadrupoleYY;
real qi6 = atom1.quadrupoleYZ;
real qi9 = -(atom1.quadrupoleXX + atom1.quadrupoleYY);
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
real bn4 = bn.w;
real damp = atom1.damp*atom2.damp;
if (damp != 0) {
real pgamma = atom1.thole < atom2.thole ? atom1.thole : atom2.thole;
real ratio = RECIP(rr1*damp);
damp = -pgamma*ratio*ratio*ratio;
}
real scale5 = (damp == 0) ? 1 : (1 - (1-damp)*EXP(damp));
real rr5 = rr1*rr1;
rr5 = 3*rr1*rr5*rr5;
#ifdef APPLY_SCALE
real psc5 = rr5*(1 - scale5*pScale);
real dsc5 = rr5*(1 - scale5*dScale);
real usc5 = rr5*(1 - scale5*uScale);
#else
real psc5 = rr5*(1 - scale5);
#endif
real qiuk1 = qi1*atom2.inducedDipole.x + qi2*atom2.inducedDipole.y + qi3*atom2.inducedDipole.z;
real qiukp1 = qi1*atom2.inducedDipolePolar.x + qi2*atom2.inducedDipolePolar.y + qi3*atom2.inducedDipolePolar.z;
real ftm21 = -bn2*(qiuk1+qiukp1);
#ifdef APPLY_SCALE
ftm21 += qiuk1*psc5 + qiukp1*dsc5;
#else
ftm21 += (qiuk1 + qiukp1)*psc5;
#endif
real qiuk2 = qi2*atom2.inducedDipole.x + qi5*atom2.inducedDipole.y + qi6*atom2.inducedDipole.z;
real qiukp2 = qi2*atom2.inducedDipolePolar.x + qi5*atom2.inducedDipolePolar.y + qi6*atom2.inducedDipolePolar.z;
real ftm22 = -bn2*(qiuk2+qiukp2);
#ifdef APPLY_SCALE
ftm22 += ((qiuk2)*psc5 + (qiukp2)*dsc5);
#else
ftm22 += (qiuk2 + qiukp2)*psc5;
#endif
real qiuk3 = qi3*atom2.inducedDipole.x + qi6*atom2.inducedDipole.y + qi9*atom2.inducedDipole.z;
real qiukp3 = qi3*atom2.inducedDipolePolar.x + qi6*atom2.inducedDipolePolar.y + qi9*atom2.inducedDipolePolar.z;
real ftm23 = -bn2*(qiuk3+qiukp3);
#ifdef APPLY_SCALE
ftm23 += ((qiuk3)*psc5 + (qiukp3)*dsc5);
#else
ftm23 += (qiuk3 + qiukp3)*psc5;
#endif
real expdamp = EXP(damp);
real scale3 = (damp == 0) ? 1 : (1 - expdamp);
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
real psc3 = rr3*(1 - scale3*pScale);
real dsc3 = rr3*(1 - scale3*dScale);
real usc3 = rr3*(1 - scale3*uScale);
#else
real psc3 = rr3*(1 - scale3);
#endif
real scale7 = (damp == 0) ? 1 : (1 - (1-damp+0.6f*damp*damp)*expdamp);
#ifdef APPLY_SCALE
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7*pScale);
real dsc7 = (15*rr3*rr3*rr1)*(1 - scale7*dScale);
#else
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7);
#endif
real qir1 = qi1*xr + qi2*yr + qi3*zr;
real qir2 = qi2*xr + qi5*yr + qi6*zr;
real qir3 = qi3*xr + qi6*yr + qi9*zr;
real sc3 = di1*xr + di2*yr + di3*zr;
real sc5 = qir1*xr + qir2*yr + qir3*zr;
real gfi3 = ci*bn1 + sc3*bn2 + sc5*bn3;
real prefactor1;
prefactor1 = 0.5f*(ci*psc3 + sc3*psc5 + sc5*psc7 - gfi3);
ftm21 -= prefactor1*atom2.inducedDipole.x;
ftm22 -= prefactor1*atom2.inducedDipole.y;
ftm23 -= prefactor1*atom2.inducedDipole.z;
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(ci*dsc3 + sc3*dsc5 + sc5*dsc7 - gfi3);
#endif
ftm21 -= prefactor1*atom2.inducedDipolePolar.x;
ftm22 -= prefactor1*atom2.inducedDipolePolar.y;
ftm23 -= prefactor1*atom2.inducedDipolePolar.z;
real sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
energy += forceFactor*0.5f*sci4*((psc3-bn1)*ci + (psc5-bn2)*sc3 + (psc7-bn3)*sc5);
real scip4 = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2 - usc5);
#else
prefactor1 = 0.5f*(bn2 - psc5);
#endif
ftm21 += prefactor1*((sci4*atom1.inducedDipolePolar.x + scip4*atom1.inducedDipole.x));
ftm22 += prefactor1*((sci4*atom1.inducedDipolePolar.y + scip4*atom1.inducedDipole.y));
ftm23 += prefactor1*((sci4*atom1.inducedDipolePolar.z + scip4*atom1.inducedDipole.z));
#endif
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2*(sci4+scip4) - (sci4*psc5+scip4*dsc5));
#else
sci4 += scip4;
prefactor1 = 0.5f*sci4*(bn2 - psc5);
#endif
ftm21 += prefactor1*di1;
ftm22 += prefactor1*di2;
ftm23 += prefactor1*di3;
#ifdef APPLY_SCALE
real gfi5 = bn3*(sci4+scip4) - (sci4*psc7+scip4*dsc7);
#else
real gfi5 = sci4*(bn3 - psc7);
#endif
ftm21 += gfi5*qir1;
ftm22 += gfi5*qir2;
ftm23 += gfi5*qir3;
real sci7 = qir1*atom2.inducedDipole.x + qir2*atom2.inducedDipole.y + qir3*atom2.inducedDipole.z;
energy += forceFactor*(bn2-psc5)*sci7;
real scip7 = qir1*atom2.inducedDipolePolar.x + qir2*atom2.inducedDipolePolar.y + qir3*atom2.inducedDipolePolar.z;
#ifdef APPLY_SCALE
real gli1 = -ci*sci4;
real gli2 = -sc3*sci4 + 2*sci7;
real gli3 = -sci4*sc5;
real glip1 = -ci*scip4;
real glip2 = -sc3*scip4 + 2*scip7;
real glip3 = -scip4*sc5;
#else
real gli1 = -ci*sci4;
real gli2 = -sc3*sci4 + 2*(sci7 + scip7);
real gli3 = -sci4*sc5;
#endif
#ifdef APPLY_SCALE
real gfi1 = (bn2*(gli1+glip1) + bn3*(gli2+glip2) + bn4*(gli3+glip3));
gfi1 -= (rr1*rr1)*(3*(gli1*psc3 + glip1*dsc3) + 5*(gli2*psc5 + glip2*dsc5) + 7*(gli3*psc7+glip3*dsc7));
#else
real gfi1 = bn2*gli1 + bn3*gli2 + bn4*gli3;
gfi1 -= (rr1*rr1)*(3*gli1*psc3 + 5*gli2*psc5 + 7*gli3*psc7);
#endif
gfi1 *= 0.5f;
ftm21 += gfi1*xr;
ftm22 += gfi1*yr;
ftm23 += gfi1*zr;
{
real expdamp = EXP(damp);
real temp3 = -1.5f*damp*expdamp*rr1*rr1;
real temp5 = -damp;
real temp7 = -0.2f - 0.6f*damp;
real ddsc31 = temp3*xr;
real ddsc32 = temp3*yr;
real ddsc33 = temp3*zr;
real ddsc51 = temp5*ddsc31;
real ddsc52 = temp5*ddsc32;
real ddsc53 = temp5*ddsc33;
real ddsc71 = temp7*ddsc51;
real ddsc72 = temp7*ddsc52;
real ddsc73 = temp7*ddsc53;
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
temp3 = (gli1*pScale + glip1*dScale);
temp5 = (3*rr1*rr1)*(gli2*pScale + glip2*dScale);
temp7 = (15*rr3*rr1)*(gli3*pScale + glip3*dScale);
#else
temp3 = gli1;
temp5 = (3*rr1*rr1)*gli2;
temp7 = (15*rr3*rr1)*gli3;
#endif
ftm21 -= rr3*(temp3*ddsc31 + temp5*ddsc51 + temp7*ddsc71);
ftm22 -= rr3*(temp3*ddsc32 + temp5*ddsc52 + temp7*ddsc72);
ftm23 -= rr3*(temp3*ddsc33 + temp5*ddsc53 + temp7*ddsc73);
}
//K
real qk1 = atom2.quadrupoleXX;
real qk2 = atom2.quadrupoleXY;
real qk3 = atom2.quadrupoleXZ;
real qk5 = atom2.quadrupoleYY;
real qk6 = atom2.quadrupoleYZ;
real qk9 = -(qk1 + qk5);
real qkui1 = qk1*atom1.inducedDipole.x + qk2*atom1.inducedDipole.y + qk3*atom1.inducedDipole.z;
real qkuip1 = qk1*atom1.inducedDipolePolar.x + qk2*atom1.inducedDipolePolar.y + qk3*atom1.inducedDipolePolar.z;
ftm21 += bn2*(qkui1+qkuip1);
#ifdef APPLY_SCALE
ftm21 -= (qkui1*psc5 + qkuip1*dsc5);
#else
ftm21 -= (qkui1 + qkuip1)*psc5;
#endif
real qkui2 = qk2*atom1.inducedDipole.x + qk5*atom1.inducedDipole.y + qk6*atom1.inducedDipole.z;
real qkuip2 = qk2*atom1.inducedDipolePolar.x + qk5*atom1.inducedDipolePolar.y + qk6*atom1.inducedDipolePolar.z;
ftm22 += bn2*(qkui2+qkuip2);
#ifdef APPLY_SCALE
ftm22 -= ((qkui2)*psc5 + (qkuip2)*dsc5);
#else
ftm22 -= (qkui2 + qkuip2)*psc5;
#endif
real qkui3 = qk3*atom1.inducedDipole.x + qk6*atom1.inducedDipole.y + qk9*atom1.inducedDipole.z;
real qkuip3 = qk3*atom1.inducedDipolePolar.x + qk6*atom1.inducedDipolePolar.y + qk9*atom1.inducedDipolePolar.z;
ftm23 += bn2*(qkui3+qkuip3);
#ifdef APPLY_SCALE
ftm23 -= ((qkui3)*psc5 + (qkuip3)*dsc5);
#else
ftm23 -= (qkui3 + qkuip3)*psc5;
#endif
real qkr1 = qk1*xr + qk2*yr + qk3*zr;
real qkr2 = qk2*xr + qk5*yr + qk6*zr;
real qkr3 = qk3*xr + qk6*yr + qk9*zr;
real dk1 = atom2.dipole.x;
real dk2 = atom2.dipole.y;
real dk3 = atom2.dipole.z;
real sc4 = dk1*xr + dk2*yr + dk3*zr;
real sc6 = qkr1*xr + qkr2*yr + qkr3*zr;
real ck = atom2.q;
real gfi2 = (-ck*bn1 + sc4*bn2 - sc6*bn3);
prefactor1 = 0.5f*(ck*psc3 - sc4*psc5 + sc6*psc7 + gfi2);
ftm21 += prefactor1*atom1.inducedDipole.x;
ftm22 += prefactor1*atom1.inducedDipole.y;
ftm23 += prefactor1*atom1.inducedDipole.z;
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(ck*dsc3 - sc4*dsc5 + sc6*dsc7 + gfi2);
#endif
ftm21 += prefactor1*atom1.inducedDipolePolar.x;
ftm22 += prefactor1*atom1.inducedDipolePolar.y;
ftm23 += prefactor1*atom1.inducedDipolePolar.z;
real sci3 = atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr;
energy += forceFactor*0.5f*sci3*(ck*(bn1-psc3) - sc4*(bn2-psc5) + sc6*(bn3-psc7));
real scip3 = atom1.inducedDipolePolar.x*xr + atom1.inducedDipolePolar.y*yr + atom1.inducedDipolePolar.z*zr;
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2 - usc5);
#else
prefactor1 = 0.5f*(bn2 - psc5);
#endif
ftm21 += prefactor1*(sci3*atom2.inducedDipolePolar.x + scip3*atom2.inducedDipole.x);
ftm22 += prefactor1*(sci3*atom2.inducedDipolePolar.y + scip3*atom2.inducedDipole.y);
ftm23 += prefactor1*(sci3*atom2.inducedDipolePolar.z + scip3*atom2.inducedDipole.z);
real sci34;
sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
scip4 = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
sci34 = (sci3*scip4+scip3*sci4);
#ifdef APPLY_SCALE
gfi1 = sci34*(usc5*(5*rr1*rr1) -bn3);
#else
gfi1 = sci34*(psc5*(5*rr1*rr1) -bn3);
#endif
#else
gfi1 = 0;
#endif
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2*(sci3+scip3) - (sci3*psc5+scip3*dsc5));
#else
sci3 += scip3;
prefactor1 = 0.5f*sci3*(bn2 - psc5);
#endif
ftm21 += prefactor1*dk1;
ftm22 += prefactor1*dk2;
ftm23 += prefactor1*dk3;
#ifdef APPLY_SCALE
real gfi6 = -bn3*(sci3+scip3) + (sci3*psc7+scip3*dsc7);
#else
real gfi6 = sci3*(psc7 - bn3);
#endif
ftm21 += gfi6*qkr1;
ftm22 += gfi6*qkr2;
ftm23 += gfi6*qkr3;
real sci1 = atom1.inducedDipole.x*dk1 + atom1.inducedDipole.y*dk2 + atom1.inducedDipole.z*dk3 + di1*atom2.inducedDipole.x + di2*atom2.inducedDipole.y + di3*atom2.inducedDipole.z;
energy += forceFactor*0.5f*(sci1*(bn1-psc3));
real sci8 = qkr1*atom1.inducedDipole.x + qkr2*atom1.inducedDipole.y + qkr3*atom1.inducedDipole.z;
energy -= forceFactor*sci8*(bn2-psc5);
real scip1 = atom1.inducedDipolePolar.x*dk1 + atom1.inducedDipolePolar.y*dk2 + atom1.inducedDipolePolar.z*dk3 + di1*atom2.inducedDipolePolar.x + di2*atom2.inducedDipolePolar.y + di3*atom2.inducedDipolePolar.z;
#ifndef APPLY_SCALE
sci1 += scip1;
#endif
real scip2 = atom1.inducedDipole.x*atom2.inducedDipolePolar.x +
atom1.inducedDipole.y*atom2.inducedDipolePolar.y +
atom1.inducedDipole.z*atom2.inducedDipolePolar.z +
atom2.inducedDipole.x*atom1.inducedDipolePolar.x +
atom2.inducedDipole.y*atom1.inducedDipolePolar.y +
atom2.inducedDipole.z*atom1.inducedDipolePolar.z;
real scip8 = qkr1*atom1.inducedDipolePolar.x + qkr2*atom1.inducedDipolePolar.y + qkr3*atom1.inducedDipolePolar.z;
#ifndef APPLY_SCALE
sci8 += scip8;
#endif
gli1 = ck*sci3 + sci1;
gli2 = -(sci3*sc4 + 2*sci8);
gli3 = sci3*sc6;
#ifdef APPLY_SCALE
glip1 = ck*scip3 + scip1;
glip2 = -(scip3*sc4 + 2*scip8);
glip3 = scip3*sc6;
#endif
#ifdef APPLY_SCALE
gfi1 += (bn2*(gli1+glip1) + bn3*(gli2+glip2) + bn4*(gli3+glip3));
gfi1 -= (rr1*rr1)*(3*(gli1*psc3 + glip1*dsc3) + 5*(gli2*psc5 + glip2*dsc5) + 7*(gli3*psc7+glip3*dsc7));
#else
gfi1 += (bn2*gli1 + bn3*gli2 + bn4*gli3);
gfi1 -= (rr1*rr1)*(3*gli1*psc3 + 5*gli2*psc5 + 7*gli3*psc7);
#endif
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
gfi1 += scip2*(bn2 - (3*rr1*rr1)*usc3);
#else
gfi1 += scip2*(bn2 - (3*rr1*rr1)*psc3);
#endif
#endif
gfi1 *= 0.5f;
ftm21 += gfi1*xr;
ftm22 += gfi1*yr;
ftm23 += gfi1*zr;
{
real expdamp = EXP(damp);
real temp3 = -1.5f*damp*expdamp*rr1*rr1;
real temp5 = -damp;
real temp7 = -0.2f - 0.6f*damp;
real ddsc31 = temp3*xr;
real ddsc32 = temp3*yr;
real ddsc33 = temp3*zr;
real ddsc51 = temp5*ddsc31;
real ddsc52 = temp5*ddsc32;
real ddsc53 = temp5*ddsc33;
real ddsc71 = temp7*ddsc51;
real ddsc72 = temp7*ddsc52;
real ddsc73 = temp7*ddsc53;
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
temp3 = gli1*pScale + glip1*dScale;
temp5 = (3*rr1*rr1)*(gli2*pScale + glip2*dScale);
temp7 = (15*rr3*rr1)*(gli3*pScale + glip3*dScale);
#else
temp3 = gli1;
temp5 = (3*rr1*rr1)*gli2;
temp7 = (15*rr3*rr1)*(gli3);
#endif
ftm21 -= rr3*(temp3*ddsc31 + temp5*ddsc51 + temp7*ddsc71);
ftm22 -= rr3*(temp3*ddsc32 + temp5*ddsc52 + temp7*ddsc72);
ftm23 -= rr3*(temp3*ddsc33 + temp5*ddsc53 + temp7*ddsc73);
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
temp3 = uScale*scip2;
temp5 = -(3*rr1*rr1)*uScale*sci34;
#else
temp3 = scip2;
temp5 = -(3*rr1*rr1)*sci34;
#endif
ftm21 -= rr3*(temp3*ddsc31 + temp5*ddsc51);
ftm22 -= rr3*(temp3*ddsc32 + temp5*ddsc52);
ftm23 -= rr3*(temp3*ddsc33 + temp5*ddsc53);
#endif
}
force.x += ftm21;
force.y += ftm22;
force.z += ftm23;
}
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionT1(
#else
computeOneInteractionT1NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, const real4 delta, const real4 bn
#ifdef APPLY_SCALE
, float dScale, float pScale, float mScale
#endif
) {
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
#ifdef APPLY_SCALE
real rr1 = delta.w;
#endif
// set the permanent multipole and induced dipole values;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real qi1 = atom1.quadrupoleXX;
real qi2 = atom1.quadrupoleXY;
real qi3 = atom1.quadrupoleXZ;
real qi5 = atom1.quadrupoleYY;
real qi6 = atom1.quadrupoleYZ;
//real qi9 = atom1.labFrameQuadrupole[5];
real qi9 = -(atom1.quadrupoleXX + atom1.quadrupoleYY);
real ck = atom2.q;
real dk1 = atom2.dipole.x;
real dk2 = atom2.dipole.y;
real dk3 = atom2.dipole.z;
real qk1 = atom2.quadrupoleXX;
real qk2 = atom2.quadrupoleXY;
real qk3 = atom2.quadrupoleXZ;
real qk5 = atom2.quadrupoleYY;
real qk6 = atom2.quadrupoleYZ;
//real qk9 = atom2.labFrameQuadrupole[5];
real qk9 = -(atom2.quadrupoleXX + atom2.quadrupoleYY);
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
real bn4 = bn.w;
// apply Thole polarization damping to scale factors
#ifdef APPLY_SCALE
real rr2 = rr1*rr1;
real rr3 = rr1*rr2;
real rr5 = 3*rr3*rr2;
real rr7 = 5*rr5*rr2;
real rr9 = 7*rr7*rr2;
real scale = 1-mScale;
real prefactor = scale*rr3 - bn1;
#else
real prefactor = -bn1;
#endif
real dixdk1 = di2*dk3 - di3*dk2;
real ttm21 = prefactor*dixdk1;
real dixdk2 = di3*dk1 - di1*dk3;
real ttm22 = prefactor*dixdk2;
real dixdk3 = di1*dk2 - di2*dk1;
real ttm23 = prefactor*dixdk3;
real qir1 = qi1*xr + qi2*yr + qi3*zr;
real qir2 = qi2*xr + qi5*yr + qi6*zr;
real qir3 = qi3*xr + qi6*yr + qi9*zr;
real qkr1 = qk1*xr + qk2*yr + qk3*zr;
real qkr2 = qk2*xr + qk5*yr + qk6*zr;
real qkr3 = qk3*xr + qk6*yr + qk9*zr;
real qiqkr1 = qi1*qkr1 + qi2*qkr2 + qi3*qkr3;
real qiqkr2 = qi2*qkr1 + qi5*qkr2 + qi6*qkr3;
real qiqkr3 = qi3*qkr1 + qi6*qkr2 + qi9*qkr3;
real rxqikr1 = yr*qiqkr3 - zr*qiqkr2;
real qkrxqir1 = qkr2*qir3 - qkr3*qir2;
#ifdef APPLY_SCALE
prefactor = 4*(bn3 - scale*rr7);
#else
prefactor = 4*bn3;
#endif
ttm21 -= prefactor*(rxqikr1+qkrxqir1);
real rxqikr2 = zr*qiqkr1 - xr*qiqkr3;
real qkrxqir2 = qkr3*qir1 - qkr1*qir3;
ttm22 -= prefactor*(rxqikr2+qkrxqir2);
real rxqikr3 = xr*qiqkr2 - yr*qiqkr1;
real qkrxqir3 = qkr1*qir2 - qkr2*qir1;
ttm23 -= prefactor*(rxqikr3+qkrxqir3);
real qidk1 = qi1*dk1 + qi2*dk2 + qi3*dk3;
real qidk2 = qi2*dk1 + qi5*dk2 + qi6*dk3;
real qidk3 = qi3*dk1 + qi6*dk2 + qi9*dk3;
real dixqkr1 = di2*qkr3 - di3*qkr2;
real dkxqir1 = dk2*qir3 - dk3*qir2;
real rxqidk1 = yr*qidk3 - zr*qidk2;
real qixqk1 = qi2*qk3 + qi5*qk6 + qi6*qk9 - qi3*qk2 - qi6*qk5 - qi9*qk6;
#ifdef APPLY_SCALE
prefactor = 2*(bn2 - scale*rr5);
#else
prefactor = 2*bn2;
#endif
ttm21 += prefactor*(dixqkr1+dkxqir1+rxqidk1-2*qixqk1);
real dixqkr2 = di3*qkr1 - di1*qkr3;
real dkxqir2 = dk3*qir1 - dk1*qir3;
real rxqidk2 = zr*qidk1 - xr*qidk3;
real qixqk2 = qi3*qk1 + qi6*qk2 + qi9*qk3 - qi1*qk3 - qi2*qk6 - qi3*qk9;
ttm22 += prefactor*(dixqkr2+dkxqir2+rxqidk2-2*qixqk2);
real dixqkr3 = di1*qkr2 - di2*qkr1;
real dkxqir3 = dk1*qir2 - dk2*qir1;
real rxqidk3 = xr*qidk2 - yr*qidk1;
real qixqk3 = qi1*qk2 + qi2*qk5 + qi3*qk6 - qi2*qk1 - qi5*qk2 - qi6*qk3;
ttm23 += prefactor*(dixqkr3+dkxqir3+rxqidk3-2*qixqk3);
real sc4 = dk1*xr + dk2*yr + dk3*zr;
real sc6 = qkr1*xr + qkr2*yr + qkr3*zr;
real gf2 = -ck*bn1 + sc4*bn2 - sc6*bn3;
#ifdef APPLY_SCALE
real gfr2 = -ck*rr3 + sc4*rr5 - sc6*rr7;
prefactor = (gf2 - scale*gfr2);
#else
prefactor = gf2;
#endif
ttm21 += prefactor*(di2*zr - di3*yr);
ttm22 += prefactor*(di3*xr - di1*zr);
ttm23 += prefactor*(di1*yr - di2*xr);
real gf5 = (-ck*bn2+sc4*bn3-sc6*bn4);
#ifdef APPLY_SCALE
real gfr5 = (-ck*rr5+sc4*rr7-sc6*rr9);
prefactor = 2*(gf5 - scale*gfr5);
#else
prefactor = 2*gf5;
#endif
real rxqir1 = yr*qir3 - zr*qir2;
real rxqir2 = zr*qir1 - xr*qir3;
real rxqir3 = xr*qir2 - yr*qir1;
ttm21 -= prefactor*rxqir1;
ttm22 -= prefactor*rxqir2;
ttm23 -= prefactor*rxqir3;
atom1.torque.x += ttm21;
atom1.torque.y += ttm22;
atom1.torque.z += ttm23;
}
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionT2(
#else
computeOneInteractionT2NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, const real4 delta, const real4 bn
#ifdef APPLY_SCALE
, float dScale, float pScale, float mScale
#endif
) {
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
real rr1 = delta.w;
// set the permanent multipole and induced dipole values;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real qi1 = atom1.quadrupoleXX;
real qi2 = atom1.quadrupoleXY;
real qi3 = atom1.quadrupoleXZ;
real qi5 = atom1.quadrupoleYY;
real qi6 = atom1.quadrupoleYZ;
real qi9 = -(atom1.quadrupoleXX + atom1.quadrupoleYY);
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
// apply Thole polarization damping to scale factors
real scale3 = 1;
real scale5 = 1;
real scale7 = 1;
real damp = atom1.damp*atom2.damp;
if (damp != 0) {
real pgamma = atom1.thole < atom2.thole ? atom1.thole : atom2.thole;
real ratio = RECIP(rr1*damp);
damp = -pgamma*ratio*ratio*ratio;
real expdamp = EXP(damp);
scale3 = 1 - expdamp;
scale5 = 1 - (1-damp)*expdamp;
scale7 = 1 - (1-damp+0.6f*damp*damp)*expdamp;
}
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
real dsc3 = rr3*(1 - scale3*dScale);
real dsc5 = (3*rr3*rr1*rr1)* (1 - scale5*dScale);
real dsc7 = (15*rr3*rr3*rr1)*(1 - scale7*dScale);
real psc3 = rr3*(1 - scale3*pScale);
real psc5 = (3*rr3*rr1*rr1)*(1 - scale5*pScale);
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7*pScale);
#else
real psc3 = rr3*(1 - scale3);
real psc5 = (3*rr3*rr1*rr1)*(1 - scale5);
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7);
#endif
real prefactor1 = 0.5f*(psc3 - bn1);
#ifdef APPLY_SCALE
real prefactor2 = 0.5f*(dsc3 - bn1);
#endif
real dixuk1 = di2*atom2.inducedDipole.z - di3*atom2.inducedDipole.y;
real dixukp1 = di2*atom2.inducedDipolePolar.z - di3*atom2.inducedDipolePolar.y;
#ifdef APPLY_SCALE
real ttm2i1 = prefactor1*dixuk1 + prefactor2*dixukp1;
#else
real ttm2i1 = prefactor1*(dixuk1 + dixukp1);
#endif
real dixuk2 = di3*atom2.inducedDipole.x - di1*atom2.inducedDipole.z;
real dixukp2 = di3*atom2.inducedDipolePolar.x - di1*atom2.inducedDipolePolar.z;
#ifdef APPLY_SCALE
real ttm2i2 = prefactor1*dixuk2 + prefactor2*dixukp2;
#else
real ttm2i2 = prefactor1*(dixuk2 + dixukp2);
#endif
real dixuk3 = di1*atom2.inducedDipole.y - di2*atom2.inducedDipole.x;
real dixukp3 = di1*atom2.inducedDipolePolar.y - di2*atom2.inducedDipolePolar.x;
#ifdef APPLY_SCALE
real ttm2i3 = prefactor1*dixuk3 + prefactor2*dixukp3;
#else
real ttm2i3 = prefactor1*(dixuk3 + dixukp3);
#endif
real sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
real scip4 = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
real gti2 = bn2*(sci4+scip4);
#ifdef APPLY_SCALE
real gtri2 = (sci4*psc5+scip4*dsc5);
#else
real gtri2 = psc5*(sci4+scip4);
#endif
prefactor1 = 0.5f*(gti2 - gtri2);
ttm2i1 += prefactor1*(di2*zr - di3*yr);
ttm2i2 += prefactor1*(di3*xr - di1*zr);
ttm2i3 += prefactor1*(di1*yr - di2*xr);
real qir1 = qi1*xr + qi2*yr + qi3*zr;
real qir2 = qi2*xr + qi5*yr + qi6*zr;
real qir3 = qi3*xr + qi6*yr + qi9*zr;
#ifdef APPLY_SCALE
prefactor1 = sci4*psc7 + scip4*dsc7 - bn3*(sci4+scip4);
#else
prefactor1 = psc7*(sci4+scip4) - bn3*(sci4+scip4);
#endif
ttm2i1 += prefactor1*(yr*qir3 - zr*qir2);
ttm2i2 += prefactor1*(zr*qir1 - xr*qir3);
ttm2i3 += prefactor1*(xr*qir2 - yr*qir1);
real qiuk1 = qi1*atom2.inducedDipole.x + qi2*atom2.inducedDipole.y + qi3*atom2.inducedDipole.z;
real qiuk2 = qi2*atom2.inducedDipole.x + qi5*atom2.inducedDipole.y + qi6*atom2.inducedDipole.z;
real qiuk3 = qi3*atom2.inducedDipole.x + qi6*atom2.inducedDipole.y + qi9*atom2.inducedDipole.z;
real qiukp1 = qi1*atom2.inducedDipolePolar.x + qi2*atom2.inducedDipolePolar.y + qi3*atom2.inducedDipolePolar.z;
real qiukp2 = qi2*atom2.inducedDipolePolar.x + qi5*atom2.inducedDipolePolar.y + qi6*atom2.inducedDipolePolar.z;
real qiukp3 = qi3*atom2.inducedDipolePolar.x + qi6*atom2.inducedDipolePolar.y + qi9*atom2.inducedDipolePolar.z;
prefactor1 = (bn2 - psc5);
#ifdef APPLY_SCALE
prefactor2 = (bn2 - dsc5);
#endif
real ukxqir1 = atom2.inducedDipole.y*qir3 - atom2.inducedDipole.z*qir2;
real ukxqirp1 = atom2.inducedDipolePolar.y*qir3 - atom2.inducedDipolePolar.z*qir2;
real rxqiuk1 = yr*qiuk3 - zr*qiuk2;
real rxqiukp1 = yr*qiukp3 - zr*qiukp2;
#ifdef APPLY_SCALE
ttm2i1 += prefactor1*(ukxqir1 + rxqiuk1) + prefactor2*(ukxqirp1 + rxqiukp1);
#else
ttm2i1 += prefactor1*(ukxqir1 + rxqiuk1 + ukxqirp1 + rxqiukp1);
#endif
real ukxqir2 = atom2.inducedDipole.z*qir1 - atom2.inducedDipole.x*qir3;
real ukxqirp2 = atom2.inducedDipolePolar.z*qir1 - atom2.inducedDipolePolar.x*qir3;
real rxqiuk2 = zr*qiuk1 - xr*qiuk3;
real rxqiukp2 = zr*qiukp1 - xr*qiukp3;
#ifdef APPLY_SCALE
ttm2i2 += prefactor1*(ukxqir2 + rxqiuk2) + prefactor2*(ukxqirp2 + rxqiukp2);
#else
ttm2i2 += prefactor1*(ukxqir2 + rxqiuk2 + ukxqirp2 + rxqiukp2);
#endif
real ukxqir3 = atom2.inducedDipole.x*qir2 - atom2.inducedDipole.y*qir1;
real ukxqirp3 = atom2.inducedDipolePolar.x*qir2 - atom2.inducedDipolePolar.y*qir1;
real rxqiuk3 = xr*qiuk2 - yr*qiuk1;
real rxqiukp3 = xr*qiukp2 - yr*qiukp1;
#ifdef APPLY_SCALE
ttm2i3 += prefactor1*(ukxqir3 + rxqiuk3) + prefactor2*(ukxqirp3 + rxqiukp3);
#else
ttm2i3 += prefactor1*(ukxqir3 + rxqiuk3 + ukxqirp3 + rxqiukp3);
#endif
atom1.torque.x += ttm2i1;
atom1.torque.y += ttm2i2;
atom1.torque.z += ttm2i3;
}
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionF1(
#else
computeOneInteractionF1NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, real bn5, float forceFactor,
#ifdef APPLY_SCALE
float dScale, float pScale, float mScale,
#endif
real3& force, real& energy) {
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
#ifdef APPLY_SCALE
real rr1 = delta.w;
#endif
// set the permanent multipole and induced dipole values;
real ci = atom1.q;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real ck = atom2.q;
real dk1 = atom2.dipole.x;
real dk2 = atom2.dipole.y;
real dk3 = atom2.dipole.z;
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
real bn4 = bn.w;
#ifdef APPLY_SCALE
real offset = 1-mScale;
real rr3 = rr1*rr1*rr1;
real gf4 = 2*(bn2 - 3*offset*rr3*rr1*rr1);
#else
real gf4 = 2*bn2;
#endif
real ftm21 = 0;
real ftm22 = 0;
real ftm23 = 0;
// calculate the scalar products for permanent components
real gl6 = di1*dk1 + di2*dk2 + di3*dk3;
real sc3 = di1*xr + di2*yr + di3*zr;
real sc4 = dk1*xr + dk2*yr + dk3*zr;
real gl0 = ci*ck;
real gl1 = ck*sc3 - ci*sc4;
real gl2 = -sc3*sc4;
#ifdef APPLY_SCALE
energy += forceFactor*(-offset*rr1*gl0 + (bn1-offset*rr3)*(gl1+gl6) + (bn2-offset*(3*rr3*rr1*rr1))*gl2);
#else
energy += forceFactor*(bn1*(gl1+gl6) + bn2*gl2);
#endif
real gf1 = bn1*gl0 + bn2*(gl1+gl6) + bn3*gl2;
#ifdef APPLY_SCALE
gf1 -= offset*(rr3*gl0 + (3*rr3*rr1*rr1)*(gl1+gl6) + (15*rr3*rr3*rr1)*gl2);
#endif
ftm21 += gf1*xr;
ftm22 += gf1*yr;
ftm23 += gf1*zr;
#ifdef APPLY_SCALE
real gf2 = -ck*bn1 + sc4*bn2 - offset*(-ck*rr3 + sc4*(3*rr3*rr1*rr1));
#else
real gf2 = -ck*bn1 + sc4*bn2;
#endif
ftm21 += gf2*di1;
ftm22 += gf2*di2;
ftm23 += gf2*di3;
#ifdef APPLY_SCALE
real gf3 = ci*bn1 + sc3*bn2 - offset*(ci*rr3 + sc3*(3*rr3*rr1*rr1));
#else
real gf3 = ci*bn1 + sc3*bn2;
#endif
ftm21 += gf3*dk1;
ftm22 += gf3*dk2;
ftm23 += gf3*dk3;
force.x = ftm21;
force.y = ftm22;
force.z = ftm23;
}
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionF2(
#else
computeOneInteractionF2NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, float forceFactor,
#ifdef APPLY_SCALE
float dScale, float pScale, float mScale,
#endif
real3& force, real& energy) {
const float uScale = 1;
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
real rr1 = delta.w;
// set the permanent multipole and induced dipole values;
real ci = atom1.q;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
real bn4 = bn.w;
real damp = atom1.damp*atom2.damp;
if (damp != 0) {
real pgamma = atom1.thole < atom2.thole ? atom1.thole : atom2.thole;
real ratio = RECIP(rr1*damp);
damp = -pgamma*ratio*ratio*ratio;
}
real scale5 = (damp == 0) ? 1 : (1 - (1-damp)*EXP(damp));
real rr5 = rr1*rr1;
rr5 = 3*rr1*rr5*rr5;
#ifdef APPLY_SCALE
real psc5 = rr5*(1 - scale5*pScale);
real dsc5 = rr5*(1 - scale5*dScale);
real usc5 = rr5*(1 - scale5*uScale);
#else
real psc5 = rr5*(1 - scale5);
#endif
real ftm21 = 0;
real ftm22 = 0;
real ftm23 = 0;
real expdamp = EXP(damp);
real scale3 = (damp == 0) ? 1 : (1 - expdamp);
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
real psc3 = rr3*(1 - scale3*pScale);
real dsc3 = rr3*(1 - scale3*dScale);
real usc3 = rr3*(1 - scale3*uScale);
#else
real psc3 = rr3*(1 - scale3);
#endif
real scale7 = (damp == 0) ? 1 : (1 - (1-damp+0.6f*damp*damp)*expdamp);
#ifdef APPLY_SCALE
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7*pScale);
real dsc7 = (15*rr3*rr3*rr1)*(1 - scale7*dScale);
#else
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7);
#endif
real sc3 = di1*xr + di2*yr + di3*zr;
real gfi3 = ci*bn1 + sc3*bn2;
real prefactor1;
prefactor1 = 0.5f*(ci*psc3 + sc3*psc5 - gfi3);
ftm21 -= prefactor1*atom2.inducedDipole.x;
ftm22 -= prefactor1*atom2.inducedDipole.y;
ftm23 -= prefactor1*atom2.inducedDipole.z;
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(ci*dsc3 + sc3*dsc5 - gfi3);
#endif
ftm21 -= prefactor1*atom2.inducedDipolePolar.x;
ftm22 -= prefactor1*atom2.inducedDipolePolar.y;
ftm23 -= prefactor1*atom2.inducedDipolePolar.z;
real sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
energy += forceFactor*0.5f*sci4*((psc3-bn1)*ci + (psc5-bn2)*sc3);
real scip4 = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2 - usc5);
#else
prefactor1 = 0.5f*(bn2 - psc5);
#endif
ftm21 += prefactor1*((sci4*atom1.inducedDipolePolar.x + scip4*atom1.inducedDipole.x));
ftm22 += prefactor1*((sci4*atom1.inducedDipolePolar.y + scip4*atom1.inducedDipole.y));
ftm23 += prefactor1*((sci4*atom1.inducedDipolePolar.z + scip4*atom1.inducedDipole.z));
#endif
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2*(sci4+scip4) - (sci4*psc5+scip4*dsc5));
#else
sci4 += scip4;
prefactor1 = 0.5f*sci4*(bn2 - psc5);
#endif
ftm21 += prefactor1*di1;
ftm22 += prefactor1*di2;
ftm23 += prefactor1*di3;
#ifdef APPLY_SCALE
real gli1 = -ci*sci4;
real gli2 = -sc3*sci4;
real glip1 = -ci*scip4;
real glip2 = -sc3*scip4;
#else
real gli1 = -ci*sci4;
real gli2 = -sc3*sci4;
#endif
#ifdef APPLY_SCALE
real gfi1 = (bn2*(gli1+glip1) + bn3*(gli2+glip2));
gfi1 -= (rr1*rr1)*(3*(gli1*psc3 + glip1*dsc3) + 5*(gli2*psc5 + glip2*dsc5));
#else
real gfi1 = bn2*gli1 + bn3*gli2;
gfi1 -= (rr1*rr1)*(3*gli1*psc3 + 5*gli2*psc5);
#endif
gfi1 *= 0.5f;
ftm21 += gfi1*xr;
ftm22 += gfi1*yr;
ftm23 += gfi1*zr;
{
real expdamp = EXP(damp);
real temp3 = -1.5f*damp*expdamp*rr1*rr1;
real temp5 = -damp;
real temp7 = -0.2f - 0.6f*damp;
real ddsc31 = temp3*xr;
real ddsc32 = temp3*yr;
real ddsc33 = temp3*zr;
real ddsc51 = temp5*ddsc31;
real ddsc52 = temp5*ddsc32;
real ddsc53 = temp5*ddsc33;
real ddsc71 = temp7*ddsc51;
real ddsc72 = temp7*ddsc52;
real ddsc73 = temp7*ddsc53;
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
temp3 = (gli1*pScale + glip1*dScale);
temp5 = (3*rr1*rr1)*(gli2*pScale + glip2*dScale);
#else
temp3 = gli1;
temp5 = (3*rr1*rr1)*gli2;
#endif
ftm21 -= rr3*(temp3*ddsc31 + temp5*ddsc51);
ftm22 -= rr3*(temp3*ddsc32 + temp5*ddsc52);
ftm23 -= rr3*(temp3*ddsc33 + temp5*ddsc53);
}
//K
real dk1 = atom2.dipole.x;
real dk2 = atom2.dipole.y;
real dk3 = atom2.dipole.z;
real sc4 = dk1*xr + dk2*yr + dk3*zr;
real ck = atom2.q;
real gfi2 = (-ck*bn1 + sc4*bn2);
prefactor1 = 0.5f*(ck*psc3 - sc4*psc5 + gfi2);
ftm21 += prefactor1*atom1.inducedDipole.x;
ftm22 += prefactor1*atom1.inducedDipole.y;
ftm23 += prefactor1*atom1.inducedDipole.z;
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(ck*dsc3 - sc4*dsc5 + gfi2);
#endif
ftm21 += prefactor1*atom1.inducedDipolePolar.x;
ftm22 += prefactor1*atom1.inducedDipolePolar.y;
ftm23 += prefactor1*atom1.inducedDipolePolar.z;
real sci3 = atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr;
energy += forceFactor*0.5f*sci3*(ck*(bn1-psc3) - sc4*(bn2-psc5));
real scip3 = atom1.inducedDipolePolar.x*xr + atom1.inducedDipolePolar.y*yr + atom1.inducedDipolePolar.z*zr;
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2 - usc5);
#else
prefactor1 = 0.5f*(bn2 - psc5);
#endif
ftm21 += prefactor1*(sci3*atom2.inducedDipolePolar.x + scip3*atom2.inducedDipole.x);
ftm22 += prefactor1*(sci3*atom2.inducedDipolePolar.y + scip3*atom2.inducedDipole.y);
ftm23 += prefactor1*(sci3*atom2.inducedDipolePolar.z + scip3*atom2.inducedDipole.z);
real sci34;
sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
scip4 = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
sci34 = (sci3*scip4+scip3*sci4);
#ifdef APPLY_SCALE
gfi1 = sci34*(usc5*(5*rr1*rr1) -bn3);
#else
gfi1 = sci34*(psc5*(5*rr1*rr1) -bn3);
#endif
#else
gfi1 = 0;
#endif
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2*(sci3+scip3) - (sci3*psc5+scip3*dsc5));
#else
sci3 += scip3;
prefactor1 = 0.5f*sci3*(bn2 - psc5);
#endif
ftm21 += prefactor1*dk1;
ftm22 += prefactor1*dk2;
ftm23 += prefactor1*dk3;
#ifdef APPLY_SCALE
real gfi6 = -bn3*(sci3+scip3) + (sci3*psc7+scip3*dsc7);
#else
real gfi6 = sci3*(psc7 - bn3);
#endif
real sci1 = atom1.inducedDipole.x*dk1 + atom1.inducedDipole.y*dk2 + atom1.inducedDipole.z*dk3 + di1*atom2.inducedDipole.x + di2*atom2.inducedDipole.y + di3*atom2.inducedDipole.z;
energy += forceFactor*0.5f*(sci1*(bn1-psc3));
real scip1 = atom1.inducedDipolePolar.x*dk1 + atom1.inducedDipolePolar.y*dk2 + atom1.inducedDipolePolar.z*dk3 + di1*atom2.inducedDipolePolar.x + di2*atom2.inducedDipolePolar.y + di3*atom2.inducedDipolePolar.z;
#ifndef APPLY_SCALE
sci1 += scip1;
#endif
real scip2 = atom1.inducedDipole.x*atom2.inducedDipolePolar.x +
atom1.inducedDipole.y*atom2.inducedDipolePolar.y +
atom1.inducedDipole.z*atom2.inducedDipolePolar.z +
atom2.inducedDipole.x*atom1.inducedDipolePolar.x +
atom2.inducedDipole.y*atom1.inducedDipolePolar.y +
atom2.inducedDipole.z*atom1.inducedDipolePolar.z;
gli1 = ck*sci3 + sci1;
gli2 = -sci3*sc4;
#ifdef APPLY_SCALE
glip1 = ck*scip3 + scip1;
glip2 = -scip3*sc4;
#endif
#ifdef APPLY_SCALE
gfi1 += (bn2*(gli1+glip1) + bn3*(gli2+glip2));
gfi1 -= (rr1*rr1)*(3*(gli1*psc3 + glip1*dsc3) + 5*(gli2*psc5 + glip2*dsc5));
#else
gfi1 += (bn2*gli1 + bn3*gli2);
gfi1 -= (rr1*rr1)*(3*gli1*psc3 + 5*gli2*psc5);
#endif
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
gfi1 += scip2*(bn2 - (3*rr1*rr1)*usc3);
#else
gfi1 += scip2*(bn2 - (3*rr1*rr1)*psc3);
#endif
#endif
gfi1 *= 0.5f;
ftm21 += gfi1*xr;
ftm22 += gfi1*yr;
ftm23 += gfi1*zr;
{
real expdamp = EXP(damp);
real temp3 = -1.5f*damp*expdamp*rr1*rr1;
real temp5 = -damp;
real temp7 = -0.2f - 0.6f*damp;
real ddsc31 = temp3*xr;
real ddsc32 = temp3*yr;
real ddsc33 = temp3*zr;
real ddsc51 = temp5*ddsc31;
real ddsc52 = temp5*ddsc32;
real ddsc53 = temp5*ddsc33;
real ddsc71 = temp7*ddsc51;
real ddsc72 = temp7*ddsc52;
real ddsc73 = temp7*ddsc53;
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
temp3 = gli1*pScale + glip1*dScale;
temp5 = (3*rr1*rr1)*(gli2*pScale + glip2*dScale);
#else
temp3 = gli1;
temp5 = (3*rr1*rr1)*gli2;
#endif
ftm21 -= rr3*(temp3*ddsc31 + temp5*ddsc51);
ftm22 -= rr3*(temp3*ddsc32 + temp5*ddsc52);
ftm23 -= rr3*(temp3*ddsc33 + temp5*ddsc53);
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
temp3 = uScale*scip2;
temp5 = -(3*rr1*rr1)*uScale*sci34;
#else
temp3 = scip2;
temp5 = -(3*rr1*rr1)*sci34;
#endif
ftm21 -= rr3*(temp3*ddsc31 + temp5*ddsc51);
ftm22 -= rr3*(temp3*ddsc32 + temp5*ddsc52);
ftm23 -= rr3*(temp3*ddsc33 + temp5*ddsc53);
#endif
}
force.x += ftm21;
force.y += ftm22;
force.z += ftm23;
}
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionT1(
#else
computeOneInteractionT1NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, const real4 delta, const real4 bn
#ifdef APPLY_SCALE
, float dScale, float pScale, float mScale
#endif
) {
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
#ifdef APPLY_SCALE
real rr1 = delta.w;
#endif
// set the permanent multipole and induced dipole values;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real ck = atom2.q;
real dk1 = atom2.dipole.x;
real dk2 = atom2.dipole.y;
real dk3 = atom2.dipole.z;
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
real bn4 = bn.w;
// apply Thole polarization damping to scale factors
#ifdef APPLY_SCALE
real rr2 = rr1*rr1;
real rr3 = rr1*rr2;
real rr5 = 3*rr3*rr2;
real rr7 = 5*rr5*rr2;
real rr9 = 7*rr7*rr2;
real scale = 1-mScale;
real prefactor = scale*rr3 - bn1;
#else
real prefactor = -bn1;
#endif
real dixdk1 = di2*dk3 - di3*dk2;
real ttm21 = prefactor*dixdk1;
real dixdk2 = di3*dk1 - di1*dk3;
real ttm22 = prefactor*dixdk2;
real dixdk3 = di1*dk2 - di2*dk1;
real ttm23 = prefactor*dixdk3;
real sc4 = dk1*xr + dk2*yr + dk3*zr;
real sc6 = 0;
real gf2 = -ck*bn1 + sc4*bn2;
#ifdef APPLY_SCALE
real gfr2 = -ck*rr3 + sc4*rr5;
prefactor = (gf2 - scale*gfr2);
#else
prefactor = gf2;
#endif
ttm21 += prefactor*(di2*zr - di3*yr);
ttm22 += prefactor*(di3*xr - di1*zr);
ttm23 += prefactor*(di1*yr - di2*xr);
atom1.torque.x += ttm21;
atom1.torque.y += ttm22;
atom1.torque.z += ttm23;
}
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionT2(
#else
computeOneInteractionT2NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, const real4 delta, const real4 bn
#ifdef APPLY_SCALE
, float dScale, float pScale, float mScale
#endif
) {
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
real rr1 = delta.w;
// set the permanent multipole and induced dipole values;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
// apply Thole polarization damping to scale factors
real scale3 = 1;
real scale5 = 1;
real scale7 = 1;
real damp = atom1.damp*atom2.damp;
if (damp != 0) {
real pgamma = atom1.thole < atom2.thole ? atom1.thole : atom2.thole;
real ratio = RECIP(rr1*damp);
damp = -pgamma*ratio*ratio*ratio;
real expdamp = EXP(damp);
scale3 = 1 - expdamp;
scale5 = 1 - (1-damp)*expdamp;
scale7 = 1 - (1-damp+0.6f*damp*damp)*expdamp;
}
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
real dsc3 = rr3*(1 - scale3*dScale);
real dsc5 = (3*rr3*rr1*rr1)* (1 - scale5*dScale);
real dsc7 = (15*rr3*rr3*rr1)*(1 - scale7*dScale);
real psc3 = rr3*(1 - scale3*pScale);
real psc5 = (3*rr3*rr1*rr1)*(1 - scale5*pScale);
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7*pScale);
#else
real psc3 = rr3*(1 - scale3);
real psc5 = (3*rr3*rr1*rr1)*(1 - scale5);
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7);
#endif
real prefactor1 = 0.5f*(psc3 - bn1);
#ifdef APPLY_SCALE
real prefactor2 = 0.5f*(dsc3 - bn1);
#endif
real dixuk1 = di2*atom2.inducedDipole.z - di3*atom2.inducedDipole.y;
real dixukp1 = di2*atom2.inducedDipolePolar.z - di3*atom2.inducedDipolePolar.y;
#ifdef APPLY_SCALE
real ttm2i1 = prefactor1*dixuk1 + prefactor2*dixukp1;
#else
real ttm2i1 = prefactor1*(dixuk1 + dixukp1);
#endif
real dixuk2 = di3*atom2.inducedDipole.x - di1*atom2.inducedDipole.z;
real dixukp2 = di3*atom2.inducedDipolePolar.x - di1*atom2.inducedDipolePolar.z;
#ifdef APPLY_SCALE
real ttm2i2 = prefactor1*dixuk2 + prefactor2*dixukp2;
#else
real ttm2i2 = prefactor1*(dixuk2 + dixukp2);
#endif
real dixuk3 = di1*atom2.inducedDipole.y - di2*atom2.inducedDipole.x;
real dixukp3 = di1*atom2.inducedDipolePolar.y - di2*atom2.inducedDipolePolar.x;
#ifdef APPLY_SCALE
real ttm2i3 = prefactor1*dixuk3 + prefactor2*dixukp3;
#else
real ttm2i3 = prefactor1*(dixuk3 + dixukp3);
#endif
real sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
real scip4 = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
real gti2 = bn2*(sci4+scip4);
#ifdef APPLY_SCALE
real gtri2 = (sci4*psc5+scip4*dsc5);
#else
real gtri2 = psc5*(sci4+scip4);
#endif
prefactor1 = 0.5f*(gti2 - gtri2);
ttm2i1 += prefactor1*(di2*zr - di3*yr);
ttm2i2 += prefactor1*(di3*xr - di1*zr);
ttm2i3 += prefactor1*(di1*yr - di2*xr);
atom1.torque.x += ttm2i1;
atom1.torque.y += ttm2i2;
atom1.torque.z += ttm2i3;
}
......@@ -6980,7 +6980,7 @@ static void compareForcesEnergy(std::string& testName, double expectedEnergy, do
const std::vector<Vec3>& expectedForces,
const std::vector<Vec3>& forces, double tolerance) {
for (unsigned int ii = 0; ii < forces.size(); ii++) {
ASSERT_EQUAL_VEC_MOD(expectedForces[ii], forces[ii], tolerance, testName);
ASSERT_EQUAL_VEC(expectedForces[ii], forces[ii], tolerance);
}
ASSERT_EQUAL_TOL_MOD(expectedEnergy, energy, tolerance, testName);
}
......
......@@ -6983,7 +6983,7 @@ static void compareForcesEnergy(std::string& testName, double expectedEnergy, do
const std::vector<Vec3>& forces, double tolerance) {
 
for (unsigned int ii = 0; ii < forces.size(); ii++) {
ASSERT_EQUAL_VEC_MOD(expectedForces[ii], forces[ii], tolerance, testName);
ASSERT_EQUAL_VEC(expectedForces[ii], forces[ii], tolerance);
}
ASSERT_EQUAL_TOL_MOD(expectedEnergy, energy, tolerance, testName);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment