Commit fd473eea authored by Peter Eastman's avatar Peter Eastman
Browse files

Merge branch 'master' into nucleic

parents 0a751b5b 6a985cfd
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct {
real4 posq;
real3 force, dipole, inducedDipole, inducedDipolePolar;
real3 pos, force, torque, inducedDipole, inducedDipolePolar, sphericalDipole;
real q;
float thole, damp;
#ifdef INCLUDE_QUADRUPOLES
real quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ;
real sphericalQuadrupole[5];
#endif
float thole, damp;
} AtomData;
__device__ void computeOneInteractionF1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real& energy, real3& outputForce);
__device__ void computeOneInteractionT1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce);
__device__ void computeOneInteractionT3(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce);
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
data.posq = posq[atom];
data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2];
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ sphericalDipole,
const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.q = atomPosq.w;
data.sphericalDipole.x = sphericalDipole[atom*3];
data.sphericalDipole.y = sphericalDipole[atom*3+1];
data.sphericalDipole.z = sphericalDipole[atom*3+2];
#ifdef INCLUDE_QUADRUPOLES
data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.sphericalQuadrupole[0] = sphericalQuadrupole[atom*5];
data.sphericalQuadrupole[1] = sphericalQuadrupole[atom*5+1];
data.sphericalQuadrupole[2] = sphericalQuadrupole[atom*5+2];
data.sphericalQuadrupole[3] = sphericalQuadrupole[atom*5+3];
data.sphericalQuadrupole[4] = sphericalQuadrupole[atom*5+4];
#endif
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
......@@ -57,11 +54,327 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f));
}
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool hasExclusions, float dScale, float pScale, float mScale, float forceFactor, mixed& energy) {
// Compute the displacement.
real3 delta;
delta.x = atom2.pos.x - atom1.pos.x;
delta.y = atom2.pos.y - atom1.pos.y;
delta.z = atom2.pos.z - atom1.pos.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real rInv = RSQRT(r2);
real r = r2*rInv;
// Rotate the various dipoles and quadrupoles.
real qiRotationMatrix[3][3];
buildQIRotationMatrix(delta, rInv, qiRotationMatrix);
real3 qiUindI = 0.5f*make_real3(qiRotationMatrix[0][1]*atom1.inducedDipole.x + qiRotationMatrix[0][2]*atom1.inducedDipole.y + qiRotationMatrix[0][0]*atom1.inducedDipole.z,
qiRotationMatrix[1][1]*atom1.inducedDipole.x + qiRotationMatrix[1][2]*atom1.inducedDipole.y + qiRotationMatrix[1][0]*atom1.inducedDipole.z,
qiRotationMatrix[2][1]*atom1.inducedDipole.x + qiRotationMatrix[2][2]*atom1.inducedDipole.y + qiRotationMatrix[2][0]*atom1.inducedDipole.z);
real3 qiUindJ = 0.5f*make_real3(qiRotationMatrix[0][1]*atom2.inducedDipole.x + qiRotationMatrix[0][2]*atom2.inducedDipole.y + qiRotationMatrix[0][0]*atom2.inducedDipole.z,
qiRotationMatrix[1][1]*atom2.inducedDipole.x + qiRotationMatrix[1][2]*atom2.inducedDipole.y + qiRotationMatrix[1][0]*atom2.inducedDipole.z,
qiRotationMatrix[2][1]*atom2.inducedDipole.x + qiRotationMatrix[2][2]*atom2.inducedDipole.y + qiRotationMatrix[2][0]*atom2.inducedDipole.z);
real3 qiUinpI = 0.5f*make_real3(qiRotationMatrix[0][1]*atom1.inducedDipolePolar.x + qiRotationMatrix[0][2]*atom1.inducedDipolePolar.y + qiRotationMatrix[0][0]*atom1.inducedDipolePolar.z,
qiRotationMatrix[1][1]*atom1.inducedDipolePolar.x + qiRotationMatrix[1][2]*atom1.inducedDipolePolar.y + qiRotationMatrix[1][0]*atom1.inducedDipolePolar.z,
qiRotationMatrix[2][1]*atom1.inducedDipolePolar.x + qiRotationMatrix[2][2]*atom1.inducedDipolePolar.y + qiRotationMatrix[2][0]*atom1.inducedDipolePolar.z);
real3 qiUinpJ = 0.5f*make_real3(qiRotationMatrix[0][1]*atom2.inducedDipolePolar.x + qiRotationMatrix[0][2]*atom2.inducedDipolePolar.y + qiRotationMatrix[0][0]*atom2.inducedDipolePolar.z,
qiRotationMatrix[1][1]*atom2.inducedDipolePolar.x + qiRotationMatrix[1][2]*atom2.inducedDipolePolar.y + qiRotationMatrix[1][0]*atom2.inducedDipolePolar.z,
qiRotationMatrix[2][1]*atom2.inducedDipolePolar.x + qiRotationMatrix[2][2]*atom2.inducedDipolePolar.y + qiRotationMatrix[2][0]*atom2.inducedDipolePolar.z);
real3 rotatedDipole1 = rotateDipole(atom1.sphericalDipole, qiRotationMatrix);
real3 rotatedDipole2 = rotateDipole(atom2.sphericalDipole, qiRotationMatrix);
real rotatedQuadrupole1[] = {0, 0, 0, 0, 0};
real rotatedQuadrupole2[] = {0, 0, 0, 0, 0};
#ifdef INCLUDE_QUADRUPOLES
rotateQuadupoles(qiRotationMatrix, atom1.sphericalQuadrupole, atom2.sphericalQuadrupole, rotatedQuadrupole1, rotatedQuadrupole2);
#endif
// The field derivatives at I due to permanent and induced moments on J, and vice-versa.
// Also, their derivatives w.r.t. R, which are needed for force calculations
real Vij[9], Vji[9], VjiR[9], VijR[9];
// The field derivatives at I due to only permanent moments on J, and vice-versa.
real Vijp[3], Vijd[3], Vjip[3], Vjid[3];
real rInvVec[7];
// The rInvVec array is defined such that the ith element is R^-i, with the
// dieleectric constant folded in, to avoid conversions later.
rInvVec[1] = rInv;
for (int i = 2; i < 7; ++i)
rInvVec[i] = rInvVec[i-1] * rInv;
real dmp = atom1.damp*atom2.damp;
real a = min(atom1.thole, atom2.thole);
real u = fabs(dmp) > 1.0e-5f ? r/dmp : 1e10f;
real au3 = a*u*u*u;
real expau3 = au3 < 50 ? EXP(-au3) : 0;
real a2u6 = au3*au3;
real a3u9 = a2u6*au3;
// Thole damping factors for energies
real thole_c = 1 - expau3;
real thole_d0 = 1 - expau3*(1 + 1.5f*au3);
real thole_d1 = 1 - expau3;
real thole_q0 = 1 - expau3*(1 + au3 + a2u6);
real thole_q1 = 1 - expau3*(1 + au3);
// Thole damping factors for derivatives
real dthole_c = 1 - expau3*(1 + 1.5f*au3);
real dthole_d0 = 1 - expau3*(1 + au3 + 1.5f*a2u6);
real dthole_d1 = 1 - expau3*(1 + au3);
real dthole_q0 = 1 - expau3*(1 + au3 + 0.25f*a2u6 + 0.75f*a3u9);
real dthole_q1 = 1 - expau3*(1 + au3 + 0.75f*a2u6);
// Now we compute the (attenuated) Coulomb operator and its derivatives, contracted with
// permanent moments and induced dipoles. Note that the coefficient of the permanent force
// terms is half of the expected value; this is because we compute the interaction of I with
// the sum of induced and permanent moments on J, as well as the interaction of J with I's
// permanent and induced moments; doing so double counts the permanent-permanent interaction.
real ePermCoef, dPermCoef, eUIndCoef, dUIndCoef, eUInpCoef, dUInpCoef;
// C-C terms (m=0)
ePermCoef = rInvVec[1]*mScale;
dPermCoef = -0.5f*mScale*rInvVec[2];
Vij[0] = ePermCoef*atom2.q;
Vji[0] = ePermCoef*atom1.q;
VijR[0] = dPermCoef*atom2.q;
VjiR[0] = dPermCoef*atom1.q;
// C-D and C-Uind terms (m=0)
ePermCoef = rInvVec[2]*mScale;
eUIndCoef = rInvVec[2]*pScale*thole_c;
eUInpCoef = rInvVec[2]*dScale*thole_c;
dPermCoef = -rInvVec[3]*mScale;
dUIndCoef = -2*rInvVec[3]*pScale*dthole_c;
dUInpCoef = -2*rInvVec[3]*dScale*dthole_c;
Vij[0] += -(ePermCoef*rotatedDipole2.x + eUIndCoef*qiUindJ.x + eUInpCoef*qiUinpJ.x);
Vji[1] = -(ePermCoef*atom1.q);
VijR[0] += -(dPermCoef*rotatedDipole2.x + dUIndCoef*qiUindJ.x + dUInpCoef*qiUinpJ.x);
VjiR[1] = -(dPermCoef*atom1.q);
Vjip[0] = -(eUInpCoef*atom1.q);
Vjid[0] = -(eUIndCoef*atom1.q);
// D-C and Uind-C terms (m=0)
Vij[1] = ePermCoef*atom2.q;
Vji[0] += ePermCoef*rotatedDipole1.x + eUIndCoef*qiUindI.x + eUInpCoef*qiUinpI.x;
VijR[1] = dPermCoef*atom2.q;
VjiR[0] += dPermCoef*rotatedDipole1.x + dUIndCoef*qiUindI.x + dUInpCoef*qiUinpI.x;
Vijp[0] = eUInpCoef*atom2.q;
Vijd[0] = eUIndCoef*atom2.q;
// D-D and D-Uind terms (m=0)
ePermCoef = -2*rInvVec[3]*mScale;
eUIndCoef = -2*rInvVec[3]*pScale*thole_d0;
eUInpCoef = -2*rInvVec[3]*dScale*thole_d0;
dPermCoef = 3*rInvVec[4]*mScale;
dUIndCoef = 6*rInvVec[4]*pScale*dthole_d0;
dUInpCoef = 6*rInvVec[4]*dScale*dthole_d0;
Vij[1] += ePermCoef*rotatedDipole2.x + eUIndCoef*qiUindJ.x + eUInpCoef*qiUinpJ.x;
Vji[1] += ePermCoef*rotatedDipole1.x + eUIndCoef*qiUindI.x + eUInpCoef*qiUinpI.x;
VijR[1] += dPermCoef*rotatedDipole2.x + dUIndCoef*qiUindJ.x + dUInpCoef*qiUinpJ.x;
VjiR[1] += dPermCoef*rotatedDipole1.x + dUIndCoef*qiUindI.x + dUInpCoef*qiUinpI.x;
Vijp[0] += eUInpCoef*rotatedDipole2.x;
Vijd[0] += eUIndCoef*rotatedDipole2.x;
Vjip[0] += eUInpCoef*rotatedDipole1.x;
Vjid[0] += eUIndCoef*rotatedDipole1.x;
// D-D and D-Uind terms (m=1)
ePermCoef = rInvVec[3]*mScale;
eUIndCoef = rInvVec[3]*pScale*thole_d1;
eUInpCoef = rInvVec[3]*dScale*thole_d1;
dPermCoef = -1.5f*rInvVec[4]*mScale;
dUIndCoef = -3*rInvVec[4]*pScale*dthole_d1;
dUInpCoef = -3*rInvVec[4]*dScale*dthole_d1;
Vij[2] = ePermCoef*rotatedDipole2.y + eUIndCoef*qiUindJ.y + eUInpCoef*qiUinpJ.y;
Vji[2] = ePermCoef*rotatedDipole1.y + eUIndCoef*qiUindI.y + eUInpCoef*qiUinpI.y;
VijR[2] = dPermCoef*rotatedDipole2.y + dUIndCoef*qiUindJ.y + dUInpCoef*qiUinpJ.y;
VjiR[2] = dPermCoef*rotatedDipole1.y + dUIndCoef*qiUindI.y + dUInpCoef*qiUinpI.y;
Vij[3] = ePermCoef*rotatedDipole2.z + eUIndCoef*qiUindJ.z + eUInpCoef*qiUinpJ.z;
Vji[3] = ePermCoef*rotatedDipole1.z + eUIndCoef*qiUindI.z + eUInpCoef*qiUinpI.z;
VijR[3] = dPermCoef*rotatedDipole2.z + dUIndCoef*qiUindJ.z + dUInpCoef*qiUinpJ.z;
VjiR[3] = dPermCoef*rotatedDipole1.z + dUIndCoef*qiUindI.z + dUInpCoef*qiUinpI.z;
Vijp[1] = eUInpCoef*rotatedDipole2.y;
Vijd[1] = eUIndCoef*rotatedDipole2.y;
Vjip[1] = eUInpCoef*rotatedDipole1.y;
Vjid[1] = eUIndCoef*rotatedDipole1.y;
Vijp[2] = eUInpCoef*rotatedDipole2.z;
Vijd[2] = eUIndCoef*rotatedDipole2.z;
Vjip[2] = eUInpCoef*rotatedDipole1.z;
Vjid[2] = eUIndCoef*rotatedDipole1.z;
// C-Q terms (m=0)
ePermCoef = mScale*rInvVec[3];
dPermCoef = -1.5f*rInvVec[4]*mScale;
Vij[0] += ePermCoef*rotatedQuadrupole2[0];
Vji[4] = ePermCoef*atom1.q;
VijR[0] += dPermCoef*rotatedQuadrupole2[0];
VjiR[4] = dPermCoef*atom1.q;
// Q-C terms (m=0)
Vij[4] = ePermCoef*atom2.q;
Vji[0] += ePermCoef*rotatedQuadrupole1[0];
VijR[4] = dPermCoef*atom2.q;
VjiR[0] += dPermCoef*rotatedQuadrupole1[0];
// D-Q and Uind-Q terms (m=0)
ePermCoef = rInvVec[4]*3.0*mScale;
eUIndCoef = rInvVec[4]*3.0*pScale*thole_q0;
eUInpCoef = rInvVec[4]*3.0*dScale*thole_q0;
dPermCoef = -6*rInvVec[5]*mScale;
dUIndCoef = -12*rInvVec[5]*pScale*dthole_q0;
dUInpCoef = -12*rInvVec[5]*dScale*dthole_q0;
Vij[1] += ePermCoef*rotatedQuadrupole2[0];
Vji[4] += ePermCoef*rotatedDipole1.x + eUIndCoef*qiUindI.x + eUInpCoef*qiUinpI.x;
VijR[1] += dPermCoef*rotatedQuadrupole2[0];
VjiR[4] += dPermCoef*rotatedDipole1.x + dUIndCoef*qiUindI.x + dUInpCoef*qiUinpI.x;
Vijp[0] += eUInpCoef*rotatedQuadrupole2[0];
Vijd[0] += eUIndCoef*rotatedQuadrupole2[0];
// Q-D and Q-Uind terms (m=0)
Vij[4] += -(ePermCoef*rotatedDipole2.x + eUIndCoef*qiUindJ.x + eUInpCoef*qiUinpJ.x);
Vji[1] += -(ePermCoef*rotatedQuadrupole1[0]);
VijR[4] += -(dPermCoef*rotatedDipole2.x + dUIndCoef*qiUindJ.x + dUInpCoef*qiUinpJ.x);
VjiR[1] += -(dPermCoef*rotatedQuadrupole1[0]);
Vjip[0] += -(eUInpCoef*rotatedQuadrupole1[0]);
Vjid[0] += -(eUIndCoef*rotatedQuadrupole1[0]);
// D-Q and Uind-Q terms (m=1)
const real sqrtThree = SQRT((real) 3);
ePermCoef = -sqrtThree*rInvVec[4]*mScale;
eUIndCoef = -sqrtThree*rInvVec[4]*pScale*thole_q1;
eUInpCoef = -sqrtThree*rInvVec[4]*dScale*thole_q1;
dPermCoef = 2*sqrtThree*rInvVec[5]*mScale;
dUIndCoef = 4*sqrtThree*rInvVec[5]*pScale*dthole_q1;
dUInpCoef = 4*sqrtThree*rInvVec[5]*dScale*dthole_q1;
Vij[2] += ePermCoef*rotatedQuadrupole2[1];
Vji[5] = ePermCoef*rotatedDipole1.y + eUIndCoef*qiUindI.y + eUInpCoef*qiUinpI.y;
VijR[2] += dPermCoef*rotatedQuadrupole2[1];
VjiR[5] = dPermCoef*rotatedDipole1.y + dUIndCoef*qiUindI.y + dUInpCoef*qiUinpI.y;
Vij[3] += ePermCoef*rotatedQuadrupole2[2];
Vji[6] = ePermCoef*rotatedDipole1.z + eUIndCoef*qiUindI.z + eUInpCoef*qiUinpI.z;
VijR[3] += dPermCoef*rotatedQuadrupole2[2];
VjiR[6] = dPermCoef*rotatedDipole1.z + dUIndCoef*qiUindI.z + dUInpCoef*qiUinpI.z;
Vijp[1] += eUInpCoef*rotatedQuadrupole2[1];
Vijd[1] += eUIndCoef*rotatedQuadrupole2[1];
Vijp[2] += eUInpCoef*rotatedQuadrupole2[2];
Vijd[2] += eUIndCoef*rotatedQuadrupole2[2];
// D-Q and Uind-Q terms (m=1)
Vij[5] = -(ePermCoef*rotatedDipole2.y + eUIndCoef*qiUindJ.y + eUInpCoef*qiUinpJ.y);
Vji[2] += -(ePermCoef*rotatedQuadrupole1[1]);
VijR[5] = -(dPermCoef*rotatedDipole2.y + dUIndCoef*qiUindJ.y + dUInpCoef*qiUinpJ.y);
VjiR[2] += -(dPermCoef*rotatedQuadrupole1[1]);
Vij[6] = -(ePermCoef*rotatedDipole2.z + eUIndCoef*qiUindJ.z + eUInpCoef*qiUinpJ.z);
Vji[3] += -(ePermCoef*rotatedQuadrupole1[2]);
VijR[6] = -(dPermCoef*rotatedDipole2.z + dUIndCoef*qiUindJ.z + dUInpCoef*qiUinpJ.z);
VjiR[3] += -(dPermCoef*rotatedQuadrupole1[2]);
Vjip[1] += -(eUInpCoef*rotatedQuadrupole1[1]);
Vjid[1] += -(eUIndCoef*rotatedQuadrupole1[1]);
Vjip[2] += -(eUInpCoef*rotatedQuadrupole1[2]);
Vjid[2] += -(eUIndCoef*rotatedQuadrupole1[2]);
// Q-Q terms (m=0)
ePermCoef = 6*rInvVec[5]*mScale;
dPermCoef = -15*rInvVec[6]*mScale;
Vij[4] += ePermCoef*rotatedQuadrupole2[0];
Vji[4] += ePermCoef*rotatedQuadrupole1[0];
VijR[4] += dPermCoef*rotatedQuadrupole2[0];
VjiR[4] += dPermCoef*rotatedQuadrupole1[0];
// Q-Q terms (m=1)
ePermCoef = -4*rInvVec[5]*mScale;
dPermCoef = 10*rInvVec[6]*mScale;
Vij[5] += ePermCoef*rotatedQuadrupole2[1];
Vji[5] += ePermCoef*rotatedQuadrupole1[1];
VijR[5] += dPermCoef*rotatedQuadrupole2[1];
VjiR[5] += dPermCoef*rotatedQuadrupole1[1];
Vij[6] += ePermCoef*rotatedQuadrupole2[2];
Vji[6] += ePermCoef*rotatedQuadrupole1[2];
VijR[6] += dPermCoef*rotatedQuadrupole2[2];
VjiR[6] += dPermCoef*rotatedQuadrupole1[2];
// Q-Q terms (m=2)
ePermCoef = rInvVec[5]*mScale;
dPermCoef = -2.5f*rInvVec[6]*mScale;
Vij[7] = ePermCoef*rotatedQuadrupole2[3];
Vji[7] = ePermCoef*rotatedQuadrupole1[3];
VijR[7] = dPermCoef*rotatedQuadrupole2[3];
VjiR[7] = dPermCoef*rotatedQuadrupole1[3];
Vij[8] = ePermCoef*rotatedQuadrupole2[4];
Vji[8] = ePermCoef*rotatedQuadrupole1[4];
VijR[8] = dPermCoef*rotatedQuadrupole2[4];
VjiR[8] = dPermCoef*rotatedQuadrupole1[4];
// Evaluate the energies, forces and torques due to permanent+induced moments
// interacting with just the permanent moments.
energy += forceFactor*0.5f*(
atom1.q*Vij[0] + rotatedDipole1.x*Vij[1] + rotatedDipole1.y*Vij[2] + rotatedDipole1.z*Vij[3] + rotatedQuadrupole1[0]*Vij[4] + rotatedQuadrupole1[1]*Vij[5] + rotatedQuadrupole1[2]*Vij[6] + rotatedQuadrupole1[3]*Vij[7] + rotatedQuadrupole1[4]*Vij[8] +
atom2.q*Vji[0] + rotatedDipole2.x*Vji[1] + rotatedDipole2.y*Vji[2] + rotatedDipole2.z*Vji[3] + rotatedQuadrupole2[0]*Vji[4] + rotatedQuadrupole2[1]*Vji[5] + rotatedQuadrupole2[2]*Vji[6] + rotatedQuadrupole2[3]*Vji[7] + rotatedQuadrupole2[4]*Vji[8]);
real fIZ = atom1.q*VijR[0] + rotatedDipole1.x*VijR[1] + rotatedDipole1.y*VijR[2] + rotatedDipole1.z*VijR[3] + rotatedQuadrupole1[0]*VijR[4] + rotatedQuadrupole1[1]*VijR[5] + rotatedQuadrupole1[2]*VijR[6] + rotatedQuadrupole1[3]*VijR[7] + rotatedQuadrupole1[4]*VijR[8];
real fJZ = atom2.q*VjiR[0] + rotatedDipole2.x*VjiR[1] + rotatedDipole2.y*VjiR[2] + rotatedDipole2.z*VjiR[3] + rotatedQuadrupole2[0]*VjiR[4] + rotatedQuadrupole2[1]*VjiR[5] + rotatedQuadrupole2[2]*VjiR[6] + rotatedQuadrupole2[3]*VjiR[7] + rotatedQuadrupole2[4]*VjiR[8];
real EIX = rotatedDipole1.z*Vij[1] - rotatedDipole1.x*Vij[3] + sqrtThree*rotatedQuadrupole1[2]*Vij[4] + rotatedQuadrupole1[4]*Vij[5] - (sqrtThree*rotatedQuadrupole1[0]+rotatedQuadrupole1[3])*Vij[6] + rotatedQuadrupole1[2]*Vij[7] - rotatedQuadrupole1[1]*Vij[8];
real EIY = -rotatedDipole1.y*Vij[1] + rotatedDipole1.x*Vij[2] - sqrtThree*rotatedQuadrupole1[1]*Vij[4] + (sqrtThree*rotatedQuadrupole1[0]-rotatedQuadrupole1[3])*Vij[5] - rotatedQuadrupole1[4]*Vij[6] + rotatedQuadrupole1[1]*Vij[7] + rotatedQuadrupole1[2]*Vij[8];
real EIZ = -rotatedDipole1.z*Vij[2] + rotatedDipole1.y*Vij[3] - rotatedQuadrupole1[2]*Vij[5] + rotatedQuadrupole1[1]*Vij[6] - 2*rotatedQuadrupole1[4]*Vij[7] + 2*rotatedQuadrupole1[3]*Vij[8];
real EJX = rotatedDipole2.z*Vji[1] - rotatedDipole2.x*Vji[3] + sqrtThree*rotatedQuadrupole2[2]*Vji[4] + rotatedQuadrupole2[4]*Vji[5] - (sqrtThree*rotatedQuadrupole2[0]+rotatedQuadrupole2[3])*Vji[6] + rotatedQuadrupole2[2]*Vji[7] - rotatedQuadrupole2[1]*Vji[8];
real EJY = -rotatedDipole2.y*Vji[1] + rotatedDipole2.x*Vji[2] - sqrtThree*rotatedQuadrupole2[1]*Vji[4] + (sqrtThree*rotatedQuadrupole2[0]-rotatedQuadrupole2[3])*Vji[5] - rotatedQuadrupole2[4]*Vji[6] + rotatedQuadrupole2[1]*Vji[7] + rotatedQuadrupole2[2]*Vji[8];
real EJZ = -rotatedDipole2.z*Vji[2] + rotatedDipole2.y*Vji[3] - rotatedQuadrupole2[2]*Vji[5] + rotatedQuadrupole2[1]*Vji[6] - 2*rotatedQuadrupole2[4]*Vji[7] + 2*rotatedQuadrupole2[3]*Vji[8];
// Define the torque intermediates for the induced dipoles. These are simply the induced dipole torque
// intermediates dotted with the field due to permanent moments only, at each center. We inline the
// induced dipole torque intermediates here, for simplicity. N.B. There are no torques on the dipoles
// themselves, so we accumulate the torque intermediates into separate variables to allow them to be
// used only in the force calculation.
//
// The torque about the x axis (needed to obtain the y force on the induced dipoles, below)
// qiUindIx[0] = qiQUindI[2]; qiUindIx[1] = 0; qiUindIx[2] = -qiQUindI[0]
real iEIX = qiUinpI.z*Vijp[0] + qiUindI.z*Vijd[0] - qiUinpI.x*Vijp[2] - qiUindI.x*Vijd[2];
real iEJX = qiUinpJ.z*Vjip[0] + qiUindJ.z*Vjid[0] - qiUinpJ.x*Vjip[2] - qiUindJ.x*Vjid[2];
// The torque about the y axis (needed to obtain the x force on the induced dipoles, below)
// qiUindIy[0] = -qiQUindI[1]; qiUindIy[1] = qiQUindI[0]; qiUindIy[2] = 0
real iEIY = qiUinpI.x*Vijp[1] + qiUindI.x*Vijd[1] - qiUinpI.y*Vijp[0] - qiUindI.y*Vijd[0];
real iEJY = qiUinpJ.x*Vjip[1] + qiUindJ.x*Vjid[1] - qiUinpJ.y*Vjip[0] - qiUindJ.y*Vjid[0];
#ifdef USE_MUTUAL_POLARIZATION
// Uind-Uind terms (m=0)
real eCoef = -4*rInvVec[3]*thole_d0;
real dCoef = 6*rInvVec[4]*dthole_d0;
iEIX += eCoef*(qiUinpI.z*qiUindJ.x + qiUindI.z*qiUinpJ.x);
iEJX += eCoef*(qiUinpJ.z*qiUindI.x + qiUindJ.z*qiUinpI.x);
iEIY -= eCoef*(qiUinpI.y*qiUindJ.x + qiUindI.y*qiUinpJ.x);
iEJY -= eCoef*(qiUinpJ.y*qiUindI.x + qiUindJ.y*qiUinpI.x);
fIZ += dCoef*(qiUinpI.x*qiUindJ.x + qiUindI.x*qiUinpJ.x);
fIZ += dCoef*(qiUinpJ.x*qiUindI.x + qiUindJ.x*qiUinpI.x);
// Uind-Uind terms (m=1)
eCoef = 2*rInvVec[3]*thole_d1;
dCoef = -3*rInvVec[4]*dthole_d1;
iEIX -= eCoef*(qiUinpI.x*qiUindJ.z + qiUindI.x*qiUinpJ.z);
iEJX -= eCoef*(qiUinpJ.x*qiUindI.z + qiUindJ.x*qiUinpI.z);
iEIY += eCoef*(qiUinpI.x*qiUindJ.y + qiUindI.x*qiUinpJ.y);
iEJY += eCoef*(qiUinpJ.x*qiUindI.y + qiUindJ.x*qiUinpI.y);
fIZ += dCoef*(qiUinpI.y*qiUindJ.y + qiUindI.y*qiUinpJ.y + qiUinpI.z*qiUindJ.z + qiUindI.z*qiUinpJ.z);
fIZ += dCoef*(qiUinpJ.y*qiUindI.y + qiUindJ.y*qiUinpI.y + qiUinpJ.z*qiUindI.z + qiUindJ.z*qiUinpI.z);
#endif
// The quasi-internal frame forces and torques. Note that the induced torque intermediates are
// used in the force expression, but not in the torques; the induced dipoles are isotropic.
real qiForce[3] = {rInv*(EIY+EJY+iEIY+iEJY), -rInv*(EIX+EJX+iEIX+iEJX), -(fJZ+fIZ)};
real qiTorqueI[3] = {-EIX, -EIY, -EIZ};
real qiTorqueJ[3] = {-EJX, -EJY, -EJZ};
real3 force = make_real3(qiRotationMatrix[1][1]*qiForce[0] + qiRotationMatrix[2][1]*qiForce[1] + qiRotationMatrix[0][1]*qiForce[2],
qiRotationMatrix[1][2]*qiForce[0] + qiRotationMatrix[2][2]*qiForce[1] + qiRotationMatrix[0][2]*qiForce[2],
qiRotationMatrix[1][0]*qiForce[0] + qiRotationMatrix[2][0]*qiForce[1] + qiRotationMatrix[0][0]*qiForce[2]);
atom1.force += force;
atom1.torque += make_real3(qiRotationMatrix[1][1]*qiTorqueI[0] + qiRotationMatrix[2][1]*qiTorqueI[1] + qiRotationMatrix[0][1]*qiTorqueI[2],
qiRotationMatrix[1][2]*qiTorqueI[0] + qiRotationMatrix[2][2]*qiTorqueI[1] + qiRotationMatrix[0][2]*qiTorqueI[2],
qiRotationMatrix[1][0]*qiTorqueI[0] + qiRotationMatrix[2][0]*qiTorqueI[1] + qiRotationMatrix[0][0]*qiTorqueI[2]);
if (forceFactor == 1) {
atom2.force -= force;
atom2.torque += make_real3(qiRotationMatrix[1][1]*qiTorqueJ[0] + qiRotationMatrix[2][1]*qiTorqueJ[1] + qiRotationMatrix[0][1]*qiTorqueJ[2],
qiRotationMatrix[1][2]*qiTorqueJ[0] + qiRotationMatrix[2][2]*qiTorqueJ[1] + qiRotationMatrix[0][2]*qiTorqueJ[2],
qiRotationMatrix[1][0]*qiTorqueJ[0] + qiRotationMatrix[2][0]*qiTorqueJ[1] + qiRotationMatrix[0][0]*qiTorqueJ[2]);
}
}
/**
* Compute electrostatic interactions.
*/
extern "C" __global__ void computeElectrostatics(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer,
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
......@@ -69,16 +382,15 @@ extern "C" __global__ void computeElectrostatics(
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms,
#endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
real energy = 0;
mixed energy = 0;
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
// First loop: process tiles that contain exclusions.
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
......@@ -89,21 +401,23 @@ extern "C" __global__ void computeElectrostatics(
const unsigned int y = tileIndices.y;
AtomData data;
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(data, atom1, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
data.force = make_real3(0);
data.torque = make_real3(0);
uint2 covalent = covalentFlags[pos*TILE_SIZE+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[pos*TILE_SIZE+tgx];
if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].posq = data.posq;
localData[threadIdx.x].dipole = data.dipole;
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].q = data.q;
localData[threadIdx.x].sphericalDipole = data.sphericalDipole;
#ifdef INCLUDE_QUADRUPOLES
localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX;
localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY;
localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ;
localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY;
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].sphericalQuadrupole[0] = data.sphericalQuadrupole[0];
localData[threadIdx.x].sphericalQuadrupole[1] = data.sphericalQuadrupole[1];
localData[threadIdx.x].sphericalQuadrupole[2] = data.sphericalQuadrupole[2];
localData[threadIdx.x].sphericalQuadrupole[3] = data.sphericalQuadrupole[3];
localData[threadIdx.x].sphericalQuadrupole[4] = data.sphericalQuadrupole[4];
#endif
localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
......@@ -115,101 +429,57 @@ extern "C" __global__ void computeElectrostatics(
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
float d = computeDScaleFactor(polarizationGroup, j);
float p = computePScaleFactor(covalent, polarizationGroup, j);
float m = computeMScaleFactor(covalent, j);
computeOneInteractionF1(data, localData[tbx+j], d, p, m, tempEnergy, tempForce);
data.force += tempForce;
energy += 0.5f*tempEnergy;
computeOneInteraction(data, localData[tbx+j], true, d, p, m, 0.5f, energy);
}
}
data.force *= ENERGY_SCALE_FACTOR;
data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
// Compute torques.
data.force = make_real3(0);
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
float d = computeDScaleFactor(polarizationGroup, j);
float p = computePScaleFactor(covalent, polarizationGroup, j);
float m = computeMScaleFactor(covalent, j);
computeOneInteractionT1(data, localData[tbx+j], d, p, m, tempForce);
data.force += tempForce;
}
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
}
else {
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
loadAtomData(localData[threadIdx.x], j, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(localData[threadIdx.x], j, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
localData[threadIdx.x].force = make_real3(0);
localData[threadIdx.x].torque = make_real3(0);
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
float d = computeDScaleFactor(polarizationGroup, tj);
float p = computePScaleFactor(covalent, polarizationGroup, tj);
float m = computeMScaleFactor(covalent, tj);
computeOneInteractionF1(data, localData[tbx+tj], d, p, m, tempEnergy, tempForce);
data.force += tempForce;
localData[tbx+tj].force -= tempForce;
energy += tempEnergy;
computeOneInteraction(data, localData[tbx+tj], true, d, p, m, 1, energy);
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= -ENERGY_SCALE_FACTOR;
localData[threadIdx.x].torque *= ENERGY_SCALE_FACTOR;
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
// Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
float d = computeDScaleFactor(polarizationGroup, tj);
float p = computePScaleFactor(covalent, polarizationGroup, tj);
float m = computeMScaleFactor(covalent, tj);
computeOneInteractionT1(data, localData[tbx+tj], d, p, m, tempForce);
data.force += tempForce;
computeOneInteractionT3(data, localData[tbx+tj], d, p, m, tempForce);
localData[tbx+tj].force += tempForce;
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0x100000000)));
}
}
......@@ -272,16 +542,18 @@ extern "C" __global__ void computeElectrostatics(
// Load atom data for this tile.
AtomData data;
loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(data, atom1, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
data.force = make_real3(0);
data.torque = make_real3(0);
#ifdef USE_CUTOFF
unsigned int j = (numTiles <= maxTiles ? interactingAtoms[pos*TILE_SIZE+tgx] : y*TILE_SIZE + tgx);
#else
unsigned int j = y*TILE_SIZE + tgx;
#endif
atomIndices[threadIdx.x] = j;
loadAtomData(localData[threadIdx.x], j, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
loadAtomData(localData[threadIdx.x], j, posq, sphericalDipole, sphericalQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
localData[threadIdx.x].force = make_real3(0);
localData[threadIdx.x].torque = make_real3(0);
// Compute forces.
......@@ -289,21 +561,24 @@ extern "C" __global__ void computeElectrostatics(
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
computeOneInteractionF1(data, localData[tbx+tj], 1, 1, 1, tempEnergy, tempForce);
data.force += tempForce;
localData[tbx+tj].force -= tempForce;
energy += tempEnergy;
computeOneInteraction(data, localData[tbx+tj], false, 1, 1, 1, 1, energy);
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= -ENERGY_SCALE_FACTOR;
localData[threadIdx.x].torque *= ENERGY_SCALE_FACTOR;
// Write results.
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
#ifdef USE_CUTOFF
offset = atomIndices[threadIdx.x];
#else
......@@ -312,36 +587,9 @@ extern "C" __global__ void computeElectrostatics(
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
// Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
computeOneInteractionT1(data, localData[tbx+tj], 1, 1, 1, tempForce);
data.force += tempForce;
computeOneInteractionT3(data, localData[tbx+tj], 1, 1, 1, tempForce);
localData[tbx+tj].force += tempForce;
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
#ifdef USE_CUTOFF
offset = atomIndices[threadIdx.x];
#else
offset = y*TILE_SIZE + tgx;
#endif
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0x100000000)));
}
pos++;
}
......
......@@ -545,6 +545,9 @@ extern "C" __global__ void computeFixedField(
localData[tbx+tj].field += fields[2];
localData[tbx+tj].fieldPolar += fields[3];
#ifdef USE_GK
}
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 fields[2];
computeOneGkInteraction(data, localData[tbx+tj], delta, fields);
data.gkField += fields[0];
localData[tbx+tj].gkField += fields[1];
......@@ -684,6 +687,9 @@ extern "C" __global__ void computeFixedField(
localData[tbx+tj].field += fields[2];
localData[tbx+tj].fieldPolar += fields[3];
#ifdef USE_GK
}
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 fields[2];
computeOneGkInteraction(data, localData[tbx+tj], delta, fields);
data.gkField += fields[0];
localData[tbx+tj].gkField += fields[1];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment