Unverified Commit 1eec1e15 authored by peastman's avatar peastman Committed by GitHub
Browse files

Created HippoNonbondedForce (#2296)

* Created API for HIPPO force field

* Beginning of reference implementation of HIPPO

* Continuing reference implementation of HIPPO

* Continuing reference implementation of HIPPO

* Continuing reference implementation of HIPPO

* Continuing reference implementation of HIPPO

* Continuing reference implementation of HIPPO

* Continuing reference implementation of HIPPO

* Continuing reference implementation of HIPPO

* Completed reference of HIPPO with no cutoff

* Beginning cutoffs/PME for reference implementation of HIPPO

* Continuing PME for reference implementation of HIPPO

* Continuing PME for reference implementation of HIPPO

* Continuing PME for reference implementation of HIPPO

* Completed reference implementation of HIPPO

* Cleanup and optimization to HIPPO reference

* Further cleanup to HIPPO

* Combined direct space interactions into a single loop

* Compute direct space interactions in quasi-internal frame

* Beginning of CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Continuing CUDA implementation of HIPPO

* Finished CUDA implementation of HIPPO

* More features and test cases for HippoNonbondedForce

* Serialization and Python API for HippoNonbondedForce

* Fixed sign error in computing forces
parent 0b22cac1
#ifdef USE_CUTOFF
unsigned int includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
#else
unsigned int includeInteraction = (!isExcluded);
#endif
// Convert to quasi-internal coordinates.
real mat[3][3];
formQIRotationMatrix(delta, rInv, mat);
real3 labForce = make_real3(0);
real3 qiDipole1 = rotateVectorToQI(dipole1, mat);
real3 qiDipole2 = rotateVectorToQI(dipole2, mat);
real3 qiInducedDipole1 = rotateVectorToQI(inducedDipole1, mat);
real3 qiInducedDipole2 = rotateVectorToQI(inducedDipole2, mat);
real qiQXX1, qiQXY1, qiQXZ1, qiQYY1, qiQYZ1, qiQZZ1;
real qiQXX2, qiQXY2, qiQXZ2, qiQYY2, qiQYZ2, qiQZZ2;
rotateQuadrupoleToQI(qXX1, qXY1, qXZ1, qYY1, qYZ1, qiQXX1, qiQXY1, qiQXZ1, qiQYY1, qiQYZ1, qiQZZ1, mat);
rotateQuadrupoleToQI(qXX2, qXY2, qXZ2, qYY2, qYZ2, qiQXX2, qiQXY2, qiQXZ2, qiQYY2, qiQYZ2, qiQZZ2, mat);
// Compute intermediates used for dipole and quadrupole calculations.
real dir = qiDipole1.z*r;
real3 qxI = make_real3(qiQXX1, qiQXY1, qiQXZ1);
real3 qyI = make_real3(qiQXY1, qiQYY1, qiQYZ1);
real3 qzI = make_real3(qiQXZ1, qiQYZ1, qiQZZ1);
real3 qi = r*make_real3(qiQXZ1, qiQYZ1, qiQZZ1);
real qir = qi.z*r;
real dkr = qiDipole2.z*r;
real3 qxK = make_real3(qiQXX2, qiQXY2, qiQXZ2);
real3 qyK = make_real3(qiQXY2, qiQYY2, qiQYZ2);
real3 qzK = make_real3(qiQXZ2, qiQYZ2, qiQZZ2);
real3 qk = r*make_real3(qiQXZ2, qiQYZ2, qiQZZ2);
real qkr = qk.z*r;
real dik = dot(qiDipole1, qiDipole2);
real qik = dot(qi, qk);
real diqk = dot(qiDipole1, qk);
real dkqi = dot(qiDipole2, qi);
real qiqk = 2*(qxI.y*qxK.y+qxI.z*qxK.z+qyI.z*qyK.z) + qxI.x*qxK.x + qyI.y*qyK.y + qzI.z*qzK.z;
real3 dirCross = make_real3(qiDipole1.y*r, -qiDipole1.x*r, 0);
real3 dkrCross = make_real3(qiDipole2.y*r, -qiDipole2.x*r, 0);
real3 dikCross = cross(qiDipole1, qiDipole2);
real3 qirCross = make_real3(qi.y*r, -qi.x*r, 0);
real3 qkrCross = make_real3(qk.y*r, -qk.x*r, 0);
real3 qikCross = cross(qk, qi);
real3 qikTemp = make_real3(dot(qxI, qk), dot(qyI, qk), dot(qzI, qk));
real3 qkiTemp = make_real3(dot(qxK, qi), dot(qyK, qi), dot(qzK, qi));
real3 qikrCross = make_real3(-r*qikTemp.y, r*qikTemp.x, 0);
real3 qkirCross = make_real3(-r*qkiTemp.y, r*qkiTemp.x, 0);
real3 diqkTemp = make_real3(dot(qiDipole1, qxK), dot(qiDipole1, qyK), dot(qiDipole1, qzK));
real3 dkqiTemp = make_real3(dot(qiDipole2, qxI), dot(qiDipole2, qyI), dot(qiDipole2, qzI));
real3 diqkrCross = make_real3(-r*diqkTemp.y, r*diqkTemp.x, 0);
real3 dkqirCross = make_real3(-r*dkqiTemp.y, r*dkqiTemp.x, 0);
real3 dqik = cross(qiDipole1, qk) + cross(qiDipole2, qi) - 2*(cross(qxI, qxK) + cross(qyI, qyK) + cross(qzI, qzK));
// Get reciprocal distance terms for this interaction.
real rInv2 = rInv*rInv;
real rr1 = rInv;
real rr3 = rr1*rInv2;
real rr5 = 3*rr3*rInv2;
real rr7 = 5*rr5*rInv2;
real rr9 = 7*rr7*rInv2;
real rr11 = 9*rr9*rInv2;
// Compute damping factors for multipole interactions.
real fdampI1, fdampI3, fdampI5, fdampI7, fdampI9;
real fdampK1, fdampK3, fdampK5, fdampK7, fdampK9;
real fdampIK1, fdampIK3, fdampIK5, fdampIK7, fdampIK9, fdampIK11;
computeOverlapDampingFactors(alpha1, alpha2, r, fdampI1, fdampI3, fdampI5, fdampI7, fdampI9, fdampK1, fdampK3, fdampK5, fdampK7, fdampK9,
fdampIK1, fdampIK3, fdampIK5, fdampIK7, fdampIK9, fdampIK11);
#if USE_EWALD
// Calculate the error function damping terms.
real ralpha = PME_ALPHA*r;
real bn0 = erfc(ralpha)*rInv;
real alsq2 = 2*PME_ALPHA*PME_ALPHA;
real alsq2n = 1/(SQRT_PI*PME_ALPHA);
real exp2a = EXP(-(ralpha*ralpha));
alsq2n *= alsq2;
real bn1 = (bn0+alsq2n*exp2a)*rInv2;
alsq2n *= alsq2;
real bn2 = (3*bn1+alsq2n*exp2a)*rInv2;
alsq2n *= alsq2;
real bn3 = (5*bn2+alsq2n*exp2a)*rInv2;
alsq2n *= alsq2;
real bn4 = (7*bn3+alsq2n*exp2a)*rInv2;
alsq2n *= alsq2;
real bn5 = (9*bn4+alsq2n*exp2a)*rInv2;
#endif
// Compute the fixed multipole interaction.
{
// Find damped multipole intermediates and energy value.
real term1 = coreCharge1*coreCharge2;
real term1i = coreCharge2*valenceCharge1;
real term2i = coreCharge2*dir;
real term3i = coreCharge2*qir;
real term1k = coreCharge1*valenceCharge2;
real term2k = -coreCharge1*dkr;
real term3k = coreCharge1*qkr;
real term1ik = valenceCharge1*valenceCharge2;
real term2ik = valenceCharge2*dir - valenceCharge1*dkr + dik;
real term3ik = valenceCharge1*qkr + valenceCharge2*qir - dir*dkr + 2*(dkqi-diqk+qiqk);
real term4ik = dir*qkr - dkr*qir - 4*qik;
real term5ik = qir*qkr;
#ifndef COMPUTING_EXCEPTIONS
real multipoleMultipoleScale = 1;
#endif
#if USE_EWALD
real rr1i = bn0 - (1-multipoleMultipoleScale*fdampI1)*rr1;
real rr3i = bn1 - (1-multipoleMultipoleScale*fdampI3)*rr3;
real rr5i = bn2 - (1-multipoleMultipoleScale*fdampI5)*rr5;
real rr7i = bn3 - (1-multipoleMultipoleScale*fdampI7)*rr7;
real rr1k = bn0 - (1-multipoleMultipoleScale*fdampK1)*rr1;
real rr3k = bn1 - (1-multipoleMultipoleScale*fdampK3)*rr3;
real rr5k = bn2 - (1-multipoleMultipoleScale*fdampK5)*rr5;
real rr7k = bn3 - (1-multipoleMultipoleScale*fdampK7)*rr7;
real rr1ik = bn0 - (1-multipoleMultipoleScale*fdampIK1)*rr1;
real rr3ik = bn1 - (1-multipoleMultipoleScale*fdampIK3)*rr3;
real rr5ik = bn2 - (1-multipoleMultipoleScale*fdampIK5)*rr5;
real rr7ik = bn3 - (1-multipoleMultipoleScale*fdampIK7)*rr7;
real rr9ik = bn4 - (1-multipoleMultipoleScale*fdampIK9)*rr9;
real rr11ik = bn5 - (1-multipoleMultipoleScale*fdampIK11)*rr11;
real rr1s = bn0 - (1-multipoleMultipoleScale)*rr1;
real rr3s = bn1 - (1-multipoleMultipoleScale)*rr3;
real scale = ENERGY_SCALE_FACTOR;
#else
real rr1i = fdampI1*rr1;
real rr3i = fdampI3*rr3;
real rr5i = fdampI5*rr5;
real rr7i = fdampI7*rr7;
real rr1k = fdampK1*rr1;
real rr3k = fdampK3*rr3;
real rr5k = fdampK5*rr5;
real rr7k = fdampK7*rr7;
real rr1ik = fdampIK1*rr1;
real rr3ik = fdampIK3*rr3;
real rr5ik = fdampIK5*rr5;
real rr7ik = fdampIK7*rr7;
real rr9ik = fdampIK9*rr9;
real rr11ik = fdampIK11*rr11;
real rr1s = rr1;
real rr3s = rr3;
real scale = ENERGY_SCALE_FACTOR*multipoleMultipoleScale;
#endif
real elecEnergy = scale*(term1*rr1s + term4ik*rr7ik + term5ik*rr9ik +
term1i*rr1i + term1k*rr1k + term1ik*rr1ik +
term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
term3i*rr5i + term3k*rr5k + term3ik*rr5ik);
// Find damped multipole intermediates for force and torque.
real de = term1*rr3s + term4ik*rr9ik + term5ik*rr11ik +
term1i*rr3i + term1k*rr3k + term1ik*rr3ik +
term2i*rr5i + term2k*rr5k + term2ik*rr5ik +
term3i*rr7i + term3k*rr7k + term3ik*rr7ik;
term1 = -coreCharge2*rr3i - valenceCharge2*rr3ik + dkr*rr5ik - qkr*rr7ik;
real term2 = coreCharge1*rr3k + valenceCharge1*rr3ik + dir*rr5ik + qir*rr7ik;
real term3 = 2*rr5ik;
real term4 = -2*(coreCharge2*rr5i+valenceCharge2*rr5ik-dkr*rr7ik+qkr*rr9ik);
real term5 = -2*(coreCharge1*rr5k+valenceCharge1*rr5ik+dir*rr7ik+qir*rr9ik);
real term6 = 4*rr7ik;
// Compute the force and torque.
real3 elecForce = -scale*(de*make_real3(0, 0, r) + term1*qiDipole1 + term2*qiDipole2 +
term3*(diqkTemp-dkqiTemp) + term4*qi + term5*qk + term6*(qikTemp+qkiTemp));
real3 tI = scale*(-rr3ik*dikCross + term1*dirCross + term3*(dqik+dkqirCross) + term4*qirCross - term6*(qikrCross+qikCross));
real3 tK = scale*(rr3ik*dikCross + term2*dkrCross - term3*(dqik+diqkrCross) + term5*qkrCross - term6*(qkirCross-qikCross));
tempEnergy += includeInteraction ? elecEnergy : 0;
tempForce += includeInteraction ? elecForce : make_real3(0);
tempTorque1 += includeInteraction ? tI : make_real3(0);
tempTorque2 += includeInteraction ? tK : make_real3(0);
}
// Compute the induced dipole interactions.
{
real uir = qiInducedDipole1.z*r;
real ukr = qiInducedDipole2.z*r;
// Apply charge penetration damping to scale factors.
#ifndef COMPUTING_EXCEPTIONS
real dipoleMultipoleScale = 1;
real dipoleDipoleScale = 1;
#endif
#if USE_EWALD
real rr3core = ENERGY_SCALE_FACTOR*(bn1 - (1-dipoleMultipoleScale)*rr3);
real rr5core = ENERGY_SCALE_FACTOR*(bn2 - (1-dipoleMultipoleScale)*rr5);
real rr3i = ENERGY_SCALE_FACTOR*(bn1 - (1-dipoleMultipoleScale*fdampI3)*rr3);
real rr5i = ENERGY_SCALE_FACTOR*(bn2 - (1-dipoleMultipoleScale*fdampI5)*rr5);
real rr7i = ENERGY_SCALE_FACTOR*(bn3 - (1-dipoleMultipoleScale*fdampI7)*rr7);
real rr9i = ENERGY_SCALE_FACTOR*(bn4 - (1-dipoleMultipoleScale*fdampI9)*rr9);
real rr3k = ENERGY_SCALE_FACTOR*(bn1 - (1-dipoleMultipoleScale*fdampK3)*rr3);
real rr5k = ENERGY_SCALE_FACTOR*(bn2 - (1-dipoleMultipoleScale*fdampK5)*rr5);
real rr7k = ENERGY_SCALE_FACTOR*(bn3 - (1-dipoleMultipoleScale*fdampK7)*rr7);
real rr9k = ENERGY_SCALE_FACTOR*(bn4 - (1-dipoleMultipoleScale*fdampK9)*rr9);
real rr5ik = ENERGY_SCALE_FACTOR*(bn2 - (1-dipoleDipoleScale*fdampIK5)*rr5);
real rr7ik = ENERGY_SCALE_FACTOR*(bn3 - (1-dipoleDipoleScale*fdampIK7)*rr7);
#else
real dmScale = ENERGY_SCALE_FACTOR*dipoleMultipoleScale;
real ddScale = ENERGY_SCALE_FACTOR*dipoleDipoleScale;
real rr3core = rr3*dmScale;
real rr5core = rr5*dmScale;
real rr3i = rr3*fdampI3*dmScale;
real rr5i = rr5*fdampI5*dmScale;
real rr7i = rr7*fdampI7*dmScale;
real rr9i = rr9*fdampI9*dmScale;
real rr3k = rr3*fdampK3*dmScale;
real rr5k = rr5*fdampK5*dmScale;
real rr7k = rr7*fdampK7*dmScale;
real rr9k = rr9*fdampK9*dmScale;
real rr5ik = rr5*fdampIK5*ddScale;
real rr7ik = rr7*fdampIK7*ddScale;
#endif
// Get the induced dipole field used for dipole torques.
real3 torqueField1 = rr3i*qiInducedDipole2;
torqueField1.z -= rr5i*ukr*r;
real3 torqueField2 = rr3k*qiInducedDipole1;
torqueField2.z -= rr5k*uir*r;
// Get induced dipole field gradient used for quadrupole torques.
real3 dtorqueField1 = 2*r*rr5i*qiInducedDipole2;
dtorqueField1.z -= r2*rr7i*ukr;
real3 dtorqueField2 = -2*r*rr5k*qiInducedDipole1;
dtorqueField2.z += r2*rr7k*uir;
// Get the field gradient for direct polarization force
real t1XX = valenceCharge1*rr3i + coreCharge1*rr3core + dir*rr5i - qxI.x*2*rr5i + qi.z*r*rr7i;
real t2XX = valenceCharge2*rr3k + coreCharge2*rr3core - dkr*rr5k - qxK.x*2*rr5k + qk.z*r*rr7k;
real t1YY = valenceCharge1*rr3i + coreCharge1*rr3core + dir*rr5i - qyI.y*2*rr5i + qi.z*r*rr7i;
real t2YY = valenceCharge2*rr3k + coreCharge2*rr3core - dkr*rr5k - qyK.y*2*rr5k + qk.z*r*rr7k;
real t1ZZ = valenceCharge1*(rr3i-rr5i*r2) + coreCharge1*(rr3core-rr5core*r2) + qiDipole1.z*2*rr5i*r -
dir*(rr7i*r2-rr5i) - qzI.z*2*rr5i + qi.z*5*rr7i*r - qir*rr9i*r2;
real t2ZZ = valenceCharge2*(rr3k-rr5k*r2) + coreCharge2*(rr3core-rr5core*r2) - qiDipole2.z*2*rr5k*r +
dkr*(rr7k*r2-rr5k) - qzK.z*2*rr5k + qk.z*5*rr7k*r - qkr*rr9k*r2;
real t1XY = -qxI.y*2*rr5i;
real t2XY = -qxK.y*2*rr5k;
real t1XZ = qiDipole1.x*rr5i*r - qxI.z*2*rr5i + qi.x*2*rr7i*r;
real t2XZ = -qiDipole2.x*rr5k*r - qxK.z*2*rr5k + qk.x*2*rr7k*r;
real t1YZ = qiDipole1.y*rr5i*r - qyI.z*2*rr5i + qi.y*2*rr7i*r;
real t2YZ = -qiDipole2.y*rr5k*r - qyK.z*2*rr5k + qk.y*2*rr7k*r;
// Get the dEp/dR terms for chgpen direct polarization force.
real depx = t1XX*qiInducedDipole2.x + t1XY*qiInducedDipole2.y + t1XZ*qiInducedDipole2.z - t2XX*qiInducedDipole1.x - t2XY*qiInducedDipole1.y - t2XZ*qiInducedDipole1.z;
real depy = t1XY*qiInducedDipole2.x + t1YY*qiInducedDipole2.y + t1YZ*qiInducedDipole2.z - t2XY*qiInducedDipole1.x - t2YY*qiInducedDipole1.y - t2YZ*qiInducedDipole1.z;
real depz = t1XZ*qiInducedDipole2.x + t1YZ*qiInducedDipole2.y + t1ZZ*qiInducedDipole2.z - t2XZ*qiInducedDipole1.x - t2YZ*qiInducedDipole1.y - t2ZZ*qiInducedDipole1.z;
real3 indForce = make_real3(depx, depy, depz);
// Torque is induced field and gradient cross permanent moments.
real3 tI = cross(torqueField1, qiDipole1);
tI.x += -qxI.y*dtorqueField1.x - 2*qyI.z*dtorqueField1.z + (qzI.z-qyI.y)*dtorqueField1.y;
tI.y += qxI.y*dtorqueField1.y + 2*qxI.z*dtorqueField1.z + (qxI.x-qzI.z)*dtorqueField1.x;
tI.z += qyI.z*dtorqueField1.x - qxI.z*dtorqueField1.y;
real3 tK = cross(torqueField2, qiDipole2);
tK.x += -qxK.y*dtorqueField2.x - 2*qyK.z*dtorqueField2.z + (qzK.z-qyK.y)*dtorqueField2.y;
tK.y += qxK.y*dtorqueField2.y + 2*qxK.z*dtorqueField2.z + (qxK.x-qzK.z)*dtorqueField2.x;
tK.z += qyK.z*dtorqueField2.x - qxK.z*dtorqueField2.y;
tempForce -= includeInteraction ? indForce : make_real3(0);
tempTorque1 += includeInteraction ? tI : make_real3(0);
tempTorque2 += includeInteraction ? tK : make_real3(0);
// Get the dtau/dr terms used for OPT polarization force.
real coeff[] = {EXTRAPOLATION_COEFFICIENTS_SUM};
for (int j = 0; j < MAX_EXTRAPOLATION_ORDER-1; j++) {
real3 extDipole1 = (atom1 < NUM_ATOMS ? extrapolatedDipole[j*NUM_ATOMS+atom1] : make_real3(0));
real uirm = dot(extDipole1, delta);
for (int m = 0; m < MAX_EXTRAPOLATION_ORDER-1-j; m++) {
real3 extDipole2 = (atom2 < NUM_ATOMS ? extrapolatedDipole[m*NUM_ATOMS+atom2] : make_real3(0));
real ukrm = dot(extDipole2, delta);
real term1 = 2*rr5ik;
real term2 = term1*delta.x;
real term3 = rr5ik - rr7ik*delta.x*delta.x;
real tixx = extDipole1.x*term2 + uirm*term3;
real tkxx = extDipole2.x*term2 + ukrm*term3;
term2 = term1*delta.y;
term3 = rr5ik - rr7ik*delta.y*delta.y;
real tiyy = extDipole1.y*term2 + uirm*term3;
real tkyy = extDipole2.y*term2 + ukrm*term3;
term2 = term1*delta.z;
term3 = rr5ik - rr7ik*delta.z*delta.z;
real tizz = extDipole1.z*term2 + uirm*term3;
real tkzz = extDipole2.z*term2 + ukrm*term3;
term1 = rr5ik*delta.y;
term2 = rr5ik*delta.x;
term3 = delta.y * (rr7ik*delta.x);
real tixy = extDipole1.x*term1 + extDipole1.y*term2 - uirm*term3;
real tkxy = extDipole2.x*term1 + extDipole2.y*term2 - ukrm*term3;
term1 = rr5ik*delta.z;
term3 = delta.z * (rr7ik*delta.x);
real tixz = extDipole1.x*term1 + extDipole1.z*term2 - uirm*term3;
real tkxz = extDipole2.x*term1 + extDipole2.z*term2 - ukrm*term3;
term2 = rr5ik*delta.y;
term3 = delta.z * (rr7ik*delta.y);
real tiyz = extDipole1.y*term1 + extDipole1.z*term2 - uirm*term3;
real tkyz = extDipole2.y*term1 + extDipole2.z*term2 - ukrm*term3;
real depx = tixx*extDipole2.x + tkxx*extDipole1.x + tixy*extDipole2.y + tkxy*extDipole1.y + tixz*extDipole2.z + tkxz*extDipole1.z;
real depy = tixy*extDipole2.x + tkxy*extDipole1.x + tiyy*extDipole2.y + tkyy*extDipole1.y + tiyz*extDipole2.z + tkyz*extDipole1.z;
real depz = tixz*extDipole2.x + tkxz*extDipole1.x + tiyz*extDipole2.y + tkyz*extDipole1.y + tizz*extDipole2.z + tkzz*extDipole1.z;
labForce += (coeff[j+m+1]/2)*make_real3(depx, depy, depz);
}
}
}
// Compute the repulsion interaction.
{
// Compute damping coefficients.
real fdamp1, fdamp3, fdamp5, fdamp7, fdamp9, fdamp11;
computeRepulsionDampingFactors(pauliAlpha1, pauliAlpha2, r, fdamp1, fdamp3, fdamp5, fdamp7, fdamp9, fdamp11);
// Calculate intermediate terms needed for the energy
real eterm1 = pauliQ1*pauliQ2;
real eterm2 = pauliQ2*dir - pauliQ1*dkr + dik;
real eterm3 = pauliQ1*qkr + pauliQ2*qir - dir*dkr + 2*(dkqi-diqk+qiqk);
real eterm4 = dir*qkr - dkr*qir - 4*qik;
real eterm5 = qir*qkr;
real eterm = eterm1*fdamp1 + eterm2*fdamp3 + eterm3*fdamp5 + eterm4*fdamp7 + eterm5*fdamp9;
// Compute the energy.
real sizik = pauliK1*pauliK2;
#ifdef COMPUTING_EXCEPTIONS
sizik *= repulsionScale;
#endif
real repEnergy = sizik*eterm*rr1;
// Calculate intermediate terms for force and torque
real de = eterm1*fdamp3 + eterm2*fdamp5 + eterm3*fdamp7 + eterm4*fdamp9 + eterm5*fdamp11;
real term1 = -pauliQ2*fdamp3 + dkr*fdamp5 - qkr*fdamp7;
real term2 = pauliQ1*fdamp3 + dir*fdamp5 + qir*fdamp7;
real term3 = 2*fdamp5;
real term4 = 2*(-pauliQ2*fdamp5 + dkr*fdamp7 - qkr*fdamp9);
real term5 = 2*(-pauliQ1*fdamp5 - dir*fdamp7 - qir*fdamp9);
real term6 = 4*fdamp7;
// Compute the force and torque.
real3 repForce = make_real3(0, 0, de*r) + term1*qiDipole1 + term2*qiDipole2 + term3*(diqkTemp-dkqiTemp)
+ term4*qi + term5*qk + term6*(qikTemp+qkiTemp);
repForce = -sizik*(repForce*rr1 + make_real3(0, 0, eterm*rr3*r));
real3 tI = -fdamp3*dikCross + term1*dirCross + term3*(dqik+dkqirCross) + term4*qirCross - term6*(qikrCross+qikCross);
real3 tK = fdamp3*dikCross + term2*dkrCross - term3*(dqik+diqkrCross) + term5*qkrCross - term6*(qkirCross-qikCross);
tI *= sizik*rr1;
tK *= sizik*rr1;
#ifdef USE_CUTOFF
if (r > SWITCH_CUTOFF) {
real x = r-SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5));
real switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5));
repForce *= switchValue;
repForce.z += repEnergy*switchDeriv;
repEnergy *= switchValue;
tI *= switchValue;
tK *= switchValue;
}
#endif
tempEnergy += includeInteraction ? repEnergy : 0;
tempForce += includeInteraction ? repForce : make_real3(0);
tempTorque1 += includeInteraction ? tI : make_real3(0);
tempTorque2 += includeInteraction ? tK : make_real3(0);
}
// Compute the dispersion force and energy.
{
real rInv6 = rInv2*rInv2*rInv2;
#ifndef COMPUTING_EXCEPTIONS
real dispersionScale = 1;
#endif
real fdamp, ddamp;
computeDispersionDampingFactors(alpha1, alpha2, r, fdamp, ddamp);
#if USE_EWALD
real ralpha2 = r2*DPME_ALPHA*DPME_ALPHA;
real ralpha4 = ralpha2*ralpha2;
real expterm = EXP(-ralpha2);
real expa = expterm * (1 + ralpha2 + ralpha4/2);
real scale = dispersionScale*fdamp*fdamp - 1;
real cick = c61*c62;
real dispEnergy = -cick*(expa+scale)*rInv6;
real rterm = -ralpha2*ralpha4*expterm*rInv;
real dispForce = -6*dispEnergy*rInv - cick*rInv6*(rterm + 2*dispersionScale*fdamp*ddamp);
#else
real dispEnergy = -dispersionScale*c61*c62*rInv6;
real dispForce = -6*dispEnergy*rInv;
dispForce = dispForce*fdamp*fdamp + 2*dispEnergy*fdamp*ddamp;
dispEnergy *= fdamp*fdamp;
#endif
tempEnergy += includeInteraction ? dispEnergy : 0;
tempForce.z += includeInteraction ? dispForce : 0;
}
// Compute the charge transfer force and energy.
{
real term1 = epsilon1*EXP(-damping2*r);
real term2 = epsilon2*EXP(-damping1*r);
real ctEnergy = -(term1+term2);
real ctForce = (term1*damping2 + term2*damping1)*rInv;
#ifdef USE_CUTOFF
if (r > SWITCH_CUTOFF) {
real x = r-SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5));
real switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5));
ctForce = ctForce*switchValue - ctEnergy*switchDeriv*r;
ctEnergy *= switchValue;
}
#endif
#ifdef COMPUTING_EXCEPTIONS
ctForce *= multipoleMultipoleScale;
ctEnergy *= multipoleMultipoleScale;
#endif
tempEnergy += includeInteraction ? ctEnergy : 0;
tempForce.z += includeInteraction ? ctForce*r : 0;
}
// Rotate back to the lab frame.
if (includeInteraction) {
tempForce = rotateVectorFromQI(tempForce, mat) - labForce;
tempTorque1 = rotateVectorFromQI(tempTorque1, mat);
tempTorque2 = rotateVectorFromQI(tempTorque2, mat);
}
// Functions that are called from hippoInteraction.cu.
__device__ void formQIRotationMatrix(real3 deltaR, real rInv, real (&rotationMatrix)[3][3]) {
real3 vectorZ = deltaR*rInv;
real3 vectorX = make_real3(0);
if (fabs(vectorZ.y) > fabs(vectorZ.x))
vectorX.x = 1;
else
vectorX.y = 1;
vectorX -= vectorZ*dot(vectorZ, vectorX);
vectorX = normalize(vectorX);
real3 vectorY = cross(vectorZ, vectorX);
rotationMatrix[0][0] = vectorX.x;
rotationMatrix[0][1] = vectorX.y;
rotationMatrix[0][2] = vectorX.z;
rotationMatrix[1][0] = vectorY.x;
rotationMatrix[1][1] = vectorY.y;
rotationMatrix[1][2] = vectorY.z;
rotationMatrix[2][0] = vectorZ.x;
rotationMatrix[2][1] = vectorZ.y;
rotationMatrix[2][2] = vectorZ.z;
}
__device__ real3 rotateVectorToQI(real3 v, const real (&mat)[3][3]) {
return make_real3(mat[0][0]*v.x + mat[0][1]*v.y + mat[0][2]*v.z,
mat[1][0]*v.x + mat[1][1]*v.y + mat[1][2]*v.z,
mat[2][0]*v.x + mat[2][1]*v.y + mat[2][2]*v.z);
}
__device__ real3 rotateVectorFromQI(real3 v, const real (&mat)[3][3]) {
return make_real3(mat[0][0]*v.x + mat[1][0]*v.y + mat[2][0]*v.z,
mat[0][1]*v.x + mat[1][1]*v.y + mat[2][1]*v.z,
mat[0][2]*v.x + mat[1][2]*v.y + mat[2][2]*v.z);
}
__device__ void rotateQuadrupoleToQI(real qXX, real qXY, real qXZ, real qYY, real qYZ,
real &qiQXX, real &qiQXY, real &qiQXZ, real &qiQYY, real &qiQYZ, real &qiQZZ, const real (&mat)[3][3]) {
real qZZ = -qXX-qYY;
qiQXX = mat[0][0]*(mat[0][0]*qXX + 2*(mat[0][1]*qXY + mat[0][2]*qXZ)) + mat[0][1]*(mat[0][1]*qYY + 2*mat[0][2]*qYZ) + mat[0][2]*mat[0][2]*qZZ;
qiQYY = mat[1][0]*(mat[1][0]*qXX + 2*(mat[1][1]*qXY + mat[1][2]*qXZ)) + mat[1][1]*(mat[1][1]*qYY + 2*mat[1][2]*qYZ) + mat[1][2]*mat[1][2]*qZZ;
qiQXY = mat[0][0]*mat[1][0]*qXX + mat[0][1]*mat[1][1]*qYY + mat[0][2]*mat[1][2]*qZZ + (mat[0][0]*mat[1][1] + mat[0][1]*mat[1][0])*qXY + (mat[0][0]*mat[1][2] + mat[0][2]*mat[1][0])*qXZ + (mat[0][1]*mat[1][2] + mat[0][2]*mat[1][1])*qYZ;
qiQXZ = mat[0][0]*mat[2][0]*qXX + mat[0][1]*mat[2][1]*qYY + mat[0][2]*mat[2][2]*qZZ + (mat[0][0]*mat[2][1] + mat[0][1]*mat[2][0])*qXY + (mat[0][0]*mat[2][2] + mat[0][2]*mat[2][0])*qXZ + (mat[0][1]*mat[2][2] + mat[0][2]*mat[2][1])*qYZ;
qiQYZ = mat[1][0]*mat[2][0]*qXX + mat[1][1]*mat[2][1]*qYY + mat[1][2]*mat[2][2]*qZZ + (mat[1][0]*mat[2][1] + mat[1][1]*mat[2][0])*qXY + (mat[1][0]*mat[2][2] + mat[1][2]*mat[2][0])*qXZ + (mat[1][1]*mat[2][2] + mat[1][2]*mat[2][1])*qYZ;
qiQZZ = -qiQXX-qiQYY;
}
__device__ void computeOverlapDampingFactors(real alphaI, real alphaJ, real r,
real& fdampI1, real& fdampI3, real& fdampI5, real& fdampI7, real& fdampI9,
real& fdampJ1, real& fdampJ3, real& fdampJ5, real& fdampJ7, real& fdampJ9,
real& fdampIJ1, real& fdampIJ3, real& fdampIJ5, real& fdampIJ7, real& fdampIJ9, real& fdampIJ11) {
real arI = alphaI*r;
real arI2 = arI*arI;
real arI3 = arI2*arI;
real arI4 = arI2*arI2;
real arI5 = arI3*arI2;
real arI6 = arI3*arI3;
real expARI = EXP(-arI);
real one = 1;
real two = 2;
real three = 3;
real four = 4;
real five = 5;
real seven = 7;
real eleven = 11;
fdampI1 = 1 - (1 + arI*(one/2))*expARI;
fdampI3 = 1 - (1 + arI + arI2*(one/2))*expARI;
fdampI5 = 1 - (1 + arI + arI2*(one/2) + arI3*(one/6))*expARI;
fdampI7 = 1 - (1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*(one/30))*expARI;
fdampI9 = 1 - (1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*(four/105) + arI5*(one/210))*expARI;
if (alphaI == alphaJ) {
fdampJ1 = fdampI1;
fdampJ3 = fdampI3;
fdampJ5 = fdampI5;
fdampJ7 = fdampI7;
fdampJ9 = fdampI9;
real arI7 = arI4*arI3;
real arI8 = arI4*arI4;
fdampIJ1 = 1 - (1 + arI*(eleven/16) + arI2*(three/16) + arI3*(one/48))*expARI;
fdampIJ3 = 1 - (1 + arI + arI2*(one/2) + arI3*(seven/48) + arI4*(one/48))*expARI;
fdampIJ5 = 1 - (1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*(one/24) + arI5*(one/144))*expARI;
fdampIJ7 = 1 - (1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*(one/24) + arI5*(one/120) + arI6*(one/720))*expARI;
fdampIJ9 = 1 - (1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*(one/24) + arI5*(one/120) + arI6*(one/720) + arI7*(one/5040))*expARI;
fdampIJ11 = 1 - (1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*(one/24) + arI5*(one/120) + arI6*(one/720) + arI7*(one/5040) + arI8*(one/45360))*expARI;
}
else {
real arJ = alphaJ*r;
real arJ2 = arJ*arJ;
real arJ3 = arJ2*arJ;
real arJ4 = arJ2*arJ2;
real arJ5 = arJ3*arJ2;
real arJ6 = arJ3*arJ3;
real expARJ = EXP(-arJ);
real aI2 = alphaI*alphaI;
real aJ2 = alphaJ*alphaJ;
real A = aJ2/(aJ2-aI2);
real B = aI2/(aI2-aJ2);
real A2 = A*A;
real B2 = B*B;
fdampJ1 = 1 - (1 + arJ*(one/2))*expARJ;
fdampJ3 = 1 - (1 + arJ + arJ2*(one/2))*expARJ;
fdampJ5 = 1 - (1 + arJ + arJ2*(one/2) + arJ3*(one/6))*expARJ;
fdampJ7 = 1 - (1 + arJ + arJ2*(one/2) + arJ3*(one/6) + arJ4*(one/30))*expARJ;
fdampJ9 = 1 - (1 + arJ + arJ2*(one/2) + arJ3*(one/6) + 4*arJ4*(one/105) + arJ5*(one/210))*expARJ;
fdampIJ1 = 1 - A2*(1 + 2*B + arI*(one/2))*expARI -
B2*(1 + 2*A + arJ*(one/2))*expARJ;
fdampIJ3 = 1 - A2*(1 + arI + arI2*(one/2))*expARI -
B2*(1 + arJ + arJ2*(one/2))*expARJ -
2*A2*B*(1 + arI)*expARI -
2*B2*A*(1 + arJ)*expARJ;
fdampIJ5 = 1 - A2*(1 + arI + arI2*(one/2) + arI3*(one/6))*expARI -
B2*(1 + arJ + arJ2*(one/2) + arJ3*(one/6))*expARJ -
2*A2*B*(1 + arI + arI2*(one/3))*expARI -
2*B2*A*(1 + arJ + arJ2*(one/3))*expARJ;
fdampIJ7 = 1 - A2*(1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*(one/30))*expARI -
B2*(1 + arJ + arJ2*(one/2) + arJ3*(one/6) + arJ4*(one/30))*expARJ -
2*A2*B*(1 + arI + arI2*(two/5) + arI3*(one/15))*expARI -
2*B2*A*(1 + arJ + arJ2*(two/5) + arJ3*(one/15))*expARJ;
fdampIJ9 = 1 - A2*(1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*4*(one/105) + arI5*(one/210))*expARI -
B2*(1 + arJ + arJ2*(one/2) + arJ3*(one/6) + arJ4*4*(one/105) + arJ5*(one/210))*expARJ -
2*A2*B*(1 + arI + arI2*(three/7) + arI3*(two/21) + arI4*(one/105))*expARI -
2*B2*A*(1 + arJ + arJ2*(three/7) + arJ3*(two/21) + arJ4*(one/105))*expARJ;
fdampIJ11 = 1 - A2*(1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*(five/126) + arI5*(two/315) + arI6*(one/1890))*expARI -
B2*(1 + arJ + arJ2*(one/2) + arJ3*(one/6) + arJ4*(five/126) + arJ5*(two/315) + arJ6*(one/1890))*expARJ -
2*A2*B*(1 + arI + arI2*(four/9) + arI3*(one/9) + arI4*(one/63) + arI5*(one/945))*expARI -
2*B2*A*(1 + arJ + arJ2*(four/9) + arJ3*(one/9) + arJ4*(one/63) + arJ5*(one/945))*expARJ;
}
}
__device__ void computeDispersionDampingFactors(real alphaI, real alphaJ, real r, real& fdamp, real& ddamp) {
real arI = alphaI*r;
real arI2 = arI*arI;
real arI3 = arI2*arI;
real expARI = EXP(-arI);
real fdamp3, fdamp5;
real one = 1;
real seven = 7;
if (alphaI == alphaJ) {
real arI4 = arI3*arI;
real arI5 = arI4*arI;
fdamp3 = 1 - (1 + arI + arI2*(one/2) + arI3*(seven/48) + arI4*(one/48))*expARI;
fdamp5 = 1 - (1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*(one/24) + arI5*(one/144))*expARI;
ddamp = alphaI*(arI5 - 3*arI3 - 3*arI2)*expARI*(one/96);
}
else {
real arJ = alphaJ*r;
real arJ2 = arJ*arJ;
real arJ3 = arJ2*arJ;
real expARJ = EXP(-arJ);
real aI2 = alphaI*alphaI;
real aJ2 = alphaJ*alphaJ;
real A = aJ2/(aJ2-aI2);
real B = aI2/(aI2-aJ2);
real A2 = A*A;
real B2 = B*B;
fdamp3 = 1 - A2*(1 + arI + arI2*(one/2))*expARI -
B2*(1 + arJ + arJ2*(one/2))*expARJ -
2*A2*B*(1 + arI)*expARI -
2*B2*A*(1 + arJ)*expARJ;
fdamp5 = 1 - A2*(1 + arI + arI2*(one/2) + arI3*(one/6))*expARI -
B2*(1 + arJ + arJ2*(one/2) + arJ3*(one/6))*expARJ -
2*A2*B*(1 + arI + arI2*(one/3))*expARI -
2*B2*A*(1 + arJ + arJ2*(one/3))*expARJ;
ddamp = (A2*arI2*alphaI*expARI*(r*alphaI + 4*B - 1) +
(B2*arJ2*alphaJ*expARJ*(r*alphaJ + 4*A - 1)))*(one/4);
}
fdamp = 1.5f*fdamp5 - 0.5f*fdamp3;
}
__device__ void computeRepulsionDampingFactors(real pauliAlphaI, real pauliAlphaJ, real r,
real& fdamp1, real& fdamp3, real& fdamp5, real& fdamp7, real& fdamp9, real& fdamp11) {
real r2 = r*r;
real r3 = r2*r;
real r4 = r2*r2;
real r5 = r3*r2;
real r6 = r3*r3;
real aI2 = 0.5f*pauliAlphaI;
real arI2 = aI2*r;
real expI = EXP(-arI2);
real aI2_2 = aI2*aI2;
real aI2_3 = aI2_2*aI2;
real aI2_4 = aI2_2*aI2_2;
real aI2_5 = aI2_3*aI2_2;
real aI2_6 = aI2_3*aI2_3;
real fexp, fexp1, fexp2, fexp3, fexp4, fexp5, pre;
real one = 1;
real two = 2;
real four = 4;
real eight = 8;
real twelve = 12;
real sixteen = 16;
if (pauliAlphaI == pauliAlphaJ) {
real r7 = r4*r3;
real r8 = r4*r4;
real aI2_7 = aI2_4*aI2_3;
pre = 128;
fexp = (r + aI2*r2 + aI2_2*r3*(one/3))*expI;
fexp1 = (aI2_2*r3 + aI2_3*r4)*expI*(one/3);
fexp2 = aI2_4*expI*r5*(one/9);
fexp3 = aI2_5*expI*r6*(one/45);
fexp4 = (aI2_5*r6 + aI2_6*r7)*expI*(one/315);
fexp5 = (aI2_5*r6 + aI2_6*r7 + aI2_7*r8*(one/3))*expI*(one/945);
}
else {
real aJ2 = 0.5f*pauliAlphaJ;
real arJ2 = aJ2*r;
real expJ = EXP(-arJ2);
real aJ2_2 = aJ2*aJ2;
real aJ2_3 = aJ2_2*aJ2;
real aJ2_4 = aJ2_2*aJ2_2;
real aJ2_5 = aJ2_3*aJ2_2;
real aJ2_6 = aJ2_3*aJ2_3;
real scale = 1/(aI2_2-aJ2_2);
pre = 8192*aI2_3*aJ2_3*(scale*scale*scale*scale);
real tmp = 4*aI2*aJ2*scale;
fexp = (arI2-tmp)*expJ + (arJ2+tmp)*expI;
fexp1 = (aI2*aJ2*r2 - 4*aI2*aJ2_2*r*scale - 4*aI2*aJ2*scale)*expJ +
(aI2*aJ2*r2 + 4*aI2_2*aJ2*r*scale + 4*aI2*aJ2*scale)*expI;
fexp2 = (aI2*aJ2*r2*(one/3) + aI2*aJ2_2*r3*(one/3) - (four/3)*aI2*aJ2_3*r2*scale - 4*aI2*aJ2_2*r*scale - 4*aI2*aJ2*scale)*expJ +
(aI2*aJ2*r2*(one/3) + aI2_2*aJ2*r3*(one/3) + (four/3)*aI2_3*aJ2*r2*scale + 4*aI2_2*aJ2*r*scale + 4*aI2*aJ2*scale)*expI;
fexp3 = (aI2*aJ2_3*r4*(one/15) + aI2*aJ2_2*r3*(one/5) + aI2*aJ2*r2*(one/5) - (four/15)*aI2*aJ2_4*r3*scale - (eight/5)*aI2*aJ2_3*r2*scale - 4*aI2*aJ2_2*r*scale - 4*scale*aI2*aJ2)*expJ +
(aI2_3*aJ2*r4*(one/15) + aI2_2*aJ2*r3*(one/5) + aI2*aJ2*r2*(one/5) + (four/15)*aI2_4*aJ2*r3*scale + (eight/5)*aI2_3*aJ2*r2*scale + 4*aI2_2*aJ2*r*scale + 4*scale*aI2*aJ2)*expI;
fexp4 = (aI2*aJ2_4*r5*(one/105) + (two/35)*aI2*aJ2_3*r4 + aI2*aJ2_2*r3*(one/7) + aI2*aJ2*r2*(one/7) - (four/105)*aI2*aJ2_5*r4*scale - (eight/21)*aI2*aJ2_4*r3*scale - (twelve/7)*aI2*aJ2_3*r2*scale - 4*aI2*aJ2_2*r*scale - 4*aI2*aJ2*scale)*expJ +
(aI2_4*aJ2*r5*(one/105) + (two/35)*aI2_3*aJ2*r4 + aI2_2*aJ2*r3*(one/7) + aI2*aJ2*r2*(one/7) + (four/105)*aI2_5*aJ2*r4*scale + (eight/21)*aI2_4*aJ2*r3*scale + (twelve/7)*aI2_3*aJ2*r2*scale + 4*aI2_2*aJ2*r*scale + 4*aI2*aJ2*scale)*expI;
fexp5 = (aI2*aJ2_5*r6*(one/945) + (two/189)*aI2*aJ2_4*r5 + aI2*aJ2_3*r4*(one/21) + aI2*aJ2_2*r3*(one/9) + aI2*aJ2*r2*(one/9) - (four/945)*aI2*aJ2_6*r5*scale - (four/63)*aI2*aJ2_5*r4*scale - (four/9)*aI2*aJ2_4*r3*scale - (sixteen/9)*aI2*aJ2_3*r2*scale - 4*aI2*aJ2_2*r*scale - 4*aI2*aJ2*scale)*expJ +
(aI2_5*aJ2*r6*(one/945) + (two/189)*aI2_4*aJ2*r5 + aI2_3*aJ2*r4*(one/21) + aI2_2*aJ2*r3*(one/9) + aI2*aJ2*r2*(one/9) + (four/945)*aI2_6*aJ2*r5*scale + (four/63)*aI2_5*aJ2*r4*scale + (four/9)*aI2_4*aJ2*r3*scale + (sixteen/9)*aI2_3*aJ2*r2*scale + 4*aI2_2*aJ2*r*scale + 4*aI2*aJ2*scale)*expI;
}
fexp = fexp/r;
fexp1 = fexp1/r3;
fexp2 = 3*fexp2/r5;
fexp3 = 15*fexp3/(r5*r2);
fexp4 = 105*fexp4/(r5*r4);
fexp5 = 945*fexp5/(r5*r6);
fdamp1 = 0.5f*pre*fexp*fexp;
fdamp3 = pre*fexp*fexp1;
fdamp5 = pre*(fexp*fexp2 + fexp1*fexp1);
fdamp7 = pre*(fexp*fexp3 + 3*fexp1*fexp2);
fdamp9 = pre*(fexp*fexp4 + 4*fexp1*fexp3 + 3*fexp2*fexp2);
fdamp11 = pre*(fexp*fexp5 + 5*fexp1*fexp4 + 10*fexp2*fexp3);
}
extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4* __restrict__ multipoleParticles, real3* __restrict__ localDipoles,
real* __restrict__ localQuadrupoles, real3* __restrict__ labDipoles, real* __restrict__ labQXX, real* __restrict__ labQXY, real* __restrict__ labQXZ,
real* __restrict__ labQYY, real* __restrict__ labQYZ) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
int4 particles = multipoleParticles[atom];
if (particles.z >= 0) {
real4 thisParticlePos = posq[atom];
real4 posZ = posq[particles.z];
real3 vectorZ = normalize(make_real3(posZ.x-thisParticlePos.x, posZ.y-thisParticlePos.y, posZ.z-thisParticlePos.z));
int axisType = particles.w;
real4 posX;
real3 vectorX;
if (axisType >= 4) {
if (fabs(vectorZ.x) < 0.866)
vectorX = make_real3(1, 0, 0);
else
vectorX = make_real3(0, 1, 0);
}
else {
posX = posq[particles.x];
vectorX = make_real3(posX.x-thisParticlePos.x, posX.y-thisParticlePos.y, posX.z-thisParticlePos.z);
}
// branch based on axis type
if (axisType == 1) {
// bisector
vectorX = normalize(vectorX);
vectorZ += vectorX;
vectorZ = normalize(vectorZ);
}
else if (axisType == 2 || axisType == 3) {
// z-bisect
if (particles.y >= 0 && particles.y < NUM_ATOMS) {
real4 posY = posq[particles.y];
real3 vectorY = make_real3(posY.x-thisParticlePos.x, posY.y-thisParticlePos.y, posY.z-thisParticlePos.z);
vectorY = normalize(vectorY);
vectorX = normalize(vectorX);
if (axisType == 2) {
vectorX += vectorY;
vectorX = normalize(vectorX);
}
else {
// 3-fold
vectorZ += vectorX + vectorY;
vectorZ = normalize(vectorZ);
}
}
}
// x = x - (x.z)z
vectorX -= dot(vectorZ, vectorX)*vectorZ;
vectorX = normalize(vectorX);
real3 vectorY = cross(vectorZ, vectorX);
// use identity rotation matrix for unrecognized axis types
if (axisType < 0 || axisType > 4) {
vectorX.x = 1;
vectorX.y = 0;
vectorX.z = 0;
vectorY.x = 0;
vectorY.y = 1;
vectorY.z = 0;
vectorZ.x = 0;
vectorZ.y = 0;
vectorZ.z = 1;
}
// Check the chirality and see whether it needs to be reversed
bool reverse = false;
if (axisType == 0 && particles.x >= 0 && particles.y >=0 && particles.z >= 0) {
real4 posY = posq[particles.y];
real delta[4][3];
delta[0][0] = thisParticlePos.x - posY.x;
delta[0][1] = thisParticlePos.y - posY.y;
delta[0][2] = thisParticlePos.z - posY.z;
delta[1][0] = posZ.x - posY.x;
delta[1][1] = posZ.y - posY.y;
delta[1][2] = posZ.z - posY.z;
delta[2][0] = posX.x - posY.x;
delta[2][1] = posX.y - posY.y;
delta[2][2] = posX.z - posY.z;
delta[3][0] = delta[1][1]*delta[2][2] - delta[1][2]*delta[2][1];
delta[3][1] = delta[2][1]*delta[0][2] - delta[2][2]*delta[0][1];
delta[3][2] = delta[0][1]*delta[1][2] - delta[0][2]*delta[1][1];
real volume = delta[3][0]*delta[0][0] + delta[3][1]*delta[1][0] + delta[3][2]*delta[2][0];
reverse = (volume < 0);
}
// Transform the dipole
real3 dipole = localDipoles[atom];
if (reverse)
dipole.y *= -1;
labDipoles[atom] = make_real3(dipole.x*vectorX.x + dipole.y*vectorY.x + dipole.z*vectorZ.x,
dipole.x*vectorX.y + dipole.y*vectorY.y + dipole.z*vectorZ.y,
dipole.x*vectorX.z + dipole.y*vectorY.z + dipole.z*vectorZ.z);
// Transform the quadrupole
real mPoleXX = localQuadrupoles[5*atom];
real mPoleXY = localQuadrupoles[5*atom+1];
real mPoleXZ = localQuadrupoles[5*atom+2];
real mPoleYY = localQuadrupoles[5*atom+3];
real mPoleYZ = localQuadrupoles[5*atom+4];
real mPoleZZ = -(mPoleXX+mPoleYY);
if (reverse) {
mPoleXY *= -1;
mPoleYZ *= -1;
}
labQXX[atom] = vectorX.x*(vectorX.x*mPoleXX + vectorY.x*mPoleXY + vectorZ.x*mPoleXZ)
+ vectorY.x*(vectorX.x*mPoleXY + vectorY.x*mPoleYY + vectorZ.x*mPoleYZ)
+ vectorZ.x*(vectorX.x*mPoleXZ + vectorY.x*mPoleYZ + vectorZ.x*mPoleZZ);
labQXY[atom] = vectorX.x*(vectorX.y*mPoleXX + vectorY.y*mPoleXY + vectorZ.y*mPoleXZ)
+ vectorY.x*(vectorX.y*mPoleXY + vectorY.y*mPoleYY + vectorZ.y*mPoleYZ)
+ vectorZ.x*(vectorX.y*mPoleXZ + vectorY.y*mPoleYZ + vectorZ.y*mPoleZZ);
labQXZ[atom] = vectorX.x*(vectorX.z*mPoleXX + vectorY.z*mPoleXY + vectorZ.z*mPoleXZ)
+ vectorY.x*(vectorX.z*mPoleXY + vectorY.z*mPoleYY + vectorZ.z*mPoleYZ)
+ vectorZ.x*(vectorX.z*mPoleXZ + vectorY.z*mPoleYZ + vectorZ.z*mPoleZZ);
labQYY[atom] = vectorX.y*(vectorX.y*mPoleXX + vectorY.y*mPoleXY + vectorZ.y*mPoleXZ)
+ vectorY.y*(vectorX.y*mPoleXY + vectorY.y*mPoleYY + vectorZ.y*mPoleYZ)
+ vectorZ.y*(vectorX.y*mPoleXZ + vectorY.y*mPoleYZ + vectorZ.y*mPoleZZ);
labQYZ[atom] = vectorX.y*(vectorX.z*mPoleXX + vectorY.z*mPoleXY + vectorZ.z*mPoleXZ)
+ vectorY.y*(vectorX.z*mPoleXY + vectorY.z*mPoleYY + vectorZ.z*mPoleYZ)
+ vectorZ.y*(vectorX.z*mPoleXZ + vectorY.z*mPoleYZ + vectorZ.z*mPoleZZ);
}
else {
labDipoles[atom] = make_real3(localDipoles[atom].x, localDipoles[atom].y, localDipoles[atom].z);
labQXX[atom] = localQuadrupoles[5*atom];
labQXY[atom] = localQuadrupoles[5*atom+1];
labQXZ[atom] = localQuadrupoles[5*atom+2];
labQYY[atom] = localQuadrupoles[5*atom+3];
labQYZ[atom] = localQuadrupoles[5*atom+4];
}
}
}
extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fieldBuffers, real* __restrict__ inducedDipole, const real* __restrict__ polarizability) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
real scale = polarizability[atom]/(real) 0x100000000;
inducedDipole[3*atom] = scale*fieldBuffers[atom];
inducedDipole[3*atom+1] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS];
inducedDipole[3*atom+2] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS*2];
}
}
/**
* Normalize a vector and return what its magnitude was.
*/
inline __device__ real normVector(real3& v) {
real n = SQRT(dot(v, v));
v *= (n > 0 ? RECIP(n) : 0);
return n;
}
/**
* Compute the force on each particle due to the torque.
*/
extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ forceBuffers, const long long* __restrict__ torqueBuffers,
const real4* __restrict__ posq, const int4* __restrict__ multipoleParticles) {
const int U = 0;
const int V = 1;
const int W = 2;
const int R = 3;
const int S = 4;
const int UV = 5;
const int UW = 6;
const int VW = 7;
const int UR = 8;
const int US = 9;
const int VS = 10;
const int WS = 11;
const int LastVectorIndex = 12;
const int X = 0;
const int Y = 1;
const int Z = 2;
const int I = 3;
const real torqueScale = RECIP((double) 0x100000000);
real3 forces[4];
real norms[LastVectorIndex];
real3 vector[LastVectorIndex];
real angles[LastVectorIndex][2];
for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
int4 particles = multipoleParticles[atom];
int axisAtom = particles.z;
int axisType = particles.w;
// NoAxisType
if (axisType < 5 && particles.z >= 0) {
real3 atomPos = trimTo3(posq[atom]);
vector[U] = atomPos - trimTo3(posq[axisAtom]);
norms[U] = normVector(vector[U]);
if (axisType != 4 && particles.x >= 0)
vector[V] = atomPos - trimTo3(posq[particles.x]);
else {
if (fabs(vector[U].x/norms[U]) < 0.866)
vector[V] = make_real3(1, 0, 0);
else
vector[V] = make_real3(0, 1, 0);
}
norms[V] = normVector(vector[V]);
// W = UxV
if (axisType < 2 || axisType > 3)
vector[W] = cross(vector[U], vector[V]);
else
vector[W] = trimTo3(posq[particles.y]) - atomPos;
norms[W] = normVector(vector[W]);
vector[UV] = cross(vector[V], vector[U]);
vector[UW] = cross(vector[W], vector[U]);
vector[VW] = cross(vector[W], vector[V]);
norms[UV] = normVector(vector[UV]);
norms[UW] = normVector(vector[UW]);
norms[VW] = normVector(vector[VW]);
angles[UV][0] = dot(vector[U], vector[V]);
angles[UV][1] = SQRT(1 - angles[UV][0]*angles[UV][0]);
angles[UW][0] = dot(vector[U], vector[W]);
angles[UW][1] = SQRT(1 - angles[UW][0]*angles[UW][0]);
angles[VW][0] = dot(vector[V], vector[W]);
angles[VW][1] = SQRT(1 - angles[VW][0]*angles[VW][0]);
real dphi[3];
real3 torque = make_real3(torqueScale*torqueBuffers[atom], torqueScale*torqueBuffers[atom+PADDED_NUM_ATOMS], torqueScale*torqueBuffers[atom+PADDED_NUM_ATOMS*2]);
dphi[U] = -dot(vector[U], torque);
dphi[V] = -dot(vector[V], torque);
dphi[W] = -dot(vector[W], torque);
// z-then-x and bisector
if (axisType == 0 || axisType == 1) {
real factor1 = dphi[V]/(norms[U]*angles[UV][1]);
real factor2 = dphi[W]/(norms[U]);
real factor3 = -dphi[U]/(norms[V]*angles[UV][1]);
real factor4 = 0;
if (axisType == 1) {
factor2 *= 0.5f;
factor4 = 0.5f*dphi[W]/(norms[V]);
}
forces[Z] = vector[UV]*factor1 + factor2*vector[UW];
forces[X] = vector[UV]*factor3 + factor4*vector[VW];
forces[I] = -(forces[X]+forces[Z]);
forces[Y] = make_real3(0);
}
else if (axisType == 2) {
// z-bisect
vector[R] = vector[V] + vector[W];
vector[S] = cross(vector[U], vector[R]);
norms[R] = normVector(vector[R]);
norms[S] = normVector(vector[S]);
vector[UR] = cross(vector[R], vector[U]);
vector[US] = cross(vector[S], vector[U]);
vector[VS] = cross(vector[S], vector[V]);
vector[WS] = cross(vector[S], vector[W]);
norms[UR] = normVector(vector[UR]);
norms[US] = normVector(vector[US]);
norms[VS] = normVector(vector[VS]);
norms[WS] = normVector(vector[WS]);
angles[UR][0] = dot(vector[U], vector[R]);
angles[UR][1] = SQRT(1 - angles[UR][0]*angles[UR][0]);
angles[US][0] = dot(vector[U], vector[S]);
angles[US][1] = SQRT(1 - angles[US][0]*angles[US][0]);
angles[VS][0] = dot(vector[V], vector[S]);
angles[VS][1] = SQRT(1 - angles[VS][0]*angles[VS][0]);
angles[WS][0] = dot(vector[W], vector[S]);
angles[WS][1] = SQRT(1 - angles[WS][0]*angles[WS][0]);
real3 t1 = vector[V] - vector[S]*angles[VS][0];
real3 t2 = vector[W] - vector[S]*angles[WS][0];
normVector(t1);
normVector(t2);
real ut1cos = dot(vector[U], t1);
real ut1sin = SQRT(1 - ut1cos*ut1cos);
real ut2cos = dot(vector[U], t2);
real ut2sin = SQRT(1 - ut2cos*ut2cos);
real dphiR = -dot(vector[R], torque);
real dphiS = -dot(vector[S], torque);
real factor1 = dphiR/(norms[U]*angles[UR][1]);
real factor2 = dphiS/(norms[U]);
real factor3 = dphi[U]/(norms[V]*(ut1sin+ut2sin));
real factor4 = dphi[U]/(norms[W]*(ut1sin+ut2sin));
forces[Z] = vector[UR]*factor1 + factor2*vector[US];
forces[X] = (angles[VS][1]*vector[S] - angles[VS][0]*t1)*factor3;
forces[Y] = (angles[WS][1]*vector[S] - angles[WS][0]*t2)*factor4;
forces[I] = -(forces[X] + forces[Y] + forces[Z]);
}
else if (axisType == 3) {
// 3-fold
forces[Z] = (vector[UW]*dphi[W]/(norms[U]*angles[UW][1]) +
vector[UV]*dphi[V]/(norms[U]*angles[UV][1]) -
vector[UW]*dphi[U]/(norms[U]*angles[UW][1]) -
vector[UV]*dphi[U]/(norms[U]*angles[UV][1]))/3;
forces[X] = (vector[VW]*dphi[W]/(norms[V]*angles[VW][1]) -
vector[UV]*dphi[U]/(norms[V]*angles[UV][1]) -
vector[VW]*dphi[V]/(norms[V]*angles[VW][1]) +
vector[UV]*dphi[V]/(norms[V]*angles[UV][1]))/3;
forces[Y] = (-vector[UW]*dphi[U]/(norms[W]*angles[UW][1]) -
vector[VW]*dphi[V]/(norms[W]*angles[VW][1]) +
vector[UW]*dphi[W]/(norms[W]*angles[UW][1]) +
vector[VW]*dphi[W]/(norms[W]*angles[VW][1]))/3;
forces[I] = -(forces[X] + forces[Y] + forces[Z]);
}
else if (axisType == 4) {
// z-only
forces[Z] = vector[UV]*dphi[V]/(norms[U]*angles[UV][1]) + vector[UW]*dphi[W]/norms[U];
forces[X] = make_real3(0);
forces[Y] = make_real3(0);
forces[I] = -forces[Z];
}
else {
forces[Z] = make_real3(0);
forces[X] = make_real3(0);
forces[Y] = make_real3(0);
forces[I] = make_real3(0);
}
// Store results
atomicAdd(&forceBuffers[particles.z], static_cast<unsigned long long>((long long) (forces[Z].x*0x100000000)));
atomicAdd(&forceBuffers[particles.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].y*0x100000000)));
atomicAdd(&forceBuffers[particles.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].z*0x100000000)));
if (axisType != 4) {
atomicAdd(&forceBuffers[particles.x], static_cast<unsigned long long>((long long) (forces[X].x*0x100000000)));
atomicAdd(&forceBuffers[particles.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].y*0x100000000)));
atomicAdd(&forceBuffers[particles.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].z*0x100000000)));
}
if ((axisType == 2 || axisType == 3) && particles.y > -1) {
atomicAdd(&forceBuffers[particles.y], static_cast<unsigned long long>((long long) (forces[Y].x*0x100000000)));
atomicAdd(&forceBuffers[particles.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].y*0x100000000)));
atomicAdd(&forceBuffers[particles.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].z*0x100000000)));
}
atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>((long long) (forces[I].x*0x100000000)));
atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].y*0x100000000)));
atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].z*0x100000000)));
}
}
}
real fdamp3, fdamp5;
computeMutualFieldDampingFactors(alpha1, alpha2, r, fdamp3, fdamp5);
#ifdef COMPUTING_EXCEPTIONS
fdamp3 *= scale;
fdamp5 *= scale;
#endif
real invR2 = invR*invR;
real invR3 = invR*invR2;
#if USE_EWALD
real ralpha = PME_ALPHA*r;
real bn0 = erfc(ralpha)*invR;
real alsq2 = 2*PME_ALPHA*PME_ALPHA;
real alsq2n = 1/(SQRT_PI*PME_ALPHA);
real exp2a = EXP(-(ralpha*ralpha));
alsq2n *= alsq2;
real bn1 = (bn0+alsq2n*exp2a)*invR2;
alsq2n *= alsq2;
real bn2 = (3*bn1+alsq2n*exp2a)*invR2;
real scale3 = -bn1 + (1-fdamp3)*invR3;
real scale5 = bn2 - 3*(1-fdamp5)*invR3*invR2;
#else
real scale3 = -fdamp3*invR3;
real scale5 = 3*fdamp5*invR3*invR2;
#endif
tempField1 = inducedDipole2*scale3 + delta*scale5*dot(inducedDipole2, delta);
tempField2 = inducedDipole1*scale3 + delta*scale5*dot(inducedDipole1, delta);
This diff is collapsed.
#define COMPUTING_EXCEPTIONS
/**
* Compute exceptions for HIPPO.
*/
extern "C" __global__ void computeNonbondedExceptions(
unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, unsigned long long* __restrict__ torqueBuffers,
const real4* __restrict__ posq, const real3* __restrict__ extDipole, const int2* __restrict__ exceptionAtoms, const real* __restrict__ mmScale,
const real* __restrict__ dmScale, const real* __restrict__ ddScale, const real* __restrict__ dispScale, const real* __restrict__ repScale,
const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge, const real* __restrict__ alpha, const real* __restrict__ epsilon,
const real* __restrict__ damping, const real* __restrict__ c6, const real* __restrict__ pauliK, const real* __restrict__ pauliQ,
const real* __restrict__ pauliAlpha, const real3* __restrict__ dipole, const real3* __restrict__ inducedDipole, const real* __restrict__ qXX,
const real* __restrict__ qXY, const real* __restrict__ qXZ, const real* __restrict__ qYY, const real* __restrict__ qYZ,
const real3* __restrict__ extrapolatedDipole
#ifdef USE_CUTOFF
, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVec
#endif
) {
mixed energy = 0;
const bool isExcluded = false;
const real interactionScale = 1.0f;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_EXCEPTIONS; index += blockDim.x*gridDim.x) {
int2 atoms = exceptionAtoms[index];
int atom1 = atoms.x;
int atom2 = atoms.y;
real4 pos1 = posq[atom1];
real4 pos2 = posq[atom2];
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
real coreCharge1 = coreCharge[atom1];
real valenceCharge1 = valenceCharge[atom1];
real alpha1 = alpha[atom1];
real epsilon1 = epsilon[atom1];
real damping1 = damping[atom1];
real c61 = c6[atom1];
real pauliK1 = pauliK[atom1];
real pauliQ1 = pauliQ[atom1];
real pauliAlpha1 = pauliAlpha[atom1];
real3 dipole1 = dipole[atom1];
real3 inducedDipole1 = inducedDipole[atom1];
real qXX1 = qXX[atom1];
real qXY1 = qXY[atom1];
real qXZ1 = qXZ[atom1];
real qYY1 = qYY[atom1];
real qYZ1 = qYZ[atom1];
real coreCharge2 = coreCharge[atom2];
real valenceCharge2 = valenceCharge[atom2];
real alpha2 = alpha[atom2];
real epsilon2 = epsilon[atom2];
real damping2 = damping[atom2];
real c62 = c6[atom2];
real pauliK2 = pauliK[atom2];
real pauliQ2 = pauliQ[atom2];
real pauliAlpha2 = pauliAlpha[atom2];
real3 dipole2 = dipole[atom2];
real3 inducedDipole2 = inducedDipole[atom2];
real qXX2 = qXX[atom2];
real qXY2 = qXY[atom2];
real qXZ2 = qXZ[atom2];
real qYY2 = qYY[atom2];
real qYZ2 = qYZ[atom2];
real multipoleMultipoleScale = mmScale[index];
real dipoleMultipoleScale = dmScale[index];
real dipoleDipoleScale = ddScale[index];
real repulsionScale = repScale[index];
real dispersionScale = dispScale[index];
real rInv = RSQRT(r2);
real r = r2*rInv;
real3 tempForce = make_real3(0);
real3 tempTorque1 = make_real3(0);
real3 tempTorque2 = make_real3(0);
real tempEnergy = 0.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (tempForce.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempForce.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempForce.z*0x100000000)));
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (-tempForce.x*0x100000000)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-tempForce.y*0x100000000)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-tempForce.z*0x100000000)));
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (tempTorque1.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempTorque1.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempTorque1.z*0x100000000)));
atomicAdd(&torqueBuffers[atom2], static_cast<unsigned long long>((long long) (tempTorque2.x*0x100000000)));
atomicAdd(&torqueBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempTorque2.y*0x100000000)));
atomicAdd(&torqueBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempTorque2.z*0x100000000)));
#ifdef USE_CUTOFF
}
#endif
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
#ifndef HIPPO
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE) #define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct { typedef struct {
...@@ -827,9 +828,12 @@ extern "C" __global__ void updateInducedFieldByDIIS(real* __restrict__ inducedDi ...@@ -827,9 +828,12 @@ extern "C" __global__ void updateInducedFieldByDIIS(real* __restrict__ inducedDi
inducedDipolePolar[index] = sumPolar; inducedDipolePolar[index] = sumPolar;
} }
} }
#endif // not HIPPO
extern "C" __global__ void initExtrapolatedDipoles(real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole, extern "C" __global__ void initExtrapolatedDipoles(real* __restrict__ inducedDipole, real* __restrict__ extrapolatedDipole
real* __restrict__ extrapolatedDipolePolar, long long* __restrict__ inducedDipoleFieldGradient, long long* __restrict__ inducedDipoleFieldGradientPolar #ifndef HIPPO
, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipolePolar, long long* __restrict__ inducedDipoleFieldGradient, long long* __restrict__ inducedDipoleFieldGradientPolar
#endif
#ifdef USE_GK #ifdef USE_GK
, real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar, , real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar,
real* __restrict__ inducedDipoleFieldGradientGk, real* __restrict__ inducedDipoleFieldGradientGkPolar real* __restrict__ inducedDipoleFieldGradientGk, real* __restrict__ inducedDipoleFieldGradientGkPolar
...@@ -837,12 +841,15 @@ extern "C" __global__ void initExtrapolatedDipoles(real* __restrict__ inducedDip ...@@ -837,12 +841,15 @@ extern "C" __global__ void initExtrapolatedDipoles(real* __restrict__ inducedDip
) { ) {
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
extrapolatedDipole[index] = inducedDipole[index]; extrapolatedDipole[index] = inducedDipole[index];
#ifndef HIPPO
extrapolatedDipolePolar[index] = inducedDipolePolar[index]; extrapolatedDipolePolar[index] = inducedDipolePolar[index];
#endif
#ifdef USE_GK #ifdef USE_GK
extrapolatedDipoleGk[index] = inducedDipoleGk[index]; extrapolatedDipoleGk[index] = inducedDipoleGk[index];
extrapolatedDipoleGkPolar[index] = inducedDipoleGkPolar[index]; extrapolatedDipoleGkPolar[index] = inducedDipoleGkPolar[index];
#endif #endif
} }
#ifndef HIPPO
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 6*NUM_ATOMS; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 6*NUM_ATOMS; index += blockDim.x*gridDim.x) {
inducedDipoleFieldGradient[index] = 0; inducedDipoleFieldGradient[index] = 0;
inducedDipoleFieldGradientPolar[index] = 0; inducedDipoleFieldGradientPolar[index] = 0;
...@@ -851,17 +858,25 @@ extern "C" __global__ void initExtrapolatedDipoles(real* __restrict__ inducedDip ...@@ -851,17 +858,25 @@ extern "C" __global__ void initExtrapolatedDipoles(real* __restrict__ inducedDip
inducedDipoleFieldGradientGkPolar[index] = 0; inducedDipoleFieldGradientGkPolar[index] = 0;
#endif #endif
} }
#endif
} }
extern "C" __global__ void iterateExtrapolatedDipoles(int order, real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole, extern "C" __global__ void iterateExtrapolatedDipoles(int order, real* __restrict__ inducedDipole, real* __restrict__ extrapolatedDipole, long long* __restrict__ inducedDipoleField,
real* __restrict__ extrapolatedDipolePolar, long long* __restrict__ inducedDipoleFieldGradient, long long* __restrict__ inducedDipoleFieldGradientPolar, #ifndef HIPPO
long long* __restrict__ inducedDipoleField, long long* __restrict__ inducedDipoleFieldPolar, real* __restrict__ extrapolatedDipoleFieldGradient, real* __restrict__ extrapolatedDipoleFieldGradientPolar, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipolePolar, long long* __restrict__ inducedDipoleFieldPolar, long long* __restrict__ inducedDipoleFieldGradient,
long long* __restrict__ inducedDipoleFieldGradientPolar, real* __restrict__ extrapolatedDipoleFieldGradient, real* __restrict__ extrapolatedDipoleFieldGradientPolar,
#endif
#ifdef USE_GK #ifdef USE_GK
real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar, real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar,
real* __restrict__ inducedDipoleFieldGradientGk, real* __restrict__ inducedDipoleFieldGradientGkPolar, long long* __restrict__ inducedDipoleFieldGk, real* __restrict__ inducedDipoleFieldGradientGk, real* __restrict__ inducedDipoleFieldGradientGkPolar, long long* __restrict__ inducedDipoleFieldGk,
long long* __restrict__ inducedDipoleFieldGkPolar, real* __restrict__ extrapolatedDipoleFieldGradientGk, real* __restrict__ extrapolatedDipoleFieldGradientGkPolar, long long* __restrict__ inducedDipoleFieldGkPolar, real* __restrict__ extrapolatedDipoleFieldGradientGk, real* __restrict__ extrapolatedDipoleFieldGradientGkPolar,
#endif #endif
const float* __restrict__ polarizability) { #ifdef HIPPO
const real* __restrict__ polarizability
#else
const float* __restrict__ polarizability
#endif
) {
const real fieldScale = 1/(real) 0x100000000; const real fieldScale = 1/(real) 0x100000000;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
int atom = index/3; int atom = index/3;
...@@ -871,9 +886,11 @@ extern "C" __global__ void iterateExtrapolatedDipoles(int order, real* __restric ...@@ -871,9 +886,11 @@ extern "C" __global__ void iterateExtrapolatedDipoles(int order, real* __restric
real value = inducedDipoleField[fieldIndex]*fieldScale*polar; real value = inducedDipoleField[fieldIndex]*fieldScale*polar;
inducedDipole[index] = value; inducedDipole[index] = value;
extrapolatedDipole[order*3*NUM_ATOMS+index] = value; extrapolatedDipole[order*3*NUM_ATOMS+index] = value;
#ifndef HIPPO
value = inducedDipoleFieldPolar[fieldIndex]*fieldScale*polar; value = inducedDipoleFieldPolar[fieldIndex]*fieldScale*polar;
inducedDipolePolar[index] = value; inducedDipolePolar[index] = value;
extrapolatedDipolePolar[order*3*NUM_ATOMS+index] = value; extrapolatedDipolePolar[order*3*NUM_ATOMS+index] = value;
#endif
#ifdef USE_GK #ifdef USE_GK
value = inducedDipoleFieldGk[fieldIndex]*fieldScale*polar; value = inducedDipoleFieldGk[fieldIndex]*fieldScale*polar;
inducedDipoleGk[index] = value; inducedDipoleGk[index] = value;
...@@ -883,6 +900,7 @@ extern "C" __global__ void iterateExtrapolatedDipoles(int order, real* __restric ...@@ -883,6 +900,7 @@ extern "C" __global__ void iterateExtrapolatedDipoles(int order, real* __restric
extrapolatedDipoleGkPolar[order*3*NUM_ATOMS+index] = value; extrapolatedDipoleGkPolar[order*3*NUM_ATOMS+index] = value;
#endif #endif
} }
#ifndef HIPPO
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 6*NUM_ATOMS; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 6*NUM_ATOMS; index += blockDim.x*gridDim.x) {
int index2 = (order-1)*6*NUM_ATOMS+index; int index2 = (order-1)*6*NUM_ATOMS+index;
extrapolatedDipoleFieldGradient[index2] = fieldScale*inducedDipoleFieldGradient[index]; extrapolatedDipoleFieldGradient[index2] = fieldScale*inducedDipoleFieldGradient[index];
...@@ -892,10 +910,13 @@ extern "C" __global__ void iterateExtrapolatedDipoles(int order, real* __restric ...@@ -892,10 +910,13 @@ extern "C" __global__ void iterateExtrapolatedDipoles(int order, real* __restric
extrapolatedDipoleFieldGradientGkPolar[index2] = fieldScale*inducedDipoleFieldGradientGkPolar[index]; extrapolatedDipoleFieldGradientGkPolar[index2] = fieldScale*inducedDipoleFieldGradientGkPolar[index];
#endif #endif
} }
#endif
} }
extern "C" __global__ void computeExtrapolatedDipoles(real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole, extern "C" __global__ void computeExtrapolatedDipoles(real* __restrict__ inducedDipole, real* __restrict__ extrapolatedDipole
real* __restrict__ extrapolatedDipolePolar #ifndef HIPPO
, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipolePolar
#endif
#ifdef USE_GK #ifdef USE_GK
, real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar , real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar
#endif #endif
...@@ -905,14 +926,18 @@ extern "C" __global__ void computeExtrapolatedDipoles(real* __restrict__ induced ...@@ -905,14 +926,18 @@ extern "C" __global__ void computeExtrapolatedDipoles(real* __restrict__ induced
real sum = 0, sumPolar = 0, sumGk = 0, sumGkPolar = 0; real sum = 0, sumPolar = 0, sumGk = 0, sumGkPolar = 0;
for (int order = 0; order < MAX_EXTRAPOLATION_ORDER; order++) { for (int order = 0; order < MAX_EXTRAPOLATION_ORDER; order++) {
sum += extrapolatedDipole[order*3*NUM_ATOMS+index]*coeff[order]; sum += extrapolatedDipole[order*3*NUM_ATOMS+index]*coeff[order];
#ifndef HIPPO
sumPolar += extrapolatedDipolePolar[order*3*NUM_ATOMS+index]*coeff[order]; sumPolar += extrapolatedDipolePolar[order*3*NUM_ATOMS+index]*coeff[order];
#endif
#ifdef USE_GK #ifdef USE_GK
sumGk += extrapolatedDipoleGk[order*3*NUM_ATOMS+index]*coeff[order]; sumGk += extrapolatedDipoleGk[order*3*NUM_ATOMS+index]*coeff[order];
sumGkPolar += extrapolatedDipoleGkPolar[order*3*NUM_ATOMS+index]*coeff[order]; sumGkPolar += extrapolatedDipoleGkPolar[order*3*NUM_ATOMS+index]*coeff[order];
#endif #endif
} }
inducedDipole[index] = sum; inducedDipole[index] = sum;
#ifndef HIPPO
inducedDipolePolar[index] = sumPolar; inducedDipolePolar[index] = sumPolar;
#endif
#ifdef USE_GK #ifdef USE_GK
inducedDipoleGk[index] = sumGk; inducedDipoleGk[index] = sumGk;
inducedDipoleGkPolar[index] = sumGkPolar; inducedDipoleGkPolar[index] = sumGkPolar;
...@@ -970,3 +995,13 @@ extern "C" __global__ void addExtrapolatedFieldGradientToForce(long long* __rest ...@@ -970,3 +995,13 @@ extern "C" __global__ void addExtrapolatedFieldGradientToForce(long long* __rest
forceBuffers[atom+PADDED_NUM_ATOMS*2] += (long long) (fz*0x100000000); forceBuffers[atom+PADDED_NUM_ATOMS*2] += (long long) (fz*0x100000000);
} }
} }
#ifdef HIPPO
extern "C" __global__ void computePolarizationEnergy(mixed* __restrict__ energyBuffer, const real3* __restrict__ inducedDipole,
const real3* __restrict__ extrapolatedDipole, const real* __restrict__ polarizability) {
mixed energy = 0;
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x)
energy -= (ENERGY_SCALE_FACTOR/2)*dot(extrapolatedDipole[atom], inducedDipole[atom])/polarizability[atom];
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
#endif
\ No newline at end of file
...@@ -59,6 +59,7 @@ extern "C" OPENMM_EXPORT void registerKernelFactories() { ...@@ -59,6 +59,7 @@ extern "C" OPENMM_EXPORT void registerKernelFactories() {
platform.registerKernelFactory(CalcAmoebaMultipoleForceKernel::Name(), factory); platform.registerKernelFactory(CalcAmoebaMultipoleForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaGeneralizedKirkwoodForceKernel::Name(), factory); platform.registerKernelFactory(CalcAmoebaGeneralizedKirkwoodForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaWcaDispersionForceKernel::Name(), factory); platform.registerKernelFactory(CalcAmoebaWcaDispersionForceKernel::Name(), factory);
platform.registerKernelFactory(CalcHippoNonbondedForceKernel::Name(), factory);
} }
} }
} }
...@@ -105,5 +106,8 @@ KernelImpl* AmoebaReferenceKernelFactory::createKernelImpl(std::string name, con ...@@ -105,5 +106,8 @@ KernelImpl* AmoebaReferenceKernelFactory::createKernelImpl(std::string name, con
if (name == CalcAmoebaWcaDispersionForceKernel::Name()) if (name == CalcAmoebaWcaDispersionForceKernel::Name())
return new ReferenceCalcAmoebaWcaDispersionForceKernel(name, platform, context.getSystem()); return new ReferenceCalcAmoebaWcaDispersionForceKernel(name, platform, context.getSystem());
if (name == CalcHippoNonbondedForceKernel::Name())
return new ReferenceCalcHippoNonbondedForceKernel(name, platform, context.getSystem());
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str()); throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
} }
\ No newline at end of file
#ifndef OPENMM_HIPPO_NONBONDED_FORCE_PROXY_H_
#define OPENMM_HIPPO_NONBONDED_FORCE_PROXY_H_
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/internal/windowsExportAmoeba.h"
#include "openmm/serialization/SerializationProxy.h"
namespace OpenMM {
/**
* This is a proxy for serializing HippoNonbondedForce objects.
*/
class OPENMM_EXPORT_AMOEBA HippoNonbondedForceProxy : public SerializationProxy {
public:
HippoNonbondedForceProxy();
void serialize(const void* object, SerializationNode& node) const;
void* deserialize(const SerializationNode& node) const;
};
} // namespace OpenMM
#endif /*OPENMM_HIPPO_NONBONDED_FORCE_PROXY_H_*/
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
#include "openmm/AmoebaTorsionTorsionForce.h" #include "openmm/AmoebaTorsionTorsionForce.h"
#include "openmm/AmoebaVdwForce.h" #include "openmm/AmoebaVdwForce.h"
#include "openmm/AmoebaWcaDispersionForce.h" #include "openmm/AmoebaWcaDispersionForce.h"
#include "openmm/HippoNonbondedForce.h"
#include "openmm/serialization/SerializationProxy.h" #include "openmm/serialization/SerializationProxy.h"
...@@ -65,6 +66,7 @@ ...@@ -65,6 +66,7 @@
#include "openmm/serialization/AmoebaTorsionTorsionForceProxy.h" #include "openmm/serialization/AmoebaTorsionTorsionForceProxy.h"
#include "openmm/serialization/AmoebaVdwForceProxy.h" #include "openmm/serialization/AmoebaVdwForceProxy.h"
#include "openmm/serialization/AmoebaWcaDispersionForceProxy.h" #include "openmm/serialization/AmoebaWcaDispersionForceProxy.h"
#include "openmm/serialization/HippoNonbondedForceProxy.h"
#if defined(WIN32) #if defined(WIN32)
#include <windows.h> #include <windows.h>
...@@ -92,4 +94,5 @@ extern "C" OPENMM_EXPORT_AMOEBA void registerAmoebaSerializationProxies() { ...@@ -92,4 +94,5 @@ extern "C" OPENMM_EXPORT_AMOEBA void registerAmoebaSerializationProxies() {
SerializationProxy::registerProxy(typeid(AmoebaTorsionTorsionForce), new AmoebaTorsionTorsionForceProxy()); SerializationProxy::registerProxy(typeid(AmoebaTorsionTorsionForce), new AmoebaTorsionTorsionForceProxy());
SerializationProxy::registerProxy(typeid(AmoebaVdwForce), new AmoebaVdwForceProxy()); SerializationProxy::registerProxy(typeid(AmoebaVdwForce), new AmoebaVdwForceProxy());
SerializationProxy::registerProxy(typeid(AmoebaWcaDispersionForce), new AmoebaWcaDispersionForceProxy()); SerializationProxy::registerProxy(typeid(AmoebaWcaDispersionForce), new AmoebaWcaDispersionForceProxy());
SerializationProxy::registerProxy(typeid(HippoNonbondedForce), new HippoNonbondedForceProxy());
} }
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment