Commit 3d1b2186 authored by peastman's avatar peastman
Browse files

Further optimizations to NonbondedForce

parent 86d09347
{
#if USE_EWALD #if USE_EWALD
bool needCorrection = hasExclusions && isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS; bool needCorrection = hasExclusions && isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS;
if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) { unsigned int includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
const real alphaR = EWALD_ALPHA*r; const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR); const real expAlphaRSqr = EXP(-alphaR*alphaR);
const real prefactor = 138.935456f*posq1.w*posq2.w*invR; const real prefactor = 138.935456f*posq1.w*posq2.w*invR;
...@@ -44,16 +45,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) { ...@@ -44,16 +45,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
} }
#endif #endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += ljEnergy + prefactor*erfcAlphaR; tempEnergy += includeInteraction ? ljEnergy + prefactor*erfcAlphaR : 0;
#else #else
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += prefactor*erfcAlphaR; tempEnergy += includeInteraction ? prefactor*erfcAlphaR : 0;
#endif #endif
} }
dEdR += tempForce*invR*invR; dEdR += includeInteraction ? tempForce*invR*invR : 0;
}
#else #else
{
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED); unsigned int includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
#else #else
...@@ -91,5 +90,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) { ...@@ -91,5 +90,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
#endif #endif
#endif #endif
dEdR += includeInteraction ? tempForce*invR*invR : 0; dEdR += includeInteraction ? tempForce*invR*invR : 0;
#endif
} }
#endif
\ No newline at end of file
...@@ -228,57 +228,51 @@ extern "C" __global__ void computeNonbonded( ...@@ -228,57 +228,51 @@ extern "C" __global__ void computeNonbonded(
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF real invR = RSQRT(r2);
if (r2 < CUTOFF_SQUARED) { real r = r2*invR;
#endif LOAD_ATOM2_PARAMETERS
real invR = RSQRT(r2); atom2 = y*TILE_SIZE+tj;
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj;
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
real dEdR = 0.0f; real dEdR = 0.0f;
#else #else
real3 dEdR1 = make_real3(0); real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0); real3 dEdR2 = make_real3(0);
#endif #endif
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1)); bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif #endif
real tempEnergy = 0.0f; real tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
energy += tempEnergy; energy += tempEnergy;
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
delta *= dEdR; delta *= dEdR;
force.x -= delta.x; force.x -= delta.x;
force.y -= delta.y; force.y -= delta.y;
force.z -= delta.z; force.z -= delta.z;
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
shflForce.x += delta.x; shflForce.x += delta.x;
shflForce.y += delta.y; shflForce.y += delta.y;
shflForce.z += delta.z; shflForce.z += delta.z;
#else #else
localData[tbx+tj].fx += delta.x; localData[tbx+tj].fx += delta.x;
localData[tbx+tj].fy += delta.y; localData[tbx+tj].fy += delta.y;
localData[tbx+tj].fz += delta.z; localData[tbx+tj].fz += delta.z;
#endif #endif
#else // !USE_SYMMETRIC #else // !USE_SYMMETRIC
force.x -= dEdR1.x; force.x -= dEdR1.x;
force.y -= dEdR1.y; force.y -= dEdR1.y;
force.z -= dEdR1.z; force.z -= dEdR1.z;
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
shflForce.x += dEdR2.x; shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y; shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z; shflForce.z += dEdR2.z;
#else #else
localData[tbx+tj].fx += dEdR2.x; localData[tbx+tj].fx += dEdR2.x;
localData[tbx+tj].fy += dEdR2.y; localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
#endif #endif
#endif // end USE_SYMMETRIC #endif // end USE_SYMMETRIC
#ifdef USE_CUTOFF
}
#endif
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
excl >>= 1; excl >>= 1;
#endif #endif
...@@ -431,53 +425,51 @@ extern "C" __global__ void computeNonbonded( ...@@ -431,53 +425,51 @@ extern "C" __global__ void computeNonbonded(
#endif #endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z); real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { real invR = RSQRT(r2);
real invR = RSQRT(r2); real r = r2*invR;
real r = r2*invR; LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS atom2 = atomIndices[tbx+tj];
atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
real dEdR = 0.0f; real dEdR = 0.0f;
#else #else
real3 dEdR1 = make_real3(0); real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0); real3 dEdR2 = make_real3(0);
#endif #endif
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS); bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif #endif
real tempEnergy = 0.0f; real tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
energy += tempEnergy; energy += tempEnergy;
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
delta *= dEdR; delta *= dEdR;
force.x -= delta.x; force.x -= delta.x;
force.y -= delta.y; force.y -= delta.y;
force.z -= delta.z; force.z -= delta.z;
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
shflForce.x += delta.x; shflForce.x += delta.x;
shflForce.y += delta.y; shflForce.y += delta.y;
shflForce.z += delta.z; shflForce.z += delta.z;
#else #else
localData[tbx+tj].fx += delta.x; localData[tbx+tj].fx += delta.x;
localData[tbx+tj].fy += delta.y; localData[tbx+tj].fy += delta.y;
localData[tbx+tj].fz += delta.z; localData[tbx+tj].fz += delta.z;
#endif #endif
#else // !USE_SYMMETRIC #else // !USE_SYMMETRIC
force.x -= dEdR1.x; force.x -= dEdR1.x;
force.y -= dEdR1.y; force.y -= dEdR1.y;
force.z -= dEdR1.z; force.z -= dEdR1.z;
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
shflForce.x += dEdR2.x; shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y; shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z; shflForce.z += dEdR2.z;
#else #else
localData[tbx+tj].fx += dEdR2.x; localData[tbx+tj].fx += dEdR2.x;
localData[tbx+tj].fy += dEdR2.y; localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
#endif #endif
#endif // end USE_SYMMETRIC #endif // end USE_SYMMETRIC
}
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA SHUFFLE_WARP_DATA
#endif #endif
...@@ -503,57 +495,51 @@ extern "C" __global__ void computeNonbonded( ...@@ -503,57 +495,51 @@ extern "C" __global__ void computeNonbonded(
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF real invR = RSQRT(r2);
if (r2 < CUTOFF_SQUARED) { real r = r2*invR;
#endif LOAD_ATOM2_PARAMETERS
real invR = RSQRT(r2); atom2 = atomIndices[tbx+tj];
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
real dEdR = 0.0f; real dEdR = 0.0f;
#else #else
real3 dEdR1 = make_real3(0); real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0); real3 dEdR2 = make_real3(0);
#endif #endif
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS); bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif #endif
real tempEnergy = 0.0f; real tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
energy += tempEnergy; energy += tempEnergy;
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
delta *= dEdR; delta *= dEdR;
force.x -= delta.x; force.x -= delta.x;
force.y -= delta.y; force.y -= delta.y;
force.z -= delta.z; force.z -= delta.z;
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
shflForce.x += delta.x; shflForce.x += delta.x;
shflForce.y += delta.y; shflForce.y += delta.y;
shflForce.z += delta.z; shflForce.z += delta.z;
#else #else
localData[tbx+tj].fx += delta.x; localData[tbx+tj].fx += delta.x;
localData[tbx+tj].fy += delta.y; localData[tbx+tj].fy += delta.y;
localData[tbx+tj].fz += delta.z; localData[tbx+tj].fz += delta.z;
#endif #endif
#else // !USE_SYMMETRIC #else // !USE_SYMMETRIC
force.x -= dEdR1.x; force.x -= dEdR1.x;
force.y -= dEdR1.y; force.y -= dEdR1.y;
force.z -= dEdR1.z; force.z -= dEdR1.z;
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
shflForce.x += dEdR2.x; shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y; shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z; shflForce.z += dEdR2.z;
#else #else
localData[tbx+tj].fx += dEdR2.x; localData[tbx+tj].fx += dEdR2.x;
localData[tbx+tj].fy += dEdR2.y; localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
#endif #endif
#endif // end USE_SYMMETRIC #endif // end USE_SYMMETRIC
#ifdef USE_CUTOFF
}
#endif
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA SHUFFLE_WARP_DATA
#endif #endif
......
...@@ -633,7 +633,7 @@ private: ...@@ -633,7 +633,7 @@ private:
std::map<std::string, std::string> pmeDefines; std::map<std::string, std::string> pmeDefines;
std::vector<std::pair<int, int> > exceptionAtoms; std::vector<std::pair<int, int> > exceptionAtoms;
double ewaldSelfEnergy, dispersionCoefficient, alpha; double ewaldSelfEnergy, dispersionCoefficient, alpha;
bool hasCoulomb, hasLJ; bool hasCoulomb, hasLJ, usePmeQueue;
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
......
...@@ -1609,12 +1609,16 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1609,12 +1609,16 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
pmeAtomGridIndex = OpenCLArray::create<mm_int2>(cl, numParticles, "pmeAtomGridIndex"); pmeAtomGridIndex = OpenCLArray::create<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms()); sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ); fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ);
pmeQueue = cl::CommandQueue(cl.getContext(), cl.getDevice()); string vendor = cl.getDevice().getInfo<CL_DEVICE_VENDOR>();
int recipForceGroup = force.getReciprocalSpaceForceGroup(); usePmeQueue = (vendor.size() >= 6 && vendor.substr(0, 6) == "NVIDIA");
if (recipForceGroup < 0) if (usePmeQueue) {
recipForceGroup = force.getForceGroup(); pmeQueue = cl::CommandQueue(cl.getContext(), cl.getDevice());
cl.addPreComputation(new SyncQueuePreComputation(cl, pmeQueue, recipForceGroup)); int recipForceGroup = force.getReciprocalSpaceForceGroup();
cl.addPostComputation(new SyncQueuePostComputation(cl, pmeSyncEvent, recipForceGroup)); if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
cl.addPreComputation(new SyncQueuePreComputation(cl, pmeQueue, recipForceGroup));
cl.addPostComputation(new SyncQueuePostComputation(cl, pmeSyncEvent, recipForceGroup));
}
// Initialize the b-spline moduli. // Initialize the b-spline moduli.
...@@ -1794,7 +1798,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1794,7 +1798,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms()); cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
} }
if (pmeGrid != NULL && includeReciprocal) { if (pmeGrid != NULL && includeReciprocal) {
cl.setQueue(pmeQueue); if (usePmeQueue)
cl.setQueue(pmeQueue);
setPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 4); setPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 4);
setInvPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 5); setInvPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 5);
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms()); cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
...@@ -1837,8 +1842,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1837,8 +1842,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1); cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
else else
cl.executeKernel(pmeInterpolateForceKernel, cl.getNumAtoms()); cl.executeKernel(pmeInterpolateForceKernel, cl.getNumAtoms());
pmeQueue.enqueueMarker(&pmeSyncEvent); if (usePmeQueue) {
cl.restoreDefaultQueue(); pmeQueue.enqueueMarker(&pmeSyncEvent);
cl.restoreDefaultQueue();
}
} }
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0); double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (dispersionCoefficient != 0.0 && includeDirect) { if (dispersionCoefficient != 0.0 && includeDirect) {
......
...@@ -573,6 +573,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -573,6 +573,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["USE_EXCLUSIONS"] = "1"; defines["USE_EXCLUSIONS"] = "1";
if (isSymmetric) if (isSymmetric)
defines["USE_SYMMETRIC"] = "1"; defines["USE_SYMMETRIC"] = "1";
if (useCutoff && context.getSIMDWidth() < 32)
defines["PRUNE_BY_CUTOFF"] = "1";
defines["FORCE_WORK_GROUP_SIZE"] = context.intToString(forceThreadBlockSize); defines["FORCE_WORK_GROUP_SIZE"] = context.intToString(forceThreadBlockSize);
defines["CUTOFF_SQUARED"] = context.doubleToString(cutoff*cutoff); defines["CUTOFF_SQUARED"] = context.doubleToString(cutoff*cutoff);
defines["CUTOFF"] = context.doubleToString(cutoff); defines["CUTOFF"] = context.doubleToString(cutoff);
......
{
#ifdef USE_DOUBLE_PRECISION
unsigned long includeInteraction;
#else
unsigned int includeInteraction;
#endif
#if USE_EWALD #if USE_EWALD
bool needCorrection = hasExclusions && isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS; bool needCorrection = hasExclusions && isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS;
if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) { includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
const real alphaR = EWALD_ALPHA*r; const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR); const real expAlphaRSqr = EXP(-alphaR*alphaR);
const real prefactor = 138.935456f*posq1.w*posq2.w*invR; const real prefactor = 138.935456f*posq1.w*posq2.w*invR;
...@@ -44,21 +50,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) { ...@@ -44,21 +50,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
} }
#endif #endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += ljEnergy + prefactor*erfcAlphaR; tempEnergy += select((real) 0, ljEnergy + prefactor*erfcAlphaR, includeInteraction);
#else #else
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += prefactor*erfcAlphaR; tempEnergy += select((real) 0, prefactor*erfcAlphaR, includeInteraction);
#endif #endif
} }
dEdR += tempForce*invR*invR; dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
}
#else
{
#ifdef USE_DOUBLE_PRECISION
unsigned long includeInteraction;
#else #else
unsigned int includeInteraction;
#endif
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED); includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
#else #else
...@@ -97,5 +96,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) { ...@@ -97,5 +96,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
#endif #endif
#endif #endif
dEdR += select((real) 0, tempForce*invR*invR, includeInteraction); dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
#endif
} }
#endif
\ No newline at end of file
...@@ -124,7 +124,7 @@ __kernel void computeNonbonded( ...@@ -124,7 +124,7 @@ __kernel void computeNonbonded(
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
...@@ -155,7 +155,7 @@ __kernel void computeNonbonded( ...@@ -155,7 +155,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y; localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
#endif #endif
#ifdef USE_CUTOFF #ifdef PRUNE_BY_CUTOFF
} }
#endif #endif
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
...@@ -295,7 +295,9 @@ __kernel void computeNonbonded( ...@@ -295,7 +295,9 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = r2*invR; real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
...@@ -324,7 +326,9 @@ __kernel void computeNonbonded( ...@@ -324,7 +326,9 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y; localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
#endif #endif
#ifdef PRUNE_BY_CUTOFF
} }
#endif
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
SYNC_WARPS; SYNC_WARPS;
} }
...@@ -343,7 +347,7 @@ __kernel void computeNonbonded( ...@@ -343,7 +347,7 @@ __kernel void computeNonbonded(
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
...@@ -374,7 +378,7 @@ __kernel void computeNonbonded( ...@@ -374,7 +378,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y; localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
#endif #endif
#ifdef USE_CUTOFF #ifdef PRUNE_BY_CUTOFF
} }
#endif #endif
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment