"...reference/src/SimTKReference/SimTKOpenMMUtilities.cpp" did not exist on "1d3ffd7bd7ad5f8f4d7589ef02040f0edce07328"
Unverified Commit 0a4867a9 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #2318 from peastman/exclusions

Revised how exclusions are computed with PME
parents df52ff73 de4c9900
...@@ -678,6 +678,8 @@ private: ...@@ -678,6 +678,8 @@ private:
CudaArray charges; CudaArray charges;
CudaArray sigmaEpsilon; CudaArray sigmaEpsilon;
CudaArray exceptionParams; CudaArray exceptionParams;
CudaArray exclusionAtoms;
CudaArray exclusionParams;
CudaArray baseParticleParams; CudaArray baseParticleParams;
CudaArray baseExceptionParams; CudaArray baseExceptionParams;
CudaArray particleParamOffsets; CudaArray particleParamOffsets;
...@@ -707,7 +709,7 @@ private: ...@@ -707,7 +709,7 @@ private:
CudaFFT3D* dispersionFft; CudaFFT3D* dispersionFft;
cufftHandle dispersionFftForward; cufftHandle dispersionFftForward;
cufftHandle dispersionFftBackward; cufftHandle dispersionFftBackward;
CUfunction computeParamsKernel; CUfunction computeParamsKernel, computeExclusionParamsKernel;
CUfunction ewaldSumsKernel; CUfunction ewaldSumsKernel;
CUfunction ewaldForcesKernel; CUfunction ewaldForcesKernel;
CUfunction pmeGridIndexKernel; CUfunction pmeGridIndexKernel;
......
...@@ -1665,6 +1665,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1665,6 +1665,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0); hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets) if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1"; paramsDefines["HAS_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (nonbondedMethod == Ewald) { if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters. // Compute the Ewald parameters.
...@@ -1929,7 +1931,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1929,7 +1931,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Evaluate the actual bspline moduli for X/Y/Z. // Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) { for (int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize); int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector<double> moduli(ndata); vector<double> moduli(ndata);
for (int i = 0; i < ndata; i++) { for (int i = 0; i < ndata; i++) {
...@@ -1957,6 +1959,37 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1957,6 +1959,37 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
} }
} }
// Add code to subtract off the reciprocal part of excluded interactions.
if ((nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) && pmeio == NULL) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumExceptions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumExceptions()/numContexts;
int numExclusions = endIndex-startIndex;
if (numExclusions > 0) {
paramsDefines["HAS_EXCLUSIONS"] = "1";
vector<vector<int> > atoms(numExclusions, vector<int>(2));
exclusionAtoms.initialize<int2>(cu, numExclusions, "exclusionAtoms");
exclusionParams.initialize<float4>(cu, numExclusions, "exclusionParams");
vector<int2> exclusionAtomsVec(numExclusions);
for (int i = 0; i < numExclusions; i++) {
int j = i+startIndex;
exclusionAtomsVec[i] = make_int2(exclusions[j].first, exclusions[j].second);
atoms[i][0] = exclusions[j].first;
atoms[i][1] = exclusions[j].second;
}
exclusionAtoms.upload(exclusionAtomsVec);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exclusionParams.getDevicePointer(), "float4");
replacements["EWALD_ALPHA"] = cu.doubleToString(alpha);
replacements["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
replacements["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel. // Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines); string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
...@@ -1967,7 +2000,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1967,7 +2000,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (usePosqCharges) { if (usePosqCharges) {
replacements["CHARGE1"] = "posq1.w"; replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w"; replacements["CHARGE2"] = "posq2.w";
paramsDefines["USE_POSQ_CHARGES"] = "1";
} }
else { else {
replacements["CHARGE1"] = prefix+"charge1"; replacements["CHARGE1"] = prefix+"charge1";
...@@ -2078,6 +2110,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -2078,6 +2110,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
CUmodule module = cu.createModule(CudaKernelSources::nonbondedParameters, paramsDefines); CUmodule module = cu.createModule(CudaKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cu.getKernel(module, "computeParameters"); computeParamsKernel = cu.getKernel(module, "computeParameters");
computeExclusionParamsKernel = cu.getKernel(module, "computeExclusionParameters");
info = new ForceInfo(force); info = new ForceInfo(force);
cu.addForce(info); cu.addForce(info);
} }
...@@ -2104,8 +2137,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2104,8 +2137,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms, vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(), &baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()}; &particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()};
int numExceptions;
if (exceptionParams.isInitialized()) { if (exceptionParams.isInitialized()) {
int numExceptions = exceptionParams.getSize(); numExceptions = exceptionParams.getSize();
paramsArgs.push_back(&numExceptions); paramsArgs.push_back(&numExceptions);
paramsArgs.push_back(&baseExceptionParams.getDevicePointer()); paramsArgs.push_back(&baseExceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParams.getDevicePointer()); paramsArgs.push_back(&exceptionParams.getDevicePointer());
...@@ -2113,6 +2147,12 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2113,6 +2147,12 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer()); paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer());
} }
cu.executeKernel(computeParamsKernel, &paramsArgs[0], cu.getPaddedNumAtoms()); cu.executeKernel(computeParamsKernel, &paramsArgs[0], cu.getPaddedNumAtoms());
if (exclusionParams.isInitialized()) {
int numExclusions = exclusionParams.getSize();
vector<void*> exclusionParamsArgs = {&cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&numExclusions, &exclusionAtoms.getDevicePointer(), &exclusionParams.getDevicePointer()};
cu.executeKernel(computeExclusionParamsKernel, &exclusionParamsArgs[0], numExclusions);
}
if (usePmeStream) { if (usePmeStream) {
cuEventRecord(paramsSyncEvent, cu.getCurrentStream()); cuEventRecord(paramsSyncEvent, cu.getCurrentStream());
cuStreamWaitEvent(pmeStream, paramsSyncEvent, 0); cuStreamWaitEvent(pmeStream, paramsSyncEvent, 0);
......
{ {
#if USE_EWALD #if USE_EWALD
bool needCorrection = hasExclusions && isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS; unsigned int includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
unsigned int includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
const real alphaR = EWALD_ALPHA*r; const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR); const real expAlphaRSqr = EXP(-alphaR*alphaR);
#if HAS_COULOMB #if HAS_COULOMB
...@@ -22,89 +21,58 @@ ...@@ -22,89 +21,58 @@
#endif #endif
real tempForce = 0.0f; real tempForce = 0.0f;
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
// present in reciprocal space. The real terms have an additive contribution real sig2 = invR*sig;
// added in, but for excluded terms the multiplicative term is just subtracted. sig2 *= sig2;
// These factors are needed in both clauses of the needCorrection statement, so real sig6 = sig2*sig2*sig2;
// I declare them up here. real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
#if DO_LJPME real epssig6 = sig6*eps;
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r; tempForce = epssig6*(12.0f*sig6 - 6.0f);
const real dar2 = dispersionAlphaR*dispersionAlphaR; real ljEnergy = epssig6*(sig6 - 1.0f);
const real dar4 = dar2*dar2; #if USE_LJ_SWITCH
const real dar6 = dar4*dar2; if (r > LJ_SWITCH_CUTOFF) {
const real invR2 = invR*invR; real x = r-LJ_SWITCH_CUTOFF;
const real expDar2 = EXP(-dar2); real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2; real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y; tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
const real coef = invR2*invR2*invR2*c6; ljEnergy *= switchValue;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
#if HAS_COULOMB
if (1-erfcAlphaR > 1e-6) {
real erfAlphaR = ERF(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += -prefactor*erfAlphaR;
}
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*CHARGE1*CHARGE2;
}
#endif
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
} }
else { #endif
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
ljEnergy *= switchValue;
}
#endif
#if DO_LJPME #if DO_LJPME
// The multiplicative grid term // The multiplicative term to correct for the multiplicative terms that are always
ljEnergy += coef*(1.0f - expDar2*eprefac); // present in reciprocal space.
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac); const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
// The potential shift accounts for the step at the cutoff introduced by the const real dar2 = dispersionAlphaR*dispersionAlphaR;
// transition from additive to multiplicative combintion rules and is only const real dar4 = dar2*dar2;
// needed for the real (not excluded) terms. By addin these terms to ljEnergy const real dar6 = dar4*dar2;
// instead of tempEnergy here, the includeInteraction mask is correctly applied. const real invR2 = invR*invR;
sig2 = sig*sig; const real expDar2 = EXP(-dar2);
sig6 = sig2*sig2*sig2*INVCUT6; const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
epssig6 = eps*sig6; const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
// The additive part of the potential shift const real coef = invR2*invR2*invR2*c6;
ljEnergy += epssig6*(1.0f - sig6); const real eprefac = 1.0f + dar2 + 0.5f*dar4;
// The multiplicative part of the potential shift const real dprefac = eprefac + dar6/6.0f;
ljEnergy += MULTSHIFT6*c6; // The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif #endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += includeInteraction ? ljEnergy + prefactor*erfcAlphaR : 0; tempEnergy += includeInteraction ? ljEnergy + prefactor*erfcAlphaR : 0;
#else #else
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += includeInteraction ? prefactor*erfcAlphaR : 0; tempEnergy += includeInteraction ? prefactor*erfcAlphaR : 0;
#endif #endif
}
dEdR += includeInteraction ? tempForce*invR*invR : 0; dEdR += includeInteraction ? tempForce*invR*invR : 0;
#else #else
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -113,7 +81,7 @@ ...@@ -113,7 +81,7 @@
unsigned int includeInteraction = (!isExcluded); unsigned int includeInteraction = (!isExcluded);
#endif #endif
real tempForce = 0.0f; real tempForce = 0.0f;
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x; real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig; real sig2 = invR*sig;
sig2 *= sig2; sig2 *= sig2;
...@@ -131,7 +99,7 @@ ...@@ -131,7 +99,7 @@
} }
#endif #endif
tempEnergy += ljEnergy; tempEnergy += ljEnergy;
#endif #endif
#if HAS_COULOMB #if HAS_COULOMB
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const real prefactor = 138.935456f*CHARGE1*CHARGE2; const real prefactor = 138.935456f*CHARGE1*CHARGE2;
......
...@@ -63,3 +63,28 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b ...@@ -63,3 +63,28 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b
if (includeSelfEnergy) if (includeSelfEnergy)
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
} }
/**
* Compute parameters for subtracting the reciprocal part of excluded interactions.
*/
extern "C" __global__ void computeExclusionParameters(real4* __restrict__ posq, real* __restrict__ charge, float2* __restrict__ sigmaEpsilon,
int numExclusions, const int2* __restrict__ exclusionAtoms, float4* __restrict__ exclusionParams) {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numExclusions; i += blockDim.x*gridDim.x) {
int2 atoms = exclusionAtoms[i];
#ifdef USE_POSQ_CHARGES
real chargeProd = posq[atoms.x].w*posq[atoms.y].w;
#else
real chargeProd = charge[atoms.x]*charge[atoms.y];
#endif
#ifdef INCLUDE_LJPME
float2 sigEps1 = sigmaEpsilon[atoms.x];
float2 sigEps2 = sigmaEpsilon[atoms.y];
float sigma = sigEps1.x*sigEps2.x;
float epsilon = sigEps1.y*sigEps2.y;
#else
float sigma = 0;
float epsilon = 0;
#endif
exclusionParams[i] = make_float4((float) (138.935456f*chargeProd), sigma, epsilon, 0);
}
}
\ No newline at end of file
const float4 exclusionParams = PARAMS[index];
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
const real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
const real r = SQRT(r2);
const real invR = RECIP(r);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
real tempForce = 0.0f;
if (alphaR > 1e-6f) {
const real erfAlphaR = ERF(alphaR);
const real prefactor = exclusionParams.x*invR;
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
energy -= prefactor*erfAlphaR;
}
else {
energy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*exclusionParams.x;
}
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const real c6 = 64*exclusionParams.y*exclusionParams.y*exclusionParams.y*exclusionParams.z;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
energy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
if (r > 0)
delta *= tempForce*invR*invR;
real3 force1 = -delta;
real3 force2 = delta;
...@@ -655,6 +655,8 @@ private: ...@@ -655,6 +655,8 @@ private:
OpenCLArray charges; OpenCLArray charges;
OpenCLArray sigmaEpsilon; OpenCLArray sigmaEpsilon;
OpenCLArray exceptionParams; OpenCLArray exceptionParams;
OpenCLArray exclusionAtoms;
OpenCLArray exclusionParams;
OpenCLArray baseParticleParams; OpenCLArray baseParticleParams;
OpenCLArray baseExceptionParams; OpenCLArray baseExceptionParams;
OpenCLArray particleParamOffsets; OpenCLArray particleParamOffsets;
...@@ -683,7 +685,7 @@ private: ...@@ -683,7 +685,7 @@ private:
Kernel cpuPme; Kernel cpuPme;
PmeIO* pmeio; PmeIO* pmeio;
SyncQueuePostComputation* syncQueue; SyncQueuePostComputation* syncQueue;
cl::Kernel computeParamsKernel; cl::Kernel computeParamsKernel, computeExclusionParamsKernel;
cl::Kernel ewaldSumsKernel; cl::Kernel ewaldSumsKernel;
cl::Kernel ewaldForcesKernel; cl::Kernel ewaldForcesKernel;
cl::Kernel pmeAtomRangeKernel; cl::Kernel pmeAtomRangeKernel;
......
...@@ -1655,6 +1655,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1655,6 +1655,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0); hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets) if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1"; paramsDefines["HAS_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (nonbondedMethod == Ewald) { if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters. // Compute the Ewald parameters.
...@@ -1853,7 +1855,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1853,7 +1855,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Evaluate the actual bspline moduli for X/Y/Z. // Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) { for (int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize); int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector<cl_double> moduli(ndata); vector<cl_double> moduli(ndata);
for (int i = 0; i < ndata; i++) { for (int i = 0; i < ndata; i++) {
...@@ -1883,6 +1885,37 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1883,6 +1885,37 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
} }
} }
// Add code to subtract off the reciprocal part of excluded interactions.
if ((nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) && pmeio == NULL) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumExceptions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumExceptions()/numContexts;
int numExclusions = endIndex-startIndex;
if (numExclusions > 0) {
paramsDefines["HAS_EXCLUSIONS"] = "1";
vector<vector<int> > atoms(numExclusions, vector<int>(2));
exclusionAtoms.initialize<mm_int2>(cl, numExclusions, "exclusionAtoms");
exclusionParams.initialize<mm_float4>(cl, numExclusions, "exclusionParams");
vector<mm_int2> exclusionAtomsVec(numExclusions);
for (int i = 0; i < numExclusions; i++) {
int j = i+startIndex;
exclusionAtomsVec[i] = mm_int2(exclusions[j].first, exclusions[j].second);
atoms[i][0] = exclusions[j].first;
atoms[i][1] = exclusions[j].second;
}
exclusionAtoms.upload(exclusionAtomsVec);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exclusionParams.getDeviceBuffer(), "float4");
replacements["EWALD_ALPHA"] = cl.doubleToString(alpha);
replacements["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
replacements["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cl.doubleToString(dispersionAlpha);
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel. // Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines); string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
...@@ -1893,7 +1926,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1893,7 +1926,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (usePosqCharges) { if (usePosqCharges) {
replacements["CHARGE1"] = "posq1.w"; replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w"; replacements["CHARGE2"] = "posq2.w";
paramsDefines["USE_POSQ_CHARGES"] = "1";
} }
else { else {
replacements["CHARGE1"] = prefix+"charge1"; replacements["CHARGE1"] = prefix+"charge1";
...@@ -2004,6 +2036,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -2004,6 +2036,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cl::Program program = cl.createProgram(OpenCLKernelSources::nonbondedParameters, paramsDefines); cl::Program program = cl.createProgram(OpenCLKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cl::Kernel(program, "computeParameters"); computeParamsKernel = cl::Kernel(program, "computeParameters");
computeExclusionParamsKernel = cl::Kernel(program, "computeExclusionParameters");
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force); info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
cl.addForce(info); cl.addForce(info);
} }
...@@ -2012,21 +2045,31 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2012,21 +2045,31 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU); bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (!hasInitializedKernel) { if (!hasInitializedKernel) {
hasInitializedKernel = true; hasInitializedKernel = true;
computeParamsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer()); int index = 0;
computeParamsKernel.setArg<cl::Buffer>(2, globalParams.getDeviceBuffer()); computeParamsKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
computeParamsKernel.setArg<cl_int>(3, cl.getPaddedNumAtoms()); index++;
computeParamsKernel.setArg<cl::Buffer>(4, baseParticleParams.getDeviceBuffer()); computeParamsKernel.setArg<cl::Buffer>(index++, globalParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(5, cl.getPosq().getDeviceBuffer()); computeParamsKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
computeParamsKernel.setArg<cl::Buffer>(6, charges.getDeviceBuffer()); computeParamsKernel.setArg<cl::Buffer>(index++, baseParticleParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(7, sigmaEpsilon.getDeviceBuffer()); computeParamsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(8, particleParamOffsets.getDeviceBuffer()); computeParamsKernel.setArg<cl::Buffer>(index++, charges.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(9, particleOffsetIndices.getDeviceBuffer()); computeParamsKernel.setArg<cl::Buffer>(index++, sigmaEpsilon.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, particleParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, particleOffsetIndices.getDeviceBuffer());
if (exceptionParams.isInitialized()) { if (exceptionParams.isInitialized()) {
computeParamsKernel.setArg<cl_int>(10, exceptionParams.getSize()); computeParamsKernel.setArg<cl_int>(index++, exceptionParams.getSize());
computeParamsKernel.setArg<cl::Buffer>(11, baseExceptionParams.getDeviceBuffer()); computeParamsKernel.setArg<cl::Buffer>(index++, baseExceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(12, exceptionParams.getDeviceBuffer()); computeParamsKernel.setArg<cl::Buffer>(index++, exceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(13, exceptionParamOffsets.getDeviceBuffer()); computeParamsKernel.setArg<cl::Buffer>(index++, exceptionParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(14, exceptionOffsetIndices.getDeviceBuffer()); computeParamsKernel.setArg<cl::Buffer>(index++, exceptionOffsetIndices.getDeviceBuffer());
}
if (exclusionParams.isInitialized()) {
computeExclusionParamsKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
computeExclusionParamsKernel.setArg<cl::Buffer>(1, charges.getDeviceBuffer());
computeExclusionParamsKernel.setArg<cl::Buffer>(2, sigmaEpsilon.getDeviceBuffer());
computeExclusionParamsKernel.setArg<cl_int>(3, exclusionParams.getSize());
computeExclusionParamsKernel.setArg<cl::Buffer>(4, exclusionAtoms.getDeviceBuffer());
computeExclusionParamsKernel.setArg<cl::Buffer>(5, exclusionParams.getDeviceBuffer());
} }
if (cosSinSums.isInitialized()) { if (cosSinSums.isInitialized()) {
ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer()); ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
...@@ -2176,6 +2219,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2176,6 +2219,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (recomputeParams || hasOffsets) { if (recomputeParams || hasOffsets) {
computeParamsKernel.setArg<cl_int>(1, includeEnergy && includeReciprocal); computeParamsKernel.setArg<cl_int>(1, includeEnergy && includeReciprocal);
cl.executeKernel(computeParamsKernel, cl.getPaddedNumAtoms()); cl.executeKernel(computeParamsKernel, cl.getPaddedNumAtoms());
if (exclusionParams.isInitialized())
cl.executeKernel(computeExclusionParamsKernel, exclusionParams.getSize());
if (usePmeQueue) { if (usePmeQueue) {
vector<cl::Event> events(1); vector<cl::Event> events(1);
cl.getQueue().enqueueMarker(&events[0]); cl.getQueue().enqueueMarker(&events[0]);
...@@ -2350,9 +2395,9 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2350,9 +2395,9 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1); cl.executeKernel(pmeDispersionSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
} }
else { else {
if (!hasCoulomb)
sort->sort(pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
if (!hasCoulomb)
sort->sort(pmeAtomGridIndex);
cl.clearBuffer(pmeGrid2); cl.clearBuffer(pmeGrid2);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5); setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
...@@ -2369,6 +2414,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2369,6 +2414,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionFinishSpreadChargeKernel, gridSizeX*gridSizeY*gridSizeZ); cl.executeKernel(pmeDispersionFinishSpreadChargeKernel, gridSizeX*gridSizeY*gridSizeZ);
} }
else { else {
sort->sort(pmeAtomGridIndex);
cl.clearBuffer(pmeGrid1); cl.clearBuffer(pmeGrid1);
cl.executeKernel(pmeDispersionAtomRangeKernel, cl.getNumAtoms()); cl.executeKernel(pmeDispersionAtomRangeKernel, cl.getNumAtoms());
setPeriodicBoxSizeArg(cl, pmeDispersionZIndexKernel, 2); setPeriodicBoxSizeArg(cl, pmeDispersionZIndexKernel, 2);
......
...@@ -5,8 +5,7 @@ ...@@ -5,8 +5,7 @@
unsigned int includeInteraction; unsigned int includeInteraction;
#endif #endif
#if USE_EWALD #if USE_EWALD
bool needCorrection = hasExclusions && isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS; includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
const real alphaR = EWALD_ALPHA*r; const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR); const real expAlphaRSqr = EXP(-alphaR*alphaR);
#if HAS_COULOMB #if HAS_COULOMB
...@@ -27,89 +26,58 @@ ...@@ -27,89 +26,58 @@
#endif #endif
real tempForce = 0; real tempForce = 0;
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
// present in reciprocal space. The real terms have an additive contribution real sig2 = invR*sig;
// added in, but for excluded terms the multiplicative term is just subtracted. sig2 *= sig2;
// These factors are needed in both clauses of the needCorrection statement, so real sig6 = sig2*sig2*sig2;
// I declare them up here. real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
#if DO_LJPME real epssig6 = sig6*eps;
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r; tempForce = epssig6*(12.0f*sig6 - 6.0f);
const real dar2 = dispersionAlphaR*dispersionAlphaR; real ljEnergy = epssig6*(sig6 - 1.0f);
const real dar4 = dar2*dar2; #if USE_LJ_SWITCH
const real dar6 = dar4*dar2; if (r > LJ_SWITCH_CUTOFF) {
const real invR2 = invR*invR; real x = r-LJ_SWITCH_CUTOFF;
const real expDar2 = EXP(-dar2); real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2; real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y; tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
const real coef = invR2*invR2*invR2*c6; ljEnergy *= switchValue;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
#if HAS_COULOMB
if (1-erfcAlphaR > 1e-6) {
real erfAlphaR = erf(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += -prefactor*erfAlphaR;
}
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*CHARGE1*CHARGE2;
}
#endif
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
} }
else { #endif
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
ljEnergy *= switchValue;
}
#endif
#if DO_LJPME #if DO_LJPME
// The multiplicative grid term // The multiplicative term to correct for the multiplicative terms that are always
ljEnergy += coef*(1.0f - expDar2*eprefac); // present in reciprocal space.
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac); const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
// The potential shift accounts for the step at the cutoff introduced by the const real dar2 = dispersionAlphaR*dispersionAlphaR;
// transition from additive to multiplicative combintion rules and is only const real dar4 = dar2*dar2;
// needed for the real (not excluded) terms. By addin these terms to ljEnergy const real dar6 = dar4*dar2;
// instead of tempEnergy here, the includeInteraction mask is correctly applied. const real invR2 = invR*invR;
sig2 = sig*sig; const real expDar2 = EXP(-dar2);
sig6 = sig2*sig2*sig2*INVCUT6; const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
epssig6 = eps*sig6; const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
// The additive part of the potential shift const real coef = invR2*invR2*invR2*c6;
ljEnergy += epssig6*(1.0f - sig6); const real eprefac = 1.0f + dar2 + 0.5f*dar4;
// The multiplicative part of the potential shift const real dprefac = eprefac + dar6/6.0f;
ljEnergy += MULTSHIFT6*c6; // The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif #endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, ljEnergy + prefactor*erfcAlphaR, includeInteraction); tempEnergy += select((real) 0, ljEnergy + prefactor*erfcAlphaR, includeInteraction);
#else #else
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, prefactor*erfcAlphaR, includeInteraction); tempEnergy += select((real) 0, prefactor*erfcAlphaR, includeInteraction);
#endif #endif
}
dEdR += select((real) 0, tempForce*invR*invR, includeInteraction); dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
#else #else
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
......
...@@ -63,3 +63,28 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu ...@@ -63,3 +63,28 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu
if (includeSelfEnergy) if (includeSelfEnergy)
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
} }
/**
* Compute parameters for subtracting the reciprocal part of excluded interactions.
*/
__kernel void computeExclusionParameters(__global real4* restrict posq, __global real* restrict charge, __global float2* restrict sigmaEpsilon,
int numExclusions, __global const int2* restrict exclusionAtoms, __global float4* restrict exclusionParams) {
for (int i = get_global_id(0); i < numExclusions; i += get_global_size(0)) {
int2 atoms = exclusionAtoms[i];
#ifdef USE_POSQ_CHARGES
real chargeProd = posq[atoms.x].w*posq[atoms.y].w;
#else
real chargeProd = charge[atoms.x]*charge[atoms.y];
#endif
#ifdef INCLUDE_LJPME
float2 sigEps1 = sigmaEpsilon[atoms.x];
float2 sigEps2 = sigmaEpsilon[atoms.y];
float sigma = sigEps1.x*sigEps2.x;
float epsilon = sigEps1.y*sigEps2.y;
#else
float sigma = 0;
float epsilon = 0;
#endif
exclusionParams[i] = (float4) ((float) (138.935456f*chargeProd), sigma, epsilon, 0);
}
}
const float4 exclusionParams = PARAMS[index];
real3 delta = (real3) (pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
const real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
const real r = SQRT(r2);
const real invR = RECIP(r);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
real tempForce = 0.0f;
if (alphaR > 1e-6f) {
const real erfAlphaR = erf(alphaR);
const real prefactor = exclusionParams.x*invR;
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
energy -= prefactor*erfAlphaR;
}
else {
energy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*exclusionParams.x;
}
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const real c6 = 64*exclusionParams.y*exclusionParams.y*exclusionParams.y*exclusionParams.z;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
energy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
if (r > 0)
delta *= tempForce*invR*invR;
real3 force1 = -delta;
real3 force2 = delta;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment