Commit da28ce0e authored by Peter Eastman's avatar Peter Eastman
Browse files

Revised how exclusions are computed with PME

parent df52ff73
......@@ -678,6 +678,8 @@ private:
CudaArray charges;
CudaArray sigmaEpsilon;
CudaArray exceptionParams;
CudaArray exclusionAtoms;
CudaArray exclusionParams;
CudaArray baseParticleParams;
CudaArray baseExceptionParams;
CudaArray particleParamOffsets;
......
......@@ -1665,6 +1665,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
......@@ -1929,7 +1931,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
for (int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector<double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
......@@ -1957,6 +1959,37 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
// Add code to subtract off the reciprocal part of excluded interactions.
if ((nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) && pmeio == NULL) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumExceptions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumExceptions()/numContexts;
int numExclusions = endIndex-startIndex;
if (numExclusions > 0) {
paramsDefines["HAS_EXCLUSIONS"] = "1";
vector<vector<int> > atoms(numExclusions, vector<int>(2));
exclusionAtoms.initialize<int2>(cu, numExclusions, "exclusionAtoms");
exclusionParams.initialize<float4>(cu, numExclusions, "exclusionParams");
vector<int2> exclusionAtomsVec(numExclusions);
for (int i = 0; i < numExclusions; i++) {
int j = i+startIndex;
exclusionAtomsVec[i] = make_int2(exclusions[j].first, exclusions[j].second);
atoms[i][0] = exclusions[j].first;
atoms[i][1] = exclusions[j].second;
}
exclusionAtoms.upload(exclusionAtomsVec);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exclusionParams.getDevicePointer(), "float4");
replacements["EWALD_ALPHA"] = cu.doubleToString(alpha);
replacements["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
replacements["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
......@@ -1967,7 +2000,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (usePosqCharges) {
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
paramsDefines["USE_POSQ_CHARGES"] = "1";
}
else {
replacements["CHARGE1"] = prefix+"charge1";
......@@ -2104,14 +2136,21 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()};
int numExceptions, numExclusions;
if (exceptionParams.isInitialized()) {
int numExceptions = exceptionParams.getSize();
numExceptions = exceptionParams.getSize();
paramsArgs.push_back(&numExceptions);
paramsArgs.push_back(&baseExceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParamOffsets.getDevicePointer());
paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer());
}
if (exclusionParams.isInitialized()) {
numExclusions = exclusionParams.getSize();
paramsArgs.push_back(&numExclusions);
paramsArgs.push_back(&exclusionAtoms.getDevicePointer());
paramsArgs.push_back(&exclusionParams.getDevicePointer());
}
cu.executeKernel(computeParamsKernel, &paramsArgs[0], cu.getPaddedNumAtoms());
if (usePmeStream) {
cuEventRecord(paramsSyncEvent, cu.getCurrentStream());
......
{
#if USE_EWALD
bool needCorrection = hasExclusions && isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS;
unsigned int includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
unsigned int includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
#if HAS_COULOMB
......@@ -22,89 +21,58 @@
#endif
real tempForce = 0.0f;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
#if HAS_COULOMB
if (1-erfcAlphaR > 1e-6) {
real erfAlphaR = ERF(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += -prefactor*erfAlphaR;
}
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*CHARGE1*CHARGE2;
}
#endif
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
ljEnergy *= switchValue;
}
else {
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
ljEnergy *= switchValue;
}
#endif
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space.
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += includeInteraction ? ljEnergy + prefactor*erfcAlphaR : 0;
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += includeInteraction ? ljEnergy + prefactor*erfcAlphaR : 0;
#else
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += includeInteraction ? prefactor*erfcAlphaR : 0;
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += includeInteraction ? prefactor*erfcAlphaR : 0;
#endif
}
dEdR += includeInteraction ? tempForce*invR*invR : 0;
#else
#ifdef USE_CUTOFF
......@@ -113,7 +81,7 @@
unsigned int includeInteraction = (!isExcluded);
#endif
real tempForce = 0.0f;
#if HAS_LENNARD_JONES
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
......@@ -131,7 +99,7 @@
}
#endif
tempEnergy += ljEnergy;
#endif
#endif
#if HAS_COULOMB
#ifdef USE_CUTOFF
const real prefactor = 138.935456f*CHARGE1*CHARGE2;
......
......@@ -7,6 +7,9 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b
#ifdef HAS_EXCEPTIONS
, int numExceptions, const float4* __restrict__ baseExceptionParams, float4* __restrict__ exceptionParams,
float4* __restrict__ exceptionParamOffsets, int* __restrict__ exceptionOffsetIndices
#endif
#ifdef HAS_EXCLUSIONS
, int numExclusions, const int2* __restrict__ exclusionAtoms, float4* __restrict__ exclusionParams
#endif
) {
mixed energy = 0;
......@@ -60,6 +63,29 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b
exceptionParams[i] = make_float4((float) (138.935456f*params.x), (float) params.y, (float) (4*params.z), 0);
}
#endif
// Compute parameters for subtracting the reciprocal part of excluded interactions.
#ifdef HAS_EXCLUSIONS
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numExclusions; i += blockDim.x*gridDim.x) {
int2 atoms = exclusionAtoms[i];
#ifdef USE_POSQ_CHARGES
real chargeProd = posq[atoms.x].w*posq[atoms.y].w;
#else
real chargeProd = charge[atoms.x]*charge[atoms.y];
#endif
#ifdef INCLUDE_LJPME
float2 sigEps1 = sigmaEpsilon[atoms.x];
float2 sigEps2 = sigmaEpsilon[atoms.y];
float sigma = sigEps1.x*sigEps2.x;
float epsilon = sigEps1.y*sigEps2.y;
#else
float sigma = 0;
float epsilon = 0;
#endif
exclusionParams[i] = make_float4((float) (138.935456f*chargeProd), sigma, epsilon, 0);
}
#endif
if (includeSelfEnergy)
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
const float4 exclusionParams = PARAMS[index];
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
const real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
const real invR = RSQRT(r2);
const real r = r2*invR;
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
const real erfAlphaR = ERF(alphaR);
real tempForce = 0.0f;
if (erfAlphaR > 1e-6f) {
const real prefactor = exclusionParams.x*invR;
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
energy -= prefactor*erfAlphaR;
}
else {
energy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*exclusionParams.x;
}
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const real c6 = 64*exclusionParams.y*exclusionParams.y*exclusionParams.y*exclusionParams.z;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
energy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
delta *= tempForce*invR*invR;
real3 force1 = -delta;
real3 force2 = delta;
......@@ -655,6 +655,8 @@ private:
OpenCLArray charges;
OpenCLArray sigmaEpsilon;
OpenCLArray exceptionParams;
OpenCLArray exclusionAtoms;
OpenCLArray exclusionParams;
OpenCLArray baseParticleParams;
OpenCLArray baseExceptionParams;
OpenCLArray particleParamOffsets;
......
......@@ -1655,6 +1655,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
......@@ -1853,7 +1855,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
for (int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector<cl_double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
......@@ -1883,6 +1885,37 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
}
}
// Add code to subtract off the reciprocal part of excluded interactions.
if ((nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) && pmeio == NULL) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumExceptions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumExceptions()/numContexts;
int numExclusions = endIndex-startIndex;
if (numExclusions > 0) {
paramsDefines["HAS_EXCLUSIONS"] = "1";
vector<vector<int> > atoms(numExclusions, vector<int>(2));
exclusionAtoms.initialize<mm_int2>(cl, numExclusions, "exclusionAtoms");
exclusionParams.initialize<mm_float4>(cl, numExclusions, "exclusionParams");
vector<mm_int2> exclusionAtomsVec(numExclusions);
for (int i = 0; i < numExclusions; i++) {
int j = i+startIndex;
exclusionAtomsVec[i] = mm_int2(exclusions[j].first, exclusions[j].second);
atoms[i][0] = exclusions[j].first;
atoms[i][1] = exclusions[j].second;
}
exclusionAtoms.upload(exclusionAtomsVec);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exclusionParams.getDeviceBuffer(), "float4");
replacements["EWALD_ALPHA"] = cl.doubleToString(alpha);
replacements["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
replacements["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cl.doubleToString(dispersionAlpha);
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
......@@ -1893,7 +1926,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (usePosqCharges) {
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
paramsDefines["USE_POSQ_CHARGES"] = "1";
}
else {
replacements["CHARGE1"] = prefix+"charge1";
......@@ -2012,21 +2044,28 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (!hasInitializedKernel) {
hasInitializedKernel = true;
computeParamsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(2, globalParams.getDeviceBuffer());
computeParamsKernel.setArg<cl_int>(3, cl.getPaddedNumAtoms());
computeParamsKernel.setArg<cl::Buffer>(4, baseParticleParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(5, cl.getPosq().getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(6, charges.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(7, sigmaEpsilon.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(8, particleParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(9, particleOffsetIndices.getDeviceBuffer());
int index = 0;
computeParamsKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
index++;
computeParamsKernel.setArg<cl::Buffer>(index++, globalParams.getDeviceBuffer());
computeParamsKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
computeParamsKernel.setArg<cl::Buffer>(index++, baseParticleParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, charges.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, sigmaEpsilon.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, particleParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, particleOffsetIndices.getDeviceBuffer());
if (exceptionParams.isInitialized()) {
computeParamsKernel.setArg<cl_int>(10, exceptionParams.getSize());
computeParamsKernel.setArg<cl::Buffer>(11, baseExceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(12, exceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(13, exceptionParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(14, exceptionOffsetIndices.getDeviceBuffer());
computeParamsKernel.setArg<cl_int>(index++, exceptionParams.getSize());
computeParamsKernel.setArg<cl::Buffer>(index++, baseExceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exceptionParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exceptionOffsetIndices.getDeviceBuffer());
}
if (exclusionParams.isInitialized()) {
computeParamsKernel.setArg<cl_int>(index++, exclusionParams.getSize());
computeParamsKernel.setArg<cl::Buffer>(index++, exclusionAtoms.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exclusionParams.getDeviceBuffer());
}
if (cosSinSums.isInitialized()) {
ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
......
......@@ -5,8 +5,7 @@
unsigned int includeInteraction;
#endif
#if USE_EWALD
bool needCorrection = hasExclusions && isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS;
includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
#if HAS_COULOMB
......@@ -27,89 +26,58 @@
#endif
real tempForce = 0;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
#if HAS_COULOMB
if (1-erfcAlphaR > 1e-6) {
real erfAlphaR = erf(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += -prefactor*erfAlphaR;
}
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*CHARGE1*CHARGE2;
}
#endif
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
ljEnergy *= switchValue;
}
else {
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
ljEnergy *= switchValue;
}
#endif
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space.
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, ljEnergy + prefactor*erfcAlphaR, includeInteraction);
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, ljEnergy + prefactor*erfcAlphaR, includeInteraction);
#else
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, prefactor*erfcAlphaR, includeInteraction);
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, prefactor*erfcAlphaR, includeInteraction);
#endif
}
dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
#else
#ifdef USE_CUTOFF
......
......@@ -7,6 +7,9 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu
#ifdef HAS_EXCEPTIONS
, int numExceptions, __global const float4* restrict baseExceptionParams, __global float4* restrict exceptionParams,
__global float4* restrict exceptionParamOffsets, __global int* restrict exceptionOffsetIndices
#endif
#ifdef HAS_EXCLUSIONS
, int numExclusions, __global const int2* restrict exclusionAtoms, __global float4* restrict exclusionParams
#endif
) {
mixed energy = 0;
......@@ -60,6 +63,29 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu
exceptionParams[i] = (float4) ((float) (138.935456f*params.x), (float) params.y, (float) (4*params.z), 0);
}
#endif
// Compute parameters for subtracting the reciprocal part of excluded interactions.
#ifdef HAS_EXCLUSIONS
for (int i = get_global_id(0); i < numExclusions; i += get_global_size(0)) {
int2 atoms = exclusionAtoms[i];
#ifdef USE_POSQ_CHARGES
real chargeProd = posq[atoms.x].w*posq[atoms.y].w;
#else
real chargeProd = charge[atoms.x]*charge[atoms.y];
#endif
#ifdef INCLUDE_LJPME
float2 sigEps1 = sigmaEpsilon[atoms.x];
float2 sigEps2 = sigmaEpsilon[atoms.y];
float sigma = sigEps1.x*sigEps2.x;
float epsilon = sigEps1.y*sigEps2.y;
#else
float sigma = 0;
float epsilon = 0;
#endif
exclusionParams[i] = (float4) ((float) (138.935456f*chargeProd), sigma, epsilon, 0);
}
#endif
if (includeSelfEnergy)
energyBuffer[get_global_id(0)] += energy;
}
const float4 exclusionParams = PARAMS[index];
real3 delta = (real3) (pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
const real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
const real invR = RSQRT(r2);
const real r = r2*invR;
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
const real erfAlphaR = erf(alphaR);
real tempForce = 0.0f;
if (erfAlphaR > 1e-6f) {
const real prefactor = exclusionParams.x*invR;
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
energy -= prefactor*erfAlphaR;
}
else {
energy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*exclusionParams.x;
}
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const real c6 = 64*exclusionParams.y*exclusionParams.y*exclusionParams.y*exclusionParams.z;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
energy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
delta *= tempForce*invR*invR;
real3 force1 = -delta;
real3 force2 = delta;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment