Unverified Commit 0a4867a9 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #2318 from peastman/exclusions

Revised how exclusions are computed with PME
parents df52ff73 de4c9900
......@@ -678,6 +678,8 @@ private:
CudaArray charges;
CudaArray sigmaEpsilon;
CudaArray exceptionParams;
CudaArray exclusionAtoms;
CudaArray exclusionParams;
CudaArray baseParticleParams;
CudaArray baseExceptionParams;
CudaArray particleParamOffsets;
......@@ -707,7 +709,7 @@ private:
CudaFFT3D* dispersionFft;
cufftHandle dispersionFftForward;
cufftHandle dispersionFftBackward;
CUfunction computeParamsKernel;
CUfunction computeParamsKernel, computeExclusionParamsKernel;
CUfunction ewaldSumsKernel;
CUfunction ewaldForcesKernel;
CUfunction pmeGridIndexKernel;
......
......@@ -1665,6 +1665,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
......@@ -1929,7 +1931,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
for (int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector<double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
......@@ -1957,6 +1959,37 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
// Add code to subtract off the reciprocal part of excluded interactions.
if ((nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) && pmeio == NULL) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumExceptions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumExceptions()/numContexts;
int numExclusions = endIndex-startIndex;
if (numExclusions > 0) {
paramsDefines["HAS_EXCLUSIONS"] = "1";
vector<vector<int> > atoms(numExclusions, vector<int>(2));
exclusionAtoms.initialize<int2>(cu, numExclusions, "exclusionAtoms");
exclusionParams.initialize<float4>(cu, numExclusions, "exclusionParams");
vector<int2> exclusionAtomsVec(numExclusions);
for (int i = 0; i < numExclusions; i++) {
int j = i+startIndex;
exclusionAtomsVec[i] = make_int2(exclusions[j].first, exclusions[j].second);
atoms[i][0] = exclusions[j].first;
atoms[i][1] = exclusions[j].second;
}
exclusionAtoms.upload(exclusionAtomsVec);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exclusionParams.getDevicePointer(), "float4");
replacements["EWALD_ALPHA"] = cu.doubleToString(alpha);
replacements["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
replacements["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
......@@ -1967,7 +2000,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (usePosqCharges) {
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
paramsDefines["USE_POSQ_CHARGES"] = "1";
}
else {
replacements["CHARGE1"] = prefix+"charge1";
......@@ -2078,6 +2110,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
CUmodule module = cu.createModule(CudaKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cu.getKernel(module, "computeParameters");
computeExclusionParamsKernel = cu.getKernel(module, "computeExclusionParameters");
info = new ForceInfo(force);
cu.addForce(info);
}
......@@ -2104,8 +2137,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()};
int numExceptions;
if (exceptionParams.isInitialized()) {
int numExceptions = exceptionParams.getSize();
numExceptions = exceptionParams.getSize();
paramsArgs.push_back(&numExceptions);
paramsArgs.push_back(&baseExceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParams.getDevicePointer());
......@@ -2113,6 +2147,12 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer());
}
cu.executeKernel(computeParamsKernel, &paramsArgs[0], cu.getPaddedNumAtoms());
if (exclusionParams.isInitialized()) {
int numExclusions = exclusionParams.getSize();
vector<void*> exclusionParamsArgs = {&cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&numExclusions, &exclusionAtoms.getDevicePointer(), &exclusionParams.getDevicePointer()};
cu.executeKernel(computeExclusionParamsKernel, &exclusionParamsArgs[0], numExclusions);
}
if (usePmeStream) {
cuEventRecord(paramsSyncEvent, cu.getCurrentStream());
cuStreamWaitEvent(pmeStream, paramsSyncEvent, 0);
......
{
#if USE_EWALD
bool needCorrection = hasExclusions && isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS;
unsigned int includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
unsigned int includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
#if HAS_COULOMB
......@@ -21,49 +20,6 @@
const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
#endif
real tempForce = 0.0f;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
#if HAS_COULOMB
if (1-erfcAlphaR > 1e-6) {
real erfAlphaR = ERF(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += -prefactor*erfAlphaR;
}
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*CHARGE1*CHARGE2;
}
#endif
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
}
else {
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
......@@ -83,6 +39,19 @@
}
#endif
#if DO_LJPME
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space.
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
......@@ -104,7 +73,6 @@
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += includeInteraction ? prefactor*erfcAlphaR : 0;
#endif
}
dEdR += includeInteraction ? tempForce*invR*invR : 0;
#else
#ifdef USE_CUTOFF
......@@ -113,7 +81,7 @@
unsigned int includeInteraction = (!isExcluded);
#endif
real tempForce = 0.0f;
#if HAS_LENNARD_JONES
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
......@@ -131,7 +99,7 @@
}
#endif
tempEnergy += ljEnergy;
#endif
#endif
#if HAS_COULOMB
#ifdef USE_CUTOFF
const real prefactor = 138.935456f*CHARGE1*CHARGE2;
......
......@@ -63,3 +63,28 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b
if (includeSelfEnergy)
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
/**
* Compute parameters for subtracting the reciprocal part of excluded interactions.
*/
extern "C" __global__ void computeExclusionParameters(real4* __restrict__ posq, real* __restrict__ charge, float2* __restrict__ sigmaEpsilon,
int numExclusions, const int2* __restrict__ exclusionAtoms, float4* __restrict__ exclusionParams) {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numExclusions; i += blockDim.x*gridDim.x) {
int2 atoms = exclusionAtoms[i];
#ifdef USE_POSQ_CHARGES
real chargeProd = posq[atoms.x].w*posq[atoms.y].w;
#else
real chargeProd = charge[atoms.x]*charge[atoms.y];
#endif
#ifdef INCLUDE_LJPME
float2 sigEps1 = sigmaEpsilon[atoms.x];
float2 sigEps2 = sigmaEpsilon[atoms.y];
float sigma = sigEps1.x*sigEps2.x;
float epsilon = sigEps1.y*sigEps2.y;
#else
float sigma = 0;
float epsilon = 0;
#endif
exclusionParams[i] = make_float4((float) (138.935456f*chargeProd), sigma, epsilon, 0);
}
}
\ No newline at end of file
const float4 exclusionParams = PARAMS[index];
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
const real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
const real r = SQRT(r2);
const real invR = RECIP(r);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
real tempForce = 0.0f;
if (alphaR > 1e-6f) {
const real erfAlphaR = ERF(alphaR);
const real prefactor = exclusionParams.x*invR;
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
energy -= prefactor*erfAlphaR;
}
else {
energy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*exclusionParams.x;
}
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const real c6 = 64*exclusionParams.y*exclusionParams.y*exclusionParams.y*exclusionParams.z;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
energy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
if (r > 0)
delta *= tempForce*invR*invR;
real3 force1 = -delta;
real3 force2 = delta;
......@@ -655,6 +655,8 @@ private:
OpenCLArray charges;
OpenCLArray sigmaEpsilon;
OpenCLArray exceptionParams;
OpenCLArray exclusionAtoms;
OpenCLArray exclusionParams;
OpenCLArray baseParticleParams;
OpenCLArray baseExceptionParams;
OpenCLArray particleParamOffsets;
......@@ -683,7 +685,7 @@ private:
Kernel cpuPme;
PmeIO* pmeio;
SyncQueuePostComputation* syncQueue;
cl::Kernel computeParamsKernel;
cl::Kernel computeParamsKernel, computeExclusionParamsKernel;
cl::Kernel ewaldSumsKernel;
cl::Kernel ewaldForcesKernel;
cl::Kernel pmeAtomRangeKernel;
......
......@@ -1655,6 +1655,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
......@@ -1853,7 +1855,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
for (int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector<cl_double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
......@@ -1883,6 +1885,37 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
}
}
// Add code to subtract off the reciprocal part of excluded interactions.
if ((nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) && pmeio == NULL) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumExceptions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumExceptions()/numContexts;
int numExclusions = endIndex-startIndex;
if (numExclusions > 0) {
paramsDefines["HAS_EXCLUSIONS"] = "1";
vector<vector<int> > atoms(numExclusions, vector<int>(2));
exclusionAtoms.initialize<mm_int2>(cl, numExclusions, "exclusionAtoms");
exclusionParams.initialize<mm_float4>(cl, numExclusions, "exclusionParams");
vector<mm_int2> exclusionAtomsVec(numExclusions);
for (int i = 0; i < numExclusions; i++) {
int j = i+startIndex;
exclusionAtomsVec[i] = mm_int2(exclusions[j].first, exclusions[j].second);
atoms[i][0] = exclusions[j].first;
atoms[i][1] = exclusions[j].second;
}
exclusionAtoms.upload(exclusionAtomsVec);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exclusionParams.getDeviceBuffer(), "float4");
replacements["EWALD_ALPHA"] = cl.doubleToString(alpha);
replacements["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
replacements["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cl.doubleToString(dispersionAlpha);
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
......@@ -1893,7 +1926,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (usePosqCharges) {
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
paramsDefines["USE_POSQ_CHARGES"] = "1";
}
else {
replacements["CHARGE1"] = prefix+"charge1";
......@@ -2004,6 +2036,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cl::Program program = cl.createProgram(OpenCLKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cl::Kernel(program, "computeParameters");
computeExclusionParamsKernel = cl::Kernel(program, "computeExclusionParameters");
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
cl.addForce(info);
}
......@@ -2012,21 +2045,31 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (!hasInitializedKernel) {
hasInitializedKernel = true;
computeParamsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(2, globalParams.getDeviceBuffer());
computeParamsKernel.setArg<cl_int>(3, cl.getPaddedNumAtoms());
computeParamsKernel.setArg<cl::Buffer>(4, baseParticleParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(5, cl.getPosq().getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(6, charges.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(7, sigmaEpsilon.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(8, particleParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(9, particleOffsetIndices.getDeviceBuffer());
int index = 0;
computeParamsKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
index++;
computeParamsKernel.setArg<cl::Buffer>(index++, globalParams.getDeviceBuffer());
computeParamsKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
computeParamsKernel.setArg<cl::Buffer>(index++, baseParticleParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, charges.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, sigmaEpsilon.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, particleParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, particleOffsetIndices.getDeviceBuffer());
if (exceptionParams.isInitialized()) {
computeParamsKernel.setArg<cl_int>(10, exceptionParams.getSize());
computeParamsKernel.setArg<cl::Buffer>(11, baseExceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(12, exceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(13, exceptionParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(14, exceptionOffsetIndices.getDeviceBuffer());
computeParamsKernel.setArg<cl_int>(index++, exceptionParams.getSize());
computeParamsKernel.setArg<cl::Buffer>(index++, baseExceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exceptionParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exceptionOffsetIndices.getDeviceBuffer());
}
if (exclusionParams.isInitialized()) {
computeExclusionParamsKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
computeExclusionParamsKernel.setArg<cl::Buffer>(1, charges.getDeviceBuffer());
computeExclusionParamsKernel.setArg<cl::Buffer>(2, sigmaEpsilon.getDeviceBuffer());
computeExclusionParamsKernel.setArg<cl_int>(3, exclusionParams.getSize());
computeExclusionParamsKernel.setArg<cl::Buffer>(4, exclusionAtoms.getDeviceBuffer());
computeExclusionParamsKernel.setArg<cl::Buffer>(5, exclusionParams.getDeviceBuffer());
}
if (cosSinSums.isInitialized()) {
ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
......@@ -2176,6 +2219,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (recomputeParams || hasOffsets) {
computeParamsKernel.setArg<cl_int>(1, includeEnergy && includeReciprocal);
cl.executeKernel(computeParamsKernel, cl.getPaddedNumAtoms());
if (exclusionParams.isInitialized())
cl.executeKernel(computeExclusionParamsKernel, exclusionParams.getSize());
if (usePmeQueue) {
vector<cl::Event> events(1);
cl.getQueue().enqueueMarker(&events[0]);
......@@ -2350,9 +2395,9 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
else {
if (cl.getSupports64BitGlobalAtomics()) {
if (!hasCoulomb)
sort->sort(pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
cl.clearBuffer(pmeGrid2);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
......@@ -2369,6 +2414,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionFinishSpreadChargeKernel, gridSizeX*gridSizeY*gridSizeZ);
}
else {
sort->sort(pmeAtomGridIndex);
cl.clearBuffer(pmeGrid1);
cl.executeKernel(pmeDispersionAtomRangeKernel, cl.getNumAtoms());
setPeriodicBoxSizeArg(cl, pmeDispersionZIndexKernel, 2);
......
......@@ -5,8 +5,7 @@
unsigned int includeInteraction;
#endif
#if USE_EWALD
bool needCorrection = hasExclusions && isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS;
includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
#if HAS_COULOMB
......@@ -26,49 +25,6 @@
const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
#endif
real tempForce = 0;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
#if HAS_COULOMB
if (1-erfcAlphaR > 1e-6) {
real erfAlphaR = erf(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += -prefactor*erfAlphaR;
}
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*CHARGE1*CHARGE2;
}
#endif
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
}
else {
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
......@@ -88,6 +44,19 @@
}
#endif
#if DO_LJPME
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space.
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
......@@ -109,7 +78,6 @@
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, prefactor*erfcAlphaR, includeInteraction);
#endif
}
dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
#else
#ifdef USE_CUTOFF
......
......@@ -63,3 +63,28 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu
if (includeSelfEnergy)
energyBuffer[get_global_id(0)] += energy;
}
/**
* Compute parameters for subtracting the reciprocal part of excluded interactions.
*/
__kernel void computeExclusionParameters(__global real4* restrict posq, __global real* restrict charge, __global float2* restrict sigmaEpsilon,
int numExclusions, __global const int2* restrict exclusionAtoms, __global float4* restrict exclusionParams) {
for (int i = get_global_id(0); i < numExclusions; i += get_global_size(0)) {
int2 atoms = exclusionAtoms[i];
#ifdef USE_POSQ_CHARGES
real chargeProd = posq[atoms.x].w*posq[atoms.y].w;
#else
real chargeProd = charge[atoms.x]*charge[atoms.y];
#endif
#ifdef INCLUDE_LJPME
float2 sigEps1 = sigmaEpsilon[atoms.x];
float2 sigEps2 = sigmaEpsilon[atoms.y];
float sigma = sigEps1.x*sigEps2.x;
float epsilon = sigEps1.y*sigEps2.y;
#else
float sigma = 0;
float epsilon = 0;
#endif
exclusionParams[i] = (float4) ((float) (138.935456f*chargeProd), sigma, epsilon, 0);
}
}
const float4 exclusionParams = PARAMS[index];
real3 delta = (real3) (pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
const real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
const real r = SQRT(r2);
const real invR = RECIP(r);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
real tempForce = 0.0f;
if (alphaR > 1e-6f) {
const real erfAlphaR = erf(alphaR);
const real prefactor = exclusionParams.x*invR;
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
energy -= prefactor*erfAlphaR;
}
else {
energy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*exclusionParams.x;
}
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const real c6 = 64*exclusionParams.y*exclusionParams.y*exclusionParams.y*exclusionParams.z;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
energy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
if (r > 0)
delta *= tempForce*invR*invR;
real3 force1 = -delta;
real3 force2 = delta;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment