Using fast approximation for erfc instead of tabulated values

0e5d3fb1 · Peter Eastman · d9029f61 · 0e5d3fb1 · 0e5d3fb1 · 0e5d3fb1
Commit 0e5d3fb1 authored Sep 02, 2011 by Peter Eastman
3 changed files
--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -41,7 +41,6 @@
 #include "lepton/Parser.h"
 #include "lepton/ParsedExpression.h"
 #include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
-#include "openmm/internal/MSVC_erfc.h"
 #include <cmath>
 #include <set>

@@ -1154,8 +1153,6 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
        delete pmeAtomRange;
    if (pmeAtomGridIndex != NULL)
        delete pmeAtomGridIndex;
-    if (erfcTable != NULL)
-        delete erfcTable;
    if (sort != NULL)
        delete sort;
    if (fft != NULL)
@@ -1341,19 +1338,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
    }
    else
        ewaldSelfEnergy = 0.0;
-    
-    // Tabulate values of erfc().
-
-    if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
-        const int tableSize = 2048;
-        defines["ERFC_TABLE_SCALE"] = doubleToString((tableSize-1)/(alpha*force.getCutoffDistance()));
-        erfcTable = new OpenCLArray<cl_float>(cl, tableSize, "ErfcTable", false, CL_MEM_READ_ONLY);
-        vector<cl_float> erfcVector(tableSize);
-        for (int i = 0; i < tableSize; ++i)
-            erfcVector[i] = (float) erfc(i*(alpha*force.getCutoffDistance())/(tableSize-1));
-        erfcTable->upload(erfcVector);
-        cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo("erfcTable", "float", 1, sizeof(cl_float), erfcTable->getDeviceBuffer()));
-    }

    // Add the interaction to the default nonbonded kernel.
    

--- a/platforms/opencl/src/OpenCLKernels.h
+++ b/platforms/opencl/src/OpenCLKernels.h
@@ -477,7 +477,7 @@ public:
    OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcNonbondedForceKernel(name, platform),
            hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), exceptionIndices(NULL), cosSinSums(NULL), pmeGrid(NULL),
            pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeBsplineDtheta(NULL), pmeAtomRange(NULL),
-            pmeAtomGridIndex(NULL), erfcTable(NULL), sort(NULL), fft(NULL) {
+            pmeAtomGridIndex(NULL), sort(NULL), fft(NULL) {
    }
    ~OpenCLCalcNonbondedForceKernel();
    /**
@@ -511,7 +511,6 @@ private:
    OpenCLArray<mm_float4>* pmeBsplineDtheta;
    OpenCLArray<cl_int>* pmeAtomRange;
    OpenCLArray<mm_int2>* pmeAtomGridIndex;
-    OpenCLArray<cl_float>* erfcTable;
    OpenCLSort<mm_int2>* sort;
    OpenCLFFT3D* fft;
    cl::Kernel exceptionsKernel;

--- a/platforms/opencl/src/kernels/coulombLennardJones.cl
+++ b/platforms/opencl/src/kernels/coulombLennardJones.cl
@@ -4,15 +4,13 @@ if (!isExcluded || needCorrection) {
    const float prefactor = 138.935456f*posq1.w*posq2.w*invR;
    float alphaR = EWALD_ALPHA*r;
    float erfcAlphaR = 0.0f;
-    if (r2 < CUTOFF_SQUARED) {
-        float normalized = ERFC_TABLE_SCALE*alphaR;
-        int tableIndex = (int) normalized;
-        float fract2 = normalized-tableIndex;
-        float fract1 = 1.0f-fract2;
-        erfcAlphaR = fract1*erfcTable[tableIndex] + fract2*erfcTable[tableIndex+1];
+    if (r2 < CUTOFF_SQUARED || needCorrection) {
+        // This approximation for erfc is from Abramowitz and Stegun (1964) p. 299.  They cite the following as
+        // the original source: C. Hastings, Jr., Approximations for Digital Computers (1955).
+
+        float t = 1.0f/(1.0f+0.47047f*alphaR);
+        erfcAlphaR = (t*(0.3480242f+t*(-0.0958798f+t*0.7478556f)))*exp(-alphaR*alphaR);
    }
-    else if (needCorrection)
-        erfcAlphaR = erfc(alphaR);
    float tempForce = 0.0f;
    if (needCorrection) {
        // Subtract off the part of this interaction that was included in the reciprocal space contribution.