Switched to a slightly faster (and more accurate) approximation for erfc()

4b5b08af · peastman · d7620eff · 4b5b08af · 4b5b08af
Commit 4b5b08af authored Sep 11, 2014 by peastman
Showing with 7 additions and 13 deletions

platforms/cuda/src/kernels/coulombLennardJones.cu platforms/cuda/src/kernels/coulombLennardJones.cu +4 -7

platforms/opencl/src/kernels/coulombLennardJones.cl platforms/opencl/src/kernels/coulombLennardJones.cl +3 -6

No files found.
--- a/platforms/cuda/src/kernels/coulombLennardJones.cu
+++ b/platforms/cuda/src/kernels/coulombLennardJones.cu
@@ -10,19 +10,16 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
 #else
    // This approximation for erfc is from Abramowitz and Stegun (1964) p. 299.  They cite the following as
    // the original source: C. Hastings, Jr., Approximations for Digital Computers (1955).  It has a maximum
-    // error of 3e-7.
+    // error of 1.5e-7.

-    real t = 1.0f+(0.0705230784f+(0.0422820123f+(0.0092705272f+(0.0001520143f+(0.0002765672f+0.0000430638f*alphaR)*alphaR)*alphaR)*alphaR)*alphaR)*alphaR;
-    t *= t;
-    t *= t;
-    t *= t;
-    const real erfcAlphaR = RECIP(t*t);
+    const real t = RECIP(1.0f+0.3275911f*alphaR);
+    const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
 #endif
    real tempForce = 0.0f;
    if (needCorrection) {
        // Subtract off the part of this interaction that was included in the reciprocal space contribution.

-        if (1-erfcAlphaR > 1e-6) {
+        if (1.0f-erfcAlphaR > 1e-6f) {
            real erfAlphaR = ERF(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
            tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
            tempEnergy += -prefactor*erfAlphaR;

--- a/platforms/opencl/src/kernels/coulombLennardJones.cl
+++ b/platforms/opencl/src/kernels/coulombLennardJones.cl
@@ -10,13 +10,10 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
 #else
    // This approximation for erfc is from Abramowitz and Stegun (1964) p. 299.  They cite the following as
    // the original source: C. Hastings, Jr., Approximations for Digital Computers (1955).  It has a maximum
-    // error of 3e-7.
+    // error of 1.5e-7.

-    real t = 1.0f+(0.0705230784f+(0.0422820123f+(0.0092705272f+(0.0001520143f+(0.0002765672f+0.0000430638f*alphaR)*alphaR)*alphaR)*alphaR)*alphaR)*alphaR;
-    t *= t;
-    t *= t;
-    t *= t;
-    const real erfcAlphaR = RECIP(t*t);
+    const real t = RECIP(1.0f+0.3275911f*alphaR);
+    const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
 #endif
    real tempForce = 0;
    if (needCorrection) {