Commit 15465af0 authored by peastman's avatar peastman
Browse files

Tabulated erfc() to improve accuracy of energies

parent 4bdbcf4d
/* Portions copyright (c) 2006-2013 Stanford University and Simbios. /* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -174,8 +174,8 @@ protected: ...@@ -174,8 +174,8 @@ protected:
float alphaEwald; float alphaEwald;
int numRx, numRy, numRz; int numRx, numRy, numRz;
int meshDim[3]; int meshDim[3];
std::vector<float> ewaldScaleTable; std::vector<float> erfcTable, ewaldScaleTable;
float ewaldDX, ewaldDXInv; float ewaldDX, ewaldDXInv, erfcDXInv;
std::vector<double> threadEnergy; std::vector<double> threadEnergy;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
int numberOfAtoms; int numberOfAtoms;
...@@ -241,7 +241,7 @@ protected: ...@@ -241,7 +241,7 @@ protected:
/** /**
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
*/ */
static float erfcApprox(float x); float erfcApprox(float x);
}; };
} // namespace OpenMM } // namespace OpenMM
......
/* Portions copyright (c) 2006-2014 Stanford University and Simbios. /* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -87,7 +87,7 @@ protected: ...@@ -87,7 +87,7 @@ protected:
/** /**
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
*/ */
static fvec4 erfcApprox(const fvec4& x); fvec4 erfcApprox(const fvec4& x);
/** /**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI) * Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
......
/* Portions copyright (c) 2006-2014 Stanford University and Simbios. /* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -86,7 +86,7 @@ protected: ...@@ -86,7 +86,7 @@ protected:
/** /**
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
*/ */
static fvec8 erfcApprox(const fvec8& x); fvec8 erfcApprox(const fvec8& x);
/** /**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI) * Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
......
...@@ -171,17 +171,20 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -171,17 +171,20 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
} }
void CpuNonbondedForce::tabulateEwaldScaleFactor() { void CpuNonbondedForce::tabulateEwaldScaleFactor() {
if (tableIsValid) if (tableIsValid)
return; return;
tableIsValid = true; tableIsValid = true;
ewaldDX = cutoffDistance/NUM_TABLE_POINTS; ewaldDX = cutoffDistance/NUM_TABLE_POINTS;
ewaldDXInv = 1.0f/ewaldDX; ewaldDXInv = 1.0f/ewaldDX;
erfcDXInv = 1.0f/(ewaldDX*alphaEwald);
erfcTable.resize(NUM_TABLE_POINTS+4);
ewaldScaleTable.resize(NUM_TABLE_POINTS+4); ewaldScaleTable.resize(NUM_TABLE_POINTS+4);
for (int i = 0; i < NUM_TABLE_POINTS+4; i++) { for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
double r = i*ewaldDX; double r = i*ewaldDX;
double alphaR = alphaEwald*r; double alphaR = alphaEwald*r;
ewaldScaleTable[i] = erfc(alphaR) + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR); erfcTable[i] = erfc(alphaR);
ewaldScaleTable[i] = erfcTable[i] + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR);
} }
} }
...@@ -364,15 +367,15 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -364,15 +367,15 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float inverseR = 1/r; float inverseR = 1/r;
float chargeProd = ONE_4PI_EPS0*posq[4*i+3]*posq[4*j+3]; float chargeProd = ONE_4PI_EPS0*posq[4*i+3]*posq[4*j+3];
float alphaR = alphaEwald*r; float alphaR = alphaEwald*r;
float erfAlphaR = erf(alphaR); float erfcAlphaR = erfcApprox(alphaR);
if (erfAlphaR > 1e-6f) { if (1-erfcAlphaR > 1e-6f) {
float dEdR = (float) (chargeProd * inverseR * inverseR * inverseR); float dEdR = (float) (chargeProd * inverseR * inverseR * inverseR);
dEdR = (float) (dEdR * (erfAlphaR-TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR))); dEdR = (float) (dEdR * (1.0f-erfcAlphaR-TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR)));
fvec4 result = deltaR*dEdR; fvec4 result = deltaR*dEdR;
(fvec4(forces+4*i)-result).store(forces+4*i); (fvec4(forces+4*i)-result).store(forces+4*i);
(fvec4(forces+4*j)+result).store(forces+4*j); (fvec4(forces+4*j)+result).store(forces+4*j);
if (includeEnergy) if (includeEnergy)
threadEnergy[threadIndex] -= chargeProd*inverseR*erfAlphaR; threadEnergy[threadIndex] -= chargeProd*inverseR*(1.0f-erfcAlphaR);
} }
} }
} }
...@@ -473,14 +476,10 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d ...@@ -473,14 +476,10 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
} }
float CpuNonbondedForce::erfcApprox(float x) { float CpuNonbondedForce::erfcApprox(float x) {
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as float x1 = x*erfcDXInv;
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum int index = min((int) floor(x1), NUM_TABLE_POINTS);
// error of 3e-7. float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
float t = 1.0f+(0.0705230784f+(0.0422820123f+(0.0092705272f+(0.0001520143f+(0.0002765672f+0.0000430638f*x)*x)*x)*x)*x)*x; return coeff1*erfcTable[index] + coeff2*erfcTable[index+1];
t *= t;
t *= t;
t *= t;
return 1.0f/(t*t);
} }
/* Portions copyright (c) 2006-2014 Stanford University and Simbios. /* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -393,15 +393,16 @@ void CpuNonbondedForceVec4::getDeltaR(const fvec4& posI, const fvec4& x, const f ...@@ -393,15 +393,16 @@ void CpuNonbondedForceVec4::getDeltaR(const fvec4& posI, const fvec4& x, const f
} }
fvec4 CpuNonbondedForceVec4::erfcApprox(const fvec4& x) { fvec4 CpuNonbondedForceVec4::erfcApprox(const fvec4& x) {
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as fvec4 x1 = x*erfcDXInv;
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum ivec4 index = min(floor(x1), NUM_TABLE_POINTS);
// error of 3e-7. fvec4 coeff2 = x1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t = 1.0f+(0.0705230784f+(0.0422820123f+(0.0092705272f+(0.0001520143f+(0.0002765672f+0.0000430638f*x)*x)*x)*x)*x)*x; fvec4 t1(&erfcTable[index[0]]);
t *= t; fvec4 t2(&erfcTable[index[1]]);
t *= t; fvec4 t3(&erfcTable[index[2]]);
t *= t; fvec4 t4(&erfcTable[index[3]]);
return 1.0f/(t*t); transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
} }
fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) { fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
......
/* Portions copyright (c) 2006-2014 Stanford University and Simbios. /* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -423,15 +423,23 @@ void CpuNonbondedForceVec8::getDeltaR(const fvec4& posI, const fvec8& x, const f ...@@ -423,15 +423,23 @@ void CpuNonbondedForceVec8::getDeltaR(const fvec4& posI, const fvec8& x, const f
} }
fvec8 CpuNonbondedForceVec8::erfcApprox(const fvec8& x) { fvec8 CpuNonbondedForceVec8::erfcApprox(const fvec8& x) {
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as fvec8 x1 = x*erfcDXInv;
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum ivec8 index = min(floor(x1), NUM_TABLE_POINTS);
// error of 3e-7. fvec8 coeff2 = x1-index;
fvec8 coeff1 = 1.0f-coeff2;
fvec8 t = 1.0f+(0.0705230784f+(0.0422820123f+(0.0092705272f+(0.0001520143f+(0.0002765672f+0.0000430638f*x)*x)*x)*x)*x)*x; ivec4 indexLower = index.lowerVec();
t *= t; ivec4 indexUpper = index.upperVec();
t *= t; fvec4 t1(&erfcTable[indexLower[0]]);
t *= t; fvec4 t2(&erfcTable[indexLower[1]]);
return 1.0f/(t*t); fvec4 t3(&erfcTable[indexLower[2]]);
fvec4 t4(&erfcTable[indexLower[3]]);
fvec4 t5(&erfcTable[indexUpper[0]]);
fvec4 t6(&erfcTable[indexUpper[1]]);
fvec4 t7(&erfcTable[indexUpper[2]]);
fvec4 t8(&erfcTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
} }
fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) { fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment