"...ssh:/git@developer.sourcefind.cn:2222/tsoc/openmm.git" did not exist on "6bfe2d8b43a761cf4b29c041db82eafc37a199eb"
Commit fd2708b4 authored by peastman's avatar peastman
Browse files

Merge pull request #212 from peastman/master

Tabulated log(x) to improve GB performance
parents 59893e57 f39a5545
...@@ -101,16 +101,25 @@ private: ...@@ -101,16 +101,25 @@ private:
std::vector<std::vector<float> > threadBornForces; std::vector<std::vector<float> > threadBornForces;
std::vector<float> obcChain; std::vector<float> obcChain;
std::vector<double> threadEnergy; std::vector<double> threadEnergy;
std::vector<float> logTable;
float logDX, logDXInv;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
float const* posq; float const* posq;
std::vector<std::vector<float> >* threadForce; std::vector<std::vector<float> >* threadForce;
bool includeEnergy; bool includeEnergy;
static const int NUM_TABLE_POINTS;
/** /**
* Compute the displacement and squared distance between a collection of points, optionally using * Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions. * periodic boundary conditions.
*/ */
void getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const; void getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/**
* Evaluate log(x) using a lookup table for speed.
*/
fvec4 fastLog(fvec4 x);
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -24,12 +24,15 @@ ...@@ -24,12 +24,15 @@
#include "CpuGBSAOBCForce.h" #include "CpuGBSAOBCForce.h"
#include "SimTKOpenMMRealType.h" #include "SimTKOpenMMRealType.h"
#include "openmm/internal/SplineFitter.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
#include <cmath> #include <cmath>
using namespace std; using namespace std;
using namespace OpenMM; using namespace OpenMM;
const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 1025;
class CpuGBSAOBCForce::ComputeTask : public ThreadPool::Task { class CpuGBSAOBCForce::ComputeTask : public ThreadPool::Task {
public: public:
ComputeTask(CpuGBSAOBCForce& owner) : owner(owner) { ComputeTask(CpuGBSAOBCForce& owner) : owner(owner) {
...@@ -41,6 +44,23 @@ public: ...@@ -41,6 +44,23 @@ public:
}; };
CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) { CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) {
logDX = 0.5/NUM_TABLE_POINTS;
logDXInv = 1.0f/logDX;
vector<double> x(NUM_TABLE_POINTS+1);
vector<double> y(NUM_TABLE_POINTS+1);
vector<double> deriv;
for (int i = 0; i < NUM_TABLE_POINTS+1; i++) {
x[i] = 0.5+i*0.5/NUM_TABLE_POINTS;
y[i] = log(x[i]);
}
SplineFitter::createNaturalSpline(x, y, deriv);
logTable.resize(4*NUM_TABLE_POINTS);
for (int i = 0; i < NUM_TABLE_POINTS; i++) {
logTable[4*i] = (float) y[i];
logTable[4*i+1] = (float) y[i+1];
logTable[4*i+2] = (float) (deriv[i]*logDX*logDX/6);
logTable[4*i+3] = (float) (deriv[i+1]*logDX*logDX/6);
}
} }
void CpuGBSAOBCForce::setUseCutoff(float distance) { void CpuGBSAOBCForce::setUseCutoff(float distance) {
...@@ -161,8 +181,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) { ...@@ -161,8 +181,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
fvec4 u_ij2 = u_ij*u_ij; fvec4 u_ij2 = u_ij*u_ij;
fvec4 rInverse = 1.0f/r; fvec4 rInverse = 1.0f/r;
fvec4 r2Inverse = rInverse*rInverse; fvec4 r2Inverse = rInverse*rInverse;
fvec4 ratio = u_ij/l_ij; fvec4 logRatio = fastLog(u_ij/l_ij);
fvec4 logRatio(logf(ratio[0]), logf(ratio[1]), logf(ratio[2]), logf(ratio[3]));
fvec4 term = l_ij - u_ij + 0.25f*r*(u_ij2 - l_ij2) + (0.5f*rInverse*logRatio) + (0.25f*scaledRadiusJ*scaledRadiusJ*rInverse)*(l_ij2 - u_ij2); fvec4 term = l_ij - u_ij + 0.25f*r*(u_ij2 - l_ij2) + (0.5f*rInverse*logRatio) + (0.25f*scaledRadiusJ*scaledRadiusJ*rInverse)*(l_ij2 - u_ij2);
for (int j = 0; j < 4; j++) { for (int j = 0; j < 4; j++) {
if (include[j]) { if (include[j]) {
...@@ -329,8 +348,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) { ...@@ -329,8 +348,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
fvec4 u_ij2 = u_ij*u_ij; fvec4 u_ij2 = u_ij*u_ij;
fvec4 rInverse = 1.0f/r; fvec4 rInverse = 1.0f/r;
fvec4 r2Inverse = rInverse*rInverse; fvec4 r2Inverse = rInverse*rInverse;
fvec4 ratio = u_ij/l_ij; fvec4 logRatio = fastLog(u_ij/l_ij);
fvec4 logRatio(logf(ratio[0]), logf(ratio[1]), logf(ratio[2]), logf(ratio[3]));
fvec4 t3 = 0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + 0.25f*logRatio*r2Inverse; fvec4 t3 = 0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + 0.25f*logRatio*r2Inverse;
fvec4 de = bornForce*t3*rInverse; fvec4 de = bornForce*t3*rInverse;
fvec4 result[4] = {dx*de, dy*de, dz*de, 0.0f}; fvec4 result[4] = {dx*de, dy*de, dz*de, 0.0f};
...@@ -363,3 +381,25 @@ void CpuGBSAOBCForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& ...@@ -363,3 +381,25 @@ void CpuGBSAOBCForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4&
} }
r2 = dx*dx + dy*dy + dz*dz; r2 = dx*dx + dy*dy + dz*dz;
} }
fvec4 CpuGBSAOBCForce::fastLog(fvec4 x) {
// Evaluate log(x) using a lookup table for speed.
float y[4];
fvec4 x1 = (x-0.5f)*logDXInv;
ivec4 index = floor(x1);
fvec4 coeff[4];
coeff[1] = x1-index;
coeff[0] = 1.0f-coeff[1];
coeff[2] = coeff[0]*coeff[0]*coeff[0]-coeff[0];
coeff[3] = coeff[1]*coeff[1]*coeff[1]-coeff[1];
transpose(coeff[0], coeff[1], coeff[2], coeff[3]);
static float maxdiff = 0.0f;
for (int i = 0; i < 4; i++) {
if (index[i] >= 0 && index[i] < NUM_TABLE_POINTS)
y[i] = dot4(coeff[i], fvec4(&logTable[4*index[i]]));
else
y[i] = logf(x[i]);
}
return fvec4(y);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment