"platforms/common/vscode:/vscode.git/clone" did not exist on "939ecf28a4bee6c45e37a9e64ff53df3e2e0fd79"
Commit b2960968 authored by Peter Eastman's avatar Peter Eastman
Browse files

Check accuracy of native_ functions and only use them if they are accurate enough

parent a0651b5b
...@@ -96,6 +96,40 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste ...@@ -96,6 +96,40 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
utilities = createProgram(OpenCLKernelSources::utilities); utilities = createProgram(OpenCLKernelSources::utilities);
clearBufferKernel = cl::Kernel(utilities, "clearBuffer"); clearBufferKernel = cl::Kernel(utilities, "clearBuffer");
reduceFloat4Kernel = cl::Kernel(utilities, "reduceFloat4Buffer"); reduceFloat4Kernel = cl::Kernel(utilities, "reduceFloat4Buffer");
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
cl::Kernel accuracyKernel(utilities, "determineNativeAccuracy");
OpenCLArray<mm_float4> values(*this, 20, "values", true);
float nextValue = 1e-4;
for (int i = 0; i < values.getSize(); ++i) {
values[i].x = nextValue;
nextValue *= M_PI;
}
values.upload();
accuracyKernel.setArg<cl::Buffer>(0, values.getDeviceBuffer());
accuracyKernel.setArg<cl_int>(1, values.getSize());
executeKernel(accuracyKernel, values.getSize());
values.download();
double maxSqrtError = 0.0, maxRsqrtError = 0.0, maxRecipError = 0.0;
for (int i = 0; i < values.getSize(); ++i) {
double correctSqrt = sqrt(values[i].x);
maxSqrtError = max(maxSqrtError, fabs(correctSqrt-values[i].y)/correctSqrt);
maxRsqrtError = max(maxRsqrtError, fabs(1.0/correctSqrt-values[i].z)*correctSqrt);
maxRecipError = max(maxRecipError, fabs(1.0/values[i].x-values[i].w)/values[i].w);
}
if (maxSqrtError < 1e-6)
compilationOptions += " -DSQRT=native_sqrt";
else
compilationOptions += " -DSQRT=sqrt";
if (maxRsqrtError < 1e-6)
compilationOptions += " -DRSQRT=native_rsqrt";
else
compilationOptions += " -DRSQRT=rsqrt";
if (maxRecipError < 1e-6)
compilationOptions += " -DRECIP=native_recip";
else
compilationOptions += " -DRECIP=1.0f/";
} }
OpenCLContext::~OpenCLContext() { OpenCLContext::~OpenCLContext() {
......
...@@ -14,7 +14,7 @@ __kernel void computeCustomBondForces(int numAtoms, int numBonds, __global float ...@@ -14,7 +14,7 @@ __kernel void computeCustomBondForces(int numAtoms, int numBonds, __global float
// Compute the force. // Compute the force.
float r = native_sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z); float r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
COMPUTE_FORCE COMPUTE_FORCE
delta.xyz *= -dEdR/r; delta.xyz *= -dEdR/r;
......
...@@ -64,7 +64,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -64,7 +64,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = native_sqrt(r2); float r = SQRT(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+j; atom2 = y+baseLocalAtom+j;
float dEdR = 0.0f; float dEdR = 0.0f;
...@@ -140,7 +140,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -140,7 +140,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = native_sqrt(r2); float r = SQRT(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+tj; atom2 = y+baseLocalAtom+tj;
float dEdR = 0.0f; float dEdR = 0.0f;
......
...@@ -63,7 +63,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -63,7 +63,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = native_sqrt(r2); float r = SQRT(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+j; atom2 = y+j;
float dEdR = 0.0f; float dEdR = 0.0f;
...@@ -136,7 +136,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -136,7 +136,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = native_sqrt(r2); float r = SQRT(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+tj; atom2 = y+tj;
float dEdR = 0.0f; float dEdR = 0.0f;
......
...@@ -61,7 +61,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq, ...@@ -61,7 +61,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = native_sqrt(r2); float r = SQRT(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+j; atom2 = y+baseLocalAtom+j;
float tempValue1 = 0.0f; float tempValue1 = 0.0f;
...@@ -134,7 +134,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq, ...@@ -134,7 +134,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = native_sqrt(r2); float r = SQRT(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+tj; atom2 = y+baseLocalAtom+tj;
float tempValue1 = 0.0f; float tempValue1 = 0.0f;
......
...@@ -61,7 +61,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq, ...@@ -61,7 +61,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = native_sqrt(r2); float r = SQRT(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+j; atom2 = y+j;
float tempValue1 = 0.0f; float tempValue1 = 0.0f;
...@@ -122,7 +122,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq, ...@@ -122,7 +122,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float tempValue1 = 0.0f; float tempValue1 = 0.0f;
float tempValue2 = 0.0f; float tempValue2 = 0.0f;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
float r = native_sqrt(r2); float r = SQRT(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+j; atom2 = y+j;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
...@@ -177,7 +177,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq, ...@@ -177,7 +177,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = native_sqrt(r2); float r = SQRT(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+tj; atom2 = y+tj;
float tempValue1 = 0.0f; float tempValue1 = 0.0f;
......
...@@ -27,14 +27,14 @@ float4 deltaPeriodic(float4 vec1, float4 vec2) { ...@@ -27,14 +27,14 @@ float4 deltaPeriodic(float4 vec1, float4 vec2) {
*/ */
float computeAngle(float4 vec1, float4 vec2) { float computeAngle(float4 vec1, float4 vec2) {
float dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z; float dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
float cosine = dotProduct*native_rsqrt(vec1.w*vec2.w); float cosine = dotProduct*RSQRT(vec1.w*vec2.w);
float angle; float angle;
if (cosine > 0.99f || cosine < -0.99f) { if (cosine > 0.99f || cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead. // We're close to the singularity in acos(), so take the cross product and use asin() instead.
float4 crossProduct = cross(vec1, vec2); float4 crossProduct = cross(vec1, vec2);
float scale = vec1.w*vec2.w; float scale = vec1.w*vec2.w;
angle = asin(native_sqrt(dot(crossProduct, crossProduct)/scale)); angle = asin(SQRT(dot(crossProduct, crossProduct)/scale));
if (cosine < 0.0f) if (cosine < 0.0f)
angle = M_PI-angle; angle = M_PI-angle;
} }
......
...@@ -65,8 +65,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po ...@@ -65,8 +65,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#else #else
if (atom1 < NUM_ATOMS && y+baseLocalAtom+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y+baseLocalAtom+j < NUM_ATOMS) {
#endif #endif
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
float2 params2 = (float2) (localData[baseLocalAtom+j].radius, localData[baseLocalAtom+j].scaledRadius); float2 params2 = (float2) (localData[baseLocalAtom+j].radius, localData[baseLocalAtom+j].scaledRadius);
float rScaledRadiusJ = r+params2.y; float rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) { if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
...@@ -133,8 +133,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po ...@@ -133,8 +133,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#else #else
if (atom1 < NUM_ATOMS && y+baseLocalAtom+tj < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y+baseLocalAtom+tj < NUM_ATOMS) {
#endif #endif
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
float2 params2 = (float2) (localData[baseLocalAtom+tj].radius, localData[baseLocalAtom+tj].scaledRadius); float2 params2 = (float2) (localData[baseLocalAtom+tj].radius, localData[baseLocalAtom+tj].scaledRadius);
float rScaledRadiusJ = r+params2.y; float rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -241,14 +241,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e ...@@ -241,14 +241,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
float bornRadius2 = localData[baseLocalAtom+j].bornRadius; float bornRadius2 = localData[baseLocalAtom+j].bornRadius;
float alpha2_ij = bornRadius1*bornRadius2; float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij); float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm; float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = native_sqrt(denominator2); float denominator = SQRT(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator; float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2; float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -315,14 +315,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e ...@@ -315,14 +315,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
float bornRadius2 = localData[baseLocalAtom+tj].bornRadius; float bornRadius2 = localData[baseLocalAtom+tj].bornRadius;
float alpha2_ij = bornRadius1*bornRadius2; float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij); float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm; float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = native_sqrt(denominator2); float denominator = SQRT(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator; float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2; float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
...@@ -65,8 +65,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po ...@@ -65,8 +65,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#else #else
if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) {
#endif #endif
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
float2 params2 = (float2) (localData[tbx+j].radius, localData[tbx+j].scaledRadius); float2 params2 = (float2) (localData[tbx+j].radius, localData[tbx+j].scaledRadius);
float rScaledRadiusJ = r+params2.y; float rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) { if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
...@@ -130,8 +130,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po ...@@ -130,8 +130,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#else #else
if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) {
#endif #endif
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
float2 params2 = (float2) (localData[tbx+j].radius, localData[tbx+j].scaledRadius); float2 params2 = (float2) (localData[tbx+j].radius, localData[tbx+j].scaledRadius);
float rScaledRadiusJ = r+params2.y; float rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -198,8 +198,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po ...@@ -198,8 +198,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#else #else
if (atom1 < NUM_ATOMS && y+tj < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y+tj < NUM_ATOMS) {
#endif #endif
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius); float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
float rScaledRadiusJ = r+params2.y; float rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -301,14 +301,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e ...@@ -301,14 +301,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
float bornRadius2 = localData[tbx+j].bornRadius; float bornRadius2 = localData[tbx+j].bornRadius;
float alpha2_ij = bornRadius1*bornRadius2; float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij); float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm; float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = native_sqrt(denominator2); float denominator = SQRT(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator; float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2; float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -370,14 +370,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e ...@@ -370,14 +370,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
float bornRadius2 = localData[tbx+j].bornRadius; float bornRadius2 = localData[tbx+j].bornRadius;
float alpha2_ij = bornRadius1*bornRadius2; float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij); float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm; float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = native_sqrt(denominator2); float denominator = SQRT(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator; float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2; float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -436,14 +436,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e ...@@ -436,14 +436,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
float bornRadius2 = localData[tbx+tj].bornRadius; float bornRadius2 = localData[tbx+tj].bornRadius;
float alpha2_ij = bornRadius1*bornRadius2; float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij); float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm; float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = native_sqrt(denominator2); float denominator = SQRT(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator; float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2; float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
...@@ -20,11 +20,11 @@ __kernel void calcHarmonicAngleForce(int numAtoms, int numAngles, __global float ...@@ -20,11 +20,11 @@ __kernel void calcHarmonicAngleForce(int numAtoms, int numAngles, __global float
float4 v1 = a2-a3; float4 v1 = a2-a3;
float4 cp = cross(v0, v1); float4 cp = cross(v0, v1);
float rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z; float rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
rp = max(native_sqrt(rp), 1.0e-06f); rp = max(SQRT(rp), 1.0e-06f);
float r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z; float r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z;
float r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z; float r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z;
float dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z; float dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z;
float cosine = dot*native_rsqrt(r21*r23); float cosine = dot*RSQRT(r21*r23);
float deltaIdeal = acos(cosine)-angleParams.x; float deltaIdeal = acos(cosine)-angleParams.x;
energy += 0.5f*angleParams.y*deltaIdeal*deltaIdeal; energy += 0.5f*angleParams.y*deltaIdeal*deltaIdeal;
float dEdR = angleParams.y*deltaIdeal; float dEdR = angleParams.y*deltaIdeal;
......
...@@ -14,7 +14,7 @@ __kernel void calcHarmonicBondForce(int numAtoms, int numBonds, __global float4* ...@@ -14,7 +14,7 @@ __kernel void calcHarmonicBondForce(int numAtoms, int numBonds, __global float4*
// Compute the force. // Compute the force.
float r = native_sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z); float r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
float deltaIdeal = r-bondParams.x; float deltaIdeal = r-bondParams.x;
energy += 0.5f * bondParams.y*deltaIdeal*deltaIdeal; energy += 0.5f * bondParams.y*deltaIdeal*deltaIdeal;
float dEdR = bondParams.y * deltaIdeal; float dEdR = bondParams.y * deltaIdeal;
......
...@@ -16,7 +16,7 @@ __kernel void computeNonbondedExceptions(__global float4* forceBuffers, __global ...@@ -16,7 +16,7 @@ __kernel void computeNonbondedExceptions(__global float4* forceBuffers, __global
// Compute the force. // Compute the force.
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float sig2 = invR*exceptionParams.y; float sig2 = invR*exceptionParams.y;
sig2 *= sig2; sig2 *= sig2;
float sig6 = sig2*sig2*sig2; float sig6 = sig2*sig2*sig2;
......
...@@ -67,8 +67,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -67,8 +67,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+j; atom2 = y+baseLocalAtom+j;
float dEdR = 0.0f; float dEdR = 0.0f;
...@@ -135,8 +135,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -135,8 +135,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+tj; atom2 = y+baseLocalAtom+tj;
float dEdR = 0.0f; float dEdR = 0.0f;
......
...@@ -124,8 +124,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -124,8 +124,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+j; atom2 = y+j;
float dEdR = 0.0f; float dEdR = 0.0f;
...@@ -181,8 +181,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -181,8 +181,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = native_rsqrt(r2); float invR = RSQRT(r2);
float r = native_recip(invR); float r = RECIP(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+tj; atom2 = y+tj;
float dEdR = 0.0f; float dEdR = 0.0f;
......
...@@ -30,3 +30,14 @@ __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int nu ...@@ -30,3 +30,14 @@ __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int nu
index += get_global_size(0); index += get_global_size(0);
} }
} }
/**
* This is called to determine the accuracy of native_sqrt(), native_rsqrt() and native_recip().
*/
__kernel void determineNativeAccuracy(__global float4* values, int numValues) {
for (int i = 0; i < numValues; ++i) {
float v = values[i].x;
values[i] = (float4) (v, native_sqrt(v), native_rsqrt(v), native_recip(v));
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment