Commit 3f2c29fb authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimizations to OpenCL platform

parent 62a83f80
......@@ -1266,6 +1266,9 @@ void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) {
pmeDefines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxSize.x);
pmeDefines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxSize.y);
pmeDefines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxSize.z);
pmeDefines["INV_PERIODIC_BOX_SIZE_X"] = doubleToString(1.0/boxSize.x);
pmeDefines["INV_PERIODIC_BOX_SIZE_Y"] = doubleToString(1.0/boxSize.y);
pmeDefines["INV_PERIODIC_BOX_SIZE_Z"] = doubleToString(1.0/boxSize.z);
pmeDefines["RECIP_SCALE_FACTOR"] = doubleToString(1.0/(M_PI*boxSize.x*boxSize.y*boxSize.z));
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
pmeGridIndexKernel = cl::Kernel(program, "updateGridIndexAndFraction");
......@@ -1564,6 +1567,9 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
defines["PERIODIC_BOX_SIZE_X"] = doubleToString(nb.getPeriodicBoxSize().x);
defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(nb.getPeriodicBoxSize().y);
defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(nb.getPeriodicBoxSize().z);
defines["INV_PERIODIC_BOX_SIZE_X"] = doubleToString(1.0/nb.getPeriodicBoxSize().x);
defines["INV_PERIODIC_BOX_SIZE_Y"] = doubleToString(1.0/nb.getPeriodicBoxSize().y);
defines["INV_PERIODIC_BOX_SIZE_Z"] = doubleToString(1.0/nb.getPeriodicBoxSize().z);
defines["CUTOFF_SQUARED"] = doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance());
defines["PREFACTOR"] = doubleToString(prefactor);
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
......@@ -1864,6 +1870,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
defines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxVectors[0][0]);
defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxVectors[1][1]);
defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxVectors[2][2]);
defines["INV_PERIODIC_BOX_SIZE_X"] = doubleToString(1.0/boxVectors[0][0]);
defines["INV_PERIODIC_BOX_SIZE_Y"] = doubleToString(1.0/boxVectors[1][1]);
defines["INV_PERIODIC_BOX_SIZE_Z"] = doubleToString(1.0/boxVectors[2][2]);
defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
......@@ -2012,6 +2021,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
defines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxVectors[0][0]);
defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxVectors[1][1]);
defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxVectors[2][2]);
defines["INV_PERIODIC_BOX_SIZE_X"] = doubleToString(1.0/boxVectors[0][0]);
defines["INV_PERIODIC_BOX_SIZE_Y"] = doubleToString(1.0/boxVectors[1][1]);
defines["INV_PERIODIC_BOX_SIZE_Z"] = doubleToString(1.0/boxVectors[2][2]);
defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
......
......@@ -28,6 +28,7 @@
#include "OpenCLArray.h"
#include "OpenCLCompact.h"
#include "OpenCLKernelSources.h"
#include "OpenCLExpressionUtilities.h"
#include <map>
using namespace OpenMM;
......@@ -349,19 +350,13 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["USE_PERIODIC"] = "1";
if (useExclusions)
defines["USE_EXCLUSIONS"] = "1";
stringstream xsize, ysize, zsize, cutoffSquared;
xsize.precision(8);
ysize.precision(8);
zsize.precision(8);
cutoffSquared.precision(8);
xsize << scientific << periodicBoxSize.x << "f";
ysize << scientific << periodicBoxSize.y << "f";
zsize << scientific << periodicBoxSize.z << "f";
cutoffSquared << scientific << (cutoff*cutoff) << "f";
defines["PERIODIC_BOX_SIZE_X"] = xsize.str();
defines["PERIODIC_BOX_SIZE_Y"] = ysize.str();
defines["PERIODIC_BOX_SIZE_Z"] = zsize.str();
defines["CUTOFF_SQUARED"] = cutoffSquared.str();
defines["PERIODIC_BOX_SIZE_X"] = OpenCLExpressionUtilities::doubleToString(periodicBoxSize.x);
defines["PERIODIC_BOX_SIZE_Y"] = OpenCLExpressionUtilities::doubleToString(periodicBoxSize.y);
defines["PERIODIC_BOX_SIZE_Z"] = OpenCLExpressionUtilities::doubleToString(periodicBoxSize.z);
defines["INV_PERIODIC_BOX_SIZE_X"] = OpenCLExpressionUtilities::doubleToString(1.0/periodicBoxSize.x);
defines["INV_PERIODIC_BOX_SIZE_Y"] = OpenCLExpressionUtilities::doubleToString(1.0/periodicBoxSize.y);
defines["INV_PERIODIC_BOX_SIZE_Z"] = OpenCLExpressionUtilities::doubleToString(1.0/periodicBoxSize.z);
defines["CUTOFF_SQUARED"] = OpenCLExpressionUtilities::doubleToString(cutoff*cutoff);
stringstream natom, padded;
natom << context.getNumAtoms();
padded << context.getPaddedNumAtoms();
......
......@@ -14,7 +14,7 @@ __kernel void computeCustomBondForces(int numAtoms, int numBonds, __global float
// Compute the force.
float r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
float r = native_sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
COMPUTE_FORCE
delta.xyz *= -dEdR/r;
......
......@@ -56,15 +56,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float r = sqrt(r2);
float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+j;
float dEdR = 0.0f;
......@@ -132,15 +132,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float r = sqrt(r2);
float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+tj;
float dEdR = 0.0f;
......
......@@ -55,15 +55,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float r = sqrt(r2);
float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float dEdR = 0.0f;
......@@ -128,15 +128,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float r = sqrt(r2);
float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+tj;
float dEdR = 0.0f;
......
......@@ -53,15 +53,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float r = sqrt(r2);
float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+j;
float tempValue1 = 0.0f;
......@@ -126,15 +126,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float r = sqrt(r2);
float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+tj;
float tempValue1 = 0.0f;
......
......@@ -53,15 +53,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float r = sqrt(r2);
float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float tempValue1 = 0.0f;
......@@ -114,15 +114,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float tempValue1 = 0.0f;
float tempValue2 = 0.0f;
if (r2 < CUTOFF_SQUARED) {
float r = sqrt(r2);
float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
......@@ -169,15 +169,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float r = sqrt(r2);
float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+tj;
float tempValue1 = 0.0f;
......
......@@ -14,9 +14,9 @@ float4 delta(float4 vec1, float4 vec2) {
float4 deltaPeriodic(float4 vec1, float4 vec2) {
float4 result = (float4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC
result.x -= floor(result.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
result.y -= floor(result.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
result.z -= floor(result.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
result.x -= floor(result.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
result.y -= floor(result.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
result.z -= floor(result.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
......@@ -27,14 +27,14 @@ float4 deltaPeriodic(float4 vec1, float4 vec2) {
*/
float computeAngle(float4 vec1, float4 vec2) {
float dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
float cosine = dotProduct/sqrt(vec1.w*vec2.w);
float cosine = dotProduct*native_rsqrt(vec1.w*vec2.w);
float angle;
if (cosine > 0.99f || cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
float4 crossProduct = cross(vec1, vec2);
float scale = vec1.w*vec2.w;
angle = asin(sqrt(dot(crossProduct, crossProduct)/scale));
angle = asin(native_sqrt(dot(crossProduct, crossProduct)/scale));
if (cosine < 0.0f)
angle = M_PI-angle;
}
......
......@@ -42,9 +42,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
for (unsigned int j = 0; j < TILE_SIZE/2; j++) {
float4 delta = (float4) (local_posq[baseLocalAtom+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -52,8 +52,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#else
if (atom1 < NUM_ATOMS && y+baseLocalAtom+j < NUM_ATOMS) {
#endif
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
float2 params2 = local_params[baseLocalAtom+j];
float rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
......@@ -104,9 +104,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
for (unsigned int j = 0; j < TILE_SIZE/2; j++) {
float4 delta = (float4) (local_posq[baseLocalAtom+tj].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -114,8 +114,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#else
if (atom1 < NUM_ATOMS && y+baseLocalAtom+tj < NUM_ATOMS) {
#endif
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
float2 params2 = local_params[baseLocalAtom+tj];
float rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) {
......@@ -214,19 +214,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float4 posq2 = local_posq[baseLocalAtom+j];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
float bornRadius2 = local_bornRadii[baseLocalAtom+j];
float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2);
float denominator = native_sqrt(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -281,19 +281,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float4 posq2 = local_posq[baseLocalAtom+tj];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
float bornRadius2 = local_bornRadii[baseLocalAtom+tj];
float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2);
float denominator = native_sqrt(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
......@@ -42,9 +42,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
for (unsigned int j = 0; j < TILE_SIZE; j++) {
float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -52,8 +52,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#else
if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) {
#endif
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
float2 params2 = local_params[tbx+j];
float rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
......@@ -100,9 +100,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
if ((flags&(1<<j)) != 0) {
float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
tempBuffer[get_local_id(0)] = 0.0f;
......@@ -111,8 +111,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#else
if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) {
#endif
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
float2 params2 = local_params[tbx+j];
float rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) {
......@@ -169,9 +169,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
for (unsigned int j = 0; j < TILE_SIZE; j++) {
float4 delta = (float4) (local_posq[tbx+tj].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -179,8 +179,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#else
if (atom1 < NUM_ATOMS && y+tj < NUM_ATOMS) {
#endif
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
float2 params2 = local_params[tbx+tj];
float rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) {
......@@ -274,19 +274,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float4 posq2 = local_posq[tbx+j];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
float bornRadius2 = local_bornRadii[tbx+j];
float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2);
float denominator = native_sqrt(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -336,19 +336,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float4 posq2 = local_posq[tbx+j];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
float bornRadius2 = local_bornRadii[tbx+j];
float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2);
float denominator = native_sqrt(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -397,19 +397,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float4 posq2 = local_posq[tbx+tj];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
float bornRadius2 = local_bornRadii[tbx+tj];
float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2);
float denominator = native_sqrt(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
......@@ -20,11 +20,11 @@ __kernel void calcHarmonicAngleForce(int numAtoms, int numAngles, __global float
float4 v1 = a2-a3;
float4 cp = cross(v0, v1);
float rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
rp = max(sqrt(rp), 1.0e-06f);
rp = max(native_sqrt(rp), 1.0e-06f);
float r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z;
float r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z;
float dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z;
float cosine = dot/sqrt(r21*r23);
float cosine = dot*native_rsqrt(r21*r23);
float deltaIdeal = acos(cosine)-angleParams.x;
energy += 0.5f*angleParams.y*deltaIdeal*deltaIdeal;
float dEdR = angleParams.y*deltaIdeal;
......
......@@ -14,7 +14,7 @@ __kernel void calcHarmonicBondForce(int numAtoms, int numBonds, __global float4*
// Compute the force.
float r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
float r = native_sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
float deltaIdeal = r-bondParams.x;
energy += 0.5f * bondParams.y*deltaIdeal*deltaIdeal;
float dEdR = bondParams.y * deltaIdeal;
......
......@@ -16,7 +16,7 @@ __kernel void computeNonbondedExceptions(__global float4* forceBuffers, __global
// Compute the force.
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = 1.0f/sqrt(r2);
float invR = native_rsqrt(r2);
float sig2 = invR*exceptionParams.y;
sig2 *= sig2;
float sig6 = sig2*sig2*sig2;
......
......@@ -52,13 +52,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+j;
float dEdR = 0.0f;
......@@ -114,13 +114,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+tj;
float dEdR = 0.0f;
......
......@@ -52,9 +52,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
......@@ -103,13 +103,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float dEdR = 0.0f;
......@@ -157,13 +157,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f/r;
float invR = native_rsqrt(r2);
float r = native_recip(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y+tj;
float dEdR = 0.0f;
......
__kernel void updateGridIndexAndFraction(__global float4* posq, __global float2* pmeAtomGridIndex) {
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
float4 pos = posq[i];
float4 t = (float4) ((pos.x/PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X,
(pos.y/PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y,
(pos.z/PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f);
float4 t = (float4) ((pos.x*INV_PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X,
(pos.y*INV_PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y,
(pos.z*INV_PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f);
int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z, 0);
......@@ -48,9 +48,9 @@ __kernel void updateBsplines(__global float4* posq, __global float4* pmeBsplineT
ddata[j] = 0.0f;
}
float4 pos = posq[i];
float4 t = (float4) ((pos.x/PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X,
(pos.y/PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y,
(pos.z/PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f);
float4 t = (float4) ((pos.x*INV_PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X,
(pos.y*INV_PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y,
(pos.z*INV_PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f);
float4 dr = (float4) (t.x-(int) t.x, t.y-(int) t.y, t.z-(int) t.z, 0.0f);
data[PME_ORDER-1] = 0.0f;
data[1] = dr;
......@@ -132,9 +132,9 @@ __kernel void reciprocalConvolution(__global float2* pmeGrid, __global float* en
int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X);
int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y);
int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z);
float mhx = mx/PERIODIC_BOX_SIZE_X;
float mhy = my/PERIODIC_BOX_SIZE_Y;
float mhz = mz/PERIODIC_BOX_SIZE_Z;
float mhx = mx*INV_PERIODIC_BOX_SIZE_X;
float mhy = my*INV_PERIODIC_BOX_SIZE_Y;
float mhz = mz*INV_PERIODIC_BOX_SIZE_Z;
float bx = pmeBsplineModuliX[kx];
float by = pmeBsplineModuliY[ky];
float bz = pmeBsplineModuliZ[kz];
......@@ -152,9 +152,9 @@ __kernel void gridInterpolateForce(__global float4* posq, __global float4* force
for (int atom = get_global_id(0); atom < NUM_ATOMS; atom += get_global_size(0)) {
float4 force = 0.0f;
float4 pos = posq[atom];
float4 t = (float4) ((pos.x/PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X,
(pos.y/PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y,
(pos.z/PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f);
float4 t = (float4) ((pos.x*INV_PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X,
(pos.y*INV_PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y,
(pos.z*INV_PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f);
int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z, 0);
......@@ -180,9 +180,9 @@ __kernel void gridInterpolateForce(__global float4* posq, __global float4* force
}
float4 totalForce = forceBuffers[atom];
float q = pos.w*EPSILON_FACTOR;
totalForce.x -= q*force.x*GRID_SIZE_X/PERIODIC_BOX_SIZE_X;
totalForce.y -= q*force.y*GRID_SIZE_Y/PERIODIC_BOX_SIZE_Y;
totalForce.z -= q*force.z*GRID_SIZE_Z/PERIODIC_BOX_SIZE_Z;
totalForce.x -= q*force.x*GRID_SIZE_X*INV_PERIODIC_BOX_SIZE_X;
totalForce.y -= q*force.y*GRID_SIZE_Y*INV_PERIODIC_BOX_SIZE_Y;
totalForce.z -= q*force.z*GRID_SIZE_Z*INV_PERIODIC_BOX_SIZE_Z;
forceBuffers[atom] = totalForce;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment