Commit 3f2c29fb authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimizations to OpenCL platform

parent 62a83f80
...@@ -1266,6 +1266,9 @@ void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) { ...@@ -1266,6 +1266,9 @@ void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) {
pmeDefines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxSize.x); pmeDefines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxSize.x);
pmeDefines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxSize.y); pmeDefines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxSize.y);
pmeDefines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxSize.z); pmeDefines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxSize.z);
pmeDefines["INV_PERIODIC_BOX_SIZE_X"] = doubleToString(1.0/boxSize.x);
pmeDefines["INV_PERIODIC_BOX_SIZE_Y"] = doubleToString(1.0/boxSize.y);
pmeDefines["INV_PERIODIC_BOX_SIZE_Z"] = doubleToString(1.0/boxSize.z);
pmeDefines["RECIP_SCALE_FACTOR"] = doubleToString(1.0/(M_PI*boxSize.x*boxSize.y*boxSize.z)); pmeDefines["RECIP_SCALE_FACTOR"] = doubleToString(1.0/(M_PI*boxSize.x*boxSize.y*boxSize.z));
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines); cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
pmeGridIndexKernel = cl::Kernel(program, "updateGridIndexAndFraction"); pmeGridIndexKernel = cl::Kernel(program, "updateGridIndexAndFraction");
...@@ -1564,6 +1567,9 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) { ...@@ -1564,6 +1567,9 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
defines["PERIODIC_BOX_SIZE_X"] = doubleToString(nb.getPeriodicBoxSize().x); defines["PERIODIC_BOX_SIZE_X"] = doubleToString(nb.getPeriodicBoxSize().x);
defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(nb.getPeriodicBoxSize().y); defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(nb.getPeriodicBoxSize().y);
defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(nb.getPeriodicBoxSize().z); defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(nb.getPeriodicBoxSize().z);
defines["INV_PERIODIC_BOX_SIZE_X"] = doubleToString(1.0/nb.getPeriodicBoxSize().x);
defines["INV_PERIODIC_BOX_SIZE_Y"] = doubleToString(1.0/nb.getPeriodicBoxSize().y);
defines["INV_PERIODIC_BOX_SIZE_Z"] = doubleToString(1.0/nb.getPeriodicBoxSize().z);
defines["CUTOFF_SQUARED"] = doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance()); defines["CUTOFF_SQUARED"] = doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance());
defines["PREFACTOR"] = doubleToString(prefactor); defines["PREFACTOR"] = doubleToString(prefactor);
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms()); defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
...@@ -1864,6 +1870,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1864,6 +1870,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
defines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxVectors[0][0]); defines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxVectors[0][0]);
defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxVectors[1][1]); defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxVectors[1][1]);
defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxVectors[2][2]); defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxVectors[2][2]);
defines["INV_PERIODIC_BOX_SIZE_X"] = doubleToString(1.0/boxVectors[0][0]);
defines["INV_PERIODIC_BOX_SIZE_Y"] = doubleToString(1.0/boxVectors[1][1]);
defines["INV_PERIODIC_BOX_SIZE_Z"] = doubleToString(1.0/boxVectors[2][2]);
defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance()); defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms()); defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
...@@ -2012,6 +2021,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -2012,6 +2021,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
defines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxVectors[0][0]); defines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxVectors[0][0]);
defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxVectors[1][1]); defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxVectors[1][1]);
defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxVectors[2][2]); defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxVectors[2][2]);
defines["INV_PERIODIC_BOX_SIZE_X"] = doubleToString(1.0/boxVectors[0][0]);
defines["INV_PERIODIC_BOX_SIZE_Y"] = doubleToString(1.0/boxVectors[1][1]);
defines["INV_PERIODIC_BOX_SIZE_Z"] = doubleToString(1.0/boxVectors[2][2]);
defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance()); defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms()); defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "OpenCLArray.h" #include "OpenCLArray.h"
#include "OpenCLCompact.h" #include "OpenCLCompact.h"
#include "OpenCLKernelSources.h" #include "OpenCLKernelSources.h"
#include "OpenCLExpressionUtilities.h"
#include <map> #include <map>
using namespace OpenMM; using namespace OpenMM;
...@@ -349,19 +350,13 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -349,19 +350,13 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
if (useExclusions) if (useExclusions)
defines["USE_EXCLUSIONS"] = "1"; defines["USE_EXCLUSIONS"] = "1";
stringstream xsize, ysize, zsize, cutoffSquared; defines["PERIODIC_BOX_SIZE_X"] = OpenCLExpressionUtilities::doubleToString(periodicBoxSize.x);
xsize.precision(8); defines["PERIODIC_BOX_SIZE_Y"] = OpenCLExpressionUtilities::doubleToString(periodicBoxSize.y);
ysize.precision(8); defines["PERIODIC_BOX_SIZE_Z"] = OpenCLExpressionUtilities::doubleToString(periodicBoxSize.z);
zsize.precision(8); defines["INV_PERIODIC_BOX_SIZE_X"] = OpenCLExpressionUtilities::doubleToString(1.0/periodicBoxSize.x);
cutoffSquared.precision(8); defines["INV_PERIODIC_BOX_SIZE_Y"] = OpenCLExpressionUtilities::doubleToString(1.0/periodicBoxSize.y);
xsize << scientific << periodicBoxSize.x << "f"; defines["INV_PERIODIC_BOX_SIZE_Z"] = OpenCLExpressionUtilities::doubleToString(1.0/periodicBoxSize.z);
ysize << scientific << periodicBoxSize.y << "f"; defines["CUTOFF_SQUARED"] = OpenCLExpressionUtilities::doubleToString(cutoff*cutoff);
zsize << scientific << periodicBoxSize.z << "f";
cutoffSquared << scientific << (cutoff*cutoff) << "f";
defines["PERIODIC_BOX_SIZE_X"] = xsize.str();
defines["PERIODIC_BOX_SIZE_Y"] = ysize.str();
defines["PERIODIC_BOX_SIZE_Z"] = zsize.str();
defines["CUTOFF_SQUARED"] = cutoffSquared.str();
stringstream natom, padded; stringstream natom, padded;
natom << context.getNumAtoms(); natom << context.getNumAtoms();
padded << context.getPaddedNumAtoms(); padded << context.getPaddedNumAtoms();
......
...@@ -14,7 +14,7 @@ __kernel void computeCustomBondForces(int numAtoms, int numBonds, __global float ...@@ -14,7 +14,7 @@ __kernel void computeCustomBondForces(int numAtoms, int numBonds, __global float
// Compute the force. // Compute the force.
float r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z); float r = native_sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
COMPUTE_FORCE COMPUTE_FORCE
delta.xyz *= -dEdR/r; delta.xyz *= -dEdR/r;
......
...@@ -56,15 +56,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -56,15 +56,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = sqrt(r2); float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+j; atom2 = y+baseLocalAtom+j;
float dEdR = 0.0f; float dEdR = 0.0f;
...@@ -132,15 +132,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -132,15 +132,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = sqrt(r2); float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+tj; atom2 = y+baseLocalAtom+tj;
float dEdR = 0.0f; float dEdR = 0.0f;
......
...@@ -55,15 +55,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -55,15 +55,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = sqrt(r2); float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+j; atom2 = y+j;
float dEdR = 0.0f; float dEdR = 0.0f;
...@@ -128,15 +128,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -128,15 +128,15 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = sqrt(r2); float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+tj; atom2 = y+tj;
float dEdR = 0.0f; float dEdR = 0.0f;
......
...@@ -53,15 +53,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq, ...@@ -53,15 +53,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = sqrt(r2); float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+j; atom2 = y+baseLocalAtom+j;
float tempValue1 = 0.0f; float tempValue1 = 0.0f;
...@@ -126,15 +126,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq, ...@@ -126,15 +126,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = sqrt(r2); float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+tj; atom2 = y+baseLocalAtom+tj;
float tempValue1 = 0.0f; float tempValue1 = 0.0f;
......
...@@ -53,15 +53,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq, ...@@ -53,15 +53,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = sqrt(r2); float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+j; atom2 = y+j;
float tempValue1 = 0.0f; float tempValue1 = 0.0f;
...@@ -114,15 +114,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq, ...@@ -114,15 +114,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float tempValue1 = 0.0f; float tempValue1 = 0.0f;
float tempValue2 = 0.0f; float tempValue2 = 0.0f;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
float r = sqrt(r2); float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+j; atom2 = y+j;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
...@@ -169,15 +169,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq, ...@@ -169,15 +169,15 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
float r = sqrt(r2); float r = native_sqrt(r2);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+tj; atom2 = y+tj;
float tempValue1 = 0.0f; float tempValue1 = 0.0f;
......
...@@ -14,9 +14,9 @@ float4 delta(float4 vec1, float4 vec2) { ...@@ -14,9 +14,9 @@ float4 delta(float4 vec1, float4 vec2) {
float4 deltaPeriodic(float4 vec1, float4 vec2) { float4 deltaPeriodic(float4 vec1, float4 vec2) {
float4 result = (float4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f); float4 result = (float4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
result.x -= floor(result.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; result.x -= floor(result.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
result.y -= floor(result.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; result.y -= floor(result.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
result.z -= floor(result.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; result.z -= floor(result.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z; result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result; return result;
...@@ -27,14 +27,14 @@ float4 deltaPeriodic(float4 vec1, float4 vec2) { ...@@ -27,14 +27,14 @@ float4 deltaPeriodic(float4 vec1, float4 vec2) {
*/ */
float computeAngle(float4 vec1, float4 vec2) { float computeAngle(float4 vec1, float4 vec2) {
float dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z; float dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
float cosine = dotProduct/sqrt(vec1.w*vec2.w); float cosine = dotProduct*native_rsqrt(vec1.w*vec2.w);
float angle; float angle;
if (cosine > 0.99f || cosine < -0.99f) { if (cosine > 0.99f || cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead. // We're close to the singularity in acos(), so take the cross product and use asin() instead.
float4 crossProduct = cross(vec1, vec2); float4 crossProduct = cross(vec1, vec2);
float scale = vec1.w*vec2.w; float scale = vec1.w*vec2.w;
angle = asin(sqrt(dot(crossProduct, crossProduct)/scale)); angle = asin(native_sqrt(dot(crossProduct, crossProduct)/scale));
if (cosine < 0.0f) if (cosine < 0.0f)
angle = M_PI-angle; angle = M_PI-angle;
} }
......
...@@ -42,9 +42,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca ...@@ -42,9 +42,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
for (unsigned int j = 0; j < TILE_SIZE/2; j++) { for (unsigned int j = 0; j < TILE_SIZE/2; j++) {
float4 delta = (float4) (local_posq[baseLocalAtom+j].xyz - posq1.xyz, 0.0f); float4 delta = (float4) (local_posq[baseLocalAtom+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -52,8 +52,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca ...@@ -52,8 +52,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#else #else
if (atom1 < NUM_ATOMS && y+baseLocalAtom+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y+baseLocalAtom+j < NUM_ATOMS) {
#endif #endif
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
float2 params2 = local_params[baseLocalAtom+j]; float2 params2 = local_params[baseLocalAtom+j];
float rScaledRadiusJ = r+params2.y; float rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) { if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
...@@ -104,9 +104,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca ...@@ -104,9 +104,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
for (unsigned int j = 0; j < TILE_SIZE/2; j++) { for (unsigned int j = 0; j < TILE_SIZE/2; j++) {
float4 delta = (float4) (local_posq[baseLocalAtom+tj].xyz - posq1.xyz, 0.0f); float4 delta = (float4) (local_posq[baseLocalAtom+tj].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -114,8 +114,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca ...@@ -114,8 +114,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#else #else
if (atom1 < NUM_ATOMS && y+baseLocalAtom+tj < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y+baseLocalAtom+tj < NUM_ATOMS) {
#endif #endif
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
float2 params2 = local_params[baseLocalAtom+tj]; float2 params2 = local_params[baseLocalAtom+tj];
float rScaledRadiusJ = r+params2.y; float rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -214,19 +214,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e ...@@ -214,19 +214,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float4 posq2 = local_posq[baseLocalAtom+j]; float4 posq2 = local_posq[baseLocalAtom+j];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
float bornRadius2 = local_bornRadii[baseLocalAtom+j]; float bornRadius2 = local_bornRadii[baseLocalAtom+j];
float alpha2_ij = bornRadius1*bornRadius2; float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij); float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm; float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2); float denominator = native_sqrt(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator; float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2; float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -281,19 +281,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e ...@@ -281,19 +281,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float4 posq2 = local_posq[baseLocalAtom+tj]; float4 posq2 = local_posq[baseLocalAtom+tj];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
float bornRadius2 = local_bornRadii[baseLocalAtom+tj]; float bornRadius2 = local_bornRadii[baseLocalAtom+tj];
float alpha2_ij = bornRadius1*bornRadius2; float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij); float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm; float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2); float denominator = native_sqrt(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator; float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2; float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
...@@ -42,9 +42,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca ...@@ -42,9 +42,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f); float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -52,8 +52,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca ...@@ -52,8 +52,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#else #else
if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) {
#endif #endif
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
float2 params2 = local_params[tbx+j]; float2 params2 = local_params[tbx+j];
float rScaledRadiusJ = r+params2.y; float rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) { if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
...@@ -100,9 +100,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca ...@@ -100,9 +100,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
if ((flags&(1<<j)) != 0) { if ((flags&(1<<j)) != 0) {
float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f); float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
tempBuffer[get_local_id(0)] = 0.0f; tempBuffer[get_local_id(0)] = 0.0f;
...@@ -111,8 +111,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca ...@@ -111,8 +111,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#else #else
if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) {
#endif #endif
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
float2 params2 = local_params[tbx+j]; float2 params2 = local_params[tbx+j];
float rScaledRadiusJ = r+params2.y; float rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -169,9 +169,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca ...@@ -169,9 +169,9 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
float4 delta = (float4) (local_posq[tbx+tj].xyz - posq1.xyz, 0.0f); float4 delta = (float4) (local_posq[tbx+tj].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -179,8 +179,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca ...@@ -179,8 +179,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#else #else
if (atom1 < NUM_ATOMS && y+tj < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y+tj < NUM_ATOMS) {
#endif #endif
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
float2 params2 = local_params[tbx+tj]; float2 params2 = local_params[tbx+tj];
float rScaledRadiusJ = r+params2.y; float rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -274,19 +274,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e ...@@ -274,19 +274,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float4 posq2 = local_posq[tbx+j]; float4 posq2 = local_posq[tbx+j];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
float bornRadius2 = local_bornRadii[tbx+j]; float bornRadius2 = local_bornRadii[tbx+j];
float alpha2_ij = bornRadius1*bornRadius2; float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij); float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm; float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2); float denominator = native_sqrt(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator; float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2; float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -336,19 +336,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e ...@@ -336,19 +336,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float4 posq2 = local_posq[tbx+j]; float4 posq2 = local_posq[tbx+j];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
float bornRadius2 = local_bornRadii[tbx+j]; float bornRadius2 = local_bornRadii[tbx+j];
float alpha2_ij = bornRadius1*bornRadius2; float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij); float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm; float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2); float denominator = native_sqrt(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator; float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2; float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -397,19 +397,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e ...@@ -397,19 +397,19 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float4 posq2 = local_posq[tbx+tj]; float4 posq2 = local_posq[tbx+tj];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
float bornRadius2 = local_bornRadii[tbx+tj]; float bornRadius2 = local_bornRadii[tbx+tj];
float alpha2_ij = bornRadius1*bornRadius2; float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij); float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm; float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2); float denominator = native_sqrt(denominator2);
float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator; float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2; float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
...@@ -20,11 +20,11 @@ __kernel void calcHarmonicAngleForce(int numAtoms, int numAngles, __global float ...@@ -20,11 +20,11 @@ __kernel void calcHarmonicAngleForce(int numAtoms, int numAngles, __global float
float4 v1 = a2-a3; float4 v1 = a2-a3;
float4 cp = cross(v0, v1); float4 cp = cross(v0, v1);
float rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z; float rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
rp = max(sqrt(rp), 1.0e-06f); rp = max(native_sqrt(rp), 1.0e-06f);
float r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z; float r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z;
float r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z; float r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z;
float dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z; float dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z;
float cosine = dot/sqrt(r21*r23); float cosine = dot*native_rsqrt(r21*r23);
float deltaIdeal = acos(cosine)-angleParams.x; float deltaIdeal = acos(cosine)-angleParams.x;
energy += 0.5f*angleParams.y*deltaIdeal*deltaIdeal; energy += 0.5f*angleParams.y*deltaIdeal*deltaIdeal;
float dEdR = angleParams.y*deltaIdeal; float dEdR = angleParams.y*deltaIdeal;
......
...@@ -14,7 +14,7 @@ __kernel void calcHarmonicBondForce(int numAtoms, int numBonds, __global float4* ...@@ -14,7 +14,7 @@ __kernel void calcHarmonicBondForce(int numAtoms, int numBonds, __global float4*
// Compute the force. // Compute the force.
float r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z); float r = native_sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
float deltaIdeal = r-bondParams.x; float deltaIdeal = r-bondParams.x;
energy += 0.5f * bondParams.y*deltaIdeal*deltaIdeal; energy += 0.5f * bondParams.y*deltaIdeal*deltaIdeal;
float dEdR = bondParams.y * deltaIdeal; float dEdR = bondParams.y * deltaIdeal;
......
...@@ -16,7 +16,7 @@ __kernel void computeNonbondedExceptions(__global float4* forceBuffers, __global ...@@ -16,7 +16,7 @@ __kernel void computeNonbondedExceptions(__global float4* forceBuffers, __global
// Compute the force. // Compute the force.
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = 1.0f/sqrt(r2); float invR = native_rsqrt(r2);
float sig2 = invR*exceptionParams.y; float sig2 = invR*exceptionParams.y;
sig2 *= sig2; sig2 *= sig2;
float sig6 = sig2*sig2*sig2; float sig6 = sig2*sig2*sig2;
......
...@@ -52,13 +52,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -52,13 +52,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+j; atom2 = y+baseLocalAtom+j;
float dEdR = 0.0f; float dEdR = 0.0f;
...@@ -114,13 +114,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -114,13 +114,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+tj; atom2 = y+baseLocalAtom+tj;
float dEdR = 0.0f; float dEdR = 0.0f;
......
...@@ -52,9 +52,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -52,9 +52,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float r = sqrt(r2);
...@@ -103,13 +103,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -103,13 +103,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+j; atom2 = y+j;
float dEdR = 0.0f; float dEdR = 0.0f;
...@@ -157,13 +157,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -157,13 +157,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float4 posq2 = local_posq[atom2]; float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y; delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z; delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float invR = native_rsqrt(r2);
float invR = 1.0f/r; float r = native_recip(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+tj; atom2 = y+tj;
float dEdR = 0.0f; float dEdR = 0.0f;
......
__kernel void updateGridIndexAndFraction(__global float4* posq, __global float2* pmeAtomGridIndex) { __kernel void updateGridIndexAndFraction(__global float4* posq, __global float2* pmeAtomGridIndex) {
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
float4 pos = posq[i]; float4 pos = posq[i];
float4 t = (float4) ((pos.x/PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X, float4 t = (float4) ((pos.x*INV_PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X,
(pos.y/PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y, (pos.y*INV_PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y,
(pos.z/PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f); (pos.z*INV_PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f);
int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X, int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y, ((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z, 0); ((int) t.z) % GRID_SIZE_Z, 0);
...@@ -48,9 +48,9 @@ __kernel void updateBsplines(__global float4* posq, __global float4* pmeBsplineT ...@@ -48,9 +48,9 @@ __kernel void updateBsplines(__global float4* posq, __global float4* pmeBsplineT
ddata[j] = 0.0f; ddata[j] = 0.0f;
} }
float4 pos = posq[i]; float4 pos = posq[i];
float4 t = (float4) ((pos.x/PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X, float4 t = (float4) ((pos.x*INV_PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X,
(pos.y/PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y, (pos.y*INV_PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y,
(pos.z/PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f); (pos.z*INV_PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f);
float4 dr = (float4) (t.x-(int) t.x, t.y-(int) t.y, t.z-(int) t.z, 0.0f); float4 dr = (float4) (t.x-(int) t.x, t.y-(int) t.y, t.z-(int) t.z, 0.0f);
data[PME_ORDER-1] = 0.0f; data[PME_ORDER-1] = 0.0f;
data[1] = dr; data[1] = dr;
...@@ -132,9 +132,9 @@ __kernel void reciprocalConvolution(__global float2* pmeGrid, __global float* en ...@@ -132,9 +132,9 @@ __kernel void reciprocalConvolution(__global float2* pmeGrid, __global float* en
int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X); int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X);
int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y); int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y);
int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z); int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z);
float mhx = mx/PERIODIC_BOX_SIZE_X; float mhx = mx*INV_PERIODIC_BOX_SIZE_X;
float mhy = my/PERIODIC_BOX_SIZE_Y; float mhy = my*INV_PERIODIC_BOX_SIZE_Y;
float mhz = mz/PERIODIC_BOX_SIZE_Z; float mhz = mz*INV_PERIODIC_BOX_SIZE_Z;
float bx = pmeBsplineModuliX[kx]; float bx = pmeBsplineModuliX[kx];
float by = pmeBsplineModuliY[ky]; float by = pmeBsplineModuliY[ky];
float bz = pmeBsplineModuliZ[kz]; float bz = pmeBsplineModuliZ[kz];
...@@ -152,9 +152,9 @@ __kernel void gridInterpolateForce(__global float4* posq, __global float4* force ...@@ -152,9 +152,9 @@ __kernel void gridInterpolateForce(__global float4* posq, __global float4* force
for (int atom = get_global_id(0); atom < NUM_ATOMS; atom += get_global_size(0)) { for (int atom = get_global_id(0); atom < NUM_ATOMS; atom += get_global_size(0)) {
float4 force = 0.0f; float4 force = 0.0f;
float4 pos = posq[atom]; float4 pos = posq[atom];
float4 t = (float4) ((pos.x/PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X, float4 t = (float4) ((pos.x*INV_PERIODIC_BOX_SIZE_X+1.0f)*GRID_SIZE_X,
(pos.y/PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y, (pos.y*INV_PERIODIC_BOX_SIZE_Y+1.0f)*GRID_SIZE_Y,
(pos.z/PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f); (pos.z*INV_PERIODIC_BOX_SIZE_Z+1.0f)*GRID_SIZE_Z, 0.0f);
int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X, int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y, ((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z, 0); ((int) t.z) % GRID_SIZE_Z, 0);
...@@ -180,9 +180,9 @@ __kernel void gridInterpolateForce(__global float4* posq, __global float4* force ...@@ -180,9 +180,9 @@ __kernel void gridInterpolateForce(__global float4* posq, __global float4* force
} }
float4 totalForce = forceBuffers[atom]; float4 totalForce = forceBuffers[atom];
float q = pos.w*EPSILON_FACTOR; float q = pos.w*EPSILON_FACTOR;
totalForce.x -= q*force.x*GRID_SIZE_X/PERIODIC_BOX_SIZE_X; totalForce.x -= q*force.x*GRID_SIZE_X*INV_PERIODIC_BOX_SIZE_X;
totalForce.y -= q*force.y*GRID_SIZE_Y/PERIODIC_BOX_SIZE_Y; totalForce.y -= q*force.y*GRID_SIZE_Y*INV_PERIODIC_BOX_SIZE_Y;
totalForce.z -= q*force.z*GRID_SIZE_Z/PERIODIC_BOX_SIZE_Z; totalForce.z -= q*force.z*GRID_SIZE_Z*INV_PERIODIC_BOX_SIZE_Z;
forceBuffers[atom] = totalForce; forceBuffers[atom] = totalForce;
} }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment