Commit 06db7ac4 authored by peastman's avatar peastman
Browse files

Very minor optimization

parent 291484f2
...@@ -65,7 +65,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -65,7 +65,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
real dEdR = 0; real dEdR = 0;
...@@ -117,7 +117,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -117,7 +117,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
real dEdR = 0; real dEdR = 0;
...@@ -268,7 +268,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -268,7 +268,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
...@@ -313,7 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -313,7 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
......
...@@ -60,7 +60,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -60,7 +60,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
real tempValue1 = 0; real tempValue1 = 0;
...@@ -108,7 +108,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -108,7 +108,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
real tempValue1 = 0; real tempValue1 = 0;
...@@ -241,7 +241,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -241,7 +241,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real tempValue1 = 0; real tempValue1 = 0;
...@@ -275,7 +275,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -275,7 +275,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real tempValue1 = 0; real tempValue1 = 0;
......
...@@ -58,7 +58,7 @@ extern "C" __global__ void computeInteractionGroups( ...@@ -58,7 +58,7 @@ extern "C" __global__ void computeInteractionGroups(
if (!isExcluded && r2 < CUTOFF_SQUARED) { if (!isExcluded && r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
real dEdR = 0.0f; real dEdR = 0.0f;
real tempEnergy = 0.0f; real tempEnergy = 0.0f;
......
...@@ -116,7 +116,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -116,7 +116,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = make_float2(localData[tbx+j].radius, localData[tbx+j].scaledRadius); float2 params2 = make_float2(localData[tbx+j].radius, localData[tbx+j].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) { if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
...@@ -163,7 +163,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -163,7 +163,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = make_float2(localData[tbx+tj].radius, localData[tbx+tj].scaledRadius); float2 params2 = make_float2(localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -305,7 +305,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -305,7 +305,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
int atom2 = atomIndices[tbx+tj]; int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = make_float2(localData[tbx+tj].radius, localData[tbx+tj].scaledRadius); float2 params2 = make_float2(localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -355,7 +355,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -355,7 +355,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = make_float2(localData[tbx+tj].radius, localData[tbx+tj].scaledRadius); float2 params2 = make_float2(localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -461,7 +461,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -461,7 +461,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+j].bornRadius; real bornRadius2 = localData[tbx+j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -518,7 +518,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -518,7 +518,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius; real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -672,7 +672,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -672,7 +672,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius; real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -723,7 +723,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -723,7 +723,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius; real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
......
...@@ -161,7 +161,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -161,7 +161,7 @@ extern "C" __global__ void computeNonbonded(
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
...@@ -232,7 +232,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -232,7 +232,7 @@ extern "C" __global__ void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
...@@ -433,7 +433,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -433,7 +433,7 @@ extern "C" __global__ void computeNonbonded(
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
...@@ -507,7 +507,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -507,7 +507,7 @@ extern "C" __global__ void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
......
...@@ -67,7 +67,7 @@ __kernel void computeN2Energy( ...@@ -67,7 +67,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
real dEdR = 0; real dEdR = 0;
...@@ -117,7 +117,7 @@ __kernel void computeN2Energy( ...@@ -117,7 +117,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
real dEdR = 0; real dEdR = 0;
...@@ -279,7 +279,7 @@ __kernel void computeN2Energy( ...@@ -279,7 +279,7 @@ __kernel void computeN2Energy(
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
...@@ -317,7 +317,7 @@ __kernel void computeN2Energy( ...@@ -317,7 +317,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
......
...@@ -67,7 +67,7 @@ __kernel void computeN2Energy( ...@@ -67,7 +67,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -133,7 +133,7 @@ __kernel void computeN2Energy( ...@@ -133,7 +133,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -285,7 +285,7 @@ __kernel void computeN2Energy( ...@@ -285,7 +285,7 @@ __kernel void computeN2Energy(
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
...@@ -341,7 +341,7 @@ __kernel void computeN2Energy( ...@@ -341,7 +341,7 @@ __kernel void computeN2Energy(
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
......
...@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
real tempValue1 = 0; real tempValue1 = 0;
...@@ -107,7 +107,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -107,7 +107,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
real tempValue1 = 0; real tempValue1 = 0;
...@@ -252,7 +252,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -252,7 +252,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real tempValue1 = 0; real tempValue1 = 0;
...@@ -285,7 +285,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -285,7 +285,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real tempValue1 = 0; real tempValue1 = 0;
......
...@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -116,7 +116,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -116,7 +116,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -251,7 +251,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -251,7 +251,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
...@@ -296,7 +296,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -296,7 +296,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
......
...@@ -91,7 +91,7 @@ __kernel void computeInteractionGroups( ...@@ -91,7 +91,7 @@ __kernel void computeInteractionGroups(
if (!isExcluded && r2 < CUTOFF_SQUARED) { if (!isExcluded && r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
real dEdR = 0.0f; real dEdR = 0.0f;
real tempEnergy = 0.0f; real tempEnergy = 0.0f;
......
...@@ -67,7 +67,7 @@ __kernel void computeBornSum( ...@@ -67,7 +67,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[tbx+j].radius, localData[tbx+j].scaledRadius); float2 params2 = (float2) (localData[tbx+j].radius, localData[tbx+j].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) { if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
...@@ -114,7 +114,7 @@ __kernel void computeBornSum( ...@@ -114,7 +114,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius); float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -268,7 +268,7 @@ __kernel void computeBornSum( ...@@ -268,7 +268,7 @@ __kernel void computeBornSum(
int atom2 = atomIndices[tbx+tj]; int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius); float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -317,7 +317,7 @@ __kernel void computeBornSum( ...@@ -317,7 +317,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius); float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -437,7 +437,7 @@ __kernel void computeGBSAForce1( ...@@ -437,7 +437,7 @@ __kernel void computeGBSAForce1(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+j].bornRadius; real bornRadius2 = localData[tbx+j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -492,7 +492,7 @@ __kernel void computeGBSAForce1( ...@@ -492,7 +492,7 @@ __kernel void computeGBSAForce1(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius; real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -659,7 +659,7 @@ __kernel void computeGBSAForce1( ...@@ -659,7 +659,7 @@ __kernel void computeGBSAForce1(
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius; real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -707,7 +707,7 @@ __kernel void computeGBSAForce1( ...@@ -707,7 +707,7 @@ __kernel void computeGBSAForce1(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius; real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
......
...@@ -71,7 +71,7 @@ __kernel void computeBornSum( ...@@ -71,7 +71,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius); float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) { if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
...@@ -120,7 +120,7 @@ __kernel void computeBornSum( ...@@ -120,7 +120,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius); float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -269,7 +269,7 @@ __kernel void computeBornSum( ...@@ -269,7 +269,7 @@ __kernel void computeBornSum(
int atom2 = atomIndices[j]; int atom2 = atomIndices[j];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius); float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -331,7 +331,7 @@ __kernel void computeBornSum( ...@@ -331,7 +331,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius); float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -461,7 +461,7 @@ __kernel void computeGBSAForce1( ...@@ -461,7 +461,7 @@ __kernel void computeGBSAForce1(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[j].bornRadius; real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -520,7 +520,7 @@ __kernel void computeGBSAForce1( ...@@ -520,7 +520,7 @@ __kernel void computeGBSAForce1(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[j].bornRadius; real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -677,7 +677,7 @@ __kernel void computeGBSAForce1( ...@@ -677,7 +677,7 @@ __kernel void computeGBSAForce1(
int atom2 = atomIndices[j]; int atom2 = atomIndices[j];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[j].bornRadius; real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -737,7 +737,7 @@ __kernel void computeGBSAForce1( ...@@ -737,7 +737,7 @@ __kernel void computeGBSAForce1(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[j].bornRadius; real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
......
...@@ -71,7 +71,7 @@ __kernel void computeNonbonded( ...@@ -71,7 +71,7 @@ __kernel void computeNonbonded(
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
...@@ -128,7 +128,7 @@ __kernel void computeNonbonded( ...@@ -128,7 +128,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
...@@ -297,7 +297,7 @@ __kernel void computeNonbonded( ...@@ -297,7 +297,7 @@ __kernel void computeNonbonded(
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
...@@ -347,7 +347,7 @@ __kernel void computeNonbonded( ...@@ -347,7 +347,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
......
...@@ -72,7 +72,7 @@ __kernel void computeNonbonded( ...@@ -72,7 +72,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -140,7 +140,7 @@ __kernel void computeNonbonded( ...@@ -140,7 +140,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -307,7 +307,7 @@ __kernel void computeNonbonded( ...@@ -307,7 +307,7 @@ __kernel void computeNonbonded(
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
...@@ -371,7 +371,7 @@ __kernel void computeNonbonded( ...@@ -371,7 +371,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment