"wrappers/python/vscode:/vscode.git/clone" did not exist on "3279c73a334e976976f9df6de13c3377adf0d604"
Commit a7b68da3 authored by Peter Eastman's avatar Peter Eastman
Browse files

Very minor optimizations

parent c56b4801
......@@ -31,7 +31,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
float4 cross_prod = cross(cp0a, cp1a);
float scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
angleA = asin(sqrt(dot(cross_prod, cross_prod)/scale));
angleA = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0.0f)
angleA = PI-angleA;
}
......@@ -54,7 +54,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
float4 cross_prod = cross(cp0b, cp1b);
float scale = dot(cp0b, cp0b)*dot(cp1b, cp1b);
angleB = asin(sqrt(dot(cross_prod, cross_prod)/scale));
angleB = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0.0f)
angleB = PI-angleB;
}
......@@ -104,7 +104,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
float normCross1 = dot(cp0a, cp0a);
float normSqrBC = dot(v1a, v1a);
float normBC = sqrt(normSqrBC);
float normBC = SQRT(normSqrBC);
float normCross2 = dot(cp1a, cp1a);
float dp = 1.0f/normSqrBC;
float4 ff = (float4) ((-dEdA*normBC)/normCross1, dot(v0a, v1a)*dp, dot(v2a, v1a)*dp, (dEdA*normBC)/normCross2);
......@@ -129,7 +129,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
normCross1 = dot(cp0b, cp0b);
normSqrBC = dot(v1b, v1b);
normBC = sqrt(normSqrBC);
normBC = SQRT(normSqrBC);
normCross2 = dot(cp1b, cp1b);
dp = 1.0f/normSqrBC;
ff = (float4) ((-dEdB*normBC)/normCross1, dot(v0b, v1b)*dp, dot(v2b, v1b)*dp, (dEdB*normBC)/normCross2);
......
......@@ -38,7 +38,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
......
......@@ -42,7 +42,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
......
......@@ -61,7 +61,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
......
......@@ -35,7 +35,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
......
......@@ -39,7 +39,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
......
......@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
......
......@@ -43,7 +43,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
......
......@@ -46,7 +46,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
......
......@@ -66,7 +66,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
......@@ -120,8 +120,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = RECIP(r);
float invR = RSQRT(r2);
float r = RECIP(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
#ifdef USE_SYMMETRIC
......
......@@ -63,7 +63,7 @@ __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int nu
*/
__kernel void determineNativeAccuracy(__global float8* values, int numValues) {
for (int i = 0; i < numValues; ++i) {
for (int i = get_global_id(0); i < numValues; i += get_global_size(0)) {
float v = values[i].s0;
values[i] = (float8) (v, native_sqrt(v), native_rsqrt(v), native_recip(v), native_exp(v), native_log(v), 0.0f, 0.0f);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment