Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b2960968
Commit
b2960968
authored
Apr 29, 2010
by
Peter Eastman
Browse files
Check accuracy of native_ functions and only use them if they are accurate enough
parent
a0651b5b
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
94 additions
and
49 deletions
+94
-49
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+34
-0
platforms/opencl/src/kernels/customBondForce.cl
platforms/opencl/src/kernels/customBondForce.cl
+1
-1
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
+2
-2
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
+2
-2
platforms/opencl/src/kernels/customGBValueN2_default.cl
platforms/opencl/src/kernels/customGBValueN2_default.cl
+2
-2
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
+3
-3
platforms/opencl/src/kernels/customHbondForce.cl
platforms/opencl/src/kernels/customHbondForce.cl
+2
-2
platforms/opencl/src/kernels/gbsaObc_default.cl
platforms/opencl/src/kernels/gbsaObc_default.cl
+10
-10
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+15
-15
platforms/opencl/src/kernels/harmonicAngleForce.cl
platforms/opencl/src/kernels/harmonicAngleForce.cl
+2
-2
platforms/opencl/src/kernels/harmonicBondForce.cl
platforms/opencl/src/kernels/harmonicBondForce.cl
+1
-1
platforms/opencl/src/kernels/nonbondedExceptions.cl
platforms/opencl/src/kernels/nonbondedExceptions.cl
+1
-1
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+4
-4
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+4
-4
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+11
-0
No files found.
platforms/opencl/src/OpenCLContext.cpp
View file @
b2960968
...
...
@@ -96,6 +96,40 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
utilities
=
createProgram
(
OpenCLKernelSources
::
utilities
);
clearBufferKernel
=
cl
::
Kernel
(
utilities
,
"clearBuffer"
);
reduceFloat4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceFloat4Buffer"
);
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
cl
::
Kernel
accuracyKernel
(
utilities
,
"determineNativeAccuracy"
);
OpenCLArray
<
mm_float4
>
values
(
*
this
,
20
,
"values"
,
true
);
float
nextValue
=
1e-4
;
for
(
int
i
=
0
;
i
<
values
.
getSize
();
++
i
)
{
values
[
i
].
x
=
nextValue
;
nextValue
*=
M_PI
;
}
values
.
upload
();
accuracyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
values
.
getDeviceBuffer
());
accuracyKernel
.
setArg
<
cl_int
>
(
1
,
values
.
getSize
());
executeKernel
(
accuracyKernel
,
values
.
getSize
());
values
.
download
();
double
maxSqrtError
=
0.0
,
maxRsqrtError
=
0.0
,
maxRecipError
=
0.0
;
for
(
int
i
=
0
;
i
<
values
.
getSize
();
++
i
)
{
double
correctSqrt
=
sqrt
(
values
[
i
].
x
);
maxSqrtError
=
max
(
maxSqrtError
,
fabs
(
correctSqrt
-
values
[
i
].
y
)
/
correctSqrt
);
maxRsqrtError
=
max
(
maxRsqrtError
,
fabs
(
1.0
/
correctSqrt
-
values
[
i
].
z
)
*
correctSqrt
);
maxRecipError
=
max
(
maxRecipError
,
fabs
(
1.0
/
values
[
i
].
x
-
values
[
i
].
w
)
/
values
[
i
].
w
);
}
if
(
maxSqrtError
<
1e-6
)
compilationOptions
+=
" -DSQRT=native_sqrt"
;
else
compilationOptions
+=
" -DSQRT=sqrt"
;
if
(
maxRsqrtError
<
1e-6
)
compilationOptions
+=
" -DRSQRT=native_rsqrt"
;
else
compilationOptions
+=
" -DRSQRT=rsqrt"
;
if
(
maxRecipError
<
1e-6
)
compilationOptions
+=
" -DRECIP=native_recip"
;
else
compilationOptions
+=
" -DRECIP=1.0f/"
;
}
OpenCLContext
::~
OpenCLContext
()
{
...
...
platforms/opencl/src/kernels/customBondForce.cl
View file @
b2960968
...
...
@@ -14,7 +14,7 @@ __kernel void computeCustomBondForces(int numAtoms, int numBonds, __global float
//
Compute
the
force.
float
r
=
native_sqrt
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
float
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
COMPUTE_FORCE
delta.xyz
*=
-dEdR/r
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
View file @
b2960968
...
...
@@ -64,7 +64,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+j
;
float
dEdR
=
0.0f
;
...
...
@@ -140,7 +140,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+tj
;
float
dEdR
=
0.0f
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
View file @
b2960968
...
...
@@ -63,7 +63,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
float
dEdR
=
0.0f
;
...
...
@@ -136,7 +136,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+tj
;
float
dEdR
=
0.0f
;
...
...
platforms/opencl/src/kernels/customGBValueN2_default.cl
View file @
b2960968
...
...
@@ -61,7 +61,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+j
;
float
tempValue1
=
0.0f
;
...
...
@@ -134,7 +134,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+tj
;
float
tempValue1
=
0.0f
;
...
...
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
View file @
b2960968
...
...
@@ -61,7 +61,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
float
tempValue1
=
0.0f
;
...
...
@@ -122,7 +122,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float
tempValue1
=
0.0f
;
float
tempValue2
=
0.0f
;
if
(
r2
<
CUTOFF_SQUARED
)
{
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
...
...
@@ -177,7 +177,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+tj
;
float
tempValue1
=
0.0f
;
...
...
platforms/opencl/src/kernels/customHbondForce.cl
View file @
b2960968
...
...
@@ -27,14 +27,14 @@ float4 deltaPeriodic(float4 vec1, float4 vec2) {
*/
float
computeAngle
(
float4
vec1,
float4
vec2
)
{
float
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
float
cosine
=
dotProduct*
native_rsqrt
(
vec1.w*vec2.w
)
;
float
cosine
=
dotProduct*
RSQRT
(
vec1.w*vec2.w
)
;
float
angle
;
if
(
cosine
>
0.99f
|
| cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
float4 crossProduct = cross(vec1, vec2);
float scale = vec1.w*vec2.w;
angle = asin(
native_sqrt
(dot(crossProduct, crossProduct)/scale));
angle = asin(
SQRT
(dot(crossProduct, crossProduct)/scale));
if (cosine < 0.0f)
angle = M_PI-angle;
}
...
...
platforms/opencl/src/kernels/gbsaObc_default.cl
View file @
b2960968
...
...
@@ -65,8 +65,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#
else
if
(
atom1
<
NUM_ATOMS
&&
y+baseLocalAtom+j
<
NUM_ATOMS
)
{
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
float2
params2
=
(
float2
)
(
localData[baseLocalAtom+j].radius,
localData[baseLocalAtom+j].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
if
((
j
!=
tgx
)
&&
(
params1.x
<
rScaledRadiusJ
))
{
...
...
@@ -133,8 +133,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#
else
if
(
atom1
<
NUM_ATOMS
&&
y+baseLocalAtom+tj
<
NUM_ATOMS
)
{
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
float2
params2
=
(
float2
)
(
localData[baseLocalAtom+tj].radius,
localData[baseLocalAtom+tj].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
if
(
params1.x
<
rScaledRadiusJ
)
{
...
...
@@ -241,14 +241,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
float
bornRadius2
=
localData[baseLocalAtom+j].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
native_sqrt
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
...
...
@@ -315,14 +315,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
float
bornRadius2
=
localData[baseLocalAtom+tj].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
native_sqrt
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
b2960968
...
...
@@ -65,8 +65,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#
else
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
)
{
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
float2
params2
=
(
float2
)
(
localData[tbx+j].radius,
localData[tbx+j].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
if
((
j
!=
tgx
)
&&
(
params1.x
<
rScaledRadiusJ
))
{
...
...
@@ -130,8 +130,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#
else
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
)
{
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
float2
params2
=
(
float2
)
(
localData[tbx+j].radius,
localData[tbx+j].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
if
(
params1.x
<
rScaledRadiusJ
)
{
...
...
@@ -198,8 +198,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#
else
if
(
atom1
<
NUM_ATOMS
&&
y+tj
<
NUM_ATOMS
)
{
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
float2
params2
=
(
float2
)
(
localData[tbx+tj].radius,
localData[tbx+tj].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
if
(
params1.x
<
rScaledRadiusJ
)
{
...
...
@@ -301,14 +301,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
float
bornRadius2
=
localData[tbx+j].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
native_sqrt
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
...
...
@@ -370,14 +370,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
float
bornRadius2
=
localData[tbx+j].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
native_sqrt
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
...
...
@@ -436,14 +436,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
float
bornRadius2
=
localData[tbx+tj].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
native_sqrt
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
...
...
platforms/opencl/src/kernels/harmonicAngleForce.cl
View file @
b2960968
...
...
@@ -20,11 +20,11 @@ __kernel void calcHarmonicAngleForce(int numAtoms, int numAngles, __global float
float4
v1
=
a2-a3
;
float4
cp
=
cross
(
v0,
v1
)
;
float
rp
=
cp.x*cp.x
+
cp.y*cp.y
+
cp.z*cp.z
;
rp
=
max
(
native_sqrt
(
rp
)
,
1.0e-06f
)
;
rp
=
max
(
SQRT
(
rp
)
,
1.0e-06f
)
;
float
r21
=
v0.x*v0.x
+
v0.y*v0.y
+
v0.z*v0.z
;
float
r23
=
v1.x*v1.x
+
v1.y*v1.y
+
v1.z*v1.z
;
float
dot
=
v0.x*v1.x
+
v0.y*v1.y
+
v0.z*v1.z
;
float
cosine
=
dot*
native_rsqrt
(
r21*r23
)
;
float
cosine
=
dot*
RSQRT
(
r21*r23
)
;
float
deltaIdeal
=
acos
(
cosine
)
-angleParams.x
;
energy
+=
0.5f*angleParams.y*deltaIdeal*deltaIdeal
;
float
dEdR
=
angleParams.y*deltaIdeal
;
...
...
platforms/opencl/src/kernels/harmonicBondForce.cl
View file @
b2960968
...
...
@@ -14,7 +14,7 @@ __kernel void calcHarmonicBondForce(int numAtoms, int numBonds, __global float4*
//
Compute
the
force.
float
r
=
native_sqrt
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
float
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
float
deltaIdeal
=
r-bondParams.x
;
energy
+=
0.5f
*
bondParams.y*deltaIdeal*deltaIdeal
;
float
dEdR
=
bondParams.y
*
deltaIdeal
;
...
...
platforms/opencl/src/kernels/nonbondedExceptions.cl
View file @
b2960968
...
...
@@ -16,7 +16,7 @@ __kernel void computeNonbondedExceptions(__global float4* forceBuffers, __global
//
Compute
the
force.
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
sig2
=
invR*exceptionParams.y
;
sig2
*=
sig2
;
float
sig6
=
sig2*sig2*sig2
;
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
b2960968
...
...
@@ -67,8 +67,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+j
;
float
dEdR
=
0.0f
;
...
...
@@ -135,8 +135,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+tj
;
float
dEdR
=
0.0f
;
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
b2960968
...
...
@@ -124,8 +124,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
float
dEdR
=
0.0f
;
...
...
@@ -181,8 +181,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+tj
;
float
dEdR
=
0.0f
;
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
b2960968
...
...
@@ -30,3 +30,14 @@ __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int nu
index
+=
get_global_size
(
0
)
;
}
}
/**
*
This
is
called
to
determine
the
accuracy
of
native_sqrt
()
,
native_rsqrt
()
and
native_recip
()
.
*/
__kernel
void
determineNativeAccuracy
(
__global
float4*
values,
int
numValues
)
{
for
(
int
i
=
0
; i < numValues; ++i) {
float
v
=
values[i].x
;
values[i]
=
(
float4
)
(
v,
native_sqrt
(
v
)
,
native_rsqrt
(
v
)
,
native_recip
(
v
))
;
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment