Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b2960968
"platforms/common/vscode:/vscode.git/clone" did not exist on "939ecf28a4bee6c45e37a9e64ff53df3e2e0fd79"
Commit
b2960968
authored
Apr 29, 2010
by
Peter Eastman
Browse files
Check accuracy of native_ functions and only use them if they are accurate enough
parent
a0651b5b
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
94 additions
and
49 deletions
+94
-49
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+34
-0
platforms/opencl/src/kernels/customBondForce.cl
platforms/opencl/src/kernels/customBondForce.cl
+1
-1
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
+2
-2
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
+2
-2
platforms/opencl/src/kernels/customGBValueN2_default.cl
platforms/opencl/src/kernels/customGBValueN2_default.cl
+2
-2
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
+3
-3
platforms/opencl/src/kernels/customHbondForce.cl
platforms/opencl/src/kernels/customHbondForce.cl
+2
-2
platforms/opencl/src/kernels/gbsaObc_default.cl
platforms/opencl/src/kernels/gbsaObc_default.cl
+10
-10
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+15
-15
platforms/opencl/src/kernels/harmonicAngleForce.cl
platforms/opencl/src/kernels/harmonicAngleForce.cl
+2
-2
platforms/opencl/src/kernels/harmonicBondForce.cl
platforms/opencl/src/kernels/harmonicBondForce.cl
+1
-1
platforms/opencl/src/kernels/nonbondedExceptions.cl
platforms/opencl/src/kernels/nonbondedExceptions.cl
+1
-1
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+4
-4
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+4
-4
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+11
-0
No files found.
platforms/opencl/src/OpenCLContext.cpp
View file @
b2960968
...
@@ -96,6 +96,40 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
...
@@ -96,6 +96,40 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
utilities
=
createProgram
(
OpenCLKernelSources
::
utilities
);
utilities
=
createProgram
(
OpenCLKernelSources
::
utilities
);
clearBufferKernel
=
cl
::
Kernel
(
utilities
,
"clearBuffer"
);
clearBufferKernel
=
cl
::
Kernel
(
utilities
,
"clearBuffer"
);
reduceFloat4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceFloat4Buffer"
);
reduceFloat4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceFloat4Buffer"
);
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
cl
::
Kernel
accuracyKernel
(
utilities
,
"determineNativeAccuracy"
);
OpenCLArray
<
mm_float4
>
values
(
*
this
,
20
,
"values"
,
true
);
float
nextValue
=
1e-4
;
for
(
int
i
=
0
;
i
<
values
.
getSize
();
++
i
)
{
values
[
i
].
x
=
nextValue
;
nextValue
*=
M_PI
;
}
values
.
upload
();
accuracyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
values
.
getDeviceBuffer
());
accuracyKernel
.
setArg
<
cl_int
>
(
1
,
values
.
getSize
());
executeKernel
(
accuracyKernel
,
values
.
getSize
());
values
.
download
();
double
maxSqrtError
=
0.0
,
maxRsqrtError
=
0.0
,
maxRecipError
=
0.0
;
for
(
int
i
=
0
;
i
<
values
.
getSize
();
++
i
)
{
double
correctSqrt
=
sqrt
(
values
[
i
].
x
);
maxSqrtError
=
max
(
maxSqrtError
,
fabs
(
correctSqrt
-
values
[
i
].
y
)
/
correctSqrt
);
maxRsqrtError
=
max
(
maxRsqrtError
,
fabs
(
1.0
/
correctSqrt
-
values
[
i
].
z
)
*
correctSqrt
);
maxRecipError
=
max
(
maxRecipError
,
fabs
(
1.0
/
values
[
i
].
x
-
values
[
i
].
w
)
/
values
[
i
].
w
);
}
if
(
maxSqrtError
<
1e-6
)
compilationOptions
+=
" -DSQRT=native_sqrt"
;
else
compilationOptions
+=
" -DSQRT=sqrt"
;
if
(
maxRsqrtError
<
1e-6
)
compilationOptions
+=
" -DRSQRT=native_rsqrt"
;
else
compilationOptions
+=
" -DRSQRT=rsqrt"
;
if
(
maxRecipError
<
1e-6
)
compilationOptions
+=
" -DRECIP=native_recip"
;
else
compilationOptions
+=
" -DRECIP=1.0f/"
;
}
}
OpenCLContext
::~
OpenCLContext
()
{
OpenCLContext
::~
OpenCLContext
()
{
...
...
platforms/opencl/src/kernels/customBondForce.cl
View file @
b2960968
...
@@ -14,7 +14,7 @@ __kernel void computeCustomBondForces(int numAtoms, int numBonds, __global float
...
@@ -14,7 +14,7 @@ __kernel void computeCustomBondForces(int numAtoms, int numBonds, __global float
//
Compute
the
force.
//
Compute
the
force.
float
r
=
native_sqrt
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
float
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
COMPUTE_FORCE
COMPUTE_FORCE
delta.xyz
*=
-dEdR/r
;
delta.xyz
*=
-dEdR/r
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
View file @
b2960968
...
@@ -64,7 +64,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
...
@@ -64,7 +64,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+j
;
atom2
=
y+baseLocalAtom+j
;
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
...
@@ -140,7 +140,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
...
@@ -140,7 +140,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+tj
;
atom2
=
y+baseLocalAtom+tj
;
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
View file @
b2960968
...
@@ -63,7 +63,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
...
@@ -63,7 +63,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
atom2
=
y+j
;
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
...
@@ -136,7 +136,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
...
@@ -136,7 +136,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+tj
;
atom2
=
y+tj
;
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
...
...
platforms/opencl/src/kernels/customGBValueN2_default.cl
View file @
b2960968
...
@@ -61,7 +61,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
...
@@ -61,7 +61,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+j
;
atom2
=
y+baseLocalAtom+j
;
float
tempValue1
=
0.0f
;
float
tempValue1
=
0.0f
;
...
@@ -134,7 +134,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
...
@@ -134,7 +134,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+tj
;
atom2
=
y+baseLocalAtom+tj
;
float
tempValue1
=
0.0f
;
float
tempValue1
=
0.0f
;
...
...
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
View file @
b2960968
...
@@ -61,7 +61,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
...
@@ -61,7 +61,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
atom2
=
y+j
;
float
tempValue1
=
0.0f
;
float
tempValue1
=
0.0f
;
...
@@ -122,7 +122,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
...
@@ -122,7 +122,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
float
tempValue1
=
0.0f
;
float
tempValue1
=
0.0f
;
float
tempValue2
=
0.0f
;
float
tempValue2
=
0.0f
;
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
atom2
=
y+j
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
...
@@ -177,7 +177,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
...
@@ -177,7 +177,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
float
r
=
native_sqrt
(
r2
)
;
float
r
=
SQRT
(
r2
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+tj
;
atom2
=
y+tj
;
float
tempValue1
=
0.0f
;
float
tempValue1
=
0.0f
;
...
...
platforms/opencl/src/kernels/customHbondForce.cl
View file @
b2960968
...
@@ -27,14 +27,14 @@ float4 deltaPeriodic(float4 vec1, float4 vec2) {
...
@@ -27,14 +27,14 @@ float4 deltaPeriodic(float4 vec1, float4 vec2) {
*/
*/
float
computeAngle
(
float4
vec1,
float4
vec2
)
{
float
computeAngle
(
float4
vec1,
float4
vec2
)
{
float
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
float
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
float
cosine
=
dotProduct*
native_rsqrt
(
vec1.w*vec2.w
)
;
float
cosine
=
dotProduct*
RSQRT
(
vec1.w*vec2.w
)
;
float
angle
;
float
angle
;
if
(
cosine
>
0.99f
|
| cosine < -0.99f) {
if
(
cosine
>
0.99f
|
| cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
float4 crossProduct = cross(vec1, vec2);
float4 crossProduct = cross(vec1, vec2);
float scale = vec1.w*vec2.w;
float scale = vec1.w*vec2.w;
angle = asin(
native_sqrt
(dot(crossProduct, crossProduct)/scale));
angle = asin(
SQRT
(dot(crossProduct, crossProduct)/scale));
if (cosine < 0.0f)
if (cosine < 0.0f)
angle = M_PI-angle;
angle = M_PI-angle;
}
}
...
...
platforms/opencl/src/kernels/gbsaObc_default.cl
View file @
b2960968
...
@@ -65,8 +65,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
...
@@ -65,8 +65,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#
else
#
else
if
(
atom1
<
NUM_ATOMS
&&
y+baseLocalAtom+j
<
NUM_ATOMS
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+baseLocalAtom+j
<
NUM_ATOMS
)
{
#
endif
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
float2
params2
=
(
float2
)
(
localData[baseLocalAtom+j].radius,
localData[baseLocalAtom+j].scaledRadius
)
;
float2
params2
=
(
float2
)
(
localData[baseLocalAtom+j].radius,
localData[baseLocalAtom+j].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
float
rScaledRadiusJ
=
r+params2.y
;
if
((
j
!=
tgx
)
&&
(
params1.x
<
rScaledRadiusJ
))
{
if
((
j
!=
tgx
)
&&
(
params1.x
<
rScaledRadiusJ
))
{
...
@@ -133,8 +133,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
...
@@ -133,8 +133,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#
else
#
else
if
(
atom1
<
NUM_ATOMS
&&
y+baseLocalAtom+tj
<
NUM_ATOMS
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+baseLocalAtom+tj
<
NUM_ATOMS
)
{
#
endif
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
float2
params2
=
(
float2
)
(
localData[baseLocalAtom+tj].radius,
localData[baseLocalAtom+tj].scaledRadius
)
;
float2
params2
=
(
float2
)
(
localData[baseLocalAtom+tj].radius,
localData[baseLocalAtom+tj].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
float
rScaledRadiusJ
=
r+params2.y
;
if
(
params1.x
<
rScaledRadiusJ
)
{
if
(
params1.x
<
rScaledRadiusJ
)
{
...
@@ -241,14 +241,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
...
@@ -241,14 +241,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
float
bornRadius2
=
localData[baseLocalAtom+j].bornRadius
;
float
bornRadius2
=
localData[baseLocalAtom+j].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
native_sqrt
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
...
@@ -315,14 +315,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
...
@@ -315,14 +315,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
float
bornRadius2
=
localData[baseLocalAtom+tj].bornRadius
;
float
bornRadius2
=
localData[baseLocalAtom+tj].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
native_sqrt
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
b2960968
...
@@ -65,8 +65,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
...
@@ -65,8 +65,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#
else
#
else
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
)
{
#
endif
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
float2
params2
=
(
float2
)
(
localData[tbx+j].radius,
localData[tbx+j].scaledRadius
)
;
float2
params2
=
(
float2
)
(
localData[tbx+j].radius,
localData[tbx+j].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
float
rScaledRadiusJ
=
r+params2.y
;
if
((
j
!=
tgx
)
&&
(
params1.x
<
rScaledRadiusJ
))
{
if
((
j
!=
tgx
)
&&
(
params1.x
<
rScaledRadiusJ
))
{
...
@@ -130,8 +130,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
...
@@ -130,8 +130,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#
else
#
else
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
)
{
#
endif
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
float2
params2
=
(
float2
)
(
localData[tbx+j].radius,
localData[tbx+j].scaledRadius
)
;
float2
params2
=
(
float2
)
(
localData[tbx+j].radius,
localData[tbx+j].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
float
rScaledRadiusJ
=
r+params2.y
;
if
(
params1.x
<
rScaledRadiusJ
)
{
if
(
params1.x
<
rScaledRadiusJ
)
{
...
@@ -198,8 +198,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
...
@@ -198,8 +198,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
#
else
#
else
if
(
atom1
<
NUM_ATOMS
&&
y+tj
<
NUM_ATOMS
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+tj
<
NUM_ATOMS
)
{
#
endif
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
float2
params2
=
(
float2
)
(
localData[tbx+tj].radius,
localData[tbx+tj].scaledRadius
)
;
float2
params2
=
(
float2
)
(
localData[tbx+tj].radius,
localData[tbx+tj].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
float
rScaledRadiusJ
=
r+params2.y
;
if
(
params1.x
<
rScaledRadiusJ
)
{
if
(
params1.x
<
rScaledRadiusJ
)
{
...
@@ -301,14 +301,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
...
@@ -301,14 +301,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
float
bornRadius2
=
localData[tbx+j].bornRadius
;
float
bornRadius2
=
localData[tbx+j].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
native_sqrt
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
...
@@ -370,14 +370,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
...
@@ -370,14 +370,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
float
bornRadius2
=
localData[tbx+j].bornRadius
;
float
bornRadius2
=
localData[tbx+j].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
native_sqrt
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
...
@@ -436,14 +436,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
...
@@ -436,14 +436,14 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
float
bornRadius2
=
localData[tbx+tj].bornRadius
;
float
bornRadius2
=
localData[tbx+tj].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
native_sqrt
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
...
...
platforms/opencl/src/kernels/harmonicAngleForce.cl
View file @
b2960968
...
@@ -20,11 +20,11 @@ __kernel void calcHarmonicAngleForce(int numAtoms, int numAngles, __global float
...
@@ -20,11 +20,11 @@ __kernel void calcHarmonicAngleForce(int numAtoms, int numAngles, __global float
float4
v1
=
a2-a3
;
float4
v1
=
a2-a3
;
float4
cp
=
cross
(
v0,
v1
)
;
float4
cp
=
cross
(
v0,
v1
)
;
float
rp
=
cp.x*cp.x
+
cp.y*cp.y
+
cp.z*cp.z
;
float
rp
=
cp.x*cp.x
+
cp.y*cp.y
+
cp.z*cp.z
;
rp
=
max
(
native_sqrt
(
rp
)
,
1.0e-06f
)
;
rp
=
max
(
SQRT
(
rp
)
,
1.0e-06f
)
;
float
r21
=
v0.x*v0.x
+
v0.y*v0.y
+
v0.z*v0.z
;
float
r21
=
v0.x*v0.x
+
v0.y*v0.y
+
v0.z*v0.z
;
float
r23
=
v1.x*v1.x
+
v1.y*v1.y
+
v1.z*v1.z
;
float
r23
=
v1.x*v1.x
+
v1.y*v1.y
+
v1.z*v1.z
;
float
dot
=
v0.x*v1.x
+
v0.y*v1.y
+
v0.z*v1.z
;
float
dot
=
v0.x*v1.x
+
v0.y*v1.y
+
v0.z*v1.z
;
float
cosine
=
dot*
native_rsqrt
(
r21*r23
)
;
float
cosine
=
dot*
RSQRT
(
r21*r23
)
;
float
deltaIdeal
=
acos
(
cosine
)
-angleParams.x
;
float
deltaIdeal
=
acos
(
cosine
)
-angleParams.x
;
energy
+=
0.5f*angleParams.y*deltaIdeal*deltaIdeal
;
energy
+=
0.5f*angleParams.y*deltaIdeal*deltaIdeal
;
float
dEdR
=
angleParams.y*deltaIdeal
;
float
dEdR
=
angleParams.y*deltaIdeal
;
...
...
platforms/opencl/src/kernels/harmonicBondForce.cl
View file @
b2960968
...
@@ -14,7 +14,7 @@ __kernel void calcHarmonicBondForce(int numAtoms, int numBonds, __global float4*
...
@@ -14,7 +14,7 @@ __kernel void calcHarmonicBondForce(int numAtoms, int numBonds, __global float4*
//
Compute
the
force.
//
Compute
the
force.
float
r
=
native_sqrt
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
float
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
float
deltaIdeal
=
r-bondParams.x
;
float
deltaIdeal
=
r-bondParams.x
;
energy
+=
0.5f
*
bondParams.y*deltaIdeal*deltaIdeal
;
energy
+=
0.5f
*
bondParams.y*deltaIdeal*deltaIdeal
;
float
dEdR
=
bondParams.y
*
deltaIdeal
;
float
dEdR
=
bondParams.y
*
deltaIdeal
;
...
...
platforms/opencl/src/kernels/nonbondedExceptions.cl
View file @
b2960968
...
@@ -16,7 +16,7 @@ __kernel void computeNonbondedExceptions(__global float4* forceBuffers, __global
...
@@ -16,7 +16,7 @@ __kernel void computeNonbondedExceptions(__global float4* forceBuffers, __global
//
Compute
the
force.
//
Compute
the
force.
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
sig2
=
invR*exceptionParams.y
;
float
sig2
=
invR*exceptionParams.y
;
sig2
*=
sig2
;
sig2
*=
sig2
;
float
sig6
=
sig2*sig2*sig2
;
float
sig6
=
sig2*sig2*sig2
;
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
b2960968
...
@@ -67,8 +67,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -67,8 +67,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+j
;
atom2
=
y+baseLocalAtom+j
;
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
...
@@ -135,8 +135,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -135,8 +135,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+tj
;
atom2
=
y+baseLocalAtom+tj
;
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
b2960968
...
@@ -124,8 +124,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -124,8 +124,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
atom2
=
y+j
;
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
...
@@ -181,8 +181,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -181,8 +181,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
delta.z
-=
floor
(
delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
invR
=
RSQRT
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
r
=
RECIP
(
invR
)
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+tj
;
atom2
=
y+tj
;
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
b2960968
...
@@ -30,3 +30,14 @@ __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int nu
...
@@ -30,3 +30,14 @@ __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int nu
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
}
}
}
}
/**
*
This
is
called
to
determine
the
accuracy
of
native_sqrt
()
,
native_rsqrt
()
and
native_recip
()
.
*/
__kernel
void
determineNativeAccuracy
(
__global
float4*
values,
int
numValues
)
{
for
(
int
i
=
0
; i < numValues; ++i) {
float
v
=
values[i].x
;
values[i]
=
(
float4
)
(
v,
native_sqrt
(
v
)
,
native_rsqrt
(
v
)
,
native_recip
(
v
))
;
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment