Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c87cee06
Commit
c87cee06
authored
Jul 20, 2010
by
Peter Eastman
Browse files
Use native_exp and native_log when they are sufficiently accurate
parent
2a313e48
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
46 additions
and
35 deletions
+46
-35
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+18
-7
platforms/opencl/src/OpenCLExpressionUtilities.cpp
platforms/opencl/src/OpenCLExpressionUtilities.cpp
+2
-2
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+3
-3
platforms/opencl/src/kernels/ewald.cl
platforms/opencl/src/kernels/ewald.cl
+2
-2
platforms/opencl/src/kernels/gbsaObc2.cl
platforms/opencl/src/kernels/gbsaObc2.cl
+2
-2
platforms/opencl/src/kernels/gbsaObc_default.cl
platforms/opencl/src/kernels/gbsaObc_default.cl
+5
-5
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+8
-8
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+1
-1
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+5
-5
No files found.
platforms/opencl/src/OpenCLContext.cpp
View file @
c87cee06
...
@@ -107,10 +107,10 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
...
@@ -107,10 +107,10 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
cl
::
Kernel
accuracyKernel
(
utilities
,
"determineNativeAccuracy"
);
cl
::
Kernel
accuracyKernel
(
utilities
,
"determineNativeAccuracy"
);
OpenCLArray
<
mm_float
4
>
values
(
*
this
,
20
,
"values"
,
true
);
OpenCLArray
<
mm_float
8
>
values
(
*
this
,
20
,
"values"
,
true
);
float
nextValue
=
1e-4
;
float
nextValue
=
1e-4
;
for
(
int
i
=
0
;
i
<
values
.
getSize
();
++
i
)
{
for
(
int
i
=
0
;
i
<
values
.
getSize
();
++
i
)
{
values
[
i
].
x
=
nextValue
;
values
[
i
].
s0
=
nextValue
;
nextValue
*=
M_PI
;
nextValue
*=
M_PI
;
}
}
values
.
upload
();
values
.
upload
();
...
@@ -118,12 +118,15 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
...
@@ -118,12 +118,15 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
accuracyKernel
.
setArg
<
cl_int
>
(
1
,
values
.
getSize
());
accuracyKernel
.
setArg
<
cl_int
>
(
1
,
values
.
getSize
());
executeKernel
(
accuracyKernel
,
values
.
getSize
());
executeKernel
(
accuracyKernel
,
values
.
getSize
());
values
.
download
();
values
.
download
();
double
maxSqrtError
=
0.0
,
maxRsqrtError
=
0.0
,
maxRecipError
=
0.0
;
double
maxSqrtError
=
0.0
,
maxRsqrtError
=
0.0
,
maxRecipError
=
0.0
,
maxExpError
=
0.0
,
maxLogError
=
0.0
;
for
(
int
i
=
0
;
i
<
values
.
getSize
();
++
i
)
{
for
(
int
i
=
0
;
i
<
values
.
getSize
();
++
i
)
{
double
correctSqrt
=
sqrt
(
values
[
i
].
x
);
double
v
=
values
[
i
].
s0
;
maxSqrtError
=
max
(
maxSqrtError
,
fabs
(
correctSqrt
-
values
[
i
].
y
)
/
correctSqrt
);
double
correctSqrt
=
sqrt
(
v
);
maxRsqrtError
=
max
(
maxRsqrtError
,
fabs
(
1.0
/
correctSqrt
-
values
[
i
].
z
)
*
correctSqrt
);
maxSqrtError
=
max
(
maxSqrtError
,
fabs
(
correctSqrt
-
values
[
i
].
s1
)
/
correctSqrt
);
maxRecipError
=
max
(
maxRecipError
,
fabs
(
1.0
/
values
[
i
].
x
-
values
[
i
].
w
)
/
values
[
i
].
w
);
maxRsqrtError
=
max
(
maxRsqrtError
,
fabs
(
1.0
/
correctSqrt
-
values
[
i
].
s2
)
*
correctSqrt
);
maxRecipError
=
max
(
maxRecipError
,
fabs
(
1.0
/
v
-
values
[
i
].
s3
)
/
values
[
i
].
s3
);
maxExpError
=
max
(
maxExpError
,
fabs
(
exp
(
v
)
-
values
[
i
].
s4
)
/
values
[
i
].
s4
);
maxLogError
=
max
(
maxLogError
,
fabs
(
log
(
v
)
-
values
[
i
].
s5
)
/
values
[
i
].
s5
);
}
}
if
(
maxSqrtError
<
1e-6
)
if
(
maxSqrtError
<
1e-6
)
compilationOptions
+=
" -DSQRT=native_sqrt"
;
compilationOptions
+=
" -DSQRT=native_sqrt"
;
...
@@ -137,6 +140,14 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
...
@@ -137,6 +140,14 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
compilationOptions
+=
" -DRECIP=native_recip"
;
compilationOptions
+=
" -DRECIP=native_recip"
;
else
else
compilationOptions
+=
" -DRECIP=1.0f/"
;
compilationOptions
+=
" -DRECIP=1.0f/"
;
if
(
maxExpError
<
1e-6
)
compilationOptions
+=
" -DEXP=native_exp"
;
else
compilationOptions
+=
" -DEXP=exp"
;
if
(
maxLogError
<
1e-6
)
compilationOptions
+=
" -DLOG=native_log"
;
else
compilationOptions
+=
" -DLOG=log"
;
}
}
OpenCLContext
::~
OpenCLContext
()
{
OpenCLContext
::~
OpenCLContext
()
{
...
...
platforms/opencl/src/OpenCLExpressionUtilities.cpp
View file @
c87cee06
...
@@ -153,10 +153,10 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -153,10 +153,10 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"sqrt("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"sqrt("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
EXP
:
case
Operation
::
EXP
:
out
<<
"
exp
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
EXP
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
LOG
:
case
Operation
::
LOG
:
out
<<
"
log
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
LOG
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
SIN
:
case
Operation
::
SIN
:
out
<<
"sin("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"sin("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
...
...
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
c87cee06
...
@@ -17,7 +17,7 @@ if (!isExcluded || needCorrection) {
...
@@ -17,7 +17,7 @@ if (!isExcluded || needCorrection) {
if
(
needCorrection
)
{
if
(
needCorrection
)
{
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
tempForce
=
-prefactor*
((
1.0f-erfcAlphaR
)
-alphaR*
exp
(
-alphaR*alphaR
)
*TWO_OVER_SQRT_PI
)
;
tempForce
=
-prefactor*
((
1.0f-erfcAlphaR
)
-alphaR*
EXP
(
-alphaR*alphaR
)
*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
-prefactor*
(
1.0f-erfcAlphaR
)
;
tempEnergy
+=
-prefactor*
(
1.0f-erfcAlphaR
)
;
}
}
else
if
(
r2
<
CUTOFF_SQUARED
)
{
else
if
(
r2
<
CUTOFF_SQUARED
)
{
...
@@ -27,10 +27,10 @@ if (!isExcluded || needCorrection) {
...
@@ -27,10 +27,10 @@ if (!isExcluded || needCorrection) {
sig2
*=
sig2
;
sig2
*=
sig2
;
float
sig6
=
sig2*sig2*sig2
;
float
sig6
=
sig2*sig2*sig2
;
float
eps
=
sigmaEpsilon1.y*sigmaEpsilon2.y
;
float
eps
=
sigmaEpsilon1.y*sigmaEpsilon2.y
;
tempForce
=
eps*
(
12.0f*sig6
-
6.0f
)
*sig6
+
prefactor*
(
erfcAlphaR+alphaR*
exp
(
-alphaR*alphaR
)
*TWO_OVER_SQRT_PI
)
;
tempForce
=
eps*
(
12.0f*sig6
-
6.0f
)
*sig6
+
prefactor*
(
erfcAlphaR+alphaR*
EXP
(
-alphaR*alphaR
)
*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
eps*
(
sig6
-
1.0f
)
*sig6
+
prefactor*erfcAlphaR
;
tempEnergy
+=
eps*
(
sig6
-
1.0f
)
*sig6
+
prefactor*erfcAlphaR
;
#
else
#
else
tempForce
=
prefactor*
(
erfcAlphaR+alphaR*
exp
(
-alphaR*alphaR
)
*TWO_OVER_SQRT_PI
)
;
tempForce
=
prefactor*
(
erfcAlphaR+alphaR*
EXP
(
-alphaR*alphaR
)
*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
prefactor*erfcAlphaR
;
tempEnergy
+=
prefactor*erfcAlphaR
;
#
endif
#
endif
}
}
...
...
platforms/opencl/src/kernels/ewald.cl
View file @
c87cee06
...
@@ -46,7 +46,7 @@ __kernel void calculateEwaldCosSinSums(__global float* energyBuffer, __global fl
...
@@ -46,7 +46,7 @@ __kernel void calculateEwaldCosSinSums(__global float* energyBuffer, __global fl
//
Compute
the
contribution
to
the
energy.
//
Compute
the
contribution
to
the
energy.
float
k2
=
kx*kx
+
ky*ky
+
kz*kz
;
float
k2
=
kx*kx
+
ky*ky
+
kz*kz
;
float
ak
=
exp
(
k2*EXP_COEFFICIENT
)
/
k2
;
float
ak
=
EXP
(
k2*EXP_COEFFICIENT
)
/
k2
;
energy
+=
reciprocalCoefficient*ak*
(
sum.x*sum.x
+
sum.y*sum.y
)
;
energy
+=
reciprocalCoefficient*ak*
(
sum.x*sum.x
+
sum.y*sum.y
)
;
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
}
}
...
@@ -83,7 +83,7 @@ __kernel void calculateEwaldForces(__global float4* forceBuffers, __global float
...
@@ -83,7 +83,7 @@ __kernel void calculateEwaldForces(__global float4* forceBuffers, __global float
int
index
=
rx*
(
KMAX_Y*2-1
)
*
(
KMAX_Z*2-1
)
+
(
ry+KMAX_Y-1
)
*
(
KMAX_Z*2-1
)
+
(
rz+KMAX_Z-1
)
;
int
index
=
rx*
(
KMAX_Y*2-1
)
*
(
KMAX_Z*2-1
)
+
(
ry+KMAX_Y-1
)
*
(
KMAX_Z*2-1
)
+
(
rz+KMAX_Z-1
)
;
float
k2
=
kx*kx
+
ky*ky
+
kz*kz
;
float
k2
=
kx*kx
+
ky*ky
+
kz*kz
;
float
ak
=
exp
(
k2*EXP_COEFFICIENT
)
/k2
;
float
ak
=
EXP
(
k2*EXP_COEFFICIENT
)
/k2
;
phase
=
apos.z*kz
;
phase
=
apos.z*kz
;
float2
structureFactor
=
multofFloat2
(
tab_xy,
(
float2
)
(
cos
(
phase
)
,
sin
(
phase
)))
;
float2
structureFactor
=
multofFloat2
(
tab_xy,
(
float2
)
(
cos
(
phase
)
,
sin
(
phase
)))
;
float2
sum
=
cosSinSum[index]
;
float2
sum
=
cosSinSum[index]
;
...
...
platforms/opencl/src/kernels/gbsaObc2.cl
View file @
c87cee06
...
@@ -14,8 +14,8 @@ if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
...
@@ -14,8 +14,8 @@ if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
float
l_ij2I
=
l_ijI*l_ijI
;
float
l_ij2I
=
l_ijI*l_ijI
;
float
u_ij2J
=
u_ijJ*u_ijJ
;
float
u_ij2J
=
u_ijJ*u_ijJ
;
float
u_ij2I
=
u_ijI*u_ijI
;
float
u_ij2I
=
u_ijI*u_ijI
;
float
t1J
=
log
(
u_ijJ/l_ijJ
)
;
float
t1J
=
LOG
(
u_ijJ/l_ijJ
)
;
float
t1I
=
log
(
u_ijI/l_ijI
)
;
float
t1I
=
LOG
(
u_ijI/l_ijI
)
;
float
t2J
=
(
l_ij2J-u_ij2J
)
;
float
t2J
=
(
l_ij2J-u_ij2J
)
;
float
t2I
=
(
l_ij2I-u_ij2I
)
;
float
t2I
=
(
l_ij2I-u_ij2I
)
;
float
t3J
=
t2J*invR
;
float
t3J
=
t2J*invR
;
...
...
platforms/opencl/src/kernels/gbsaObc_default.cl
View file @
c87cee06
...
@@ -75,7 +75,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -75,7 +75,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
float
u_ij
=
RECIP
(
rScaledRadiusJ
)
;
float
u_ij
=
RECIP
(
rScaledRadiusJ
)
;
float
l_ij2
=
l_ij*l_ij
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
float
ratio
=
LOG
(
u_ij
/
l_ij
)
;
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params1.x
<
params2.x-r
)
if
(
params1.x
<
params2.x-r
)
...
@@ -143,7 +143,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -143,7 +143,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
float
u_ij
=
RECIP
(
rScaledRadiusJ
)
;
float
u_ij
=
RECIP
(
rScaledRadiusJ
)
;
float
l_ij2
=
l_ij*l_ij
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
float
ratio
=
LOG
(
u_ij
/
l_ij
)
;
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params1.x
<
params2.x-r
)
if
(
params1.x
<
params2.x-r
)
...
@@ -155,7 +155,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -155,7 +155,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
float
u_ij
=
RECIP
(
rScaledRadiusI
)
;
float
u_ij
=
RECIP
(
rScaledRadiusI
)
;
float
l_ij2
=
l_ij*l_ij
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
float
ratio
=
LOG
(
u_ij
/
l_ij
)
;
float
term
=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
float
term
=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params1.y*params1.y*invR
)
*
(
l_ij2-u_ij2
)
;
(
0.25f*params1.y*params1.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params2.x
<
params1.x-r
)
if
(
params2.x
<
params1.x-r
)
...
@@ -248,7 +248,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -248,7 +248,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
float
bornRadius2
=
localData[baseLocalAtom+j].bornRadius
;
float
bornRadius2
=
localData[baseLocalAtom+j].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
EXP
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
SQRT
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
...
@@ -322,7 +322,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -322,7 +322,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
float
bornRadius2
=
localData[baseLocalAtom+tj].bornRadius
;
float
bornRadius2
=
localData[baseLocalAtom+tj].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
EXP
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
SQRT
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
c87cee06
...
@@ -75,7 +75,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -75,7 +75,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
float
u_ij
=
RECIP
(
rScaledRadiusJ
)
;
float
u_ij
=
RECIP
(
rScaledRadiusJ
)
;
float
l_ij2
=
l_ij*l_ij
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
float
ratio
=
LOG
(
u_ij
/
l_ij
)
;
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params1.x
<
params2.x-r
)
if
(
params1.x
<
params2.x-r
)
...
@@ -140,7 +140,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -140,7 +140,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
float
u_ij
=
RECIP
(
rScaledRadiusJ
)
;
float
u_ij
=
RECIP
(
rScaledRadiusJ
)
;
float
l_ij2
=
l_ij*l_ij
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
float
ratio
=
LOG
(
u_ij
/
l_ij
)
;
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params1.x
<
params2.x-r
)
if
(
params1.x
<
params2.x-r
)
...
@@ -152,7 +152,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -152,7 +152,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
float
u_ij
=
RECIP
(
rScaledRadiusI
)
;
float
u_ij
=
RECIP
(
rScaledRadiusI
)
;
float
l_ij2
=
l_ij*l_ij
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
float
ratio
=
LOG
(
u_ij
/
l_ij
)
;
float
term
=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
float
term
=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params1.y*params1.y*invR
)
*
(
l_ij2-u_ij2
)
;
(
0.25f*params1.y*params1.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params2.x
<
params1.x-r
)
if
(
params2.x
<
params1.x-r
)
...
@@ -208,7 +208,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -208,7 +208,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
float
u_ij
=
RECIP
(
rScaledRadiusJ
)
;
float
u_ij
=
RECIP
(
rScaledRadiusJ
)
;
float
l_ij2
=
l_ij*l_ij
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
float
ratio
=
LOG
(
u_ij
/
l_ij
)
;
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params1.x
<
params2.x-r
)
if
(
params1.x
<
params2.x-r
)
...
@@ -220,7 +220,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -220,7 +220,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
float
u_ij
=
RECIP
(
rScaledRadiusI
)
;
float
u_ij
=
RECIP
(
rScaledRadiusI
)
;
float
l_ij2
=
l_ij*l_ij
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
float
ratio
=
LOG
(
u_ij
/
l_ij
)
;
float
term
=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
float
term
=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params1.y*params1.y*invR
)
*
(
l_ij2-u_ij2
)
;
(
0.25f*params1.y*params1.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params2.x
<
params1.x-r
)
if
(
params2.x
<
params1.x-r
)
...
@@ -308,7 +308,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -308,7 +308,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
float
bornRadius2
=
localData[tbx+j].bornRadius
;
float
bornRadius2
=
localData[tbx+j].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
EXP
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
SQRT
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
...
@@ -377,7 +377,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -377,7 +377,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
float
bornRadius2
=
localData[tbx+j].bornRadius
;
float
bornRadius2
=
localData[tbx+j].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
EXP
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
SQRT
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
...
@@ -443,7 +443,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -443,7 +443,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
float
bornRadius2
=
localData[tbx+tj].bornRadius
;
float
bornRadius2
=
localData[tbx+tj].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
EXP
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
SQRT
(
denominator2
)
;
float
denominator
=
SQRT
(
denominator2
)
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR*posq1.w*posq2.w
)
/denominator
;
...
...
platforms/opencl/src/kernels/pme.cl
View file @
c87cee06
...
@@ -173,7 +173,7 @@ __kernel void reciprocalConvolution(__global float2* pmeGrid, __global float* en
...
@@ -173,7 +173,7 @@ __kernel void reciprocalConvolution(__global float2* pmeGrid, __global float* en
float2
grid
=
pmeGrid[index]
;
float2
grid
=
pmeGrid[index]
;
float
m2
=
mhx*mhx+mhy*mhy+mhz*mhz
;
float
m2
=
mhx*mhx+mhy*mhy+mhz*mhz
;
float
denom
=
m2*bx*by*bz
;
float
denom
=
m2*bx*by*bz
;
float
eterm
=
recipScaleFactor*
exp
(
-RECIP_EXP_FACTOR*m2
)
/denom
;
float
eterm
=
recipScaleFactor*
EXP
(
-RECIP_EXP_FACTOR*m2
)
/denom
;
pmeGrid[index]
=
(
float2
)
(
grid.x*eterm,
grid.y*eterm
)
;
pmeGrid[index]
=
(
float2
)
(
grid.x*eterm,
grid.y*eterm
)
;
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
}
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
c87cee06
...
@@ -59,12 +59,12 @@ __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int nu
...
@@ -59,12 +59,12 @@ __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int nu
}
}
/**
/**
*
This
is
called
to
determine
the
accuracy
of
native_sqrt
()
,
native_rsqrt
()
and
native_recip
()
.
*
This
is
called
to
determine
the
accuracy
of
various
native
functions
.
*/
*/
__kernel
void
determineNativeAccuracy
(
__global
float
4
*
values,
int
numValues
)
{
__kernel
void
determineNativeAccuracy
(
__global
float
8
*
values,
int
numValues
)
{
for
(
int
i
=
0
; i < numValues; ++i) {
for
(
int
i
=
0
; i < numValues; ++i) {
float
v
=
values[i].
x
;
float
v
=
values[i].
s0
;
values[i]
=
(
float
4
)
(
v,
native_sqrt
(
v
)
,
native_rsqrt
(
v
)
,
native_recip
(
v
))
;
values[i]
=
(
float
8
)
(
v,
native_sqrt
(
v
)
,
native_rsqrt
(
v
)
,
native_recip
(
v
)
,
native_exp
(
v
)
,
native_log
(
v
)
,
0.0f,
0.0f
)
;
}
}
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment