Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a7b68da3
Commit
a7b68da3
authored
May 04, 2011
by
Peter Eastman
Browse files
Very minor optimizations
parent
c56b4801
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
16 additions
and
16 deletions
+16
-16
platforms/opencl/src/kernels/cmapTorsionForce.cl
platforms/opencl/src/kernels/cmapTorsionForce.cl
+4
-4
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+1
-1
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
+1
-1
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
+1
-1
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
+1
-1
platforms/opencl/src/kernels/customGBValueN2_default.cl
platforms/opencl/src/kernels/customGBValueN2_default.cl
+1
-1
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
+1
-1
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+1
-1
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+1
-1
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+3
-3
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+1
-1
No files found.
platforms/opencl/src/kernels/cmapTorsionForce.cl
View file @
a7b68da3
...
@@ -31,7 +31,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
...
@@ -31,7 +31,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
float4 cross_prod = cross(cp0a, cp1a);
float4 cross_prod = cross(cp0a, cp1a);
float scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
float scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
angleA = asin(
sqrt
(dot(cross_prod, cross_prod)/scale));
angleA = asin(
SQRT
(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0.0f)
if (cosangle < 0.0f)
angleA = PI-angleA;
angleA = PI-angleA;
}
}
...
@@ -54,7 +54,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
...
@@ -54,7 +54,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
float4
cross_prod
=
cross
(
cp0b,
cp1b
)
;
float4
cross_prod
=
cross
(
cp0b,
cp1b
)
;
float
scale
=
dot
(
cp0b,
cp0b
)
*dot
(
cp1b,
cp1b
)
;
float
scale
=
dot
(
cp0b,
cp0b
)
*dot
(
cp1b,
cp1b
)
;
angleB
=
asin
(
sqrt
(
dot
(
cross_prod,
cross_prod
)
/scale
))
;
angleB
=
asin
(
SQRT
(
dot
(
cross_prod,
cross_prod
)
/scale
))
;
if
(
cosangle
<
0.0f
)
if
(
cosangle
<
0.0f
)
angleB
=
PI-angleB
;
angleB
=
PI-angleB
;
}
}
...
@@ -104,7 +104,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
...
@@ -104,7 +104,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
float
normCross1
=
dot
(
cp0a,
cp0a
)
;
float
normCross1
=
dot
(
cp0a,
cp0a
)
;
float
normSqrBC
=
dot
(
v1a,
v1a
)
;
float
normSqrBC
=
dot
(
v1a,
v1a
)
;
float
normBC
=
sqrt
(
normSqrBC
)
;
float
normBC
=
SQRT
(
normSqrBC
)
;
float
normCross2
=
dot
(
cp1a,
cp1a
)
;
float
normCross2
=
dot
(
cp1a,
cp1a
)
;
float
dp
=
1.0f/normSqrBC
;
float
dp
=
1.0f/normSqrBC
;
float4
ff
=
(
float4
)
((
-dEdA*normBC
)
/normCross1,
dot
(
v0a,
v1a
)
*dp,
dot
(
v2a,
v1a
)
*dp,
(
dEdA*normBC
)
/normCross2
)
;
float4
ff
=
(
float4
)
((
-dEdA*normBC
)
/normCross1,
dot
(
v0a,
v1a
)
*dp,
dot
(
v2a,
v1a
)
*dp,
(
dEdA*normBC
)
/normCross2
)
;
...
@@ -129,7 +129,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
...
@@ -129,7 +129,7 @@ __kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global f
normCross1
=
dot
(
cp0b,
cp0b
)
;
normCross1
=
dot
(
cp0b,
cp0b
)
;
normSqrBC
=
dot
(
v1b,
v1b
)
;
normSqrBC
=
dot
(
v1b,
v1b
)
;
normBC
=
sqrt
(
normSqrBC
)
;
normBC
=
SQRT
(
normSqrBC
)
;
normCross2
=
dot
(
cp1b,
cp1b
)
;
normCross2
=
dot
(
cp1b,
cp1b
)
;
dp
=
1.0f/normSqrBC
;
dp
=
1.0f/normSqrBC
;
ff
=
(
float4
)
((
-dEdB*normBC
)
/normCross1,
dot
(
v0b,
v1b
)
*dp,
dot
(
v2b,
v1b
)
*dp,
(
dEdB*normBC
)
/normCross2
)
;
ff
=
(
float4
)
((
-dEdB*normBC
)
/normCross1,
dot
(
v0b,
v1b
)
*dp,
dot
(
v2b,
v1b
)
*dp,
(
dEdB*normBC
)
/normCross2
)
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
a7b68da3
...
@@ -38,7 +38,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
...
@@ -38,7 +38,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
else
else
#
endif
#
endif
{
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
y += (x < y ? -1 : 1);
...
...
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
View file @
a7b68da3
...
@@ -42,7 +42,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -42,7 +42,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
else
else
#
endif
#
endif
{
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
y += (x < y ? -1 : 1);
...
...
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
View file @
a7b68da3
...
@@ -61,7 +61,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
...
@@ -61,7 +61,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
else
else
#
endif
#
endif
{
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
y += (x < y ? -1 : 1);
...
...
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
View file @
a7b68da3
...
@@ -35,7 +35,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
...
@@ -35,7 +35,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
else
else
#
endif
#
endif
{
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
y += (x < y ? -1 : 1);
...
...
platforms/opencl/src/kernels/customGBValueN2_default.cl
View file @
a7b68da3
...
@@ -39,7 +39,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -39,7 +39,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
else
else
#
endif
#
endif
{
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
y += (x < y ? -1 : 1);
...
...
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
View file @
a7b68da3
...
@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
...
@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global float4* posq, __local float4* local_posq,
else
else
#
endif
#
endif
{
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
y += (x < y ? -1 : 1);
...
...
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
a7b68da3
...
@@ -43,7 +43,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -43,7 +43,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
else
else
#
endif
#
endif
{
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
y += (x < y ? -1 : 1);
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
a7b68da3
...
@@ -46,7 +46,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -46,7 +46,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
else
else
#
endif
#
endif
{
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
y += (x < y ? -1 : 1);
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
a7b68da3
...
@@ -66,7 +66,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -66,7 +66,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
else
else
#
endif
#
endif
{
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-
SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
y += (x < y ? -1 : 1);
...
@@ -120,8 +120,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -120,8 +120,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float
r = sqrt
(r2);
float
invR = RSQRT
(r2);
float
invR
= RECIP(
r
);
float
r
= RECIP(
invR
);
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
atom2 = y*TILE_SIZE+j;
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
a7b68da3
...
@@ -63,7 +63,7 @@ __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int nu
...
@@ -63,7 +63,7 @@ __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int nu
*/
*/
__kernel
void
determineNativeAccuracy
(
__global
float8*
values,
int
numValues
)
{
__kernel
void
determineNativeAccuracy
(
__global
float8*
values,
int
numValues
)
{
for
(
int
i
=
0
; i < numValues;
++i
) {
for
(
int
i
=
get_global_id
(
0
)
; i < numValues;
i += get_global_size(0)
) {
float
v
=
values[i].s0
;
float
v
=
values[i].s0
;
values[i]
=
(
float8
)
(
v,
native_sqrt
(
v
)
,
native_rsqrt
(
v
)
,
native_recip
(
v
)
,
native_exp
(
v
)
,
native_log
(
v
)
,
0.0f,
0.0f
)
;
values[i]
=
(
float8
)
(
v,
native_sqrt
(
v
)
,
native_rsqrt
(
v
)
,
native_recip
(
v
)
,
native_exp
(
v
)
,
native_log
(
v
)
,
0.0f,
0.0f
)
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment