Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d2a5b3bb
Commit
d2a5b3bb
authored
Sep 09, 2011
by
Peter Eastman
Browse files
Minor performance optimizations
parent
592dc5a9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
27 additions
and
44 deletions
+27
-44
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+6
-6
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+14
-27
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+7
-11
No files found.
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
d2a5b3bb
...
@@ -25,9 +25,9 @@ if (!isExcluded || needCorrection) {
...
@@ -25,9 +25,9 @@ if (!isExcluded || needCorrection) {
float
sig2
=
invR*sig
;
float
sig2
=
invR*sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
float
sig6
=
sig2*sig2*sig2
;
float
sig6
=
sig2*sig2*sig2
;
float
eps
=
sigmaEpsilon1.y*sigmaEpsilon2.y
;
float
eps
sig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
tempForce
=
eps*
(
12.0f*sig6
-
6.0f
)
*sig6
+
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempForce
=
eps
sig6
*
(
12.0f*sig6
-
6.0f
)
+
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
eps*
(
sig6
-
1.0f
)
*sig6
+
prefactor*erfcAlphaR
;
tempEnergy
+=
eps
sig6
*
(
sig6
-
1.0f
)
+
prefactor*erfcAlphaR
;
#
else
#
else
tempForce
=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempForce
=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
prefactor*erfcAlphaR
;
tempEnergy
+=
prefactor*erfcAlphaR
;
...
@@ -49,9 +49,9 @@ if (!isExcluded || needCorrection) {
...
@@ -49,9 +49,9 @@ if (!isExcluded || needCorrection) {
float
sig2
=
invR*sig
;
float
sig2
=
invR*sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
float
sig6
=
sig2*sig2*sig2
;
float
sig6
=
sig2*sig2*sig2
;
float
eps
=
sigmaEpsilon1.y*sigmaEpsilon2.y
;
float
eps
sig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
tempForce
=
eps*
(
12.0f*sig6
-
6.0f
)
*sig6
;
tempForce
=
eps
sig6
*
(
12.0f*sig6
-
6.0f
)
;
tempEnergy
+=
select
(
0.0f,
eps*
(
sig6
-
1.0f
)
*sig6
,
includeInteraction
)
;
tempEnergy
+=
select
(
0.0f,
eps
sig6
*
(
sig6
-
1.0f
)
,
includeInteraction
)
;
#
endif
#
endif
#
if
HAS_COULOMB
#
if
HAS_COULOMB
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
d2a5b3bb
...
@@ -71,52 +71,38 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
...
@@ -71,52 +71,38 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i]
=
temp[i].y
;
sum[i]
=
temp[i].y
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
numValid
=
sum[BUFFER_SIZE-1]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Compact
the
buffer.
//
Compact
the
buffer.
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
if
(
valid[i]
)
{
if
(
valid[i]
)
{
temp[sum[i]-1]
=
buffer[i]
;
temp[sum[i]-1]
=
buffer[i]
;
sum[i]
=
valid[i]
;
valid[i]
=
false
;
valid[i]
=
false
;
buffer[i]
=
(
ushort2
)
1
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
numValid
=
sum[BUFFER_SIZE-1]
;
#
ifndef
WARPS_ARE_ATOMIC
//
Filter
the
list
of
tiles
by
comparing
the
distance
from
each
atom
to
the
other
bounding
box.
//
Filter
the
list
of
tiles
by
comparing
the
distance
from
each
atom
to
the
other
bounding
box.
//
We
only
do
this
if
we
aren
't
already
optimizing
the
computation
using
flags.
int
tile
;
int
index
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
int
index
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
int
group
=
get_local_id
(
0
)
/TILE_SIZE
;
int
group
=
get_local_id
(
0
)
/TILE_SIZE
;
__local
int*
flag
=
sum
;
int
lasty
=
-1
;
float4
center,
boxSize,
pos
;
float4
center,
boxSize,
pos
;
for
(
tile
=
0
; tile < numValid; ) {
for
(
int
tile
=
0
; tile < numValid;
tile++
) {
int
x
=
temp[tile].x
;
int
x
=
temp[tile].x
;
int
y
=
temp[tile].y
;
int
y
=
temp[tile].y
;
if
(
x
==
y
)
{
if
(
x
==
y
)
tile++
;
continue
;
continue
;
}
if
(
index
==
0
)
flag[group]
=
true
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Load
an
atom
position
and
the
bounding
box
the
other
block.
//
Load
an
atom
position
and
the
bounding
box
the
other
block.
if
(
group
==
0
)
{
center
=
blockCenter[
(
group
==
0
?
x
:
y
)
]
;
center
=
blockCenter[x]
;
boxSize
=
blockBoundingBox[
(
group
==
0
?
x
:
y
)
]
;
boxSize
=
blockBoundingBox[x]
;
pos
=
posq[
(
group
==
0
?
y
:
x
)
*TILE_SIZE+index]
;
if
(
y
!=
lasty
)
pos
=
posq[y*TILE_SIZE+index]
;
}
else
{
if
(
y
!=
lasty
)
{
center
=
blockCenter[y]
;
boxSize
=
blockBoundingBox[y]
;
}
pos
=
posq[x*TILE_SIZE+index]
;
}
lasty
=
y
;
//
Find
the
distance
of
the
atom
from
the
bounding
box.
//
Find
the
distance
of
the
atom
from
the
bounding
box.
...
@@ -127,6 +113,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
...
@@ -127,6 +113,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
#
endif
delta
=
max
((
float4
)
0.0f,
fabs
(
delta
)
-boxSize
)
;
delta
=
max
((
float4
)
0.0f,
fabs
(
delta
)
-boxSize
)
;
__local
ushort*
flag
=
(
__local
ushort*
)
&buffer[tile]
;
if
(
delta.x*delta.x+delta.y*delta.y+delta.z*delta.z
<
cutoffSquared
)
if
(
delta.x*delta.x+delta.y*delta.y+delta.z*delta.z
<
cutoffSquared
)
flag[group]
=
false
;
flag[group]
=
false
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
@@ -136,11 +123,11 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
...
@@ -136,11 +123,11 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
numValid--;
numValid--;
if (get_local_id(0) == 0)
if (get_local_id(0) == 0)
temp[tile] = temp[numValid];
temp[tile] = temp[numValid];
tile--;
}
}
else
tile++;
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
}
}
#endif
// Store it to global memory.
// Store it to global memory.
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
d2a5b3bb
...
@@ -40,7 +40,6 @@ __kernel void computeNonbonded(
...
@@ -40,7 +40,6 @@ __kernel void computeNonbonded(
unsigned
int
end
=
startTileIndex+
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
end
=
startTileIndex+
(
warp+1
)
*numTiles/totalWarps
;
#
endif
#
endif
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
__local
unsigned
int
exclusionRange[2*WARPS_PER_GROUP]
;
__local
unsigned
int
exclusionRange[2*WARPS_PER_GROUP]
;
__local
int
exclusionIndex[WARPS_PER_GROUP]
;
__local
int
exclusionIndex[WARPS_PER_GROUP]
;
__local
int2*
reservedBlocks
=
(
__local
int2*
)
exclusionRange
;
__local
int2*
reservedBlocks
=
(
__local
int2*
)
exclusionRange
;
...
@@ -141,15 +140,13 @@ __kernel void computeNonbonded(
...
@@ -141,15 +140,13 @@ __kernel void computeNonbonded(
// This is an off-diagonal tile.
// This is an off-diagonal tile.
const unsigned int localAtomIndex = get_local_id(0);
const unsigned int localAtomIndex = get_local_id(0);
if (lasty != y) {
unsigned int j = y*TILE_SIZE + tgx;
unsigned int j = y*TILE_SIZE + tgx;
float4 tempPosq = posq[j];
float4 tempPosq = posq[j];
localData[localAtomIndex].x = tempPosq.x;
localData[localAtomIndex].x = tempPosq.x;
localData[localAtomIndex].y = tempPosq.y;
localData[localAtomIndex].y = tempPosq.y;
localData[localAtomIndex].z = tempPosq.z;
localData[localAtomIndex].z = tempPosq.z;
localData[localAtomIndex].q = tempPosq.w;
localData[localAtomIndex].q = tempPosq.w;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
localData[localAtomIndex].fx = 0.0f;
localData[localAtomIndex].fx = 0.0f;
localData[localAtomIndex].fy = 0.0f;
localData[localAtomIndex].fy = 0.0f;
localData[localAtomIndex].fz = 0.0f;
localData[localAtomIndex].fz = 0.0f;
...
@@ -353,7 +350,6 @@ __kernel void computeNonbonded(
...
@@ -353,7 +350,6 @@ __kernel void computeNonbonded(
}
}
}
}
#
endif
#
endif
lasty
=
y
;
pos++
;
pos++
;
}
while
(
pos
<
end
)
;
}
while
(
pos
<
end
)
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment