Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
077a93c8
Commit
077a93c8
authored
Oct 21, 2010
by
Peter Eastman
Browse files
Continuing to optimize nonbonded kernels for CPU
parent
e7b18ca4
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
77 additions
and
73 deletions
+77
-73
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+2
-2
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
+36
-25
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+39
-46
No files found.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
077a93c8
...
...
@@ -215,7 +215,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if
(
maxInteractingTiles
>
numTiles
)
maxInteractingTiles
=
numTiles
;
interactingTiles
=
new
OpenCLArray
<
mm_ushort2
>
(
context
,
maxInteractingTiles
,
"interactingTiles"
);
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
context
.
getSIMDWidth
()
==
32
||
deviceIsCpu
?
maxInteractingTiles
:
1
,
"interactionFlags"
);
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
context
.
getSIMDWidth
()
==
32
?
maxInteractingTiles
:
(
deviceIsCpu
?
2
*
maxInteractingTiles
:
1
)
,
"interactionFlags"
);
interactionCount
=
new
OpenCLArray
<
cl_uint
>
(
context
,
1
,
"interactionCount"
,
true
);
blockCenter
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockBoundingBox
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
...
...
@@ -459,7 +459,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusions
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionIndices
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionRowIndices
->
getDeviceBuffer
());
kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
localDataSize
,
NULL
);
kernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
*
localDataSize
:
OpenCLContext
::
ThreadBlockSize
*
localDataSize
)
,
NULL
);
kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
if
(
useCutoff
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
...
...
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
View file @
077a93c8
...
...
@@ -45,12 +45,15 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
*
This
is
called
by
findBlocksWithInteractions
()
.
It
compacts
the
list
of
blocks
and
writes
them
*
to
global
memory.
*/
void
storeInteractionData
(
__local
ushort2*
buffer,
int
numValid,
__lo
c
al
unsigned
int*
flagsBuffer
,
__lo
c
al
float4*
temp
,
__global
unsigned
int*
interactionCount,
__global
ushort2*
interactingTiles,
__global
unsigned
int*
interactionFlags,
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
unsigned
int
maxTiles
)
{
void
storeInteractionData
(
ushort2*
buffer,
int
numValid,
__
g
lo
b
al
unsigned
int*
interactionCount
,
__
g
lo
b
al
ushort2*
interactingTiles
,
__global
unsigned
int*
interactionFlags,
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
unsigned
int
maxTiles
)
{
//
Filter
the
list
of
tiles
by
comparing
the
distance
from
each
atom
to
the
other
bounding
box.
unsigned
int
flagsBuffer[2*BUFFER_SIZE]
;
float4
atomPositions[TILE_SIZE]
;
int
lasty
=
-1
;
float4
centery,
boxSizey
;
for
(
int
tile
=
0
; tile < numValid; ) {
int
x
=
buffer[tile].x
;
int
y
=
buffer[tile].y
;
...
...
@@ -59,37 +62,46 @@ void storeInteractionData(__local ushort2* buffer, int numValid, __local unsigne
continue
;
}
//
Load
the
atom
positions
and
the
bounding
box
of
the
other
block
.
//
Load
the
atom
positions
and
bounding
box
es
.
float4
center
=
blockCenter[x]
;
float4
boxSize
=
blockBoundingBox[x]
;
if
(
y
!=
lasty
)
float4
center
x
=
blockCenter[x]
;
float4
boxSize
x
=
blockBoundingBox[x]
;
if
(
y
!=
lasty
)
{
for
(
int
atom
=
0
; atom < TILE_SIZE; atom++)
temp[atom]
=
posq[y*TILE_SIZE+atom]
;
atomPositions[atom]
=
posq[y*TILE_SIZE+atom]
;
centery
=
blockCenter[y]
;
boxSizey
=
blockBoundingBox[y]
;
lasty
=
y
;
}
//
Find
the
distance
of
each
atom
from
the
bounding
box.
unsigned
int
flags
=
0
;
unsigned
int
flags
1
=
0
,
flags2
=
0
;
for
(
int
atom
=
0
; atom < TILE_SIZE; atom++) {
float4
delta
=
temp
[atom]-center
;
float4
delta
=
atomPositions
[atom]-center
x
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
delta
=
max
((
float4
)
0.0f,
fabs
(
delta
)
-boxSizex
)
;
if
(
dot
(
delta.xyz,
delta.xyz
)
<
cutoffSquared
)
flags1
+=
1
<<
atom
;
delta
=
posq[x*TILE_SIZE+atom]-centery
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
delta
=
max
((
float4
)
0.0f,
fabs
(
delta
)
-boxSize
)
;
if
(
d
elta.x*delta.x+delta.y*delta.y+delta.z*
delta.
z
<
cutoffSquared
)
flags
+=
1
<<
atom
;
delta
=
max
((
float4
)
0.0f,
fabs
(
delta
)
-boxSize
y
)
;
if
(
d
ot
(
delta.xyz,
delta.
xyz
)
<
cutoffSquared
)
flags
2
+=
1
<<
atom
;
}
if
(
flags
==
0
)
{
if
(
flags
1
==
0
||
flags2
==
0
)
{
//
This
tile
contains
no
interactions.
numValid--
;
buffer[tile]
=
buffer[numValid]
;
}
else
{
flagsBuffer[tile]
=
flags
;
flagsBuffer[2*tile]
=
flags1
;
flagsBuffer[2*tile+1]
=
flags2
;
tile++
;
}
}
...
...
@@ -100,7 +112,8 @@ void storeInteractionData(__local ushort2* buffer, int numValid, __local unsigne
if
(
baseIndex+numValid
<=
maxTiles
)
for
(
int
i
=
0
; i < numValid; i++) {
interactingTiles[baseIndex+i]
=
buffer[i]
;
interactionFlags[baseIndex+i]
=
flagsBuffer[i]
;
interactionFlags[2*
(
baseIndex+i
)
]
=
flagsBuffer[2*i]
;
interactionFlags[2*
(
baseIndex+i
)
+1]
=
flagsBuffer[2*i+1]
;
}
}
...
...
@@ -111,9 +124,7 @@ void storeInteractionData(__local ushort2* buffer, int numValid, __local unsigne
__kernel
void
findBlocksWithInteractions
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionCount,
__global
ushort2*
interactingTiles,
__global
unsigned
int*
interactionFlags,
__global
float4*
posq,
unsigned
int
maxTiles
)
{
__local
ushort2
buffer[BUFFER_SIZE]
;
__local
unsigned
int
flagsBuffer[BUFFER_SIZE]
;
__local
float4
temp[TILE_SIZE]
;
ushort2
buffer[BUFFER_SIZE]
;
int
valuesInBuffer
=
0
;
const
int
numTiles
=
(
NUM_BLOCKS*
(
NUM_BLOCKS+1
))
/2
;
unsigned
int
start
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
...
...
@@ -146,10 +157,10 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
buffer[valuesInBuffer++]
=
(
ushort2
)
(
x,
y
)
;
if
(
valuesInBuffer
==
BUFFER_SIZE
)
{
storeInteractionData
(
buffer,
valuesInBuffer,
flagsBuffer,
temp,
interactionCount,
interactingTiles,
interactionFlags,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox,
maxTiles
)
;
storeInteractionData
(
buffer,
valuesInBuffer,
interactionCount,
interactingTiles,
interactionFlags,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox,
maxTiles
)
;
valuesInBuffer
=
0
;
}
}
}
storeInteractionData
(
buffer,
valuesInBuffer,
flagsBuffer,
temp,
interactionCount,
interactingTiles,
interactionFlags,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox,
maxTiles
)
;
storeInteractionData
(
buffer,
valuesInBuffer,
interactionCount,
interactingTiles,
interactionFlags,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox,
maxTiles
)
;
}
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
077a93c8
...
...
@@ -49,7 +49,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
//
Locate
the
exclusion
data
for
this
tile.
...
...
@@ -92,15 +91,14 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
if
(
!isExcluded
)
{
#
endif
float4
posq2
=
(
float4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
d
elta.x*delta.x
+
delta.y*delta.y
+
delta.z*
delta.
z
;
float
r2
=
d
ot
(
delta.xyz,
delta.
xyz
)
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
...
...
@@ -125,6 +123,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
#
endif
#
ifdef
USE_CUTOFF
}
#
endif
#
ifdef
USE_EXCLUSIONS
}
#
endif
excl
>>=
1
;
}
...
...
@@ -144,33 +145,27 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
localData[tgx].fz
=
0.0f
;
}
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
}
else
{
unsigned
int
flags1
=
(
numTiles
<=
maxTiles
?
interactionFlags[2*pos]
:
0xFFFFFFFF
)
;
unsigned
int
flags2
=
(
numTiles
<=
maxTiles
?
interactionFlags[2*pos+1]
:
0xFFFFFFFF
)
;
if
(
!hasExclusions
&&
(
flags1
!=
0xFFFFFFFF
||
flags2
!=
0xFFFFFFFF
))
{
//
Compute
only
a
subset
of
the
interactions
in
this
tile.
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
if
((
flags2&
(
1<<tgx
))
!=
0
)
{
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
float4
force
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
if
((
flags&
(
1<<j
))
!=
0
)
{
if
((
flags
1
&
(
1<<j
))
!=
0
)
{
bool
isExcluded
=
false
;
float4
posq2
=
(
float4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
float
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
unsigned
int
atom2
=
j
;
...
...
@@ -197,9 +192,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
localData[j].fy
+=
dEdR2.y
;
localData[j].fz
+=
dEdR2.z
;
#
endif
#
ifdef
USE_CUTOFF
}
#
endif
}
}
...
...
@@ -226,15 +219,14 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
if
(
!isExcluded
)
{
#
endif
float4
posq2
=
(
float4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
d
elta.x*delta.x
+
delta.y*delta.y
+
delta.z*
delta.
z
;
float
r2
=
d
ot
(
delta.xyz,
delta.
xyz
)
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
...
...
@@ -268,6 +260,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
}
#
endif
#
ifdef
USE_EXCLUSIONS
}
excl
>>=
1
;
#
endif
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment