Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
077a93c8
"vscode:/vscode.git/clone" did not exist on "26c51a0c4ea5e610b2fb1ea6a0bb7a86f3bafdd0"
Commit
077a93c8
authored
Oct 21, 2010
by
Peter Eastman
Browse files
Continuing to optimize nonbonded kernels for CPU
parent
e7b18ca4
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
77 additions
and
73 deletions
+77
-73
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+2
-2
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
+36
-25
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+39
-46
No files found.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
077a93c8
...
...
@@ -215,7 +215,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if
(
maxInteractingTiles
>
numTiles
)
maxInteractingTiles
=
numTiles
;
interactingTiles
=
new
OpenCLArray
<
mm_ushort2
>
(
context
,
maxInteractingTiles
,
"interactingTiles"
);
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
context
.
getSIMDWidth
()
==
32
||
deviceIsCpu
?
maxInteractingTiles
:
1
,
"interactionFlags"
);
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
context
.
getSIMDWidth
()
==
32
?
maxInteractingTiles
:
(
deviceIsCpu
?
2
*
maxInteractingTiles
:
1
)
,
"interactionFlags"
);
interactionCount
=
new
OpenCLArray
<
cl_uint
>
(
context
,
1
,
"interactionCount"
,
true
);
blockCenter
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockBoundingBox
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
...
...
@@ -459,7 +459,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusions
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionIndices
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionRowIndices
->
getDeviceBuffer
());
kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
localDataSize
,
NULL
);
kernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
*
localDataSize
:
OpenCLContext
::
ThreadBlockSize
*
localDataSize
)
,
NULL
);
kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
if
(
useCutoff
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
...
...
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
View file @
077a93c8
...
...
@@ -45,12 +45,15 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
*
This
is
called
by
findBlocksWithInteractions
()
.
It
compacts
the
list
of
blocks
and
writes
them
*
to
global
memory.
*/
void
storeInteractionData
(
__local
ushort2*
buffer,
int
numValid,
__lo
c
al
unsigned
int*
flagsBuffer
,
__lo
c
al
float4*
temp
,
__global
unsigned
int*
interactionCount,
__global
ushort2*
interactingTiles,
__global
unsigned
int*
interactionFlags,
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
unsigned
int
maxTiles
)
{
void
storeInteractionData
(
ushort2*
buffer,
int
numValid,
__
g
lo
b
al
unsigned
int*
interactionCount
,
__
g
lo
b
al
ushort2*
interactingTiles
,
__global
unsigned
int*
interactionFlags,
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
unsigned
int
maxTiles
)
{
//
Filter
the
list
of
tiles
by
comparing
the
distance
from
each
atom
to
the
other
bounding
box.
unsigned
int
flagsBuffer[2*BUFFER_SIZE]
;
float4
atomPositions[TILE_SIZE]
;
int
lasty
=
-1
;
float4
centery,
boxSizey
;
for
(
int
tile
=
0
; tile < numValid; ) {
int
x
=
buffer[tile].x
;
int
y
=
buffer[tile].y
;
...
...
@@ -59,37 +62,46 @@ void storeInteractionData(__local ushort2* buffer, int numValid, __local unsigne
continue
;
}
//
Load
the
atom
positions
and
the
bounding
box
of
the
other
block
.
//
Load
the
atom
positions
and
bounding
box
es
.
float4
center
=
blockCenter[x]
;
float4
boxSize
=
blockBoundingBox[x]
;
if
(
y
!=
lasty
)
float4
center
x
=
blockCenter[x]
;
float4
boxSize
x
=
blockBoundingBox[x]
;
if
(
y
!=
lasty
)
{
for
(
int
atom
=
0
; atom < TILE_SIZE; atom++)
temp[atom]
=
posq[y*TILE_SIZE+atom]
;
atomPositions[atom]
=
posq[y*TILE_SIZE+atom]
;
centery
=
blockCenter[y]
;
boxSizey
=
blockBoundingBox[y]
;
lasty
=
y
;
}
//
Find
the
distance
of
each
atom
from
the
bounding
box.
unsigned
int
flags
=
0
;
unsigned
int
flags
1
=
0
,
flags2
=
0
;
for
(
int
atom
=
0
; atom < TILE_SIZE; atom++) {
float4
delta
=
temp
[atom]-center
;
float4
delta
=
atomPositions
[atom]-center
x
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
delta
=
max
((
float4
)
0.0f,
fabs
(
delta
)
-boxSizex
)
;
if
(
dot
(
delta.xyz,
delta.xyz
)
<
cutoffSquared
)
flags1
+=
1
<<
atom
;
delta
=
posq[x*TILE_SIZE+atom]-centery
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
delta
=
max
((
float4
)
0.0f,
fabs
(
delta
)
-boxSize
)
;
if
(
d
elta.x*delta.x+delta.y*delta.y+delta.z*
delta.
z
<
cutoffSquared
)
flags
+=
1
<<
atom
;
delta
=
max
((
float4
)
0.0f,
fabs
(
delta
)
-boxSize
y
)
;
if
(
d
ot
(
delta.xyz,
delta.
xyz
)
<
cutoffSquared
)
flags
2
+=
1
<<
atom
;
}
if
(
flags
==
0
)
{
if
(
flags
1
==
0
||
flags2
==
0
)
{
//
This
tile
contains
no
interactions.
numValid--
;
buffer[tile]
=
buffer[numValid]
;
}
else
{
flagsBuffer[tile]
=
flags
;
flagsBuffer[2*tile]
=
flags1
;
flagsBuffer[2*tile+1]
=
flags2
;
tile++
;
}
}
...
...
@@ -100,7 +112,8 @@ void storeInteractionData(__local ushort2* buffer, int numValid, __local unsigne
if
(
baseIndex+numValid
<=
maxTiles
)
for
(
int
i
=
0
; i < numValid; i++) {
interactingTiles[baseIndex+i]
=
buffer[i]
;
interactionFlags[baseIndex+i]
=
flagsBuffer[i]
;
interactionFlags[2*
(
baseIndex+i
)
]
=
flagsBuffer[2*i]
;
interactionFlags[2*
(
baseIndex+i
)
+1]
=
flagsBuffer[2*i+1]
;
}
}
...
...
@@ -111,9 +124,7 @@ void storeInteractionData(__local ushort2* buffer, int numValid, __local unsigne
__kernel
void
findBlocksWithInteractions
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionCount,
__global
ushort2*
interactingTiles,
__global
unsigned
int*
interactionFlags,
__global
float4*
posq,
unsigned
int
maxTiles
)
{
__local
ushort2
buffer[BUFFER_SIZE]
;
__local
unsigned
int
flagsBuffer[BUFFER_SIZE]
;
__local
float4
temp[TILE_SIZE]
;
ushort2
buffer[BUFFER_SIZE]
;
int
valuesInBuffer
=
0
;
const
int
numTiles
=
(
NUM_BLOCKS*
(
NUM_BLOCKS+1
))
/2
;
unsigned
int
start
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
...
...
@@ -146,10 +157,10 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
buffer[valuesInBuffer++]
=
(
ushort2
)
(
x,
y
)
;
if
(
valuesInBuffer
==
BUFFER_SIZE
)
{
storeInteractionData
(
buffer,
valuesInBuffer,
flagsBuffer,
temp,
interactionCount,
interactingTiles,
interactionFlags,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox,
maxTiles
)
;
storeInteractionData
(
buffer,
valuesInBuffer,
interactionCount,
interactingTiles,
interactionFlags,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox,
maxTiles
)
;
valuesInBuffer
=
0
;
}
}
}
storeInteractionData
(
buffer,
valuesInBuffer,
flagsBuffer,
temp,
interactionCount,
interactingTiles,
interactionFlags,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox,
maxTiles
)
;
storeInteractionData
(
buffer,
valuesInBuffer,
interactionCount,
interactingTiles,
interactionFlags,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox,
maxTiles
)
;
}
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
077a93c8
...
...
@@ -49,7 +49,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
//
Locate
the
exclusion
data
for
this
tile.
...
...
@@ -92,15 +91,14 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
if
(
!isExcluded
)
{
#
endif
float4
posq2
=
(
float4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
d
elta.x*delta.x
+
delta.y*delta.y
+
delta.z*
delta.
z
;
float
r2
=
d
ot
(
delta.xyz,
delta.
xyz
)
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
...
...
@@ -125,6 +123,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
#
endif
#
ifdef
USE_CUTOFF
}
#
endif
#
ifdef
USE_EXCLUSIONS
}
#
endif
excl
>>=
1
;
}
...
...
@@ -144,33 +145,27 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
localData[tgx].fz
=
0.0f
;
}
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
}
else
{
unsigned
int
flags1
=
(
numTiles
<=
maxTiles
?
interactionFlags[2*pos]
:
0xFFFFFFFF
)
;
unsigned
int
flags2
=
(
numTiles
<=
maxTiles
?
interactionFlags[2*pos+1]
:
0xFFFFFFFF
)
;
if
(
!hasExclusions
&&
(
flags1
!=
0xFFFFFFFF
||
flags2
!=
0xFFFFFFFF
))
{
//
Compute
only
a
subset
of
the
interactions
in
this
tile.
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
if
((
flags2&
(
1<<tgx
))
!=
0
)
{
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
float4
force
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
if
((
flags&
(
1<<j
))
!=
0
)
{
if
((
flags
1
&
(
1<<j
))
!=
0
)
{
bool
isExcluded
=
false
;
float4
posq2
=
(
float4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
float
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
invR
=
RSQRT
(
r2
)
;
float
r
=
RECIP
(
invR
)
;
unsigned
int
atom2
=
j
;
...
...
@@ -197,9 +192,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
localData[j].fy
+=
dEdR2.y
;
localData[j].fz
+=
dEdR2.z
;
#
endif
#
ifdef
USE_CUTOFF
}
#
endif
}
}
...
...
@@ -226,15 +219,14 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
if
(
!isExcluded
)
{
#
endif
float4
posq2
=
(
float4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
d
elta.x*delta.x
+
delta.y*delta.y
+
delta.z*
delta.
z
;
float
r2
=
d
ot
(
delta.xyz,
delta.
xyz
)
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
...
...
@@ -268,6 +260,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
}
#
endif
#
ifdef
USE_EXCLUSIONS
}
excl
>>=
1
;
#
endif
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment