Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
70bd61a6
Commit
70bd61a6
authored
Sep 09, 2014
by
peastman
Browse files
Ported the new version of findBlocksWithInteractions() to OpenCL (not yet debugged)
parent
bfeb8ac7
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
190 additions
and
2 deletions
+190
-2
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+4
-0
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+1
-1
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+185
-1
No files found.
platforms/opencl/src/OpenCLContext.cpp
View file @
70bd61a6
...
@@ -471,21 +471,25 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
...
@@ -471,21 +471,25 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
src
<<
"typedef double real;
\n
"
;
src
<<
"typedef double real;
\n
"
;
src
<<
"typedef double2 real2;
\n
"
;
src
<<
"typedef double2 real2;
\n
"
;
src
<<
"typedef double3 real3;
\n
"
;
src
<<
"typedef double4 real4;
\n
"
;
src
<<
"typedef double4 real4;
\n
"
;
}
}
else
{
else
{
src
<<
"typedef float real;
\n
"
;
src
<<
"typedef float real;
\n
"
;
src
<<
"typedef float2 real2;
\n
"
;
src
<<
"typedef float2 real2;
\n
"
;
src
<<
"typedef float3 real3;
\n
"
;
src
<<
"typedef float4 real4;
\n
"
;
src
<<
"typedef float4 real4;
\n
"
;
}
}
if
(
useDoublePrecision
||
useMixedPrecision
)
{
if
(
useDoublePrecision
||
useMixedPrecision
)
{
src
<<
"typedef double mixed;
\n
"
;
src
<<
"typedef double mixed;
\n
"
;
src
<<
"typedef double2 mixed2;
\n
"
;
src
<<
"typedef double2 mixed2;
\n
"
;
src
<<
"typedef double3 mixed3;
\n
"
;
src
<<
"typedef double4 mixed4;
\n
"
;
src
<<
"typedef double4 mixed4;
\n
"
;
}
}
else
{
else
{
src
<<
"typedef float mixed;
\n
"
;
src
<<
"typedef float mixed;
\n
"
;
src
<<
"typedef float2 mixed2;
\n
"
;
src
<<
"typedef float2 mixed2;
\n
"
;
src
<<
"typedef float3 mixed3;
\n
"
;
src
<<
"typedef float4 mixed4;
\n
"
;
src
<<
"typedef float4 mixed4;
\n
"
;
}
}
for
(
map
<
string
,
string
>::
const_iterator
iter
=
defines
.
begin
();
iter
!=
defines
.
end
();
++
iter
)
{
for
(
map
<
string
,
string
>::
const_iterator
iter
=
defines
.
begin
();
iter
!=
defines
.
end
();
++
iter
)
{
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
70bd61a6
...
@@ -319,7 +319,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -319,7 +319,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
defines
[
"MAX_EXCLUSIONS"
]
=
context
.
intToString
(
maxExclusions
);
defines
[
"MAX_EXCLUSIONS"
]
=
context
.
intToString
(
maxExclusions
);
defines
[
"BUFFER_GROUPS"
]
=
(
deviceIsCpu
?
"4"
:
"2"
);
defines
[
"BUFFER_GROUPS"
]
=
(
deviceIsCpu
?
"4"
:
"2"
);
string
file
=
(
deviceIsCpu
?
OpenCLKernelSources
::
findInteractingBlocks_cpu
:
OpenCLKernelSources
::
findInteractingBlocks
);
string
file
=
(
deviceIsCpu
?
OpenCLKernelSources
::
findInteractingBlocks_cpu
:
OpenCLKernelSources
::
findInteractingBlocks
);
int
groupSize
=
(
deviceIsCpu
?
32
:
128
);
int
groupSize
=
(
deviceIsCpu
||
context
.
getSIMDWidth
()
<
32
?
32
:
256
);
while
(
true
)
{
while
(
true
)
{
defines
[
"GROUP_SIZE"
]
=
context
.
intToString
(
groupSize
);
defines
[
"GROUP_SIZE"
]
=
context
.
intToString
(
groupSize
);
cl
::
Program
interactingBlocksProgram
=
context
.
createProgram
(
file
,
defines
);
cl
::
Program
interactingBlocksProgram
=
context
.
createProgram
(
file
,
defines
);
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
70bd61a6
...
@@ -233,7 +233,7 @@ void storeInteractionData(int x, __local unsigned short* buffer, __local short*
...
@@ -233,7 +233,7 @@ void storeInteractionData(int x, __local unsigned short* buffer, __local short*
*
Compare
the
bounding
boxes
for
each
pair
of
blocks.
If
they
are
sufficiently
far
apart,
*
Compare
the
bounding
boxes
for
each
pair
of
blocks.
If
they
are
sufficiently
far
apart,
*
mark
them
as
non-interacting.
*
mark
them
as
non-interacting.
*/
*/
__kernel
void
findBlocksWithInteractions
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
unsigned
int*
restrict
interactionCount,
__kernel
void
findBlocksWithInteractions
2
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
unsigned
int*
restrict
interactionCount,
__global
int*
restrict
interactingTiles,
__global
unsigned
int*
restrict
interactingAtoms,
__global
const
real4*
restrict
posq,
unsigned
int
maxTiles,
unsigned
int
startBlockIndex,
__global
int*
restrict
interactingTiles,
__global
unsigned
int*
restrict
interactingAtoms,
__global
const
real4*
restrict
posq,
unsigned
int
maxTiles,
unsigned
int
startBlockIndex,
unsigned
int
numBlocks,
__global
real2*
restrict
sortedBlocks,
__global
const
real4*
restrict
sortedBlockCenter,
__global
const
real4*
restrict
sortedBlockBoundingBox,
unsigned
int
numBlocks,
__global
real2*
restrict
sortedBlocks,
__global
const
real4*
restrict
sortedBlockCenter,
__global
const
real4*
restrict
sortedBlockBoundingBox,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__global
real4*
restrict
oldPositions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__global
real4*
restrict
oldPositions,
...
@@ -331,3 +331,187 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
...
@@ -331,3 +331,187 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0))
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0))
oldPositions[i] = posq[i];
oldPositions[i] = posq[i];
}
}
__kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global unsigned int* restrict interactionCount,
__global int* restrict interactingTiles, __global unsigned int* restrict interactingAtoms, __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex,
unsigned int numBlocks, __global real2* restrict sortedBlocks, __global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox,
__global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions,
__global const int* restrict rebuildNeighborList) {
if (rebuildNeighborList[0] == 0)
return; // The neighbor list doesn't need to be rebuilt.
const int indexInWarp = get_local_id(0)%32;
const int warpStart = get_local_id(0)-indexInWarp;
const int totalWarps = get_global_size(0)/32;
const int warpIndex = get_global_id(0)/32;
const int warpMask = (1<<indexInWarp)-1;
__local int workgroupBuffer[BUFFER_SIZE*(GROUP_SIZE/32)];
__local int warpExclusions[MAX_EXCLUSIONS*(GROUP_SIZE/32)];
__local real3 posBuffer[GROUP_SIZE];
__local volatile int workgroupTileIndex[GROUP_SIZE/32];
__local bool includeBlockFlags[GROUP_SIZE];
__local short2 atomCountBuffer[GROUP_SIZE];
__local int* buffer = workgroupBuffer+BUFFER_SIZE*(warpStart/32);
__local int* exclusionsForX = warpExclusions+MAX_EXCLUSIONS*(warpStart/32);
__local volatile int* tileStartIndex = workgroupTileIndex+(warpStart/32);
// Loop over blocks.
for (int block1 = startBlockIndex+warpIndex; block1 < startBlockIndex+numBlocks; block1 += totalWarps) {
// Load data for this block. Note that all threads in a warp are processing the same block.
real2 sortedKey = sortedBlocks[block1];
int x = (int) sortedKey.y;
real4 blockCenterX = sortedBlockCenter[block1];
real4 blockSizeX = sortedBlockBoundingBox[block1];
int neighborsInBuffer = 0;
real3 pos1 = posq[x*TILE_SIZE+indexInWarp].xyz;
#ifdef USE_PERIODIC
const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= PADDED_CUTOFF);
if (singlePeriodicCopy) {
// The box is small enough that we can just translate all the atoms into a single periodic
// box, then skip having to apply periodic boundary conditions later.
pos1.xyz -= floor((pos1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
}
#endif
posBuffer[get_local_id(0)] = pos1;
// Load exclusion data for block x.
const int exclusionStart = exclusionRowIndices[x];
const int exclusionEnd = exclusionRowIndices[x+1];
const int numExclusions = exclusionEnd-exclusionStart;
for (int j = indexInWarp; j < numExclusions; j += 32)
exclusionsForX[j] = exclusionIndices[exclusionStart+j];
if (MAX_EXCLUSIONS > 32)
barrier(CLK_LOCAL_MEM_FENCE);
else
SYNC_WARPS;
// Loop over atom blocks to search for neighbors. The threads in a warp compare block1 against 32
// other blocks in parallel.
for (int block2Base = block1+1; block2Base < NUM_BLOCKS; block2Base += 32) {
int block2 = block2Base+indexInWarp;
bool includeBlock2 = (block2 < NUM_BLOCKS);
if (includeBlock2) {
real4 blockCenterY = (block2 < NUM_BLOCKS ? sortedBlockCenter[block2] : (real4) (0));
real4 blockSizeY = (block2 < NUM_BLOCKS ? sortedBlockBoundingBox[block2] : (real4) (0));
real4 blockDelta = blockCenterX-blockCenterY;
#ifdef USE_PERIODIC
blockDelta.xyz -= floor(blockDelta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
#endif
blockDelta.x = max(0.0f, fabs(blockDelta.x)-blockSizeX.x-blockSizeY.x);
blockDelta.y = max(0.0f, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y);
blockDelta.z = max(0.0f, fabs(blockDelta.z)-blockSizeX.z-blockSizeY.z);
includeBlock2 &= (blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z < PADDED_CUTOFF_SQUARED);
if (includeBlock2) {
unsigned short y = (unsigned short) sortedBlocks[block2].y;
for (int k = 0; k < numExclusions; k++)
includeBlock2 &= (exclusionsForX[k] != y);
}
}
// Loop over any blocks we identified as potentially containing neighbors.
includeBlockFlags[get_local_id(0)] = includeBlock2;
SYNC_WARPS;
for (int i = 0; i < TILE_SIZE; i++) {
if (includeBlockFlags[warpStart+i]) {
unsigned short y = (unsigned short) sortedBlocks[block2Base+i].y;
// Check each atom in block Y for interactions.
int start = y*TILE_SIZE;
int atom2 = start+indexInWarp;
real3 pos2 = posq[atom2].xyz;
#ifdef USE_PERIODIC
if (singlePeriodicCopy)
pos2.xyz -= floor((pos2.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
#endif
bool interacts = false;
if (atom2 < NUM_ATOMS) {
#ifdef USE_PERIODIC
if (!singlePeriodicCopy) {
for (int j = 0; j < TILE_SIZE; j++) {
real3 delta = pos2-posBuffer[warpStart+j];
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
interacts |
=
(
delta.x*delta.x+delta.y*delta.y+delta.z*delta.z
<
PADDED_CUTOFF_SQUARED
)
;
}
}
else
{
#
endif
for
(
int
j
=
0
; j < TILE_SIZE; j++) {
real3
delta
=
pos2-posBuffer[warpStart+j]
;
interacts
|
=
(
delta.x*delta.x+delta.y*delta.y+delta.z*delta.z
<
PADDED_CUTOFF_SQUARED
)
;
}
#
ifdef
USE_PERIODIC
}
#
endif
}
//
Do
a
prefix
sum
to
compact
the
list
of
atoms.
atomCountBuffer[get_local_id
(
0
)
].x
=
(
interacts
?
1
:
0
)
;
SYNC_WARPS
;
int
whichBuffer
=
0
;
for
(
int
offset
=
1
; offset < TILE_SIZE; offset *= 2) {
if
(
whichBuffer
==
0
)
atomCountBuffer[get_local_id
(
0
)
].y
=
(
get_local_id
(
0
)
<
offset
?
atomCountBuffer[get_local_id
(
0
)
].x
:
atomCountBuffer[get_local_id
(
0
)
].x+atomCountBuffer[get_local_id
(
0
)
-offset].x
)
;
else
atomCountBuffer[get_local_id
(
0
)
].x
=
(
get_local_id
(
0
)
<
offset
?
atomCountBuffer[get_local_id
(
0
)
].y
:
atomCountBuffer[get_local_id
(
0
)
].y+atomCountBuffer[get_local_id
(
0
)
-offset].y
)
;
whichBuffer
=
1-whichBuffer
;
SYNC_WARPS
;
}
//
Add
any
interacting
atoms
to
the
buffer.
if
(
interacts
)
buffer[neighborsInBuffer+atomCountBuffer[get_local_id
(
0
)
].x-1]
=
atom2
;
neighborsInBuffer
+=
atomCountBuffer[warpStart+TILE_SIZE-1].x
;
if
(
neighborsInBuffer
>
BUFFER_SIZE-TILE_SIZE
)
{
//
Store
the
new
tiles
to
memory.
int
tilesToStore
=
neighborsInBuffer/TILE_SIZE
;
if
(
indexInWarp
==
0
)
*tileStartIndex
=
atom_add
(
interactionCount,
tilesToStore
)
;
int
newTileStartIndex
=
*tileStartIndex
;
if
(
newTileStartIndex+tilesToStore
<
maxTiles
)
{
if
(
indexInWarp
<
tilesToStore
)
interactingTiles[newTileStartIndex+indexInWarp]
=
x
;
for
(
int
j
=
0
; j < tilesToStore; j++)
interactingAtoms[
(
newTileStartIndex+j
)
*TILE_SIZE+indexInWarp]
=
buffer[indexInWarp+j*TILE_SIZE]
;
}
buffer[indexInWarp]
=
buffer[indexInWarp+TILE_SIZE*tilesToStore]
;
neighborsInBuffer
-=
TILE_SIZE*tilesToStore
;
}
}
}
}
//
If
we
have
a
partially
filled
buffer,
store
it
to
memory.
if
(
neighborsInBuffer
>
0
)
{
int
tilesToStore
=
(
neighborsInBuffer+TILE_SIZE-1
)
/TILE_SIZE
;
if
(
indexInWarp
==
0
)
*tileStartIndex
=
atom_add
(
interactionCount,
tilesToStore
)
;
int
newTileStartIndex
=
*tileStartIndex
;
if
(
newTileStartIndex+tilesToStore
<
maxTiles
)
{
if
(
indexInWarp
<
tilesToStore
)
interactingTiles[newTileStartIndex+indexInWarp]
=
x
;
for
(
int
j
=
0
; j < tilesToStore; j++)
interactingAtoms[
(
newTileStartIndex+j
)
*TILE_SIZE+indexInWarp]
=
(
indexInWarp+j*TILE_SIZE
<
neighborsInBuffer
?
buffer[indexInWarp+j*TILE_SIZE]
:
NUM_ATOMS
)
;
}
}
}
//
Record
the
positions
the
neighbor
list
is
based
on.
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0))
oldPositions[i]
=
posq[i]
;
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment