Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
22078b69
Commit
22078b69
authored
Aug 19, 2010
by
Peter Eastman
Browse files
Continuing changes to reduce memory use for large systems
parent
0f8da117
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
164 additions
and
128 deletions
+164
-128
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+14
-20
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+14
-49
platforms/opencl/src/OpenCLNonbondedUtilities.h
platforms/opencl/src/OpenCLNonbondedUtilities.h
+0
-10
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
+2
-2
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
+2
-2
platforms/opencl/src/kernels/customGBValueN2_default.cl
platforms/opencl/src/kernels/customGBValueN2_default.cl
+2
-2
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
+2
-2
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+111
-21
platforms/opencl/src/kernels/gbsaObc_default.cl
platforms/opencl/src/kernels/gbsaObc_default.cl
+4
-4
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+6
-6
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+2
-2
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+2
-2
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
+3
-6
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
22078b69
...
@@ -1705,10 +1705,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1705,10 +1705,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
}
}
else
{
else
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
computeBornSumKernel
.
setArg
<
cl_uint
>
(
index
++
,
nb
.
getTiles
().
getSize
());
}
force1Kernel
=
cl
::
Kernel
(
program
,
"computeGBSAForce1"
);
force1Kernel
=
cl
::
Kernel
(
program
,
"computeGBSAForce1"
);
index
=
0
;
index
=
0
;
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
...
@@ -1723,10 +1721,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1723,10 +1721,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
}
}
else
{
else
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
force1Kernel
.
setArg
<
cl_uint
>
(
index
++
,
nb
.
getTiles
().
getSize
());
}
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
gbsaObcReductions
,
defines
);
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
gbsaObcReductions
,
defines
);
reduceBornSumKernel
=
cl
::
Kernel
(
program
,
"reduceBornSum"
);
reduceBornSumKernel
=
cl
::
Kernel
(
program
,
"reduceBornSum"
);
reduceBornSumKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getPaddedNumAtoms
());
reduceBornSumKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getPaddedNumAtoms
());
...
@@ -1753,9 +1749,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1753,9 +1749,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
force1Kernel
.
setArg
<
mm_float4
>
(
10
,
cl
.
getPeriodicBoxSize
());
force1Kernel
.
setArg
<
mm_float4
>
(
10
,
cl
.
getPeriodicBoxSize
());
force1Kernel
.
setArg
<
mm_float4
>
(
11
,
cl
.
getInvPeriodicBoxSize
());
force1Kernel
.
setArg
<
mm_float4
>
(
11
,
cl
.
getInvPeriodicBoxSize
());
}
}
cl
.
executeKernel
(
computeBornSumKernel
,
nb
.
getTiles
().
getSize
()
*
OpenCLContext
::
TileSize
);
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
cl
.
executeKernel
(
computeBornSumKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
reduceBornSumKernel
,
cl
.
getPaddedNumAtoms
());
cl
.
executeKernel
(
reduceBornSumKernel
,
cl
.
getPaddedNumAtoms
());
cl
.
executeKernel
(
force1Kernel
,
n
b
.
getTiles
().
getSize
()
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
force1Kernel
,
n
umTiles
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
reduceBornForceKernel
,
cl
.
getPaddedNumAtoms
());
cl
.
executeKernel
(
reduceBornForceKernel
,
cl
.
getPaddedNumAtoms
());
return
0.0
;
return
0.0
;
}
}
...
@@ -2406,10 +2403,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2406,10 +2403,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
}
}
else
{
else
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getTiles
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
pairValueKernel
.
setArg
<
cl_uint
>
(
index
++
,
nb
.
getTiles
().
getSize
());
}
if
(
globals
!=
NULL
)
if
(
globals
!=
NULL
)
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globals
->
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globals
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
...
@@ -2454,10 +2449,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2454,10 +2449,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
}
}
else
{
else
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getTiles
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
pairEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
nb
.
getTiles
().
getSize
());
}
if
(
globals
!=
NULL
)
if
(
globals
!=
NULL
)
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globals
->
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globals
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
...
@@ -2530,9 +2523,10 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2530,9 +2523,10 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairEnergyKernel
.
setArg
<
mm_float4
>
(
12
,
cl
.
getPeriodicBoxSize
());
pairEnergyKernel
.
setArg
<
mm_float4
>
(
12
,
cl
.
getPeriodicBoxSize
());
pairEnergyKernel
.
setArg
<
mm_float4
>
(
13
,
cl
.
getInvPeriodicBoxSize
());
pairEnergyKernel
.
setArg
<
mm_float4
>
(
13
,
cl
.
getInvPeriodicBoxSize
());
}
}
cl
.
executeKernel
(
pairValueKernel
,
nb
.
getTiles
().
getSize
()
*
OpenCLContext
::
TileSize
);
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
cl
.
executeKernel
(
pairValueKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
perParticleValueKernel
,
cl
.
getPaddedNumAtoms
());
cl
.
executeKernel
(
perParticleValueKernel
,
cl
.
getPaddedNumAtoms
());
cl
.
executeKernel
(
pairEnergyKernel
,
n
b
.
getTiles
().
getSize
()
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
pairEnergyKernel
,
n
umTiles
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
perParticleEnergyKernel
,
cl
.
getPaddedNumAtoms
());
cl
.
executeKernel
(
perParticleEnergyKernel
,
cl
.
getPaddedNumAtoms
());
if
(
needParameterGradient
)
if
(
needParameterGradient
)
cl
.
executeKernel
(
gradientChainRuleKernel
,
cl
.
getPaddedNumAtoms
());
cl
.
executeKernel
(
gradientChainRuleKernel
,
cl
.
getPaddedNumAtoms
());
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
22078b69
...
@@ -26,7 +26,6 @@
...
@@ -26,7 +26,6 @@
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLArray.h"
#include "OpenCLArray.h"
#include "OpenCLCompact.h"
#include "OpenCLKernelSources.h"
#include "OpenCLKernelSources.h"
#include "OpenCLExpressionUtilities.h"
#include "OpenCLExpressionUtilities.h"
#include <map>
#include <map>
...
@@ -37,8 +36,8 @@ using namespace OpenMM;
...
@@ -37,8 +36,8 @@ using namespace OpenMM;
using
namespace
std
;
using
namespace
std
;
OpenCLNonbondedUtilities
::
OpenCLNonbondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
cutoff
(
-
1.0
),
useCutoff
(
false
),
OpenCLNonbondedUtilities
::
OpenCLNonbondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
cutoff
(
-
1.0
),
useCutoff
(
false
),
numForceBuffers
(
0
),
tiles
(
NULL
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactionFlags
(
NULL
),
numForceBuffers
(
0
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactionFlags
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
)
,
compact
(
NULL
)
{
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
)
{
// Decide how many force buffers to use.
// Decide how many force buffers to use.
forceBufferPerAtomBlock
=
false
;
forceBufferPerAtomBlock
=
false
;
...
@@ -52,8 +51,6 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
...
@@ -52,8 +51,6 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
}
}
OpenCLNonbondedUtilities
::~
OpenCLNonbondedUtilities
()
{
OpenCLNonbondedUtilities
::~
OpenCLNonbondedUtilities
()
{
if
(
tiles
!=
NULL
)
delete
tiles
;
if
(
exclusionIndices
!=
NULL
)
if
(
exclusionIndices
!=
NULL
)
delete
exclusionIndices
;
delete
exclusionIndices
;
if
(
exclusionRowIndices
!=
NULL
)
if
(
exclusionRowIndices
!=
NULL
)
...
@@ -70,8 +67,6 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
...
@@ -70,8 +67,6 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
delete
blockCenter
;
delete
blockCenter
;
if
(
blockBoundingBox
!=
NULL
)
if
(
blockBoundingBox
!=
NULL
)
delete
blockBoundingBox
;
delete
blockBoundingBox
;
if
(
compact
!=
NULL
)
delete
compact
;
}
}
void
OpenCLNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
)
{
void
OpenCLNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
)
{
...
@@ -127,33 +122,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -127,33 +122,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
int
numAtomBlocks
=
context
.
getNumAtomBlocks
();
int
numAtomBlocks
=
context
.
getNumAtomBlocks
();
int
numTiles
=
numAtomBlocks
*
(
numAtomBlocks
+
1
)
/
2
;
int
numTiles
=
numAtomBlocks
*
(
numAtomBlocks
+
1
)
/
2
;
tiles
=
new
OpenCLArray
<
cl_uint
>
(
context
,
numTiles
,
"tiles"
);
vector
<
cl_uint
>
tileVec
(
tiles
->
getSize
());
unsigned
int
count
=
0
;
for
(
unsigned
int
y
=
0
;
y
<
(
unsigned
int
)
numAtomBlocks
;
y
++
)
for
(
unsigned
int
x
=
y
;
x
<
(
unsigned
int
)
numAtomBlocks
;
x
++
)
tileVec
[
count
++
]
=
(
x
<<
17
)
|
(
y
<<
2
);
// Mark which tiles have exclusions.
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
OpenCLContext
::
TileSize
;
for
(
int
j
=
0
;
j
<
(
int
)
atomExclusions
[
atom1
].
size
();
++
j
)
{
int
atom2
=
atomExclusions
[
atom1
][
j
];
int
y
=
atom2
/
OpenCLContext
::
TileSize
;
int
index
=
(
x
>
y
?
x
+
y
*
numAtomBlocks
-
y
*
(
y
+
1
)
/
2
:
y
+
x
*
numAtomBlocks
-
x
*
(
x
+
1
)
/
2
);
tileVec
[
index
]
|=
1
;
}
}
if
(
context
.
getPaddedNumAtoms
()
>
context
.
getNumAtoms
())
{
int
lastTile
=
context
.
getNumAtoms
()
/
OpenCLContext
::
TileSize
;
for
(
int
i
=
0
;
i
<
numTiles
;
++
i
)
{
int
x
=
tileVec
[
i
]
>>
17
;
int
y
=
(
tileVec
[
i
]
>>
2
)
&
0x7FFF
;
if
(
x
==
lastTile
||
y
==
lastTile
)
tileVec
[
i
]
|=
1
;
}
}
// Build a list of indices for the tiles with exclusions.
// Build a list of indices for the tiles with exclusions.
...
@@ -227,7 +195,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -227,7 +195,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
}
}
}
}
atomExclusions
.
clear
();
// We won't use this again, so free the memory it used
atomExclusions
.
clear
();
// We won't use this again, so free the memory it used
tiles
->
upload
(
tileVec
);
exclusions
->
upload
(
exclusionVec
);
exclusions
->
upload
(
exclusionVec
);
// Create data structures for the neighbor list.
// Create data structures for the neighbor list.
...
@@ -238,7 +205,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -238,7 +205,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
interactionCount
=
new
OpenCLArray
<
cl_uint
>
(
context
,
1
,
"interactionCount"
);
interactionCount
=
new
OpenCLArray
<
cl_uint
>
(
context
,
1
,
"interactionCount"
);
blockCenter
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockCenter
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockBoundingBox
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
blockBoundingBox
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
compact
=
new
OpenCLCompact
(
context
);
}
}
// Create kernels.
// Create kernels.
...
@@ -246,6 +212,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -246,6 +212,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
forceKernel
=
createInteractionKernel
(
kernelSource
,
parameters
,
arguments
,
true
,
true
);
forceKernel
=
createInteractionKernel
(
kernelSource
,
parameters
,
arguments
,
true
,
true
);
if
(
useCutoff
)
{
if
(
useCutoff
)
{
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
if
(
forceBufferPerAtomBlock
)
if
(
forceBufferPerAtomBlock
)
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
if
(
usePeriodic
)
if
(
usePeriodic
)
...
@@ -256,13 +223,13 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -256,13 +223,13 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockCenter
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockCenter
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
.
setArg
<
cl_int
>
(
0
,
tiles
->
getSize
());
findInteractingBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractingBlocksKernel
.
setArg
<
cl_float
>
(
1
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
blockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
tiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockBoundingBox
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
blockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
blockBoundingBox
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionFlags
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
...
@@ -293,10 +260,9 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
...
@@ -293,10 +260,9 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
findBlockBoundsKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
());
findBlockBoundsKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
());
findBlockBoundsKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
());
findBlockBoundsKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
());
context
.
executeKernel
(
findBlockBoundsKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
findBlockBoundsKernel
,
context
.
getNumAtoms
());
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getPeriodicBoxSize
());
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
());
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
3
,
context
.
getInvPeriodicBoxSize
());
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
());
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
());
compact
->
compactStream
(
*
interactingTiles
,
*
tiles
,
*
interactionFlags
,
*
interactionCount
);
if
(
context
.
getSIMDWidth
()
==
32
)
{
if
(
context
.
getSIMDWidth
()
==
32
)
{
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
());
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
());
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
());
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
());
...
@@ -305,12 +271,12 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
...
@@ -305,12 +271,12 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
}
}
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
if
(
tiles
!=
NULL
)
{
if
(
cutoff
!=
-
1.0
)
{
if
(
useCutoff
)
{
if
(
useCutoff
)
{
forceKernel
.
setArg
<
mm_float4
>
(
11
,
context
.
getPeriodicBoxSize
());
forceKernel
.
setArg
<
mm_float4
>
(
11
,
context
.
getPeriodicBoxSize
());
forceKernel
.
setArg
<
mm_float4
>
(
12
,
context
.
getInvPeriodicBoxSize
());
forceKernel
.
setArg
<
mm_float4
>
(
12
,
context
.
getInvPeriodicBoxSize
());
}
}
context
.
executeKernel
(
forceKernel
,
tiles
->
getSize
(
)
*
OpenCLContext
::
TileSize
);
context
.
executeKernel
(
forceKernel
,
(
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
)
*
OpenCLContext
::
TileSize
);
}
}
}
}
...
@@ -443,8 +409,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -443,8 +409,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
index
+=
2
;
// The periodic box size arguments are set when the kernel is executed.
index
+=
2
;
// The periodic box size arguments are set when the kernel is executed.
}
}
else
{
else
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
tiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl_uint
>
(
index
++
,
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
);
kernel
.
setArg
<
cl_uint
>
(
index
++
,
tiles
->
getSize
());
}
}
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
params
[
i
].
getMemory
());
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
params
[
i
].
getMemory
());
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.h
View file @
22078b69
...
@@ -35,8 +35,6 @@
...
@@ -35,8 +35,6 @@
namespace
OpenMM
{
namespace
OpenMM
{
class
OpenCLCompact
;
/**
/**
* This class provides a generic interface for calculating nonbonded interactions. It does this in two
* This class provides a generic interface for calculating nonbonded interactions. It does this in two
* ways. First, it can be used to create Kernels that evaluate nonbonded interactions. Clients
* ways. First, it can be used to create Kernels that evaluate nonbonded interactions. Clients
...
@@ -141,12 +139,6 @@ public:
...
@@ -141,12 +139,6 @@ public:
OpenCLArray
<
mm_float4
>&
getBlockBoundingBoxes
()
{
OpenCLArray
<
mm_float4
>&
getBlockBoundingBoxes
()
{
return
*
blockBoundingBox
;
return
*
blockBoundingBox
;
}
}
/**
* Get the array containing the full set of tiles.
*/
OpenCLArray
<
cl_uint
>&
getTiles
()
{
return
*
tiles
;
}
/**
/**
* Get the array whose first element contains the number of tiles with interactions.
* Get the array whose first element contains the number of tiles with interactions.
*/
*/
...
@@ -202,7 +194,6 @@ private:
...
@@ -202,7 +194,6 @@ private:
cl
::
Kernel
findBlockBoundsKernel
;
cl
::
Kernel
findBlockBoundsKernel
;
cl
::
Kernel
findInteractingBlocksKernel
;
cl
::
Kernel
findInteractingBlocksKernel
;
cl
::
Kernel
findInteractionsWithinBlocksKernel
;
cl
::
Kernel
findInteractionsWithinBlocksKernel
;
OpenCLArray
<
cl_uint
>*
tiles
;
OpenCLArray
<
cl_uint
>*
exclusions
;
OpenCLArray
<
cl_uint
>*
exclusions
;
OpenCLArray
<
cl_uint
>*
exclusionIndices
;
OpenCLArray
<
cl_uint
>*
exclusionIndices
;
OpenCLArray
<
cl_uint
>*
exclusionRowIndices
;
OpenCLArray
<
cl_uint
>*
exclusionRowIndices
;
...
@@ -214,7 +205,6 @@ private:
...
@@ -214,7 +205,6 @@ private:
std
::
vector
<
std
::
vector
<
int
>
>
atomExclusions
;
std
::
vector
<
std
::
vector
<
int
>
>
atomExclusions
;
std
::
vector
<
ParameterInfo
>
parameters
;
std
::
vector
<
ParameterInfo
>
parameters
;
std
::
vector
<
ParameterInfo
>
arguments
;
std
::
vector
<
ParameterInfo
>
arguments
;
OpenCLCompact
*
compact
;
std
::
string
kernelSource
;
std
::
string
kernelSource
;
std
::
map
<
std
::
string
,
std
::
string
>
kernelDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
kernelDefines
;
double
cutoff
;
double
cutoff
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
View file @
22078b69
...
@@ -9,9 +9,9 @@
...
@@ -9,9 +9,9 @@
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeN2Energy
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__local
float4*
local_force,
void
computeN2Energy
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__local
float4*
local_force,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempForceBuffer,
__global
unsigned
int*
tiles,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempForceBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
unsigned
int*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
...
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
View file @
22078b69
...
@@ -9,9 +9,9 @@
...
@@ -9,9 +9,9 @@
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeN2Energy
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__local
float4*
local_force,
void
computeN2Energy
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__local
float4*
local_force,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
unsigned
int*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
...
platforms/opencl/src/kernels/customGBValueN2_default.cl
View file @
22078b69
...
@@ -7,9 +7,9 @@
...
@@ -7,9 +7,9 @@
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeN2Value
(
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
void
computeN2Value
(
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__local
float*
tempBuffer,
__global
unsigned
int*
tiles,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
unsigned
int*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
...
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
View file @
22078b69
...
@@ -7,9 +7,9 @@
...
@@ -7,9 +7,9 @@
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeN2Value
(
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
void
computeN2Value
(
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__local
float*
tempBuffer,
__global
unsigned
int*
tiles,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
unsigned
int*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
22078b69
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
define
TILE_SIZE
32
#
define
TILE_SIZE
32
#
define
GROUP_SIZE
64
#
define
BUFFER_GROUPS
4
#
define
BUFFER_SIZE
BUFFER_GROUPS*GROUP_SIZE
/**
/**
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*/
*/
__kernel
void
findBlockBounds
(
int
numAtoms,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox
)
{
__kernel
void
findBlockBounds
(
int
numAtoms,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox
,
__global
unsigned
int*
interactionCount
)
{
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
int
base
=
index*TILE_SIZE
;
while
(
base
<
numAtoms
)
{
while
(
base
<
numAtoms
)
{
...
@@ -32,21 +36,89 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
...
@@ -32,21 +36,89 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
base
=
index*TILE_SIZE
;
base
=
index*TILE_SIZE
;
}
}
if
(
get_global_id
(
0
)
==
0
)
interactionCount[0]
=
0
;
}
/**
*
This
is
called
by
findBlocksWithInteractions
()
.
It
compacts
the
list
of
blocks
and
writes
them
*
to
global
memory.
*/
void
storeInteractionData
(
__local
short2*
buffer,
__local
bool*
valid,
__local
int*
sum,
__local
int*
sum2,
__local
short2*
temp,
__local
int*
baseIndex,
__global
unsigned
int*
interactionCount,
__global
unsigned
int*
interactingTiles
)
{
//
The
buffer
is
full,
so
we
need
to
compact
it
and
write
out
results.
Start
by
doing
a
parallel
prefix
sum.
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i]
=
(
valid[i]
?
1
:
0
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
whichBuffer
=
0
;
for
(
int
offset
=
1
; offset < BUFFER_SIZE; offset *= 2) {
if
(
whichBuffer
==
0
)
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
sum2[i]
=
(
i
<
offset
?
sum[i]
:
sum[i]+sum[i-offset]
)
;
else
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i]
=
(
i
<
offset
?
sum2[i]
:
sum2[i]+sum2[i-offset]
)
;
whichBuffer
=
1-whichBuffer
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
whichBuffer
==
1
)
{
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i]
=
sum2[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
//
Compact
the
buffer
and
store
it
to
global
memory.
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
if
(
valid[i]
)
{
temp[sum[i]-1]
=
buffer[i]
;
valid[i]
=
false
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
numValid
=
sum[BUFFER_SIZE-1]
;
if
(
get_local_id
(
0
)
==
0
)
*baseIndex
=
atom_add
(
interactionCount,
numValid
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Store
it
to
global
memory.
for
(
int
i
=
get_local_id
(
0
)
; i < numValid; i += GROUP_SIZE)
interactingTiles[*baseIndex+i]
=
(
temp[i].x<<17
)
+
(
temp[i].y<<2
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
/**
/**
*
Compare
the
bounding
boxes
for
each
pair
of
blocks.
If
they
are
sufficiently
far
apart,
*
Compare
the
bounding
boxes
for
each
pair
of
blocks.
If
they
are
sufficiently
far
apart,
*
mark
them
as
non-interacting.
*
mark
them
as
non-interacting.
*/
*/
__kernel
void
findBlocksWithInteractions
(
int
numTiles,
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
unsigned
int*
tiles,
__global
float4*
blockCenter,
__kernel
void
findBlocksWithInteractions
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionFlag
)
{
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionCount,
__global
unsigned
int*
interactingTiles
)
{
int
index
=
get_global_id
(
0
)
;
__local
short2
buffer[BUFFER_SIZE]
;
while
(
index
<
numTiles
)
{
__local
bool
valid[BUFFER_SIZE]
;
//
Extract
cell
coordinates
from
appropriate
work
unit
__local
int
sum[BUFFER_SIZE]
;
__local
int
sum2[BUFFER_SIZE]
;
__local
short2
temp[BUFFER_SIZE]
;
__local
int
bufferFull
;
__local
int
globalIndex
;
int
valuesInBuffer
=
0
;
if
(
get_local_id
(
0
)
==
0
)
bufferFull
=
false
;
for
(
int
i
=
0
; i < BUFFER_GROUPS; ++i)
valid[i*GROUP_SIZE+get_local_id
(
0
)
]
=
false
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
const
int
numTiles
=
(
NUM_BLOCKS*
(
NUM_BLOCKS+1
))
/2
;
for
(
int
baseIndex
=
get_group_id
(
0
)
*get_local_size
(
0
)
; baseIndex < numTiles; baseIndex += get_global_size(0)) {
//
Identify
the
pair
of
blocks
to
compare.
unsigned
int
x
=
tiles[index]
;
int
index
=
baseIndex+get_local_id
(
0
)
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
if
(
index
<
numTiles
)
{
x
=
(
x
>>
17
)
;
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*index
))
;
unsigned
int
x
=
(
index-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
x
=
(
index-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
//
Find
the
distance
between
the
bounding
boxes
of
the
two
cells.
//
Find
the
distance
between
the
bounding
boxes
of
the
two
cells.
...
@@ -61,9 +133,27 @@ __kernel void findBlocksWithInteractions(int numTiles, float cutoffSquared, floa
...
@@ -61,9 +133,27 @@ __kernel void findBlocksWithInteractions(int numTiles, float cutoffSquared, floa
delta.x
=
max
(
0.0f,
fabs
(
delta.x
)
-boxSizea.x-boxSizeb.x
)
;
delta.x
=
max
(
0.0f,
fabs
(
delta.x
)
-boxSizea.x-boxSizeb.x
)
;
delta.y
=
max
(
0.0f,
fabs
(
delta.y
)
-boxSizea.y-boxSizeb.y
)
;
delta.y
=
max
(
0.0f,
fabs
(
delta.y
)
-boxSizea.y-boxSizeb.y
)
;
delta.z
=
max
(
0.0f,
fabs
(
delta.z
)
-boxSizea.z-boxSizeb.z
)
;
delta.z
=
max
(
0.0f,
fabs
(
delta.z
)
-boxSizea.z-boxSizeb.z
)
;
interactionFlag[index]
=
(
delta.x*delta.x+delta.y*delta.y+delta.z*delta.z
>
cutoffSquared
?
0
:
1
)
;
if
(
delta.x*delta.x+delta.y*delta.y+delta.z*delta.z
<
cutoffSquared
)
{
index
+=
get_global_size
(
0
)
;
//
Add
this
tile
to
the
buffer.
int
bufferIndex
=
valuesInBuffer*GROUP_SIZE+get_local_id
(
0
)
;
valid[bufferIndex]
=
true
;
buffer[bufferIndex]
=
(
short2
)
(
x,
y
)
;
valuesInBuffer++
;
if
(
!bufferFull
&&
valuesInBuffer
==
BUFFER_GROUPS
)
bufferFull
=
true
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
bufferFull
)
{
storeInteractionData
(
buffer,
valid,
sum,
sum2,
temp,
&globalIndex,
interactionCount,
interactingTiles
)
;
valuesInBuffer
=
0
;
if
(
get_local_id
(
0
)
==
0
)
bufferFull
=
false
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
}
storeInteractionData
(
buffer,
valid,
sum,
sum2,
temp,
&globalIndex,
interactionCount,
interactingTiles
)
;
}
}
/**
/**
...
...
platforms/opencl/src/kernels/gbsaObc_default.cl
View file @
22078b69
...
@@ -15,9 +15,9 @@ typedef struct {
...
@@ -15,9 +15,9 @@ typedef struct {
*/
*/
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
__global
unsigned
int*
tiles,
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
unsigned
int*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
@@ -194,9 +194,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -194,9 +194,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeGBSAForce1
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
void
computeGBSAForce1
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
unsigned
int*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
22078b69
...
@@ -15,9 +15,9 @@ typedef struct {
...
@@ -15,9 +15,9 @@ typedef struct {
*/
*/
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
__global
unsigned
int*
tiles,
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
unsigned
int*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
@@ -116,7 +116,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -116,7 +116,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
localData[get_local_id
(
0
)
].bornSum
=
0.0f
;
localData[get_local_id
(
0
)
].bornSum
=
0.0f
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
unsigned
int
flags
=
interactionFlags[pos]
;
if
(
flags
!=
0xFFFFFFFF
)
{
if
(
flags
!=
0xFFFFFFFF
&&
false
)
{
//
TODO:
Fix
this:
should
be
checking
for
exclusions
if
(
flags
==
0
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
//
No
interactions
in
this
tile.
}
}
...
@@ -260,9 +260,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -260,9 +260,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeGBSAForce1
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
void
computeGBSAForce1
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
unsigned
int*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
@@ -366,7 +366,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -366,7 +366,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
localData[get_local_id
(
0
)
].fw
=
0.0f
;
localData[get_local_id
(
0
)
].fw
=
0.0f
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
unsigned
int
flags
=
interactionFlags[pos]
;
if
(
flags
!=
0xFFFFFFFF
)
{
if
(
flags
!=
0xFFFFFFFF
&&
false
)
{
//
TODO:
Fix
this:
should
be
checking
for
exclusions
if
(
flags
==
0
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
//
No
interactions
in
this
tile.
}
}
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
22078b69
...
@@ -13,9 +13,9 @@ typedef struct {
...
@@ -13,9 +13,9 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
unsigned
int*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
22078b69
...
@@ -13,9 +13,9 @@ typedef struct {
...
@@ -13,9 +13,9 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
unsigned
int*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
...
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
View file @
22078b69
...
@@ -521,7 +521,7 @@ void testBlockInteractions(bool periodic) {
...
@@ -521,7 +521,7 @@ void testBlockInteractions(bool periodic) {
vector
<
cl_uint
>
interactionFlags
;
vector
<
cl_uint
>
interactionFlags
;
nb
.
getInteractionCount
().
download
(
interactionCount
);
nb
.
getInteractionCount
().
download
(
interactionCount
);
int
numWithInteractions
=
interactionCount
[
0
];
int
numWithInteractions
=
interactionCount
[
0
];
vector
<
bool
>
hasInteractions
(
n
b
.
getTiles
().
getSize
()
,
false
);
vector
<
bool
>
hasInteractions
(
n
umBlocks
*
(
numBlocks
+
1
)
/
2
,
false
);
nb
.
getInteractingTiles
().
download
(
interactingTiles
);
nb
.
getInteractingTiles
().
download
(
interactingTiles
);
nb
.
getInteractionFlags
().
download
(
interactionFlags
);
nb
.
getInteractionFlags
().
download
(
interactionFlags
);
const
unsigned
int
atoms
=
clcontext
.
getPaddedNumAtoms
();
const
unsigned
int
atoms
=
clcontext
.
getPaddedNumAtoms
();
...
@@ -580,13 +580,10 @@ void testBlockInteractions(bool periodic) {
...
@@ -580,13 +580,10 @@ void testBlockInteractions(bool periodic) {
// Check the tiles that did not have interactions to make sure all atoms are beyond the cutoff.
// Check the tiles that did not have interactions to make sure all atoms are beyond the cutoff.
vector
<
cl_uint
>
tiles
;
nb
.
getTiles
().
download
(
tiles
);
for
(
int
i
=
0
;
i
<
(
int
)
hasInteractions
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
hasInteractions
.
size
();
i
++
)
if
(
!
hasInteractions
[
i
])
{
if
(
!
hasInteractions
[
i
])
{
unsigned
int
tile
=
tiles
[
i
];
unsigned
int
y
=
(
unsigned
int
)
std
::
floor
(
numBlocks
+
0.5
-
std
::
sqrt
((
numBlocks
+
0.5
)
*
(
numBlocks
+
0.5
)
-
2
*
i
));
unsigned
int
x
=
(
tile
>>
17
);
unsigned
int
x
=
(
i
-
y
*
numBlocks
+
y
*
(
y
+
1
)
/
2
);
unsigned
int
y
=
((
tile
>>
2
)
&
0x7fff
);
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
mm_float4
pos1
=
clcontext
.
getPosq
()[
x
*
blockSize
+
atom1
];
mm_float4
pos1
=
clcontext
.
getPosq
()[
x
*
blockSize
+
atom1
];
for
(
int
atom2
=
0
;
atom2
<
blockSize
;
++
atom2
)
{
for
(
int
atom2
=
0
;
atom2
<
blockSize
;
++
atom2
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment