Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a5e4de14
"serialization/tests/TestSerializeRGForce.cpp" did not exist on "18c9a78a8a9950be32af8021986d619afab3e017"
Commit
a5e4de14
authored
Aug 25, 2010
by
Peter Eastman
Browse files
Allow the neighbor list arrays to grow if the initial sizes are too small
parent
d3f0d1f7
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
100 additions
and
47 deletions
+100
-47
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+23
-7
platforms/opencl/src/OpenCLKernels.h
platforms/opencl/src/OpenCLKernels.h
+2
-0
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+36
-4
platforms/opencl/src/OpenCLNonbondedUtilities.h
platforms/opencl/src/OpenCLNonbondedUtilities.h
+5
-2
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+7
-7
platforms/opencl/src/kernels/gbsaObc_default.cl
platforms/opencl/src/kernels/gbsaObc_default.cl
+8
-8
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+10
-10
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+4
-4
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+5
-5
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
a5e4de14
...
@@ -71,8 +71,10 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
...
@@ -71,8 +71,10 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
}
}
void
OpenCLCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
void
OpenCLCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
if
(
cl
.
getNonbondedUtilities
().
getUseCutoff
()
&&
cl
.
getComputeForceCount
()
%
100
==
0
)
if
(
cl
.
getNonbondedUtilities
().
getUseCutoff
()
&&
cl
.
getComputeForceCount
()
%
100
==
0
)
{
cl
.
reorderAtoms
();
cl
.
reorderAtoms
();
cl
.
getNonbondedUtilities
().
updateNeighborListSize
();
}
cl
.
setComputeForceCount
(
cl
.
getComputeForceCount
()
+
1
);
cl
.
setComputeForceCount
(
cl
.
getComputeForceCount
()
+
1
);
cl
.
clearAutoclearBuffers
();
cl
.
clearAutoclearBuffers
();
cl
.
getNonbondedUtilities
().
prepareInteractions
();
cl
.
getNonbondedUtilities
().
prepareInteractions
();
...
@@ -1679,6 +1681,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1679,6 +1681,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
// These Kernels cannot be created in initialize(), because the OpenCLNonbondedUtilities has not been initialized yet then.
// These Kernels cannot be created in initialize(), because the OpenCLNonbondedUtilities has not been initialized yet then.
hasCreatedKernels
=
true
;
hasCreatedKernels
=
true
;
maxTiles
=
(
nb
.
getUseCutoff
()
?
nb
.
getInteractingTiles
().
getSize
()
:
0
);
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
if
(
nb
.
getForceBufferPerAtomBlock
())
if
(
nb
.
getForceBufferPerAtomBlock
())
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
...
@@ -1691,8 +1694,6 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1691,8 +1694,6 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
if
(
nb
.
getUseCutoff
())
defines
[
"MAX_TILES"
]
=
OpenCLExpressionUtilities
::
intToString
(
nb
.
getInteractingTiles
().
getSize
());
string
file
=
(
cl
.
getSIMDWidth
()
==
32
?
OpenCLKernelSources
::
gbsaObc_nvidia
:
OpenCLKernelSources
::
gbsaObc_default
);
string
file
=
(
cl
.
getSIMDWidth
()
==
32
?
OpenCLKernelSources
::
gbsaObc_nvidia
:
OpenCLKernelSources
::
gbsaObc_default
);
cl
::
Program
program
=
cl
.
createProgram
(
file
,
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
file
,
defines
);
int
index
=
0
;
int
index
=
0
;
...
@@ -1705,8 +1706,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1705,8 +1706,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
index
+=
2
;
// The periodic box size arguments are set when the kernel is executed.
computeBornSumKernel
.
setArg
<
cl_uint
>
(
index
++
,
maxTiles
);
if
(
cl
.
getSIMDWidth
()
==
32
)
if
(
cl
.
getSIMDWidth
()
==
32
)
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
+
2
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
+
+
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
}
}
else
else
computeBornSumKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
computeBornSumKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
...
@@ -1722,8 +1725,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1722,8 +1725,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
index
+=
2
;
// The periodic box size arguments are set when the kernel is executed.
force1Kernel
.
setArg
<
cl_uint
>
(
index
++
,
maxTiles
);
if
(
cl
.
getSIMDWidth
()
==
32
)
if
(
cl
.
getSIMDWidth
()
==
32
)
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
+
2
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
+
+
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
}
}
else
else
force1Kernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
force1Kernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
...
@@ -1752,6 +1757,11 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1752,6 +1757,11 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
computeBornSumKernel
.
setArg
<
mm_float4
>
(
8
,
cl
.
getInvPeriodicBoxSize
());
computeBornSumKernel
.
setArg
<
mm_float4
>
(
8
,
cl
.
getInvPeriodicBoxSize
());
force1Kernel
.
setArg
<
mm_float4
>
(
9
,
cl
.
getPeriodicBoxSize
());
force1Kernel
.
setArg
<
mm_float4
>
(
9
,
cl
.
getPeriodicBoxSize
());
force1Kernel
.
setArg
<
mm_float4
>
(
10
,
cl
.
getInvPeriodicBoxSize
());
force1Kernel
.
setArg
<
mm_float4
>
(
10
,
cl
.
getInvPeriodicBoxSize
());
if
(
maxTiles
<
nb
.
getInteractingTiles
().
getSize
())
{
maxTiles
=
nb
.
getInteractingTiles
().
getSize
();
computeBornSumKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
force1Kernel
.
setArg
<
cl_uint
>
(
11
,
maxTiles
);
}
}
}
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
cl
.
executeKernel
(
computeBornSumKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
computeBornSumKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
...
@@ -2389,6 +2399,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2389,6 +2399,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
OpenCLNonbondedUtilities
&
nb
=
cl
.
getNonbondedUtilities
();
OpenCLNonbondedUtilities
&
nb
=
cl
.
getNonbondedUtilities
();
if
(
!
hasInitializedKernels
)
{
if
(
!
hasInitializedKernels
)
{
hasInitializedKernels
=
true
;
hasInitializedKernels
=
true
;
maxTiles
=
(
nb
.
getUseCutoff
()
?
nb
.
getInteractingTiles
().
getSize
()
:
0
);
valueBuffers
=
new
OpenCLArray
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getNumForceBuffers
(),
"customGBValueBuffers"
);
valueBuffers
=
new
OpenCLArray
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getNumForceBuffers
(),
"customGBValueBuffers"
);
cl
.
addAutoclearBuffer
(
valueBuffers
->
getDeviceBuffer
(),
valueBuffers
->
getSize
());
cl
.
addAutoclearBuffer
(
valueBuffers
->
getDeviceBuffer
(),
valueBuffers
->
getSize
());
cl
.
clearBuffer
(
*
valueBuffers
);
cl
.
clearBuffer
(
*
valueBuffers
);
...
@@ -2405,7 +2416,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2405,7 +2416,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
pairValueKernel
.
setArg
<
cl_uint
>
(
index
++
,
nb
.
getInteractingTiles
().
getSize
()
);
pairValueKernel
.
setArg
<
cl_uint
>
(
index
++
,
maxTiles
);
if
(
cl
.
getSIMDWidth
()
==
32
)
if
(
cl
.
getSIMDWidth
()
==
32
)
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
}
}
...
@@ -2453,7 +2464,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2453,7 +2464,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
pairEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
nb
.
getInteractingTiles
().
getSize
()
);
pairEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
maxTiles
);
if
(
cl
.
getSIMDWidth
()
==
32
)
if
(
cl
.
getSIMDWidth
()
==
32
)
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
}
}
...
@@ -2530,6 +2541,11 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2530,6 +2541,11 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairValueKernel
.
setArg
<
mm_float4
>
(
11
,
cl
.
getInvPeriodicBoxSize
());
pairValueKernel
.
setArg
<
mm_float4
>
(
11
,
cl
.
getInvPeriodicBoxSize
());
pairEnergyKernel
.
setArg
<
mm_float4
>
(
11
,
cl
.
getPeriodicBoxSize
());
pairEnergyKernel
.
setArg
<
mm_float4
>
(
11
,
cl
.
getPeriodicBoxSize
());
pairEnergyKernel
.
setArg
<
mm_float4
>
(
12
,
cl
.
getInvPeriodicBoxSize
());
pairEnergyKernel
.
setArg
<
mm_float4
>
(
12
,
cl
.
getInvPeriodicBoxSize
());
if
(
maxTiles
<
nb
.
getInteractingTiles
().
getSize
())
{
maxTiles
=
nb
.
getInteractingTiles
().
getSize
();
pairValueKernel
.
setArg
<
cl_uint
>
(
12
,
maxTiles
);
pairEnergyKernel
.
setArg
<
cl_uint
>
(
13
,
maxTiles
);
}
}
}
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
cl
.
executeKernel
(
pairValueKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
pairValueKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
...
...
platforms/opencl/src/OpenCLKernels.h
View file @
a5e4de14
...
@@ -592,6 +592,7 @@ public:
...
@@ -592,6 +592,7 @@ public:
private:
private:
double
prefactor
;
double
prefactor
;
bool
hasCreatedKernels
;
bool
hasCreatedKernels
;
int
maxTiles
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
OpenCLArray
<
mm_float2
>*
params
;
OpenCLArray
<
mm_float2
>*
params
;
OpenCLArray
<
cl_float
>*
bornSum
;
OpenCLArray
<
cl_float
>*
bornSum
;
...
@@ -632,6 +633,7 @@ public:
...
@@ -632,6 +633,7 @@ public:
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
private:
private:
bool
hasInitializedKernels
,
needParameterGradient
;
bool
hasInitializedKernels
,
needParameterGradient
;
int
maxTiles
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
computedValues
;
OpenCLParameterSet
*
computedValues
;
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
a5e4de14
...
@@ -212,9 +212,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -212,9 +212,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
interactingTiles
=
new
OpenCLArray
<
mm_ushort2
>
(
context
,
maxInteractingTiles
,
"interactingTiles"
);
interactingTiles
=
new
OpenCLArray
<
mm_ushort2
>
(
context
,
maxInteractingTiles
,
"interactingTiles"
);
if
(
context
.
getSIMDWidth
()
==
32
)
if
(
context
.
getSIMDWidth
()
==
32
)
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
maxInteractingTiles
,
"interactionFlags"
);
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
maxInteractingTiles
,
"interactionFlags"
);
interactionCount
=
new
OpenCLArray
<
cl_uint
>
(
context
,
1
,
"interactionCount"
);
interactionCount
=
new
OpenCLArray
<
cl_uint
>
(
context
,
1
,
"interactionCount"
,
true
);
blockCenter
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockCenter
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockBoundingBox
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
blockBoundingBox
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
interactionCount
->
set
(
0
,
0
);
interactionCount
->
upload
();
}
}
// Create kernels.
// Create kernels.
...
@@ -223,7 +225,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -223,7 +225,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if
(
useCutoff
)
{
if
(
useCutoff
)
{
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"MAX_TILES"
]
=
OpenCLExpressionUtilities
::
intToString
(
interactingTiles
->
getSize
());
if
(
forceBufferPerAtomBlock
)
if
(
forceBufferPerAtomBlock
)
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
if
(
usePeriodic
)
if
(
usePeriodic
)
...
@@ -242,6 +243,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -242,6 +243,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
8
,
interactingTiles
->
getSize
());
if
(
context
.
getSIMDWidth
()
==
32
)
{
if
(
context
.
getSIMDWidth
()
==
32
)
{
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
...
@@ -252,6 +254,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -252,6 +254,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionFlags
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionFlags
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactionCount
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactionCount
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
(
9
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_uint
),
NULL
);
findInteractionsWithinBlocksKernel
.
setArg
(
9
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_uint
),
NULL
);
findInteractionsWithinBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
interactingTiles
->
getSize
());
}
}
}
}
}
}
...
@@ -296,6 +299,36 @@ void OpenCLNonbondedUtilities::computeInteractions() {
...
@@ -296,6 +299,36 @@ void OpenCLNonbondedUtilities::computeInteractions() {
}
}
}
}
void
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
return
;
interactionCount
->
download
();
if
(
interactionCount
->
get
(
0
)
<=
interactingTiles
->
getSize
())
return
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
int
newSize
=
(
int
)
(
1.2
*
interactionCount
->
get
(
0
));
int
numTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
newSize
>
numTiles
)
newSize
=
numTiles
;
delete
interactingTiles
;
interactingTiles
=
new
OpenCLArray
<
mm_ushort2
>
(
context
,
newSize
,
"interactingTiles"
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactingTiles
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl_uint
>
(
12
,
newSize
);
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
8
,
newSize
);
if
(
context
.
getSIMDWidth
()
==
32
)
{
delete
interactionFlags
;
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
newSize
,
"interactionFlags"
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
13
,
interactionFlags
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingTiles
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionFlags
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
newSize
);
}
}
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
const
{
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
const
{
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
...
@@ -403,8 +436,6 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -403,8 +436,6 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
if
(
useCutoff
)
defines
[
"MAX_TILES"
]
=
OpenCLExpressionUtilities
::
intToString
(
interactingTiles
->
getSize
());
string
file
=
(
context
.
getSIMDWidth
()
==
32
?
OpenCLKernelSources
::
nonbonded_nvidia
:
OpenCLKernelSources
::
nonbonded_default
);
string
file
=
(
context
.
getSIMDWidth
()
==
32
?
OpenCLKernelSources
::
nonbonded_nvidia
:
OpenCLKernelSources
::
nonbonded_default
);
cl
::
Program
program
=
context
.
createProgram
(
context
.
replaceStrings
(
file
,
replacements
),
defines
);
cl
::
Program
program
=
context
.
createProgram
(
context
.
replaceStrings
(
file
,
replacements
),
defines
);
cl
::
Kernel
kernel
(
program
,
"computeNonbonded"
);
cl
::
Kernel
kernel
(
program
,
"computeNonbonded"
);
...
@@ -424,6 +455,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -424,6 +455,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
index
+=
2
;
// The periodic box size arguments are set when the kernel is executed.
index
+=
2
;
// The periodic box size arguments are set when the kernel is executed.
kernel
.
setArg
<
cl_uint
>
(
index
++
,
interactingTiles
->
getSize
());
if
(
context
.
getSIMDWidth
()
==
32
)
if
(
context
.
getSIMDWidth
()
==
32
)
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionFlags
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionFlags
->
getDeviceBuffer
());
}
}
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.h
View file @
a5e4de14
...
@@ -123,10 +123,13 @@ public:
...
@@ -123,10 +123,13 @@ public:
*/
*/
void
prepareInteractions
();
void
prepareInteractions
();
/**
/**
* Compute the nonbonded interactions. This will only be executed once after each call to
* Compute the nonbonded interactions.
* prepareInteractions(). Additional calls return immediately without doing anything.
*/
*/
void
computeInteractions
();
void
computeInteractions
();
/**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*/
void
updateNeighborListSize
();
/**
/**
* Get the array containing the center of each atom block.
* Get the array containing the center of each atom block.
*/
*/
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
a5e4de14
...
@@ -47,7 +47,7 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
...
@@ -47,7 +47,7 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
*/
*/
void
storeInteractionData
(
__local
ushort2*
buffer,
__local
int*
valid,
__local
short*
sum,
__local
ushort2*
temp,
__local
int*
baseIndex,
void
storeInteractionData
(
__local
ushort2*
buffer,
__local
int*
valid,
__local
short*
sum,
__local
ushort2*
temp,
__local
int*
baseIndex,
__global
unsigned
int*
interactionCount,
__global
ushort2*
interactingTiles,
float
cutoffSquared,
float4
periodicBoxSize,
__global
unsigned
int*
interactionCount,
__global
ushort2*
interactingTiles,
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox
)
{
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox
,
unsigned
int
maxTiles
)
{
//
The
buffer
is
full,
so
we
need
to
compact
it
and
write
out
results.
Start
by
doing
a
parallel
prefix
sum.
//
The
buffer
is
full,
so
we
need
to
compact
it
and
write
out
results.
Start
by
doing
a
parallel
prefix
sum.
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
...
@@ -147,7 +147,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
...
@@ -147,7 +147,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
if
(
get_local_id
(
0
)
==
0
)
if
(
get_local_id
(
0
)
==
0
)
*baseIndex
=
atom_add
(
interactionCount,
numValid
)
;
*baseIndex
=
atom_add
(
interactionCount,
numValid
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
*baseIndex+numValid
<=
MAX_TILES
)
if
(
*baseIndex+numValid
<=
maxTiles
)
for
(
int
i
=
get_local_id
(
0
)
; i < numValid; i += GROUP_SIZE)
for
(
int
i
=
get_local_id
(
0
)
; i < numValid; i += GROUP_SIZE)
interactingTiles[*baseIndex+i]
=
temp[i]
;
interactingTiles[*baseIndex+i]
=
temp[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
@@ -158,7 +158,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
...
@@ -158,7 +158,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
*
mark
them
as
non-interacting.
*
mark
them
as
non-interacting.
*/
*/
__kernel
void
findBlocksWithInteractions
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
blockCenter,
__kernel
void
findBlocksWithInteractions
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionCount,
__global
ushort2*
interactingTiles,
__global
float4*
posq
)
{
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionCount,
__global
ushort2*
interactingTiles,
__global
float4*
posq
,
unsigned
int
maxTiles
)
{
__local
ushort2
buffer[BUFFER_SIZE]
;
__local
ushort2
buffer[BUFFER_SIZE]
;
__local
int
valid[BUFFER_SIZE]
;
__local
int
valid[BUFFER_SIZE]
;
__local
short
sum[BUFFER_SIZE]
;
__local
short
sum[BUFFER_SIZE]
;
...
@@ -210,14 +210,14 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
...
@@ -210,14 +210,14 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
bufferFull
)
{
if
(
bufferFull
)
{
storeInteractionData
(
buffer,
valid,
sum,
temp,
&globalIndex,
interactionCount,
interactingTiles,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox
)
;
storeInteractionData
(
buffer,
valid,
sum,
temp,
&globalIndex,
interactionCount,
interactingTiles,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox
,
maxTiles
)
;
valuesInBuffer
=
0
;
valuesInBuffer
=
0
;
if
(
get_local_id
(
0
)
==
0
)
if
(
get_local_id
(
0
)
==
0
)
bufferFull
=
false
;
bufferFull
=
false
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
}
}
storeInteractionData
(
buffer,
valid,
sum,
temp,
&globalIndex,
interactionCount,
interactingTiles,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox
)
;
storeInteractionData
(
buffer,
valid,
sum,
temp,
&globalIndex,
interactionCount,
interactingTiles,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox
,
maxTiles
)
;
}
}
/**
/**
...
@@ -225,7 +225,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
...
@@ -225,7 +225,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
*
flags
for
which
ones
are
interacting.
*
flags
for
which
ones
are
interacting.
*/
*/
__kernel
void
findInteractionsWithinBlocks
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
ushort2*
tiles,
__global
float4*
blockCenter,
__kernel
void
findInteractionsWithinBlocks
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
ushort2*
tiles,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
unsigned
int*
flags
)
{
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
unsigned
int*
flags
,
unsigned
int
maxTiles
)
{
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
...
@@ -233,7 +233,7 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB
...
@@ -233,7 +233,7 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
index
=
get_local_id
(
0
)
&
(
TILE_SIZE
-
1
)
;
unsigned
int
index
=
get_local_id
(
0
)
&
(
TILE_SIZE
-
1
)
;
if
(
numTiles
>
MAX_TILES
)
if
(
numTiles
>
maxTiles
)
return
;
return
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
float4
apos
;
float4
apos
;
...
...
platforms/opencl/src/kernels/gbsaObc_default.cl
View file @
a5e4de14
...
@@ -17,14 +17,14 @@ typedef struct {
...
@@ -17,14 +17,14 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
,
unsigned
int
maxTiles
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
#
else
#
else
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
...
@@ -36,7 +36,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -36,7 +36,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
x
=
tileIndices.x
;
x
=
tileIndices.x
;
y
=
tileIndices.y
;
y
=
tileIndices.y
;
...
@@ -204,14 +204,14 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -204,14 +204,14 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
,
unsigned
int
maxTiles
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
#
else
#
else
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
...
@@ -223,7 +223,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -223,7 +223,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
x
=
tileIndices.x
;
x
=
tileIndices.x
;
y
=
tileIndices.y
;
y
=
tileIndices.y
;
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
a5e4de14
...
@@ -17,7 +17,7 @@ typedef struct {
...
@@ -17,7 +17,7 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
unsigned
int*
interactionFlags
)
{
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
@@ -25,8 +25,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -25,8 +25,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
warp*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
pos
=
warp*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
#
else
#
else
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
...
@@ -38,7 +38,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -38,7 +38,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
x
=
tileIndices.x
;
x
=
tileIndices.x
;
y
=
tileIndices.y
;
y
=
tileIndices.y
;
...
@@ -123,7 +123,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -123,7 +123,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
}
}
localData[get_local_id
(
0
)
].bornSum
=
0.0f
;
localData[get_local_id
(
0
)
].bornSum
=
0.0f
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
MAX_TILES
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
if
(
flags
!=
0xFFFFFFFF
&&
false
)
{
//
TODO:
Fix
this:
should
be
checking
for
exclusions
if
(
flags
!=
0xFFFFFFFF
&&
false
)
{
//
TODO:
Fix
this:
should
be
checking
for
exclusions
if
(
flags
==
0
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
//
No
interactions
in
this
tile.
...
@@ -270,7 +270,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -270,7 +270,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
unsigned
int*
interactionFlags
)
{
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
@@ -278,8 +278,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -278,8 +278,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
warp*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
pos
=
warp*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
#
else
#
else
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
...
@@ -291,7 +291,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -291,7 +291,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
x
=
tileIndices.x
;
x
=
tileIndices.x
;
y
=
tileIndices.y
;
y
=
tileIndices.y
;
...
@@ -381,7 +381,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -381,7 +381,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
localData[get_local_id
(
0
)
].fz
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
localData[get_local_id
(
0
)
].fw
=
0.0f
;
localData[get_local_id
(
0
)
].fw
=
0.0f
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
MAX_TILES
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
if
(
flags
!=
0xFFFFFFFF
&&
false
)
{
//
TODO:
Fix
this:
should
be
checking
for
exclusions
if
(
flags
!=
0xFFFFFFFF
&&
false
)
{
//
TODO:
Fix
this:
should
be
checking
for
exclusions
if
(
flags
==
0
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
//
No
interactions
in
this
tile.
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
a5e4de14
...
@@ -15,15 +15,15 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
...
@@ -15,15 +15,15 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
,
unsigned
int
maxTiles
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
#
else
#
else
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
...
@@ -37,7 +37,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -37,7 +37,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
x
=
tileIndices.x
;
x
=
tileIndices.x
;
y
=
tileIndices.y
;
y
=
tileIndices.y
;
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
a5e4de14
...
@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
...
@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
unsigned
int*
interactionFlags
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -24,8 +24,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -24,8 +24,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
warp*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
pos
=
warp*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
#
else
#
else
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
...
@@ -39,7 +39,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -39,7 +39,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
x
=
tileIndices.x
;
x
=
tileIndices.x
;
y
=
tileIndices.y
;
y
=
tileIndices.y
;
...
@@ -145,7 +145,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -145,7 +145,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
localData[get_local_id
(
0
)
].fy
=
0.0f
;
localData[get_local_id
(
0
)
].fy
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
MAX_TILES
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
flags
==
0
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
//
No
interactions
in
this
tile.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment