Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
839f9a81
Commit
839f9a81
authored
Aug 23, 2010
by
Peter Eastman
Browse files
Finished changes to reduce memory use for large systems
parent
a8fe9cea
Changes
12
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
264 additions
and
154 deletions
+264
-154
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+20
-12
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+32
-14
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
+19
-11
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
+22
-14
platforms/opencl/src/kernels/customGBValueN2_default.cl
platforms/opencl/src/kernels/customGBValueN2_default.cl
+19
-11
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
+22
-14
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+5
-2
platforms/opencl/src/kernels/gbsaObc_default.cl
platforms/opencl/src/kernels/gbsaObc_default.cl
+38
-22
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+44
-28
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+19
-11
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+22
-14
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
+2
-1
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
839f9a81
...
@@ -1691,6 +1691,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1691,6 +1691,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
if
(
nb
.
getUseCutoff
())
defines
[
"MAX_TILES"
]
=
OpenCLExpressionUtilities
::
intToString
(
nb
.
getInteractingTiles
().
getSize
());
string
file
=
(
cl
.
getSIMDWidth
()
==
32
?
OpenCLKernelSources
::
gbsaObc_nvidia
:
OpenCLKernelSources
::
gbsaObc_default
);
string
file
=
(
cl
.
getSIMDWidth
()
==
32
?
OpenCLKernelSources
::
gbsaObc_nvidia
:
OpenCLKernelSources
::
gbsaObc_default
);
cl
::
Program
program
=
cl
.
createProgram
(
file
,
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
file
,
defines
);
int
index
=
0
;
int
index
=
0
;
...
@@ -1702,8 +1704,9 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1702,8 +1704,9 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
computeBornSumKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
if
(
cl
.
getSIMDWidth
()
==
32
)
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
+
2
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
}
}
else
else
computeBornSumKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
computeBornSumKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
...
@@ -1718,8 +1721,9 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1718,8 +1721,9 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
force1Kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
mm_float4
),
NULL
);
force1Kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
mm_float4
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
if
(
cl
.
getSIMDWidth
()
==
32
)
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
+
2
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
}
}
else
else
force1Kernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
force1Kernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
...
@@ -1744,10 +1748,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1744,10 +1748,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
obcChain
->
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
obcChain
->
getDeviceBuffer
());
}
}
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
computeBornSumKernel
.
setArg
<
mm_float4
>
(
8
,
cl
.
getPeriodicBoxSize
());
computeBornSumKernel
.
setArg
<
mm_float4
>
(
7
,
cl
.
getPeriodicBoxSize
());
computeBornSumKernel
.
setArg
<
mm_float4
>
(
9
,
cl
.
getInvPeriodicBoxSize
());
computeBornSumKernel
.
setArg
<
mm_float4
>
(
8
,
cl
.
getInvPeriodicBoxSize
());
force1Kernel
.
setArg
<
mm_float4
>
(
10
,
cl
.
getPeriodicBoxSize
());
force1Kernel
.
setArg
<
mm_float4
>
(
9
,
cl
.
getPeriodicBoxSize
());
force1Kernel
.
setArg
<
mm_float4
>
(
1
1
,
cl
.
getInvPeriodicBoxSize
());
force1Kernel
.
setArg
<
mm_float4
>
(
1
0
,
cl
.
getInvPeriodicBoxSize
());
}
}
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
cl
.
executeKernel
(
computeBornSumKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
computeBornSumKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
...
@@ -2399,9 +2403,11 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2399,9 +2403,11 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairValueKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
pairValueKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
pairValueKernel
.
setArg
<
cl_uint
>
(
index
++
,
nb
.
getInteractingTiles
().
getSize
());
if
(
cl
.
getSIMDWidth
()
==
32
)
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
}
}
else
else
pairValueKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
pairValueKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
...
@@ -2445,9 +2451,11 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2445,9 +2451,11 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairEnergyKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
pairEnergyKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
pairEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
nb
.
getInteractingTiles
().
getSize
());
if
(
cl
.
getSIMDWidth
()
==
32
)
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
}
}
else
else
pairEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
pairEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
...
@@ -2518,10 +2526,10 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2518,10 +2526,10 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
globals
->
upload
(
globalParamValues
);
globals
->
upload
(
globalParamValues
);
}
}
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
pairValueKernel
.
setArg
<
mm_float4
>
(
1
1
,
cl
.
getPeriodicBoxSize
());
pairValueKernel
.
setArg
<
mm_float4
>
(
1
0
,
cl
.
getPeriodicBoxSize
());
pairValueKernel
.
setArg
<
mm_float4
>
(
1
2
,
cl
.
getInvPeriodicBoxSize
());
pairValueKernel
.
setArg
<
mm_float4
>
(
1
1
,
cl
.
getInvPeriodicBoxSize
());
pairEnergyKernel
.
setArg
<
mm_float4
>
(
1
2
,
cl
.
getPeriodicBoxSize
());
pairEnergyKernel
.
setArg
<
mm_float4
>
(
1
1
,
cl
.
getPeriodicBoxSize
());
pairEnergyKernel
.
setArg
<
mm_float4
>
(
1
3
,
cl
.
getInvPeriodicBoxSize
());
pairEnergyKernel
.
setArg
<
mm_float4
>
(
1
2
,
cl
.
getInvPeriodicBoxSize
());
}
}
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
cl
.
executeKernel
(
pairValueKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
pairValueKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
839f9a81
...
@@ -200,8 +200,18 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -200,8 +200,18 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
// Create data structures for the neighbor list.
// Create data structures for the neighbor list.
if
(
useCutoff
)
{
if
(
useCutoff
)
{
interactingTiles
=
new
OpenCLArray
<
mm_ushort2
>
(
context
,
numTiles
,
"interactingTiles"
);
// Select a size for the arrays that hold the neighbor list. This estimate is intentionally very
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
numTiles
,
"interactionFlags"
);
// high, because if it ever is too small, we have to fall back to the N^2 algorithm.
mm_float4
boxSize
=
context
.
getPeriodicBoxSize
();
int
maxInteractingTiles
=
(
int
)
(
numTiles
*
(
cutoff
/
boxSize
.
x
+
cutoff
/
boxSize
.
y
+
cutoff
/
boxSize
.
z
));
if
(
maxInteractingTiles
<
1
)
maxInteractingTiles
=
1
;
if
(
maxInteractingTiles
>
numTiles
)
maxInteractingTiles
=
numTiles
;
interactingTiles
=
new
OpenCLArray
<
mm_ushort2
>
(
context
,
maxInteractingTiles
,
"interactingTiles"
);
if
(
context
.
getSIMDWidth
()
==
32
)
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
maxInteractingTiles
,
"interactionFlags"
);
interactionCount
=
new
OpenCLArray
<
cl_uint
>
(
context
,
1
,
"interactionCount"
);
interactionCount
=
new
OpenCLArray
<
cl_uint
>
(
context
,
1
,
"interactionCount"
);
blockCenter
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockCenter
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockBoundingBox
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
blockBoundingBox
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
...
@@ -213,6 +223,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -213,6 +223,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if
(
useCutoff
)
{
if
(
useCutoff
)
{
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"MAX_TILES"
]
=
OpenCLExpressionUtilities
::
intToString
(
interactingTiles
->
getSize
());
if
(
forceBufferPerAtomBlock
)
if
(
forceBufferPerAtomBlock
)
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
if
(
usePeriodic
)
if
(
usePeriodic
)
...
@@ -231,6 +242,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -231,6 +242,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
context
.
getPosq
().
getDeviceBuffer
());
if
(
context
.
getSIMDWidth
()
==
32
)
{
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
...
@@ -241,6 +253,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -241,6 +253,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactionCount
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactionCount
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
(
9
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_uint
),
NULL
);
findInteractionsWithinBlocksKernel
.
setArg
(
9
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_uint
),
NULL
);
}
}
}
}
}
int
OpenCLNonbondedUtilities
::
findExclusionIndex
(
int
x
,
int
y
,
const
vector
<
cl_uint
>&
exclusionIndices
,
const
vector
<
cl_uint
>&
exclusionRowIndices
)
{
int
OpenCLNonbondedUtilities
::
findExclusionIndex
(
int
x
,
int
y
,
const
vector
<
cl_uint
>&
exclusionIndices
,
const
vector
<
cl_uint
>&
exclusionRowIndices
)
{
...
@@ -264,6 +277,8 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
...
@@ -264,6 +277,8 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
());
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
());
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
());
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
());
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
());
vector
<
cl_uint
>
count
;
interactionCount
->
download
(
count
);
if
(
context
.
getSIMDWidth
()
==
32
)
{
if
(
context
.
getSIMDWidth
()
==
32
)
{
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
());
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
());
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
());
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
());
...
@@ -274,8 +289,8 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
...
@@ -274,8 +289,8 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
if
(
cutoff
!=
-
1.0
)
{
if
(
cutoff
!=
-
1.0
)
{
if
(
useCutoff
)
{
if
(
useCutoff
)
{
forceKernel
.
setArg
<
mm_float4
>
(
1
1
,
context
.
getPeriodicBoxSize
());
forceKernel
.
setArg
<
mm_float4
>
(
1
0
,
context
.
getPeriodicBoxSize
());
forceKernel
.
setArg
<
mm_float4
>
(
1
2
,
context
.
getInvPeriodicBoxSize
());
forceKernel
.
setArg
<
mm_float4
>
(
1
1
,
context
.
getInvPeriodicBoxSize
());
}
}
context
.
executeKernel
(
forceKernel
,
(
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
)
*
OpenCLContext
::
TileSize
);
context
.
executeKernel
(
forceKernel
,
(
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
)
*
OpenCLContext
::
TileSize
);
}
}
...
@@ -388,6 +403,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -388,6 +403,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
if
(
useCutoff
)
defines
[
"MAX_TILES"
]
=
OpenCLExpressionUtilities
::
intToString
(
interactingTiles
->
getSize
());
string
file
=
(
context
.
getSIMDWidth
()
==
32
?
OpenCLKernelSources
::
nonbonded_nvidia
:
OpenCLKernelSources
::
nonbonded_default
);
string
file
=
(
context
.
getSIMDWidth
()
==
32
?
OpenCLKernelSources
::
nonbonded_nvidia
:
OpenCLKernelSources
::
nonbonded_default
);
cl
::
Program
program
=
context
.
createProgram
(
context
.
replaceStrings
(
file
,
replacements
),
defines
);
cl
::
Program
program
=
context
.
createProgram
(
context
.
replaceStrings
(
file
,
replacements
),
defines
);
cl
::
Kernel
kernel
(
program
,
"computeNonbonded"
);
cl
::
Kernel
kernel
(
program
,
"computeNonbonded"
);
...
@@ -405,9 +422,10 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -405,9 +422,10 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
if
(
useCutoff
)
{
if
(
useCutoff
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionFlags
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
index
+=
2
;
// The periodic box size arguments are set when the kernel is executed.
index
+=
2
;
// The periodic box size arguments are set when the kernel is executed.
if
(
context
.
getSIMDWidth
()
==
32
)
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionFlags
->
getDeviceBuffer
());
}
}
else
{
else
{
kernel
.
setArg
<
cl_uint
>
(
index
++
,
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
);
kernel
.
setArg
<
cl_uint
>
(
index
++
,
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
);
...
...
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
View file @
839f9a81
...
@@ -11,16 +11,19 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -11,16 +11,19 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempForceBuffer,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempForceBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
,
unsigned
int
maxTiles
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
#
else
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
#
endif
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
__local
unsigned
int
exclusionRange[2]
;
__local
unsigned
int
exclusionRange[2]
;
...
@@ -29,18 +32,23 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -29,18 +32,23 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
unsigned
int
x
=
tileIndices.x
;
x
=
tileIndices.x
;
unsigned
int
y
=
tileIndices.y
;
y
=
tileIndices.y
;
#
else
}
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
else
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
#
endif
}
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
forceBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
unsigned
int
forceBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
View file @
839f9a81
...
@@ -11,18 +11,21 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -11,18 +11,21 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempBuffer,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
warp*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
#
else
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
#
endif
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
__local
unsigned
int
exclusionRange[4]
;
__local
unsigned
int
exclusionRange[4]
;
...
@@ -30,18 +33,23 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -30,18 +33,23 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
unsigned
int
x
=
tileIndices.x
;
x
=
tileIndices.x
;
unsigned
int
y
=
tileIndices.y
;
y
=
tileIndices.y
;
#
else
}
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
else
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
#
endif
}
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
@@ -128,7 +136,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -128,7 +136,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
local_force[get_local_id
(
0
)
]
=
0.0f
;
local_force[get_local_id
(
0
)
]
=
0.0f
;
CLEAR_LOCAL_DERIVATIVES
CLEAR_LOCAL_DERIVATIVES
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
if
(
!hasExclusions
&&
flags
==
0
)
{
if
(
!hasExclusions
&&
flags
==
0
)
{
//
No
interactions
in
this
tile.
//
No
interactions
in
this
tile.
}
}
...
...
platforms/opencl/src/kernels/customGBValueN2_default.cl
View file @
839f9a81
...
@@ -9,16 +9,19 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -9,16 +9,19 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__local
float*
tempBuffer,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
,
unsigned
int
maxTiles
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
#
else
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
#
endif
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
__local
unsigned
int
exclusionRange[2]
;
__local
unsigned
int
exclusionRange[2]
;
...
@@ -26,18 +29,23 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -26,18 +29,23 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
unsigned
int
x
=
tileIndices.x
;
x
=
tileIndices.x
;
unsigned
int
y
=
tileIndices.y
;
y
=
tileIndices.y
;
#
else
}
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
else
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
#
endif
}
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
valueBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
unsigned
int
valueBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
...
...
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
View file @
839f9a81
...
@@ -9,18 +9,21 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -9,18 +9,21 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__local
float*
tempBuffer,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
warp*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
#
else
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
#
endif
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
__local
unsigned
int
exclusionRange[4]
;
__local
unsigned
int
exclusionRange[4]
;
...
@@ -28,18 +31,23 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -28,18 +31,23 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
unsigned
int
x
=
tileIndices.x
;
x
=
tileIndices.x
;
unsigned
int
y
=
tileIndices.y
;
y
=
tileIndices.y
;
#
else
}
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
else
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
#
endif
}
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
@@ -125,7 +133,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -125,7 +133,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
}
}
local_value[get_local_id
(
0
)
]
=
0.0f
;
local_value[get_local_id
(
0
)
]
=
0.0f
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
flags
==
0
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
//
No
interactions
in
this
tile.
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
839f9a81
...
@@ -147,6 +147,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
...
@@ -147,6 +147,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
if
(
get_local_id
(
0
)
==
0
)
if
(
get_local_id
(
0
)
==
0
)
*baseIndex
=
atom_add
(
interactionCount,
numValid
)
;
*baseIndex
=
atom_add
(
interactionCount,
numValid
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
*baseIndex+numValid
<=
MAX_TILES
)
for
(
int
i
=
get_local_id
(
0
)
; i < numValid; i += GROUP_SIZE)
for
(
int
i
=
get_local_id
(
0
)
; i < numValid; i += GROUP_SIZE)
interactingTiles[*baseIndex+i]
=
temp[i]
;
interactingTiles[*baseIndex+i]
=
temp[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
@@ -232,6 +233,8 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB
...
@@ -232,6 +233,8 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
index
=
get_local_id
(
0
)
&
(
TILE_SIZE
-
1
)
;
unsigned
int
index
=
get_local_id
(
0
)
&
(
TILE_SIZE
-
1
)
;
if
(
numTiles
>
MAX_TILES
)
return
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
float4
apos
;
float4
apos
;
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
...
...
platforms/opencl/src/kernels/gbsaObc_default.cl
View file @
839f9a81
...
@@ -17,32 +17,40 @@ typedef struct {
...
@@ -17,32 +17,40 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
#
else
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
#
endif
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
unsigned
int
x
=
tileIndices.x
;
x
=
tileIndices.x
;
unsigned
int
y
=
tileIndices.y
;
y
=
tileIndices.y
;
#
else
}
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
else
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
#
endif
}
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
forceBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
unsigned
int
forceBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
...
@@ -196,32 +204,40 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -196,32 +204,40 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
#
else
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
#
endif
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
unsigned
int
x
=
tileIndices.x
;
x
=
tileIndices.x
;
unsigned
int
y
=
tileIndices.y
;
y
=
tileIndices.y
;
#
else
}
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
else
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
#
endif
}
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
forceBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
unsigned
int
forceBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
839f9a81
...
@@ -17,34 +17,42 @@ typedef struct {
...
@@ -17,34 +17,42 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
,
__global
unsigned
int*
interactionFlags
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
#
endif
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
warp*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
#
else
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
#
endif
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
unsigned
int
x
=
tileIndices.x
;
x
=
tileIndices.x
;
unsigned
int
y
=
tileIndices.y
;
y
=
tileIndices.y
;
#
else
}
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
else
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
#
endif
}
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
@@ -115,7 +123,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -115,7 +123,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
}
}
localData[get_local_id
(
0
)
].bornSum
=
0.0f
;
localData[get_local_id
(
0
)
].bornSum
=
0.0f
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
unsigned
int
flags
=
(
numTiles
<=
MAX_TILES
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
if
(
flags
!=
0xFFFFFFFF
&&
false
)
{
//
TODO:
Fix
this:
should
be
checking
for
exclusions
if
(
flags
!=
0xFFFFFFFF
&&
false
)
{
//
TODO:
Fix
this:
should
be
checking
for
exclusions
if
(
flags
==
0
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
//
No
interactions
in
this
tile.
...
@@ -262,34 +270,42 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -262,34 +270,42 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
,
__global
unsigned
int*
interactionFlags
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
#
endif
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
warp*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
#
else
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
#
endif
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
unsigned
int
x
=
tileIndices.x
;
x
=
tileIndices.x
;
unsigned
int
y
=
tileIndices.y
;
y
=
tileIndices.y
;
#
else
}
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
else
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
#
endif
}
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
@@ -365,7 +381,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -365,7 +381,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
localData[get_local_id
(
0
)
].fz
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
localData[get_local_id
(
0
)
].fw
=
0.0f
;
localData[get_local_id
(
0
)
].fw
=
0.0f
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
unsigned
int
flags
=
(
numTiles
<=
MAX_TILES
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
if
(
flags
!=
0xFFFFFFFF
&&
false
)
{
//
TODO:
Fix
this:
should
be
checking
for
exclusions
if
(
flags
!=
0xFFFFFFFF
&&
false
)
{
//
TODO:
Fix
this:
should
be
checking
for
exclusions
if
(
flags
==
0
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
//
No
interactions
in
this
tile.
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
839f9a81
...
@@ -15,16 +15,19 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
...
@@ -15,16 +15,19 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
#
else
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
#
endif
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
__local
unsigned
int
exclusionRange[2]
;
__local
unsigned
int
exclusionRange[2]
;
...
@@ -32,18 +35,23 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -32,18 +35,23 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
unsigned
int
x
=
tileIndices.x
;
x
=
tileIndices.x
;
unsigned
int
y
=
tileIndices.y
;
y
=
tileIndices.y
;
#
else
}
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
else
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
#
endif
}
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
forceBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
unsigned
int
forceBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
839f9a81
...
@@ -15,18 +15,21 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
...
@@ -15,18 +15,21 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
,
__global
unsigned
int*
interactionFlags
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
warp*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*
(
numTiles
>
MAX_TILES
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/totalWarps
;
#
else
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
#
endif
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
__local
unsigned
int
exclusionRange[4]
;
__local
unsigned
int
exclusionRange[4]
;
...
@@ -34,18 +37,23 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -34,18 +37,23 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
MAX_TILES
)
{
ushort2
tileIndices
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
unsigned
int
x
=
tileIndices.x
;
x
=
tileIndices.x
;
unsigned
int
y
=
tileIndices.y
;
y
=
tileIndices.y
;
#
else
}
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
else
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
#
endif
}
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
@@ -137,7 +145,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -137,7 +145,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
localData[get_local_id
(
0
)
].fy
=
0.0f
;
localData[get_local_id
(
0
)
].fy
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
unsigned
int
flags
=
(
numTiles
<=
MAX_TILES
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
flags
==
0
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
//
No
interactions
in
this
tile.
...
...
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
View file @
839f9a81
...
@@ -523,6 +523,7 @@ void testBlockInteractions(bool periodic) {
...
@@ -523,6 +523,7 @@ void testBlockInteractions(bool periodic) {
int
numWithInteractions
=
interactionCount
[
0
];
int
numWithInteractions
=
interactionCount
[
0
];
vector
<
bool
>
hasInteractions
(
numBlocks
*
(
numBlocks
+
1
)
/
2
,
false
);
vector
<
bool
>
hasInteractions
(
numBlocks
*
(
numBlocks
+
1
)
/
2
,
false
);
nb
.
getInteractingTiles
().
download
(
interactingTiles
);
nb
.
getInteractingTiles
().
download
(
interactingTiles
);
if
(
clcontext
.
getSIMDWidth
()
==
32
)
nb
.
getInteractionFlags
().
download
(
interactionFlags
);
nb
.
getInteractionFlags
().
download
(
interactionFlags
);
const
unsigned
int
atoms
=
clcontext
.
getPaddedNumAtoms
();
const
unsigned
int
atoms
=
clcontext
.
getPaddedNumAtoms
();
const
unsigned
int
grid
=
OpenCLContext
::
TileSize
;
const
unsigned
int
grid
=
OpenCLContext
::
TileSize
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment