Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
9ab32c50
Commit
9ab32c50
authored
Sep 12, 2011
by
Peter Eastman
Browse files
Reduced the number of kernel executions to clear buffers
parent
61c50862
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
59 additions
and
4 deletions
+59
-4
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+33
-3
platforms/opencl/src/OpenCLContext.h
platforms/opencl/src/OpenCLContext.h
+2
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+1
-1
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+23
-0
No files found.
platforms/opencl/src/OpenCLContext.cpp
View file @
9ab32c50
...
@@ -148,6 +148,8 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex, OpenCLPlatform::
...
@@ -148,6 +148,8 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex, OpenCLPlatform::
clearTwoBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearTwoBuffers"
);
clearTwoBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearTwoBuffers"
);
clearThreeBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearThreeBuffers"
);
clearThreeBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearThreeBuffers"
);
clearFourBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFourBuffers"
);
clearFourBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFourBuffers"
);
clearFiveBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFiveBuffers"
);
clearSixBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearSixBuffers"
);
reduceFloat4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceFloat4Buffer"
);
reduceFloat4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceFloat4Buffer"
);
reduceForcesKernel
=
cl
::
Kernel
(
utilities
,
"reduceForces"
);
reduceForcesKernel
=
cl
::
Kernel
(
utilities
,
"reduceForces"
);
...
@@ -351,7 +353,36 @@ void OpenCLContext::addAutoclearBuffer(cl::Memory& memory, int size) {
...
@@ -351,7 +353,36 @@ void OpenCLContext::addAutoclearBuffer(cl::Memory& memory, int size) {
void
OpenCLContext
::
clearAutoclearBuffers
()
{
void
OpenCLContext
::
clearAutoclearBuffers
()
{
int
base
=
0
;
int
base
=
0
;
int
total
=
autoclearBufferSizes
.
size
();
int
total
=
autoclearBufferSizes
.
size
();
while
(
total
-
base
>=
4
)
{
while
(
total
-
base
>=
6
)
{
clearSixBuffersKernel
.
setArg
<
cl
::
Memory
>
(
0
,
*
autoclearBuffers
[
base
]);
clearSixBuffersKernel
.
setArg
<
cl_int
>
(
1
,
autoclearBufferSizes
[
base
]);
clearSixBuffersKernel
.
setArg
<
cl
::
Memory
>
(
2
,
*
autoclearBuffers
[
base
+
1
]);
clearSixBuffersKernel
.
setArg
<
cl_int
>
(
3
,
autoclearBufferSizes
[
base
+
1
]);
clearSixBuffersKernel
.
setArg
<
cl
::
Memory
>
(
4
,
*
autoclearBuffers
[
base
+
2
]);
clearSixBuffersKernel
.
setArg
<
cl_int
>
(
5
,
autoclearBufferSizes
[
base
+
2
]);
clearSixBuffersKernel
.
setArg
<
cl
::
Memory
>
(
6
,
*
autoclearBuffers
[
base
+
3
]);
clearSixBuffersKernel
.
setArg
<
cl_int
>
(
7
,
autoclearBufferSizes
[
base
+
3
]);
clearSixBuffersKernel
.
setArg
<
cl
::
Memory
>
(
8
,
*
autoclearBuffers
[
base
+
4
]);
clearSixBuffersKernel
.
setArg
<
cl_int
>
(
9
,
autoclearBufferSizes
[
base
+
4
]);
clearSixBuffersKernel
.
setArg
<
cl
::
Memory
>
(
10
,
*
autoclearBuffers
[
base
+
5
]);
clearSixBuffersKernel
.
setArg
<
cl_int
>
(
11
,
autoclearBufferSizes
[
base
+
5
]);
executeKernel
(
clearSixBuffersKernel
,
max
(
max
(
max
(
max
(
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]),
autoclearBufferSizes
[
base
+
2
]),
autoclearBufferSizes
[
base
+
3
]),
autoclearBufferSizes
[
base
+
4
]),
autoclearBufferSizes
[
base
+
5
]),
128
);
base
+=
6
;
}
if
(
total
-
base
==
5
)
{
clearFiveBuffersKernel
.
setArg
<
cl
::
Memory
>
(
0
,
*
autoclearBuffers
[
base
]);
clearFiveBuffersKernel
.
setArg
<
cl_int
>
(
1
,
autoclearBufferSizes
[
base
]);
clearFiveBuffersKernel
.
setArg
<
cl
::
Memory
>
(
2
,
*
autoclearBuffers
[
base
+
1
]);
clearFiveBuffersKernel
.
setArg
<
cl_int
>
(
3
,
autoclearBufferSizes
[
base
+
1
]);
clearFiveBuffersKernel
.
setArg
<
cl
::
Memory
>
(
4
,
*
autoclearBuffers
[
base
+
2
]);
clearFiveBuffersKernel
.
setArg
<
cl_int
>
(
5
,
autoclearBufferSizes
[
base
+
2
]);
clearFiveBuffersKernel
.
setArg
<
cl
::
Memory
>
(
6
,
*
autoclearBuffers
[
base
+
3
]);
clearFiveBuffersKernel
.
setArg
<
cl_int
>
(
7
,
autoclearBufferSizes
[
base
+
3
]);
clearFiveBuffersKernel
.
setArg
<
cl
::
Memory
>
(
8
,
*
autoclearBuffers
[
base
+
4
]);
clearFiveBuffersKernel
.
setArg
<
cl_int
>
(
9
,
autoclearBufferSizes
[
base
+
4
]);
executeKernel
(
clearFiveBuffersKernel
,
max
(
max
(
max
(
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]),
autoclearBufferSizes
[
base
+
2
]),
autoclearBufferSizes
[
base
+
3
]),
autoclearBufferSizes
[
base
+
4
]),
128
);
}
else
if
(
total
-
base
==
4
)
{
clearFourBuffersKernel
.
setArg
<
cl
::
Memory
>
(
0
,
*
autoclearBuffers
[
base
]);
clearFourBuffersKernel
.
setArg
<
cl
::
Memory
>
(
0
,
*
autoclearBuffers
[
base
]);
clearFourBuffersKernel
.
setArg
<
cl_int
>
(
1
,
autoclearBufferSizes
[
base
]);
clearFourBuffersKernel
.
setArg
<
cl_int
>
(
1
,
autoclearBufferSizes
[
base
]);
clearFourBuffersKernel
.
setArg
<
cl
::
Memory
>
(
2
,
*
autoclearBuffers
[
base
+
1
]);
clearFourBuffersKernel
.
setArg
<
cl
::
Memory
>
(
2
,
*
autoclearBuffers
[
base
+
1
]);
...
@@ -361,9 +392,8 @@ void OpenCLContext::clearAutoclearBuffers() {
...
@@ -361,9 +392,8 @@ void OpenCLContext::clearAutoclearBuffers() {
clearFourBuffersKernel
.
setArg
<
cl
::
Memory
>
(
6
,
*
autoclearBuffers
[
base
+
3
]);
clearFourBuffersKernel
.
setArg
<
cl
::
Memory
>
(
6
,
*
autoclearBuffers
[
base
+
3
]);
clearFourBuffersKernel
.
setArg
<
cl_int
>
(
7
,
autoclearBufferSizes
[
base
+
3
]);
clearFourBuffersKernel
.
setArg
<
cl_int
>
(
7
,
autoclearBufferSizes
[
base
+
3
]);
executeKernel
(
clearFourBuffersKernel
,
max
(
max
(
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]),
autoclearBufferSizes
[
base
+
2
]),
autoclearBufferSizes
[
base
+
3
]),
128
);
executeKernel
(
clearFourBuffersKernel
,
max
(
max
(
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]),
autoclearBufferSizes
[
base
+
2
]),
autoclearBufferSizes
[
base
+
3
]),
128
);
base
+=
4
;
}
}
if
(
total
-
base
==
3
)
{
else
if
(
total
-
base
==
3
)
{
clearThreeBuffersKernel
.
setArg
<
cl
::
Memory
>
(
0
,
*
autoclearBuffers
[
base
]);
clearThreeBuffersKernel
.
setArg
<
cl
::
Memory
>
(
0
,
*
autoclearBuffers
[
base
]);
clearThreeBuffersKernel
.
setArg
<
cl_int
>
(
1
,
autoclearBufferSizes
[
base
]);
clearThreeBuffersKernel
.
setArg
<
cl_int
>
(
1
,
autoclearBufferSizes
[
base
]);
clearThreeBuffersKernel
.
setArg
<
cl
::
Memory
>
(
2
,
*
autoclearBuffers
[
base
+
1
]);
clearThreeBuffersKernel
.
setArg
<
cl
::
Memory
>
(
2
,
*
autoclearBuffers
[
base
+
1
]);
...
...
platforms/opencl/src/OpenCLContext.h
View file @
9ab32c50
...
@@ -472,6 +472,8 @@ private:
...
@@ -472,6 +472,8 @@ private:
cl
::
Kernel
clearTwoBuffersKernel
;
cl
::
Kernel
clearTwoBuffersKernel
;
cl
::
Kernel
clearThreeBuffersKernel
;
cl
::
Kernel
clearThreeBuffersKernel
;
cl
::
Kernel
clearFourBuffersKernel
;
cl
::
Kernel
clearFourBuffersKernel
;
cl
::
Kernel
clearFiveBuffersKernel
;
cl
::
Kernel
clearSixBuffersKernel
;
cl
::
Kernel
reduceFloat4Kernel
;
cl
::
Kernel
reduceFloat4Kernel
;
cl
::
Kernel
reduceForcesKernel
;
cl
::
Kernel
reduceForcesKernel
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
9ab32c50
...
@@ -1266,6 +1266,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1266,6 +1266,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Create required data structures.
// Create required data structures.
pmeGrid
=
new
OpenCLArray
<
mm_float2
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
"pmeGrid"
);
pmeGrid
=
new
OpenCLArray
<
mm_float2
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
"pmeGrid"
);
cl
.
addAutoclearBuffer
(
pmeGrid
->
getDeviceBuffer
(),
pmeGrid
->
getSize
()
*
2
);
pmeGrid2
=
new
OpenCLArray
<
mm_float2
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
"pmeGrid2"
);
pmeGrid2
=
new
OpenCLArray
<
mm_float2
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
"pmeGrid2"
);
pmeBsplineModuliX
=
new
OpenCLArray
<
cl_float
>
(
cl
,
gridSizeX
,
"pmeBsplineModuliX"
);
pmeBsplineModuliX
=
new
OpenCLArray
<
cl_float
>
(
cl
,
gridSizeX
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
OpenCLArray
<
cl_float
>
(
cl
,
gridSizeY
,
"pmeBsplineModuliY"
);
pmeBsplineModuliY
=
new
OpenCLArray
<
cl_float
>
(
cl
,
gridSizeY
,
"pmeBsplineModuliY"
);
...
@@ -1464,7 +1465,6 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1464,7 +1465,6 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeAtomRangeKernel
.
setArg
<
mm_float4
>
(
4
,
invBoxSize
);
pmeAtomRangeKernel
.
setArg
<
mm_float4
>
(
4
,
invBoxSize
);
cl
.
executeKernel
(
pmeAtomRangeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeAtomRangeKernel
,
cl
.
getNumAtoms
());
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
cl
.
clearBuffer
(
pmeGrid
->
getDeviceBuffer
(),
pmeGrid
->
getSize
()
*
2
);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
5
,
boxSize
);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
5
,
boxSize
);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
6
,
invBoxSize
);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
6
,
invBoxSize
);
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
(),
PmeOrder
*
PmeOrder
*
PmeOrder
);
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
(),
PmeOrder
*
PmeOrder
*
PmeOrder
);
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
9ab32c50
...
@@ -42,6 +42,29 @@ __kernel void clearFourBuffers(__global int* buffer1, int size1, __global int* b
...
@@ -42,6 +42,29 @@ __kernel void clearFourBuffers(__global int* buffer1, int size1, __global int* b
clearBuffer
(
buffer4,
size4
)
;
clearBuffer
(
buffer4,
size4
)
;
}
}
/**
*
Fill
five
buffers
with
0.
*/
__kernel
void
clearFiveBuffers
(
__global
int*
buffer1,
int
size1,
__global
int*
buffer2,
int
size2,
__global
int*
buffer3,
int
size3,
__global
int*
buffer4,
int
size4,
__global
int*
buffer5,
int
size5
)
{
clearBuffer
(
buffer1,
size1
)
;
clearBuffer
(
buffer2,
size2
)
;
clearBuffer
(
buffer3,
size3
)
;
clearBuffer
(
buffer4,
size4
)
;
clearBuffer
(
buffer5,
size5
)
;
}
/**
*
Fill
six
buffers
with
0.
*/
__kernel
void
clearSixBuffers
(
__global
int*
buffer1,
int
size1,
__global
int*
buffer2,
int
size2,
__global
int*
buffer3,
int
size3,
__global
int*
buffer4,
int
size4,
__global
int*
buffer5,
int
size5,
__global
int*
buffer6,
int
size6
)
{
clearBuffer
(
buffer1,
size1
)
;
clearBuffer
(
buffer2,
size2
)
;
clearBuffer
(
buffer3,
size3
)
;
clearBuffer
(
buffer4,
size4
)
;
clearBuffer
(
buffer5,
size5
)
;
clearBuffer
(
buffer6,
size6
)
;
}
/**
/**
*
Sum
a
collection
of
buffers
into
the
first
one.
*
Sum
a
collection
of
buffers
into
the
first
one.
*/
*/
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment