Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
35b9d787
"libraries/lepton/vscode:/vscode.git/clone" did not exist on "fc79cc94ec2c0087578cab282f62d0c6ab94fdf8"
Commit
35b9d787
authored
Nov 22, 2013
by
peastman
Browse files
Workarounds for devices whose maximum workgroup size varies between kernels
parent
d59a34fb
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
229 additions
and
203 deletions
+229
-203
platforms/opencl/src/OpenCLFFT3D.cpp
platforms/opencl/src/OpenCLFFT3D.cpp
+177
-166
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+46
-34
platforms/opencl/src/OpenCLSort.cpp
platforms/opencl/src/OpenCLSort.cpp
+6
-3
No files found.
platforms/opencl/src/OpenCLFFT3D.cpp
View file @
35b9d787
...
...
@@ -74,9 +74,12 @@ int OpenCLFFT3D::findLegalDimension(int minimum) {
}
cl
::
Kernel
OpenCLFFT3D
::
createKernel
(
int
xsize
,
int
ysize
,
int
zsize
,
int
&
threads
)
{
bool
loopRequired
=
(
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
int
maxThreads
=
std
::
min
(
256
,
(
int
)
context
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
());
bool
isCPU
=
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
;
while
(
true
)
{
bool
loopRequired
=
(
zsize
>
maxThreads
||
isCPU
);
stringstream
source
;
int
blocksPerGroup
=
(
loopRequired
?
1
:
max
(
1
,
256
/
zsize
));
int
blocksPerGroup
=
(
loopRequired
?
1
:
max
(
1
,
maxThreads
/
zsize
));
int
stage
=
0
;
int
L
=
zsize
;
int
m
=
1
;
...
...
@@ -242,10 +245,18 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threa
replacements
[
"LOOP_REQUIRED"
]
=
(
loopRequired
?
"1"
:
"0"
);
cl
::
Program
program
=
context
.
createProgram
(
context
.
replaceStrings
(
OpenCLKernelSources
::
fft
,
replacements
));
cl
::
Kernel
kernel
(
program
,
"execFFT"
);
threads
=
(
isCPU
?
1
:
blocksPerGroup
*
zsize
);
int
kernelMaxThreads
=
kernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
());
if
(
threads
>
kernelMaxThreads
)
{
// The device can't handle this block size, so reduce it.
maxThreads
=
kernelMaxThreads
;
continue
;
}
int
bufferSize
=
blocksPerGroup
*
zsize
*
(
context
.
getUseDoublePrecision
()
?
sizeof
(
mm_double2
)
:
sizeof
(
mm_float2
));
kernel
.
setArg
(
3
,
bufferSize
,
NULL
);
kernel
.
setArg
(
4
,
bufferSize
,
NULL
);
kernel
.
setArg
(
5
,
bufferSize
,
NULL
);
threads
=
(
loopRequired
?
1
:
blocksPerGroup
*
zsize
);
return
kernel
;
}
}
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
35b9d787
...
...
@@ -317,9 +317,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
for
(
int
i
=
0
;
i
<
(
int
)
exclusionBlocksForBlock
.
size
();
i
++
)
maxExclusions
=
(
maxExclusions
>
exclusionBlocksForBlock
[
i
].
size
()
?
maxExclusions
:
exclusionBlocksForBlock
[
i
].
size
());
defines
[
"MAX_EXCLUSIONS"
]
=
context
.
intToString
(
maxExclusions
);
defines
[
"GROUP_SIZE"
]
=
(
deviceIsCpu
?
"32"
:
"128"
);
defines
[
"BUFFER_GROUPS"
]
=
(
deviceIsCpu
?
"4"
:
"2"
);
string
file
=
(
deviceIsCpu
?
OpenCLKernelSources
::
findInteractingBlocks_cpu
:
OpenCLKernelSources
::
findInteractingBlocks
);
int
groupSize
=
(
deviceIsCpu
?
32
:
128
);
while
(
true
)
{
defines
[
"GROUP_SIZE"
]
=
context
.
intToString
(
groupSize
);
cl
::
Program
interactingBlocksProgram
=
context
.
createProgram
(
file
,
defines
);
findBlockBoundsKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
findBlockBoundsKernel
.
setArg
<
cl_int
>
(
0
,
context
.
getNumAtoms
());
...
...
@@ -353,6 +355,16 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
13
,
exclusionRowIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
14
,
oldPositions
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
15
,
rebuildNeighborList
->
getDeviceBuffer
());
if
(
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
// The device can't handle this block size, so reduce it.
groupSize
-=
32
;
if
(
groupSize
<
32
)
throw
OpenMMException
(
"Failed to create findInteractingBlocks kernel"
);
continue
;
}
break
;
}
}
}
...
...
platforms/opencl/src/OpenCLSort.cpp
View file @
35b9d787
...
...
@@ -56,10 +56,13 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le
unsigned
int
maxGroupSize
=
std
::
min
(
256
,
(
int
)
context
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
());
int
maxSharedMem
=
context
.
getDevice
().
getInfo
<
CL_DEVICE_LOCAL_MEM_SIZE
>
();
unsigned
int
maxLocalBuffer
=
(
unsigned
int
)
((
maxSharedMem
/
trait
->
getDataSize
())
/
2
);
isShortList
=
(
length
<=
maxLocalBuffer
);
for
(
rangeKernelSize
=
1
;
rangeKernelSize
*
2
<=
maxGroupSize
;
rangeKernelSize
*=
2
)
unsigned
int
maxRangeSize
=
std
::
min
(
maxGroupSize
,
(
unsigned
int
)
computeRangeKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
()));
unsigned
int
maxPositionsSize
=
std
::
min
(
maxGroupSize
,
(
unsigned
int
)
computeBucketPositionsKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
()));
unsigned
int
maxShortListSize
=
shortListKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
());
isShortList
=
(
length
<=
maxLocalBuffer
&&
length
<
maxShortListSize
);
for
(
rangeKernelSize
=
1
;
rangeKernelSize
*
2
<=
maxRangeSize
;
rangeKernelSize
*=
2
)
;
positionsKernelSize
=
rangeKernelSize
;
positionsKernelSize
=
std
::
min
(
rangeKernelSize
,
maxPositionsSize
)
;
sortKernelSize
=
(
isShortList
?
rangeKernelSize
:
rangeKernelSize
/
2
);
if
(
rangeKernelSize
>
length
)
rangeKernelSize
=
length
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment