Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
089384df
Unverified
Commit
089384df
authored
Apr 09, 2020
by
peastman
Committed by
GitHub
Apr 09, 2020
Browse files
Merge pull request #2631 from peastman/sort
Fixed errors in sorting on some AMD GPUs
parents
7c7dc751
0297d2f0
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
79 additions
and
72 deletions
+79
-72
platforms/opencl/src/OpenCLSort.cpp
platforms/opencl/src/OpenCLSort.cpp
+79
-72
No files found.
platforms/opencl/src/OpenCLSort.cpp
View file @
089384df
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2010-20
18
Stanford University and the Authors. *
* Portions copyright (c) 2010-20
20
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -64,10 +64,11 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le
...
@@ -64,10 +64,11 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le
unsigned
int
maxPositionsSize
=
std
::
min
(
maxGroupSize
,
(
unsigned
int
)
computeBucketPositionsKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
()));
unsigned
int
maxPositionsSize
=
std
::
min
(
maxGroupSize
,
(
unsigned
int
)
computeBucketPositionsKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
()));
int
maxLocalBuffer
=
(
maxSharedMem
/
trait
->
getDataSize
())
/
2
;
int
maxLocalBuffer
=
(
maxSharedMem
/
trait
->
getDataSize
())
/
2
;
unsigned
int
maxShortList
=
min
(
8192
,
max
(
maxLocalBuffer
,
(
int
)
OpenCLContext
::
ThreadBlockSize
*
context
.
getNumThreadBlocks
()));
unsigned
int
maxShortList
=
min
(
8192
,
max
(
maxLocalBuffer
,
(
int
)
OpenCLContext
::
ThreadBlockSize
*
context
.
getNumThreadBlocks
()));
// On Qualcomm's OpenCL, it's essential to check against CL_KERNEL_WORK_GROUP_SIZE. Otherwise you get a crash.
// The following line checks CL_KERNEL_WORK_GROUP_SIZE to make sure we don't create too large a workgroup.
// But AMD's OpenCL returns an inappropriately small value for it that is much shorter than the actual
// Unfortunately, AMD's OpenCL returns an inappropriately small value for it that is much shorter than the actual
// maximum, so including the check hurts performance. For the moment I'm going to just comment it out.
// maximum, so including the check hurts performance. For the moment I'm just leaving it commented out.
// If we officially support Qualcomm in the future, we'll need to do something better.
// If the workgroup size turns out to be too large, we catch the exception and switch back to the standard
// sorting kernels.
//maxShortList = min(maxShortList, shortListKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()));
//maxShortList = min(maxShortList, shortListKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()));
isShortList
=
(
length
<=
maxShortList
);
isShortList
=
(
length
<=
maxShortList
);
string
vendor
=
context
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
string
vendor
=
context
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
...
@@ -92,12 +93,10 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le
...
@@ -92,12 +93,10 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le
// Create workspace arrays.
// Create workspace arrays.
if
(
!
isShortList
)
{
dataRange
.
initialize
(
context
,
2
,
trait
->
getKeySize
(),
"sortDataRange"
);
dataRange
.
initialize
(
context
,
2
,
trait
->
getKeySize
(),
"sortDataRange"
);
bucketOffset
.
initialize
<
cl_uint
>
(
context
,
numBuckets
,
"bucketOffset"
);
bucketOffset
.
initialize
<
cl_uint
>
(
context
,
numBuckets
,
"bucketOffset"
);
bucketOfElement
.
initialize
<
cl_uint
>
(
context
,
length
,
"bucketOfElement"
);
bucketOfElement
.
initialize
<
cl_uint
>
(
context
,
length
,
"bucketOfElement"
);
offsetInBucket
.
initialize
<
cl_uint
>
(
context
,
length
,
"offsetInBucket"
);
offsetInBucket
.
initialize
<
cl_uint
>
(
context
,
length
,
"offsetInBucket"
);
}
buckets
.
initialize
(
context
,
length
,
trait
->
getDataSize
(),
"buckets"
);
buckets
.
initialize
(
context
,
length
,
trait
->
getDataSize
(),
"buckets"
);
}
}
...
@@ -113,6 +112,7 @@ void OpenCLSort::sort(OpenCLArray& data) {
...
@@ -113,6 +112,7 @@ void OpenCLSort::sort(OpenCLArray& data) {
if
(
isShortList
)
{
if
(
isShortList
)
{
// We can use a simpler sort kernel that does the entire operation in one kernel.
// We can use a simpler sort kernel that does the entire operation in one kernel.
try
{
if
(
useShortList2
)
{
if
(
useShortList2
)
{
shortList2Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
shortList2Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
shortList2Kernel
.
setArg
<
cl
::
Buffer
>
(
1
,
buckets
.
getDeviceBuffer
());
shortList2Kernel
.
setArg
<
cl
::
Buffer
>
(
1
,
buckets
.
getDeviceBuffer
());
...
@@ -126,8 +126,16 @@ void OpenCLSort::sort(OpenCLArray& data) {
...
@@ -126,8 +126,16 @@ void OpenCLSort::sort(OpenCLArray& data) {
shortListKernel
.
setArg
(
2
,
dataLength
*
trait
->
getDataSize
(),
NULL
);
shortListKernel
.
setArg
(
2
,
dataLength
*
trait
->
getDataSize
(),
NULL
);
context
.
executeKernel
(
shortListKernel
,
sortKernelSize
,
sortKernelSize
);
context
.
executeKernel
(
shortListKernel
,
sortKernelSize
,
sortKernelSize
);
}
}
return
;
}
}
else
{
catch
(
exception
&
ex
)
{
// This can happen if we chose too large a size for the kernel. Switch
// over to the standard sorting method.
isShortList
=
false
;
}
}
// Compute the range of data values.
// Compute the range of data values.
unsigned
int
numBuckets
=
bucketOffset
.
getSize
();
unsigned
int
numBuckets
=
bucketOffset
.
getSize
();
...
@@ -176,5 +184,4 @@ void OpenCLSort::sort(OpenCLArray& data) {
...
@@ -176,5 +184,4 @@ void OpenCLSort::sort(OpenCLArray& data) {
sortBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
bucketOffset
.
getDeviceBuffer
());
sortBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
bucketOffset
.
getDeviceBuffer
());
sortBucketsKernel
.
setArg
(
4
,
sortKernelSize
*
trait
->
getDataSize
(),
NULL
);
sortBucketsKernel
.
setArg
(
4
,
sortKernelSize
*
trait
->
getDataSize
(),
NULL
);
context
.
executeKernel
(
sortBucketsKernel
,
((
data
.
getSize
()
+
sortKernelSize
-
1
)
/
sortKernelSize
)
*
sortKernelSize
,
sortKernelSize
);
context
.
executeKernel
(
sortBucketsKernel
,
((
data
.
getSize
()
+
sortKernelSize
-
1
)
/
sortKernelSize
)
*
sortKernelSize
,
sortKernelSize
);
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment