Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
be863b08
Commit
be863b08
authored
Oct 22, 2014
by
Peter Eastman
Browse files
Better workaround for bug on GTX 980
parent
8e2fc4ea
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
17 deletions
+11
-17
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+0
-6
platforms/opencl/src/kernels/sort.cl
platforms/opencl/src/kernels/sort.cl
+11
-11
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
be863b08
...
...
@@ -1611,12 +1611,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
string
vendor
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
usePmeQueue
=
(
vendor
.
size
()
>=
6
&&
vendor
.
substr
(
0
,
6
)
==
"NVIDIA"
);
if
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_EXTENSIONS
>
().
find
(
"cl_nv_device_attribute_query"
)
!=
string
::
npos
)
{
cl_uint
computeCapabilityMajor
;
clGetDeviceInfo
(
cl
.
getDevice
()(),
0x4000
,
sizeof
(
cl_uint
),
&
computeCapabilityMajor
,
NULL
);
// CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
if
(
computeCapabilityMajor
==
5
)
usePmeQueue
=
false
;
// Workaround for driver bug that affects GTX 980.
}
if
(
usePmeQueue
)
{
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
...
...
platforms/opencl/src/kernels/sort.cl
View file @
be863b08
...
...
@@ -162,10 +162,10 @@ __kernel void copyDataToBuckets(__global const DATA_TYPE* restrict data, __globa
*
Sort
the
data
in
each
bucket.
*/
__kernel
void
sortBuckets
(
__global
DATA_TYPE*
restrict
data,
__global
const
DATA_TYPE*
restrict
buckets,
uint
numBuckets,
__global
const
uint*
restrict
bucketOffset,
__local
DATA_TYPE*
restrict
buffer
)
{
for
(
u
int
index
=
get_group_id
(
0
)
; index < numBuckets; index += get_num_groups(0)) {
u
int
startIndex
=
(
index
==
0
?
0
:
bucketOffset[index-1]
)
;
u
int
endIndex
=
bucketOffset[index]
;
u
int
length
=
endIndex-startIndex
;
for
(
int
index
=
get_group_id
(
0
)
; index < numBuckets; index += get_num_groups(0)) {
int
startIndex
=
(
index
==
0
?
0
:
bucketOffset[index-1]
)
;
int
endIndex
=
bucketOffset[index]
;
int
length
=
endIndex-startIndex
;
if
(
length
<=
get_local_size
(
0
))
{
//
Load
the
data
into
local
memory.
...
...
@@ -177,8 +177,8 @@ __kernel void sortBuckets(__global DATA_TYPE* restrict data, __global const DATA
//
Perform
a
bitonic
sort
in
local
memory.
for
(
u
int
k
=
2
; k <= get_local_size(0); k *= 2) {
for
(
u
int
j
=
k/2
; j > 0; j /= 2) {
for
(
int
k
=
2
; k <= get_local_size(0); k *= 2) {
for
(
int
j
=
k/2
; j > 0; j /= 2) {
int
ixj
=
get_local_id
(
0
)
^j
;
if
(
ixj
>
get_local_id
(
0
))
{
DATA_TYPE
value1
=
buffer[get_local_id
(
0
)
]
;
...
...
@@ -203,21 +203,21 @@ __kernel void sortBuckets(__global DATA_TYPE* restrict data, __global const DATA
else
{
//
Copy
the
bucket
data
over
to
the
output
array.
for
(
u
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0))
for
(
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0))
data[startIndex+i]
=
buckets[startIndex+i]
;
barrier
(
CLK_GLOBAL_MEM_FENCE
)
;
//
Perform
a
bitonic
sort
in
global
memory.
for
(
u
int
k
=
2
; k < 2*length; k *= 2) {
for
(
u
int
j
=
k/2
; j > 0; j /= 2) {
for
(
u
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0)) {
for
(
int
k
=
2
; k < 2*length; k *= 2) {
for
(
int
j
=
k/2
; j > 0; j /= 2) {
for
(
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0)) {
int
ixj
=
i^j
;
if
(
ixj
>
i
&&
ixj
<
length
)
{
DATA_TYPE
value1
=
data[startIndex+i]
;
DATA_TYPE
value2
=
data[startIndex+ixj]
;
bool
ascending
=
((
i&k
)
==
0
)
;
for
(
u
int
mask
=
k*2
; mask < 2*length; mask *= 2)
for
(
int
mask
=
k*2
; mask < 2*length; mask *= 2)
ascending
=
((
i&mask
)
==
0
?
!ascending
:
ascending
)
;
KEY_TYPE
lowKey
=
(
ascending
?
getValue
(
value1
)
:
getValue
(
value2
))
;
KEY_TYPE
highKey
=
(
ascending
?
getValue
(
value2
)
:
getValue
(
value1
))
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment