Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
be863b08
"vscode:/vscode.git/clone" did not exist on "f67c7b7e82331371343bc787a9b35ba996b34755"
Commit
be863b08
authored
Oct 22, 2014
by
Peter Eastman
Browse files
Better workaround for bug on GTX 980
parent
8e2fc4ea
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
17 deletions
+11
-17
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+0
-6
platforms/opencl/src/kernels/sort.cl
platforms/opencl/src/kernels/sort.cl
+11
-11
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
be863b08
...
...
@@ -1611,12 +1611,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
string
vendor
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
usePmeQueue
=
(
vendor
.
size
()
>=
6
&&
vendor
.
substr
(
0
,
6
)
==
"NVIDIA"
);
if
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_EXTENSIONS
>
().
find
(
"cl_nv_device_attribute_query"
)
!=
string
::
npos
)
{
cl_uint
computeCapabilityMajor
;
clGetDeviceInfo
(
cl
.
getDevice
()(),
0x4000
,
sizeof
(
cl_uint
),
&
computeCapabilityMajor
,
NULL
);
// CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
if
(
computeCapabilityMajor
==
5
)
usePmeQueue
=
false
;
// Workaround for driver bug that affects GTX 980.
}
if
(
usePmeQueue
)
{
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
...
...
platforms/opencl/src/kernels/sort.cl
View file @
be863b08
...
...
@@ -162,10 +162,10 @@ __kernel void copyDataToBuckets(__global const DATA_TYPE* restrict data, __globa
*
Sort
the
data
in
each
bucket.
*/
__kernel
void
sortBuckets
(
__global
DATA_TYPE*
restrict
data,
__global
const
DATA_TYPE*
restrict
buckets,
uint
numBuckets,
__global
const
uint*
restrict
bucketOffset,
__local
DATA_TYPE*
restrict
buffer
)
{
for
(
u
int
index
=
get_group_id
(
0
)
; index < numBuckets; index += get_num_groups(0)) {
u
int
startIndex
=
(
index
==
0
?
0
:
bucketOffset[index-1]
)
;
u
int
endIndex
=
bucketOffset[index]
;
u
int
length
=
endIndex-startIndex
;
for
(
int
index
=
get_group_id
(
0
)
; index < numBuckets; index += get_num_groups(0)) {
int
startIndex
=
(
index
==
0
?
0
:
bucketOffset[index-1]
)
;
int
endIndex
=
bucketOffset[index]
;
int
length
=
endIndex-startIndex
;
if
(
length
<=
get_local_size
(
0
))
{
//
Load
the
data
into
local
memory.
...
...
@@ -177,8 +177,8 @@ __kernel void sortBuckets(__global DATA_TYPE* restrict data, __global const DATA
//
Perform
a
bitonic
sort
in
local
memory.
for
(
u
int
k
=
2
; k <= get_local_size(0); k *= 2) {
for
(
u
int
j
=
k/2
; j > 0; j /= 2) {
for
(
int
k
=
2
; k <= get_local_size(0); k *= 2) {
for
(
int
j
=
k/2
; j > 0; j /= 2) {
int
ixj
=
get_local_id
(
0
)
^j
;
if
(
ixj
>
get_local_id
(
0
))
{
DATA_TYPE
value1
=
buffer[get_local_id
(
0
)
]
;
...
...
@@ -203,21 +203,21 @@ __kernel void sortBuckets(__global DATA_TYPE* restrict data, __global const DATA
else
{
//
Copy
the
bucket
data
over
to
the
output
array.
for
(
u
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0))
for
(
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0))
data[startIndex+i]
=
buckets[startIndex+i]
;
barrier
(
CLK_GLOBAL_MEM_FENCE
)
;
//
Perform
a
bitonic
sort
in
global
memory.
for
(
u
int
k
=
2
; k < 2*length; k *= 2) {
for
(
u
int
j
=
k/2
; j > 0; j /= 2) {
for
(
u
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0)) {
for
(
int
k
=
2
; k < 2*length; k *= 2) {
for
(
int
j
=
k/2
; j > 0; j /= 2) {
for
(
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0)) {
int
ixj
=
i^j
;
if
(
ixj
>
i
&&
ixj
<
length
)
{
DATA_TYPE
value1
=
data[startIndex+i]
;
DATA_TYPE
value2
=
data[startIndex+ixj]
;
bool
ascending
=
((
i&k
)
==
0
)
;
for
(
u
int
mask
=
k*2
; mask < 2*length; mask *= 2)
for
(
int
mask
=
k*2
; mask < 2*length; mask *= 2)
ascending
=
((
i&mask
)
==
0
?
!ascending
:
ascending
)
;
KEY_TYPE
lowKey
=
(
ascending
?
getValue
(
value1
)
:
getValue
(
value2
))
;
KEY_TYPE
highKey
=
(
ascending
?
getValue
(
value2
)
:
getValue
(
value1
))
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment