Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
e19cefde
Commit
e19cefde
authored
Oct 22, 2014
by
peastman
Browse files
Merge pull request #665 from peastman/980
Workaround for driver bugs on GTX 980
parents
ba66e90e
be863b08
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
34 additions
and
26 deletions
+34
-26
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+22
-14
platforms/opencl/src/kernels/sort.cl
platforms/opencl/src/kernels/sort.cl
+11
-11
No files found.
platforms/cuda/include/CudaKernels.h
View file @
e19cefde
...
...
@@ -632,7 +632,7 @@ private:
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
int
interpolateForceThreads
;
bool
hasCoulomb
,
hasLJ
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
;
static
const
int
PmeOrder
=
5
;
};
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
e19cefde
...
...
@@ -1457,9 +1457,11 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
if
(
hasInitializedFFT
)
{
cufftDestroy
(
fftForward
);
cufftDestroy
(
fftBackward
);
if
(
usePmeStream
)
{
cuStreamDestroy
(
pmeStream
);
cuEventDestroy
(
pmeSyncEvent
);
}
}
}
/**
...
...
@@ -1670,6 +1672,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Prepare for doing PME on its own stream.
usePmeStream
=
(
cu
.
getComputeCapability
()
<
5.0
);
// A driver bug causes this to be very slow on GTX 980.
if
(
usePmeStream
)
{
cuStreamCreate
(
&
pmeStream
,
CU_STREAM_NON_BLOCKING
);
cufftSetStream
(
fftForward
,
pmeStream
);
cufftSetStream
(
fftBackward
,
pmeStream
);
...
...
@@ -1679,6 +1683,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
recipForceGroup
=
force
.
getForceGroup
();
cu
.
addPreComputation
(
new
SyncStreamPreComputation
(
pmeStream
,
pmeSyncEvent
,
recipForceGroup
));
cu
.
addPostComputation
(
new
SyncStreamPostComputation
(
pmeSyncEvent
,
recipForceGroup
));
}
hasInitializedFFT
=
true
;
// Initialize the b-spline moduli.
...
...
@@ -1795,6 +1800,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
cu
.
executeKernel
(
ewaldForcesKernel
,
forcesArgs
,
cu
.
getNumAtoms
());
}
if
(
directPmeGrid
!=
NULL
&&
includeReciprocal
)
{
if
(
usePmeStream
)
cu
.
setCurrentStream
(
pmeStream
);
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
());
...
...
@@ -1832,9 +1838,11 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
void
*
interpolateArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
directPmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeInterpolateForceKernel
,
interpolateArgs
,
cu
.
getNumAtoms
(),
128
);
if
(
usePmeStream
)
{
cuEventRecord
(
pmeSyncEvent
,
pmeStream
);
cu
.
restoreDefaultStream
();
}
}
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
double4
boxSize
=
cu
.
getPeriodicBoxSize
();
...
...
platforms/opencl/src/kernels/sort.cl
View file @
e19cefde
...
...
@@ -162,10 +162,10 @@ __kernel void copyDataToBuckets(__global const DATA_TYPE* restrict data, __globa
*
Sort
the
data
in
each
bucket.
*/
__kernel
void
sortBuckets
(
__global
DATA_TYPE*
restrict
data,
__global
const
DATA_TYPE*
restrict
buckets,
uint
numBuckets,
__global
const
uint*
restrict
bucketOffset,
__local
DATA_TYPE*
restrict
buffer
)
{
for
(
u
int
index
=
get_group_id
(
0
)
; index < numBuckets; index += get_num_groups(0)) {
u
int
startIndex
=
(
index
==
0
?
0
:
bucketOffset[index-1]
)
;
u
int
endIndex
=
bucketOffset[index]
;
u
int
length
=
endIndex-startIndex
;
for
(
int
index
=
get_group_id
(
0
)
; index < numBuckets; index += get_num_groups(0)) {
int
startIndex
=
(
index
==
0
?
0
:
bucketOffset[index-1]
)
;
int
endIndex
=
bucketOffset[index]
;
int
length
=
endIndex-startIndex
;
if
(
length
<=
get_local_size
(
0
))
{
//
Load
the
data
into
local
memory.
...
...
@@ -177,8 +177,8 @@ __kernel void sortBuckets(__global DATA_TYPE* restrict data, __global const DATA
//
Perform
a
bitonic
sort
in
local
memory.
for
(
u
int
k
=
2
; k <= get_local_size(0); k *= 2) {
for
(
u
int
j
=
k/2
; j > 0; j /= 2) {
for
(
int
k
=
2
; k <= get_local_size(0); k *= 2) {
for
(
int
j
=
k/2
; j > 0; j /= 2) {
int
ixj
=
get_local_id
(
0
)
^j
;
if
(
ixj
>
get_local_id
(
0
))
{
DATA_TYPE
value1
=
buffer[get_local_id
(
0
)
]
;
...
...
@@ -203,21 +203,21 @@ __kernel void sortBuckets(__global DATA_TYPE* restrict data, __global const DATA
else
{
//
Copy
the
bucket
data
over
to
the
output
array.
for
(
u
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0))
for
(
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0))
data[startIndex+i]
=
buckets[startIndex+i]
;
barrier
(
CLK_GLOBAL_MEM_FENCE
)
;
//
Perform
a
bitonic
sort
in
global
memory.
for
(
u
int
k
=
2
; k < 2*length; k *= 2) {
for
(
u
int
j
=
k/2
; j > 0; j /= 2) {
for
(
u
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0)) {
for
(
int
k
=
2
; k < 2*length; k *= 2) {
for
(
int
j
=
k/2
; j > 0; j /= 2) {
for
(
int
i
=
get_local_id
(
0
)
; i < length; i += get_local_size(0)) {
int
ixj
=
i^j
;
if
(
ixj
>
i
&&
ixj
<
length
)
{
DATA_TYPE
value1
=
data[startIndex+i]
;
DATA_TYPE
value2
=
data[startIndex+ixj]
;
bool
ascending
=
((
i&k
)
==
0
)
;
for
(
u
int
mask
=
k*2
; mask < 2*length; mask *= 2)
for
(
int
mask
=
k*2
; mask < 2*length; mask *= 2)
ascending
=
((
i&mask
)
==
0
?
!ascending
:
ascending
)
;
KEY_TYPE
lowKey
=
(
ascending
?
getValue
(
value1
)
:
getValue
(
value2
))
;
KEY_TYPE
highKey
=
(
ascending
?
getValue
(
value2
)
:
getValue
(
value1
))
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment