Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8e2fc4ea
Commit
8e2fc4ea
authored
Oct 21, 2014
by
Peter Eastman
Browse files
Workaround for driver bugs that affect GTX 980
parent
ba66e90e
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
29 additions
and
15 deletions
+29
-15
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+22
-14
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+6
-0
No files found.
platforms/cuda/include/CudaKernels.h
View file @
8e2fc4ea
...
@@ -632,7 +632,7 @@ private:
...
@@ -632,7 +632,7 @@ private:
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
int
interpolateForceThreads
;
int
interpolateForceThreads
;
bool
hasCoulomb
,
hasLJ
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
};
};
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
8e2fc4ea
...
@@ -1457,9 +1457,11 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1457,9 +1457,11 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
if
(
hasInitializedFFT
)
{
if
(
hasInitializedFFT
)
{
cufftDestroy
(
fftForward
);
cufftDestroy
(
fftForward
);
cufftDestroy
(
fftBackward
);
cufftDestroy
(
fftBackward
);
if
(
usePmeStream
)
{
cuStreamDestroy
(
pmeStream
);
cuStreamDestroy
(
pmeStream
);
cuEventDestroy
(
pmeSyncEvent
);
cuEventDestroy
(
pmeSyncEvent
);
}
}
}
}
}
/**
/**
...
@@ -1670,6 +1672,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1670,6 +1672,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Prepare for doing PME on its own stream.
// Prepare for doing PME on its own stream.
usePmeStream
=
(
cu
.
getComputeCapability
()
<
5.0
);
// A driver bug causes this to be very slow on GTX 980.
if
(
usePmeStream
)
{
cuStreamCreate
(
&
pmeStream
,
CU_STREAM_NON_BLOCKING
);
cuStreamCreate
(
&
pmeStream
,
CU_STREAM_NON_BLOCKING
);
cufftSetStream
(
fftForward
,
pmeStream
);
cufftSetStream
(
fftForward
,
pmeStream
);
cufftSetStream
(
fftBackward
,
pmeStream
);
cufftSetStream
(
fftBackward
,
pmeStream
);
...
@@ -1679,6 +1683,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1679,6 +1683,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
recipForceGroup
=
force
.
getForceGroup
();
recipForceGroup
=
force
.
getForceGroup
();
cu
.
addPreComputation
(
new
SyncStreamPreComputation
(
pmeStream
,
pmeSyncEvent
,
recipForceGroup
));
cu
.
addPreComputation
(
new
SyncStreamPreComputation
(
pmeStream
,
pmeSyncEvent
,
recipForceGroup
));
cu
.
addPostComputation
(
new
SyncStreamPostComputation
(
pmeSyncEvent
,
recipForceGroup
));
cu
.
addPostComputation
(
new
SyncStreamPostComputation
(
pmeSyncEvent
,
recipForceGroup
));
}
hasInitializedFFT
=
true
;
hasInitializedFFT
=
true
;
// Initialize the b-spline moduli.
// Initialize the b-spline moduli.
...
@@ -1795,6 +1800,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1795,6 +1800,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
cu
.
executeKernel
(
ewaldForcesKernel
,
forcesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
ewaldForcesKernel
,
forcesArgs
,
cu
.
getNumAtoms
());
}
}
if
(
directPmeGrid
!=
NULL
&&
includeReciprocal
)
{
if
(
directPmeGrid
!=
NULL
&&
includeReciprocal
)
{
if
(
usePmeStream
)
cu
.
setCurrentStream
(
pmeStream
);
cu
.
setCurrentStream
(
pmeStream
);
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
());
...
@@ -1832,9 +1838,11 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1832,9 +1838,11 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
void
*
interpolateArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
directPmeGrid
->
getDevicePointer
(),
void
*
interpolateArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
directPmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeInterpolateForceKernel
,
interpolateArgs
,
cu
.
getNumAtoms
(),
128
);
cu
.
executeKernel
(
pmeInterpolateForceKernel
,
interpolateArgs
,
cu
.
getNumAtoms
(),
128
);
if
(
usePmeStream
)
{
cuEventRecord
(
pmeSyncEvent
,
pmeStream
);
cuEventRecord
(
pmeSyncEvent
,
pmeStream
);
cu
.
restoreDefaultStream
();
cu
.
restoreDefaultStream
();
}
}
}
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
double4
boxSize
=
cu
.
getPeriodicBoxSize
();
double4
boxSize
=
cu
.
getPeriodicBoxSize
();
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
8e2fc4ea
...
@@ -1611,6 +1611,12 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1611,6 +1611,12 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
string
vendor
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
string
vendor
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
usePmeQueue
=
(
vendor
.
size
()
>=
6
&&
vendor
.
substr
(
0
,
6
)
==
"NVIDIA"
);
usePmeQueue
=
(
vendor
.
size
()
>=
6
&&
vendor
.
substr
(
0
,
6
)
==
"NVIDIA"
);
if
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_EXTENSIONS
>
().
find
(
"cl_nv_device_attribute_query"
)
!=
string
::
npos
)
{
cl_uint
computeCapabilityMajor
;
clGetDeviceInfo
(
cl
.
getDevice
()(),
0x4000
,
sizeof
(
cl_uint
),
&
computeCapabilityMajor
,
NULL
);
// CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
if
(
computeCapabilityMajor
==
5
)
usePmeQueue
=
false
;
// Workaround for driver bug that affects GTX 980.
}
if
(
usePmeQueue
)
{
if
(
usePmeQueue
)
{
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment