Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
832e7f04
Commit
832e7f04
authored
Oct 08, 2014
by
peastman
Browse files
CUDA platform does PME on a separate stream
parent
b8bae04c
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
73 additions
and
7 deletions
+73
-7
platforms/cuda/include/CudaContext.h
platforms/cuda/include/CudaContext.h
+13
-0
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+4
-0
platforms/cuda/src/CudaArray.cpp
platforms/cuda/src/CudaArray.cpp
+3
-3
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+14
-2
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+39
-2
No files found.
platforms/cuda/include/CudaContext.h
View file @
832e7f04
...
...
@@ -133,6 +133,18 @@ public:
int
getContextIndex
()
const
{
return
contextIndex
;
}
/**
* Get the stream currently being used for execution.
*/
CUstream
getCurrentStream
();
/**
* Set the stream to use for execution.
*/
void
setCurrentStream
(
CUstream
stream
);
/**
* Reset the context to using the default stream for execution.
*/
void
restoreDefaultStream
();
/**
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/
...
...
@@ -521,6 +533,7 @@ private:
std
::
map
<
std
::
string
,
std
::
string
>
compilationDefines
;
CUcontext
context
;
CUdevice
device
;
CUstream
currentStream
;
CUfunction
clearBufferKernel
;
CUfunction
clearTwoBuffersKernel
;
CUfunction
clearThreeBuffersKernel
;
...
...
platforms/cuda/include/CudaKernels.h
View file @
832e7f04
...
...
@@ -599,6 +599,8 @@ private:
class
PmeIO
;
class
PmePreComputation
;
class
PmePostComputation
;
class
SyncStreamPreComputation
;
class
SyncStreamPostComputation
;
CudaContext
&
cu
;
bool
hasInitializedFFT
;
CudaArray
*
sigmaEpsilon
;
...
...
@@ -614,6 +616,8 @@ private:
CudaSort
*
sort
;
Kernel
cpuPme
;
PmeIO
*
pmeio
;
CUstream
pmeStream
;
CUevent
pmeSyncEvent
;
cufftHandle
fftForward
;
cufftHandle
fftBackward
;
CUfunction
ewaldSumsKernel
;
...
...
platforms/cuda/src/CudaArray.cpp
View file @
832e7f04
...
...
@@ -58,7 +58,7 @@ void CudaArray::upload(const void* data, bool blocking) {
if
(
blocking
)
result
=
cuMemcpyHtoD
(
pointer
,
data
,
size
*
elementSize
);
else
result
=
cuMemcpyHtoDAsync
(
pointer
,
data
,
size
*
elementSize
,
0
);
result
=
cuMemcpyHtoDAsync
(
pointer
,
data
,
size
*
elementSize
,
context
.
getCurrentStream
()
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error uploading array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
...
...
@@ -71,7 +71,7 @@ void CudaArray::download(void* data, bool blocking) const {
if
(
blocking
)
result
=
cuMemcpyDtoH
(
data
,
pointer
,
size
*
elementSize
);
else
result
=
cuMemcpyDtoHAsync
(
data
,
pointer
,
size
*
elementSize
,
0
);
result
=
cuMemcpyDtoHAsync
(
data
,
pointer
,
size
*
elementSize
,
context
.
getCurrentStream
()
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error downloading array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
...
...
@@ -82,7 +82,7 @@ void CudaArray::download(void* data, bool blocking) const {
void
CudaArray
::
copyTo
(
CudaArray
&
dest
)
const
{
if
(
dest
.
getSize
()
!=
size
||
dest
.
getElementSize
()
!=
elementSize
)
throw
OpenMMException
(
"Error copying array "
+
name
+
" to "
+
dest
.
getName
()
+
": The destination array does not match the size of the array"
);
CUresult
result
=
cuMemcpyDtoDAsync
(
dest
.
getDevicePointer
(),
pointer
,
size
*
elementSize
,
0
);
CUresult
result
=
cuMemcpyDtoDAsync
(
dest
.
getDevicePointer
(),
pointer
,
size
*
elementSize
,
context
.
getCurrentStream
()
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error copying array "
<<
name
<<
" to "
<<
dest
.
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
...
...
platforms/cuda/src/CudaContext.cpp
View file @
832e7f04
...
...
@@ -72,7 +72,7 @@ const int CudaContext::TileSize = sizeof(tileflags)*8;
bool
CudaContext
::
hasInitializedCuda
=
false
;
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
currentStream
(
0
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
...
...
@@ -507,6 +507,18 @@ CUfunction CudaContext::getKernel(CUmodule& module, const string& name) {
return
function
;
}
CUstream
CudaContext
::
getCurrentStream
()
{
return
currentStream
;
}
void
CudaContext
::
setCurrentStream
(
CUstream
stream
)
{
currentStream
=
stream
;
}
void
CudaContext
::
restoreDefaultStream
()
{
setCurrentStream
(
0
);
}
string
CudaContext
::
doubleToString
(
double
value
)
{
stringstream
s
;
s
.
precision
(
useDoublePrecision
?
16
:
8
);
...
...
@@ -575,7 +587,7 @@ void CudaContext::executeKernel(CUfunction kernel, void** arguments, int threads
if
(
blockSize
==
-
1
)
blockSize
=
ThreadBlockSize
;
int
gridSize
=
std
::
min
((
threads
+
blockSize
-
1
)
/
blockSize
,
numThreadBlocks
);
CUresult
result
=
cuLaunchKernel
(
kernel
,
gridSize
,
1
,
1
,
blockSize
,
1
,
1
,
sharedSize
,
0
,
arguments
,
NULL
);
CUresult
result
=
cuLaunchKernel
(
kernel
,
gridSize
,
1
,
1
,
blockSize
,
1
,
1
,
sharedSize
,
currentStream
,
arguments
,
NULL
);
if
(
result
!=
CUDA_SUCCESS
)
{
stringstream
str
;
str
<<
"Error invoking kernel: "
<<
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
832e7f04
...
...
@@ -1398,6 +1398,31 @@ private:
CalcPmeReciprocalForceKernel
::
IO
&
io
;
};
class
CudaCalcNonbondedForceKernel
::
SyncStreamPreComputation
:
public
CudaContext
::
ForcePreComputation
{
public:
SyncStreamPreComputation
(
CUstream
stream
,
CUevent
event
)
:
stream
(
stream
),
event
(
event
)
{
}
void
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
cuEventRecord
(
event
,
0
);
cuStreamWaitEvent
(
stream
,
event
,
0
);
}
private:
CUstream
stream
;
CUevent
event
;
};
class
CudaCalcNonbondedForceKernel
::
SyncStreamPostComputation
:
public
CudaContext
::
ForcePostComputation
{
public:
SyncStreamPostComputation
(
CUevent
event
)
:
event
(
event
)
{
}
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
cuStreamWaitEvent
(
0
,
event
,
0
);
return
0.0
;
}
private:
CUevent
event
;
};
CudaCalcNonbondedForceKernel
::~
CudaCalcNonbondedForceKernel
()
{
cu
.
setAsCurrent
();
if
(
sigmaEpsilon
!=
NULL
)
...
...
@@ -1427,6 +1452,8 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
if
(
hasInitializedFFT
)
{
cufftDestroy
(
fftForward
);
cufftDestroy
(
fftBackward
);
cuStreamDestroy
(
pmeStream
);
cuEventDestroy
(
pmeSyncEvent
);
}
}
...
...
@@ -1636,6 +1663,14 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cufftSetCompatibilityMode
(
fftForward
,
CUFFT_COMPATIBILITY_NATIVE
);
cufftSetCompatibilityMode
(
fftBackward
,
CUFFT_COMPATIBILITY_NATIVE
);
// Prepare for doing PME on its own stream.
cuStreamCreate
(
&
pmeStream
,
CU_STREAM_NON_BLOCKING
);
cufftSetStream
(
fftForward
,
pmeStream
);
cufftSetStream
(
fftBackward
,
pmeStream
);
CHECK_RESULT
(
cuEventCreate
(
&
pmeSyncEvent
,
CU_EVENT_DISABLE_TIMING
),
"Error creating event for NonbondedForce"
);
cu
.
addPreComputation
(
new
SyncStreamPreComputation
(
pmeStream
,
pmeSyncEvent
));
cu
.
addPostComputation
(
new
SyncStreamPostComputation
(
pmeSyncEvent
));
hasInitializedFFT
=
true
;
// Initialize the b-spline moduli.
...
...
@@ -1752,6 +1787,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
cu
.
executeKernel
(
ewaldForcesKernel
,
forcesArgs
,
cu
.
getNumAtoms
());
}
if
(
directPmeGrid
!=
NULL
&&
includeReciprocal
)
{
cu
.
setCurrentStream
(
pmeStream
);
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
());
...
...
@@ -1788,7 +1824,8 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
void
*
interpolateArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
directPmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeInterpolateForceKernel
,
interpolateArgs
,
cu
.
getNumAtoms
(),
128
);
cuEventRecord
(
pmeSyncEvent
,
pmeStream
);
cu
.
restoreDefaultStream
();
}
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment