Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
9d3a655b
Commit
9d3a655b
authored
Mar 10, 2015
by
peastman
Browse files
Fixed a performance regression in multi-GPU on CUDA
parent
5069c668
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
9 deletions
+16
-9
examples/benchmark.py
examples/benchmark.py
+6
-3
platforms/cuda/include/CudaParallelKernels.h
platforms/cuda/include/CudaParallelKernels.h
+1
-1
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+9
-5
No files found.
examples/benchmark.py
View file @
9d3a655b
...
...
@@ -6,9 +6,9 @@ import sys
from
datetime
import
datetime
from
optparse
import
OptionParser
def
timeIntegration
(
context
,
steps
):
def
timeIntegration
(
context
,
steps
,
initialSteps
):
"""Integrate a Context for a specified number of steps, then return how many seconds it took."""
context
.
getIntegrator
().
step
(
5
)
# Make sure everything is fully initialized
context
.
getIntegrator
().
step
(
initialSteps
)
# Make sure everything is fully initialized
context
.
getState
(
getEnergy
=
True
)
start
=
datetime
.
now
()
context
.
getIntegrator
().
step
(
steps
)
...
...
@@ -79,11 +79,14 @@ def runOneTest(testName, options):
system
=
ff
.
createSystem
(
pdb
.
topology
,
nonbondedMethod
=
method
,
nonbondedCutoff
=
cutoff
,
constraints
=
constraints
,
hydrogenMass
=
hydrogenMass
)
print
(
'Step Size: %g fs'
%
dt
.
value_in_unit
(
unit
.
femtoseconds
))
properties
=
{}
initialSteps
=
5
if
options
.
device
is
not
None
:
if
platform
.
getName
()
==
'CUDA'
:
properties
[
'CudaDeviceIndex'
]
=
options
.
device
elif
platform
.
getName
()
==
'OpenCL'
:
properties
[
'OpenCLDeviceIndex'
]
=
options
.
device
if
','
in
options
.
device
or
' '
in
options
.
device
:
initialSteps
=
250
if
options
.
precision
is
not
None
:
if
platform
.
getName
()
==
'CUDA'
:
properties
[
'CudaPrecision'
]
=
options
.
precision
...
...
@@ -102,7 +105,7 @@ def runOneTest(testName, options):
context
.
setVelocitiesToTemperature
(
300
*
unit
.
kelvin
)
steps
=
20
while
True
:
time
=
timeIntegration
(
context
,
steps
)
time
=
timeIntegration
(
context
,
steps
,
initialSteps
)
if
time
>=
0.5
*
options
.
seconds
:
break
if
time
<
0.5
:
...
...
platforms/cuda/include/CudaParallelKernels.h
View file @
9d3a655b
...
...
@@ -83,7 +83,7 @@ private:
std
::
vector
<
Kernel
>
kernels
;
std
::
vector
<
long
long
>
completionTimes
;
std
::
vector
<
double
>
contextNonbondedFractions
;
std
::
vector
<
int
>
tileCounts
;
int
*
tileCounts
;
CudaArray
*
contextForces
;
void
*
pinnedPositionBuffer
;
long
long
*
pinnedForceBuffer
;
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
9d3a655b
...
...
@@ -99,7 +99,7 @@ public:
}
void
execute
()
{
// Execute the kernel, then download forces.
energy
+=
kernel
.
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
,
valid
);
if
(
cu
.
getComputeForceCount
()
<
200
)
{
// Record timing information for load balancing. Since this takes time, only do it at the start of the simulation.
...
...
@@ -141,7 +141,7 @@ private:
CudaParallelCalcForcesAndEnergyKernel
::
CudaParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
)
:
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
),
completionTimes
(
data
.
contexts
.
size
()),
contextNonbondedFractions
(
data
.
contexts
.
size
()),
tileCounts
(
data
.
contexts
.
size
()
),
contextForces
(
NULL
),
pinnedPositionBuffer
(
NULL
),
pinnedForceBuffer
(
NULL
)
{
tileCounts
(
NULL
),
contextForces
(
NULL
),
pinnedPositionBuffer
(
NULL
),
pinnedForceBuffer
(
NULL
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcForcesAndEnergyKernel
(
name
,
platform
,
*
data
.
contexts
[
i
])));
}
...
...
@@ -156,6 +156,8 @@ CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel()
cuMemFreeHost
(
pinnedForceBuffer
);
cuEventDestroy
(
event
);
cuStreamDestroy
(
peerCopyStream
);
if
(
tileCounts
!=
NULL
)
cuMemFreeHost
(
tileCounts
);
}
void
CudaParallelCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
...
...
@@ -163,12 +165,14 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
cu
.
setAsCurrent
();
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
parallel
);
sumKernel
=
cu
.
getKernel
(
module
,
"sumForces"
);
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
int
numContexts
=
data
.
contexts
.
size
();
for
(
int
i
=
0
;
i
<
numContexts
;
i
++
)
getKernel
(
i
).
initialize
(
system
);
for
(
int
i
=
0
;
i
<
(
int
)
contextNonbondedFractions
.
size
()
;
i
++
)
contextNonbondedFractions
[
i
]
=
1
/
(
double
)
c
ontext
NonbondedFractions
.
size
()
;
for
(
int
i
=
0
;
i
<
numContexts
;
i
++
)
contextNonbondedFractions
[
i
]
=
1
/
(
double
)
numC
ontext
s
;
CHECK_RESULT
(
cuEventCreate
(
&
event
,
0
),
"Error creating event"
);
CHECK_RESULT
(
cuStreamCreate
(
&
peerCopyStream
,
CU_STREAM_NON_BLOCKING
),
"Error creating stream"
);
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
tileCounts
,
numContexts
*
sizeof
(
int
),
0
),
"Error creating tile count buffer"
);
}
void
CudaParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment