Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c992d2c9
Commit
c992d2c9
authored
Jun 22, 2017
by
peastman
Committed by
GitHub
Jun 22, 2017
Browse files
Merge pull request #1839 from peastman/sum
Improved performance of computing sums with CustomIntegrator
parents
3a356e24
ffddfb86
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
12 additions
and
8 deletions
+12
-8
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+4
-3
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+1
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+6
-3
No files found.
platforms/cuda/include/CudaKernels.h
View file @
c992d2c9
...
...
@@ -1497,7 +1497,7 @@ private:
CudaContext
&
cu
;
double
energy
;
float
energyFloat
;
int
numGlobalVariables
;
int
numGlobalVariables
,
sumWorkGroupSize
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
mutable
bool
localValuesAreCurrent
;
CudaArray
*
globalValues
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
c992d2c9
...
...
@@ -7146,10 +7146,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
stepTarget
.
resize
(
numSteps
);
merged
.
resize
(
numSteps
,
false
);
modifiesParameters
=
false
;
sumWorkGroupSize
=
512
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
CudaContext
::
ThreadBlock
Size
);
defines
[
"WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
sumWorkGroup
Size
);
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
// Record the tabulated functions.
...
...
@@ -7695,7 +7696,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
executeKernel
(
kernels
[
step
][
0
],
&
kernelArgs
[
step
][
0
][
0
],
numAtoms
,
128
);
cu
.
executeKernel
(
kernels
[
step
][
1
],
&
kernelArgs
[
step
][
1
][
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlock
Size
);
cu
.
executeKernel
(
kernels
[
step
][
1
],
&
kernelArgs
[
step
][
1
][
0
],
sumWorkGroupSize
,
sumWorkGroup
Size
);
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
{
double
value
;
summedValue
->
download
(
&
value
);
...
...
@@ -7797,7 +7798,7 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
executeKernel
(
kineticEnergyKernel
,
&
kineticEnergyArgs
[
0
],
cu
.
getNumAtoms
());
void
*
args
[]
=
{
&
sumBuffer
->
getDevicePointer
(),
&
summedValue
->
getDevicePointer
()};
cu
.
executeKernel
(
sumKineticEnergyKernel
,
args
,
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlock
Size
);
cu
.
executeKernel
(
sumKineticEnergyKernel
,
args
,
sumWorkGroupSize
,
sumWorkGroup
Size
);
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
{
double
ke
;
summedValue
->
download
(
&
ke
);
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
c992d2c9
...
...
@@ -1484,7 +1484,7 @@ private:
OpenCLContext
&
cl
;
double
energy
;
float
energyFloat
;
int
numGlobalVariables
;
int
numGlobalVariables
,
sumWorkGroupSize
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
mutable
bool
localValuesAreCurrent
;
OpenCLArray
*
globalValues
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
c992d2c9
...
...
@@ -7491,9 +7491,12 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
stepTarget
.
resize
(
numSteps
);
merged
.
resize
(
numSteps
,
false
);
modifiesParameters
=
false
;
sumWorkGroupSize
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
();
if
(
sumWorkGroupSize
>
512
)
sumWorkGroupSize
=
512
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines["WORK_GROUP_SIZE"] = cl.intToString(
OpenCLContext::ThreadBlock
Size);
defines
[
"WORK_GROUP_SIZE"
]
=
cl
.
intToString
(
sumWorkGroup
Size
);
// Record the tabulated functions.
...
...
@@ -8037,7 +8040,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
cl
.
executeKernel
(
randomKernel
,
numAtoms
);
cl
.
clearBuffer
(
*
sumBuffer
);
cl
.
executeKernel
(
kernels
[
step
][
0
],
numAtoms
,
128
);
cl.executeKernel(kernels[step][1],
OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlock
Size);
cl
.
executeKernel
(
kernels
[
step
][
1
],
sumWorkGroupSize
,
sumWorkGroup
Size
);
if
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
())
{
double
value
;
summedValue
->
download
(
&
value
);
...
...
@@ -8139,7 +8142,7 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
cl
.
getIntegrationUtilities
().
getRandom
().
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl_uint
>
(
9
,
0
);
cl
.
executeKernel
(
kineticEnergyKernel
,
cl
.
getNumAtoms
());
cl.executeKernel(sumKineticEnergyKernel,
OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlock
Size);
cl
.
executeKernel
(
sumKineticEnergyKernel
,
sumWorkGroupSize
,
sumWorkGroup
Size
);
if
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
())
{
double
ke
;
summedValue
->
download
(
&
ke
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment