Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ffddfb86
Commit
ffddfb86
authored
Jun 19, 2017
by
Peter Eastman
Browse files
Improved performance of computing sums with CustomIntegrator
parent
93742ae3
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
12 additions
and
8 deletions
+12
-8
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+4
-3
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+1
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+6
-3
No files found.
platforms/cuda/include/CudaKernels.h
View file @
ffddfb86
...
...
@@ -1497,7 +1497,7 @@ private:
CudaContext
&
cu
;
double
energy
;
float
energyFloat
;
int
numGlobalVariables
;
int
numGlobalVariables
,
sumWorkGroupSize
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
mutable
bool
localValuesAreCurrent
;
CudaArray
*
globalValues
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
ffddfb86
...
...
@@ -7146,10 +7146,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
stepTarget.resize(numSteps);
merged.resize(numSteps, false);
modifiesParameters = false;
sumWorkGroupSize = 512;
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["WORK_GROUP_SIZE"] = cu.intToString(
CudaContext::ThreadBlock
Size);
defines["WORK_GROUP_SIZE"] = cu.intToString(
sumWorkGroup
Size);
defines["SUM_BUFFER_SIZE"] = "0";
// Record the tabulated functions.
...
...
@@ -7695,7 +7696,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu.executeKernel(randomKernel, &randomArgs[0], numAtoms);
cu.clearBuffer(*sumBuffer);
cu.executeKernel(kernels[step][0], &kernelArgs[step][0][0], numAtoms, 128);
cu.executeKernel(kernels[step][1], &kernelArgs[step][1][0],
CudaContext::ThreadBlockSize, CudaContext::ThreadBlock
Size);
cu.executeKernel(kernels[step][1], &kernelArgs[step][1][0],
sumWorkGroupSize, sumWorkGroup
Size);
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double value;
summedValue->download(&value);
...
...
@@ -7797,7 +7798,7 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
cu.clearBuffer(*sumBuffer);
cu.executeKernel(kineticEnergyKernel, &kineticEnergyArgs[0], cu.getNumAtoms());
void* args[] = {&sumBuffer->getDevicePointer(), &summedValue->getDevicePointer()};
cu.executeKernel(sumKineticEnergyKernel, args,
CudaContext::ThreadBlockSize, CudaContext::ThreadBlock
Size);
cu.executeKernel(sumKineticEnergyKernel, args,
sumWorkGroupSize, sumWorkGroup
Size);
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double ke;
summedValue->download(&ke);
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
ffddfb86
...
...
@@ -1484,7 +1484,7 @@ private:
OpenCLContext
&
cl
;
double
energy
;
float
energyFloat
;
int
numGlobalVariables
;
int
numGlobalVariables
,
sumWorkGroupSize
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
mutable
bool
localValuesAreCurrent
;
OpenCLArray
*
globalValues
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
ffddfb86
...
...
@@ -7491,9 +7491,12 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
stepTarget.resize(numSteps);
merged.resize(numSteps, false);
modifiesParameters = false;
sumWorkGroupSize = cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
if (sumWorkGroupSize > 512)
sumWorkGroupSize = 512;
map<string, string> defines;
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["WORK_GROUP_SIZE"] = cl.intToString(
OpenCLContext::ThreadBlock
Size);
defines["WORK_GROUP_SIZE"] = cl.intToString(
sumWorkGroup
Size);
// Record the tabulated functions.
...
...
@@ -8037,7 +8040,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
cl.executeKernel(randomKernel, numAtoms);
cl.clearBuffer(*sumBuffer);
cl.executeKernel(kernels[step][0], numAtoms, 128);
cl.executeKernel(kernels[step][1],
OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlock
Size);
cl.executeKernel(kernels[step][1],
sumWorkGroupSize, sumWorkGroup
Size);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double value;
summedValue->download(&value);
...
...
@@ -8139,7 +8142,7 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
kineticEnergyKernel.setArg<cl::Buffer>(8, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
kineticEnergyKernel.setArg<cl_uint>(9, 0);
cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms());
cl.executeKernel(sumKineticEnergyKernel,
OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlock
Size);
cl.executeKernel(sumKineticEnergyKernel,
sumWorkGroupSize, sumWorkGroup
Size);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double ke;
summedValue->download(&ke);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment