Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ffddfb86
"wrappers/python/vscode:/vscode.git/clone" did not exist on "cfcf0dcde77efb4de720569601f0233e55f6e3a7"
Commit
ffddfb86
authored
Jun 19, 2017
by
Peter Eastman
Browse files
Improved performance of computing sums with CustomIntegrator
parent
93742ae3
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
12 additions
and
8 deletions
+12
-8
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+4
-3
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+1
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+6
-3
No files found.
platforms/cuda/include/CudaKernels.h
View file @
ffddfb86
...
...
@@ -1497,7 +1497,7 @@ private:
CudaContext
&
cu
;
double
energy
;
float
energyFloat
;
int
numGlobalVariables
;
int
numGlobalVariables
,
sumWorkGroupSize
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
mutable
bool
localValuesAreCurrent
;
CudaArray
*
globalValues
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
ffddfb86
...
...
@@ -7146,10 +7146,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
stepTarget.resize(numSteps);
merged.resize(numSteps, false);
modifiesParameters = false;
sumWorkGroupSize = 512;
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["WORK_GROUP_SIZE"] = cu.intToString(
CudaContext::ThreadBlock
Size);
defines["WORK_GROUP_SIZE"] = cu.intToString(
sumWorkGroup
Size);
defines["SUM_BUFFER_SIZE"] = "0";
// Record the tabulated functions.
...
...
@@ -7695,7 +7696,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu.executeKernel(randomKernel, &randomArgs[0], numAtoms);
cu.clearBuffer(*sumBuffer);
cu.executeKernel(kernels[step][0], &kernelArgs[step][0][0], numAtoms, 128);
cu.executeKernel(kernels[step][1], &kernelArgs[step][1][0],
CudaContext::ThreadBlockSize, CudaContext::ThreadBlock
Size);
cu.executeKernel(kernels[step][1], &kernelArgs[step][1][0],
sumWorkGroupSize, sumWorkGroup
Size);
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double value;
summedValue->download(&value);
...
...
@@ -7797,7 +7798,7 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
cu.clearBuffer(*sumBuffer);
cu.executeKernel(kineticEnergyKernel, &kineticEnergyArgs[0], cu.getNumAtoms());
void* args[] = {&sumBuffer->getDevicePointer(), &summedValue->getDevicePointer()};
cu.executeKernel(sumKineticEnergyKernel, args,
CudaContext::ThreadBlockSize, CudaContext::ThreadBlock
Size);
cu.executeKernel(sumKineticEnergyKernel, args,
sumWorkGroupSize, sumWorkGroup
Size);
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double ke;
summedValue->download(&ke);
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
ffddfb86
...
...
@@ -1484,7 +1484,7 @@ private:
OpenCLContext
&
cl
;
double
energy
;
float
energyFloat
;
int
numGlobalVariables
;
int
numGlobalVariables
,
sumWorkGroupSize
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
mutable
bool
localValuesAreCurrent
;
OpenCLArray
*
globalValues
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
ffddfb86
...
...
@@ -7491,9 +7491,12 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
stepTarget.resize(numSteps);
merged.resize(numSteps, false);
modifiesParameters = false;
sumWorkGroupSize = cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
if (sumWorkGroupSize > 512)
sumWorkGroupSize = 512;
map<string, string> defines;
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["WORK_GROUP_SIZE"] = cl.intToString(
OpenCLContext::ThreadBlock
Size);
defines["WORK_GROUP_SIZE"] = cl.intToString(
sumWorkGroup
Size);
// Record the tabulated functions.
...
...
@@ -8037,7 +8040,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
cl.executeKernel(randomKernel, numAtoms);
cl.clearBuffer(*sumBuffer);
cl.executeKernel(kernels[step][0], numAtoms, 128);
cl.executeKernel(kernels[step][1],
OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlock
Size);
cl.executeKernel(kernels[step][1],
sumWorkGroupSize, sumWorkGroup
Size);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double value;
summedValue->download(&value);
...
...
@@ -8139,7 +8142,7 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
kineticEnergyKernel.setArg<cl::Buffer>(8, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
kineticEnergyKernel.setArg<cl_uint>(9, 0);
cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms());
cl.executeKernel(sumKineticEnergyKernel,
OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlock
Size);
cl.executeKernel(sumKineticEnergyKernel,
sumWorkGroupSize, sumWorkGroup
Size);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double ke;
summedValue->download(&ke);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment