Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
85c0f767
Commit
85c0f767
authored
May 18, 2017
by
Peter Eastman
Browse files
CUDA implementation of tabulated functions for CustomIntegrator
parent
e82ace44
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
49 additions
and
10 deletions
+49
-10
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+4
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+43
-7
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+2
-2
No files found.
platforms/cuda/include/CudaKernels.h
View file @
85c0f767
...
@@ -1485,7 +1485,9 @@ private:
...
@@ -1485,7 +1485,9 @@ private:
class
ReorderListener
;
class
ReorderListener
;
class
GlobalTarget
;
class
GlobalTarget
;
class
DerivFunction
;
class
DerivFunction
;
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
);
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
,
std
::
vector
<
const
TabulatedFunction
*>&
functions
,
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functionNames
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
Lepton
::
ExpressionTreeNode
replaceDerivFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
OpenMM
::
ContextImpl
&
context
);
Lepton
::
ExpressionTreeNode
replaceDerivFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
OpenMM
::
ContextImpl
&
context
);
void
findExpressionsForDerivs
(
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variableNodes
);
void
findExpressionsForDerivs
(
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variableNodes
);
...
@@ -1504,6 +1506,7 @@ private:
...
@@ -1504,6 +1506,7 @@ private:
CudaArray
*
uniformRandoms
;
CudaArray
*
uniformRandoms
;
CudaArray
*
randomSeed
;
CudaArray
*
randomSeed
;
CudaArray
*
perDofEnergyParamDerivs
;
CudaArray
*
perDofEnergyParamDerivs
;
std
::
vector
<
CudaArray
*>
tabulatedFunctions
;
std
::
map
<
int
,
CudaArray
*>
savedForces
;
std
::
map
<
int
,
CudaArray
*>
savedForces
;
std
::
set
<
int
>
validSavedForces
;
std
::
set
<
int
>
validSavedForces
;
CudaParameterSet
*
perDofValues
;
CudaParameterSet
*
perDofValues
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
85c0f767
...
@@ -48,6 +48,7 @@
...
@@ -48,6 +48,7 @@
#include "lepton/Operation.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "lepton/ParsedExpression.h"
#include "ReferenceTabulatedFunction.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include "SimTKOpenMMUtilities.h"
#include <algorithm>
#include <algorithm>
...
@@ -7061,6 +7062,8 @@ CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() {
...
@@ -7061,6 +7062,8 @@ CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() {
delete perDofEnergyParamDerivs;
delete perDofEnergyParamDerivs;
if (perDofValues != NULL)
if (perDofValues != NULL)
delete perDofValues;
delete perDofValues;
for (auto function : tabulatedFunctions)
delete function;
for (auto& f : savedForces)
for (auto& f : savedForces)
delete f.second;
delete f.second;
}
}
...
@@ -7078,7 +7081,8 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
...
@@ -7078,7 +7081,8 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
}
}
string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) {
string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator,
const string& forceName, const string& energyName, vector<const TabulatedFunction*>& functions, vector<pair<string, string> >& functionNames) {
const string suffixes[] = {".x", ".y", ".z"};
const string suffixes[] = {".x", ".y", ".z"};
string suffix = suffixes[component];
string suffix = suffixes[component];
map<string, Lepton::ParsedExpression> expressions;
map<string, Lepton::ParsedExpression> expressions;
...
@@ -7111,8 +7115,6 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
...
@@ -7111,8 +7115,6 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
for (int i = 0; i < (int) parameterNames.size(); i++)
for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "globals["+cu.intToString(parameterVariableIndex[i])+"]";
variables[parameterNames[i]] = "globals["+cu.intToString(parameterVariableIndex[i])+"]";
vector<const TabulatedFunction*> functions;
vector<pair<string, string> > functionNames;
vector<pair<ExpressionTreeNode, string> > variableNodes;
vector<pair<ExpressionTreeNode, string> > variableNodes;
findExpressionsForDerivs(expr.getRootNode(), variableNodes);
findExpressionsForDerivs(expr.getRootNode(), variableNodes);
for (auto& var : variables)
for (auto& var : variables)
...
@@ -7149,13 +7151,34 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -7149,13 +7151,34 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["WORK_GROUP_SIZE"] = cu.intToString(CudaContext::ThreadBlockSize);
defines["WORK_GROUP_SIZE"] = cu.intToString(CudaContext::ThreadBlockSize);
defines["SUM_BUFFER_SIZE"] = "0";
defines["SUM_BUFFER_SIZE"] = "0";
// Record the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionNames;
vector<const TabulatedFunction*> functionList;
vector<string> tableTypes;
for (int i = 0; i < integrator.getNumTabulatedFunctions(); i++) {
functionList.push_back(&integrator.getTabulatedFunction(i));
string name = integrator.getTabulatedFunctionName(i);
string arrayName = "table"+cu.intToString(i);
functionNames.push_back(make_pair(name, arrayName));
functions[name] = createReferenceTabulatedFunction(integrator.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(integrator.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
if (width == 1)
tableTypes.push_back("float");
else
tableTypes.push_back("float"+cu.intToString(width));
}
// Record information about all the computation steps.
// Record information about all the computation steps.
vector<string> variable(numSteps);
vector<string> variable(numSteps);
vector<int> forceGroup;
vector<int> forceGroup;
vector<vector<Lepton::ParsedExpression> > expression;
vector<vector<Lepton::ParsedExpression> > expression;
map<string, Lepton::CustomFunction*> functions;
CustomIntegratorUtilities::analyzeComputations(context, integrator, expression, comparisons, blockEnd, invalidatesForces, needsForces, needsEnergy, computeBothForceAndEnergy, forceGroup, functions);
CustomIntegratorUtilities::analyzeComputations(context, integrator, expression, comparisons, blockEnd, invalidatesForces, needsForces, needsEnergy, computeBothForceAndEnergy, forceGroup, functions);
for (int step = 0; step < numSteps; step++) {
for (int step = 0; step < numSteps; step++) {
string expr;
string expr;
...
@@ -7327,7 +7350,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -7327,7 +7350,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
if (numUniform > 0)
if (numUniform > 0)
compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
for (int i = 0; i < 3; i++)
for (int i = 0; i < 3; i++)
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j]);
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j]
, functionList, functionNames
);
if (variable[j] == "x") {
if (variable[j] == "x") {
if (storePosAsDelta[j])
if (storePosAsDelta[j])
compute << "posDelta[index] = convertFromDouble4(position-convertToDouble4(loadPos(posq, posqCorrection, index)));\n";
compute << "posDelta[index] = convertFromDouble4(position-convertToDouble4(loadPos(posq, posqCorrection, index)));\n";
...
@@ -7358,6 +7381,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -7358,6 +7381,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
string valueName = "perDofValues"+cu.intToString(i+1);
string valueName = "perDofValues"+cu.intToString(i+1);
args << ", " << buffer.getType() << "* __restrict__ " << valueName;
args << ", " << buffer.getType() << "* __restrict__ " << valueName;
}
}
for (int i = 0; i < (int) tableTypes.size(); i++)
args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
replacements["PARAMETER_ARGUMENTS"] = args.str();
replacements["PARAMETER_ARGUMENTS"] = args.str();
if (loadPosAsDelta[step])
if (loadPosAsDelta[step])
defines["LOAD_POS_AS_DELTA"] = "1";
defines["LOAD_POS_AS_DELTA"] = "1";
...
@@ -7387,6 +7412,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -7387,6 +7412,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1.push_back(&perDofEnergyParamDerivs->getDevicePointer());
args1.push_back(&perDofEnergyParamDerivs->getDevicePointer());
for (auto& buffer : perDofValues->getBuffers())
for (auto& buffer : perDofValues->getBuffers())
args1.push_back(&buffer.getMemory());
args1.push_back(&buffer.getMemory());
for (auto array : tabulatedFunctions)
args1.push_back(&array->getDevicePointer());
kernelArgs[step].push_back(args1);
kernelArgs[step].push_back(args1);
if (stepType[step] == CustomIntegrator::ComputeSum) {
if (stepType[step] == CustomIntegrator::ComputeSum) {
// Create a second kernel for this step that sums the values.
// Create a second kernel for this step that sums the values.
...
@@ -7449,7 +7476,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -7449,7 +7476,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
}
Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize();
Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize();
for (int i = 0; i < 3; i++)
for (int i = 0; i < 3; i++)
computeKE << createPerDofComputation("", keExpression, i, integrator, "f", "");
computeKE << createPerDofComputation("", keExpression, i, integrator, "f", ""
, functionList, functionNames
);
map<string, string> replacements;
map<string, string> replacements;
replacements["COMPUTE_STEP"] = computeKE.str();
replacements["COMPUTE_STEP"] = computeKE.str();
stringstream args;
stringstream args;
...
@@ -7458,6 +7485,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -7458,6 +7485,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
string valueName = "perDofValues"+cu.intToString(i+1);
string valueName = "perDofValues"+cu.intToString(i+1);
args << ", " << buffer.getType() << "* __restrict__ " << valueName;
args << ", " << buffer.getType() << "* __restrict__ " << valueName;
}
}
for (int i = 0; i < (int) tableTypes.size(); i++)
args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
replacements["PARAMETER_ARGUMENTS"] = args.str();
replacements["PARAMETER_ARGUMENTS"] = args.str();
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
...
@@ -7482,6 +7511,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -7482,6 +7511,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs.push_back(&perDofEnergyParamDerivs->getDevicePointer());
kineticEnergyArgs.push_back(&perDofEnergyParamDerivs->getDevicePointer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyArgs.push_back(&perDofValues->getBuffers()[i].getMemory());
kineticEnergyArgs.push_back(&perDofValues->getBuffers()[i].getMemory());
for (auto array : tabulatedFunctions)
kineticEnergyArgs.push_back(&array->getDevicePointer());
keNeedsForce = usesVariable(keExpression, "f");
keNeedsForce = usesVariable(keExpression, "f");
// Create a second kernel to sum the values.
// Create a second kernel to sum the values.
...
@@ -7489,6 +7520,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -7489,6 +7520,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
module = cu.createModule(CudaKernelSources::customIntegrator, defines);
module = cu.createModule(CudaKernelSources::customIntegrator, defines);
sumKineticEnergyKernel = cu.getKernel(module, useDouble ? "computeDoubleSum" : "computeFloatSum");
sumKineticEnergyKernel = cu.getKernel(module, useDouble ? "computeDoubleSum" : "computeFloatSum");
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
}
}
// Make sure all values (variables, parameters, etc.) are up to date.
// Make sure all values (variables, parameters, etc.) are up to date.
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
85c0f767
...
@@ -7753,7 +7753,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -7753,7 +7753,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
for (auto& buffer : perDofValues->getBuffers())
for (auto& buffer : perDofValues->getBuffers())
kernel.setArg<cl::Memory>(index++, buffer.getMemory());
kernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto
*
array : tabulatedFunctions)
for (auto array : tabulatedFunctions)
kernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
if (stepType[step] == CustomIntegrator::ComputeSum) {
if (stepType[step] == CustomIntegrator::ComputeSum) {
// Create a second kernel for this step that sums the values.
// Create a second kernel for this step that sums the values.
...
@@ -7851,7 +7851,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -7851,7 +7851,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
for (auto
*
array : tabulatedFunctions)
for (auto array : tabulatedFunctions)
kineticEnergyKernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
keNeedsForce = usesVariable(keExpression, "f");
keNeedsForce = usesVariable(keExpression, "f");
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment