Commit 85c0f767 authored by Peter Eastman's avatar Peter Eastman
Browse files

CUDA implementation of tabulated functions for CustomIntegrator

parent e82ace44
......@@ -1485,7 +1485,9 @@ private:
class ReorderListener;
class GlobalTarget;
class DerivFunction;
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator,
const std::string& forceName, const std::string& energyName, std::vector<const TabulatedFunction*>& functions,
std::vector<std::pair<std::string, std::string> >& functionNames);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
......@@ -1504,6 +1506,7 @@ private:
CudaArray* uniformRandoms;
CudaArray* randomSeed;
CudaArray* perDofEnergyParamDerivs;
std::vector<CudaArray*> tabulatedFunctions;
std::map<int, CudaArray*> savedForces;
std::set<int> validSavedForces;
CudaParameterSet* perDofValues;
......
......@@ -48,6 +48,7 @@
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "ReferenceTabulatedFunction.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include <algorithm>
......@@ -7061,6 +7062,8 @@ CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() {
delete perDofEnergyParamDerivs;
if (perDofValues != NULL)
delete perDofValues;
for (auto function : tabulatedFunctions)
delete function;
for (auto& f : savedForces)
delete f.second;
}
......@@ -7078,7 +7081,8 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
}
string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) {
string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator,
const string& forceName, const string& energyName, vector<const TabulatedFunction*>& functions, vector<pair<string, string> >& functionNames) {
const string suffixes[] = {".x", ".y", ".z"};
string suffix = suffixes[component];
map<string, Lepton::ParsedExpression> expressions;
......@@ -7111,8 +7115,6 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "globals["+cu.intToString(parameterVariableIndex[i])+"]";
vector<const TabulatedFunction*> functions;
vector<pair<string, string> > functionNames;
vector<pair<ExpressionTreeNode, string> > variableNodes;
findExpressionsForDerivs(expr.getRootNode(), variableNodes);
for (auto& var : variables)
......@@ -7149,13 +7151,34 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["WORK_GROUP_SIZE"] = cu.intToString(CudaContext::ThreadBlockSize);
defines["SUM_BUFFER_SIZE"] = "0";
// Record the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionNames;
vector<const TabulatedFunction*> functionList;
vector<string> tableTypes;
for (int i = 0; i < integrator.getNumTabulatedFunctions(); i++) {
functionList.push_back(&integrator.getTabulatedFunction(i));
string name = integrator.getTabulatedFunctionName(i);
string arrayName = "table"+cu.intToString(i);
functionNames.push_back(make_pair(name, arrayName));
functions[name] = createReferenceTabulatedFunction(integrator.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(integrator.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
if (width == 1)
tableTypes.push_back("float");
else
tableTypes.push_back("float"+cu.intToString(width));
}
// Record information about all the computation steps.
vector<string> variable(numSteps);
vector<int> forceGroup;
vector<vector<Lepton::ParsedExpression> > expression;
map<string, Lepton::CustomFunction*> functions;
CustomIntegratorUtilities::analyzeComputations(context, integrator, expression, comparisons, blockEnd, invalidatesForces, needsForces, needsEnergy, computeBothForceAndEnergy, forceGroup, functions);
for (int step = 0; step < numSteps; step++) {
string expr;
......@@ -7327,7 +7350,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
if (numUniform > 0)
compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
for (int i = 0; i < 3; i++)
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j]);
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j], functionList, functionNames);
if (variable[j] == "x") {
if (storePosAsDelta[j])
compute << "posDelta[index] = convertFromDouble4(position-convertToDouble4(loadPos(posq, posqCorrection, index)));\n";
......@@ -7358,6 +7381,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
string valueName = "perDofValues"+cu.intToString(i+1);
args << ", " << buffer.getType() << "* __restrict__ " << valueName;
}
for (int i = 0; i < (int) tableTypes.size(); i++)
args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
replacements["PARAMETER_ARGUMENTS"] = args.str();
if (loadPosAsDelta[step])
defines["LOAD_POS_AS_DELTA"] = "1";
......@@ -7387,6 +7412,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1.push_back(&perDofEnergyParamDerivs->getDevicePointer());
for (auto& buffer : perDofValues->getBuffers())
args1.push_back(&buffer.getMemory());
for (auto array : tabulatedFunctions)
args1.push_back(&array->getDevicePointer());
kernelArgs[step].push_back(args1);
if (stepType[step] == CustomIntegrator::ComputeSum) {
// Create a second kernel for this step that sums the values.
......@@ -7449,7 +7476,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize();
for (int i = 0; i < 3; i++)
computeKE << createPerDofComputation("", keExpression, i, integrator, "f", "");
computeKE << createPerDofComputation("", keExpression, i, integrator, "f", "", functionList, functionNames);
map<string, string> replacements;
replacements["COMPUTE_STEP"] = computeKE.str();
stringstream args;
......@@ -7458,6 +7485,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
string valueName = "perDofValues"+cu.intToString(i+1);
args << ", " << buffer.getType() << "* __restrict__ " << valueName;
}
for (int i = 0; i < (int) tableTypes.size(); i++)
args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
replacements["PARAMETER_ARGUMENTS"] = args.str();
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
......@@ -7482,6 +7511,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs.push_back(&perDofEnergyParamDerivs->getDevicePointer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyArgs.push_back(&perDofValues->getBuffers()[i].getMemory());
for (auto array : tabulatedFunctions)
kineticEnergyArgs.push_back(&array->getDevicePointer());
keNeedsForce = usesVariable(keExpression, "f");
// Create a second kernel to sum the values.
......@@ -7489,6 +7520,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
module = cu.createModule(CudaKernelSources::customIntegrator, defines);
sumKineticEnergyKernel = cu.getKernel(module, useDouble ? "computeDoubleSum" : "computeFloatSum");
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
}
// Make sure all values (variables, parameters, etc.) are up to date.
......
......@@ -7753,7 +7753,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
for (auto& buffer : perDofValues->getBuffers())
kernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto* array : tabulatedFunctions)
for (auto array : tabulatedFunctions)
kernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
if (stepType[step] == CustomIntegrator::ComputeSum) {
// Create a second kernel for this step that sums the values.
......@@ -7851,7 +7851,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
for (auto* array : tabulatedFunctions)
for (auto array : tabulatedFunctions)
kineticEnergyKernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
keNeedsForce = usesVariable(keExpression, "f");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment