CUDA implementation of tabulated functions for CustomIntegrator

85c0f767 · Peter Eastman · e82ace44 · 85c0f767 · 85c0f767 · 85c0f767
Commit 85c0f767 authored May 18, 2017 by Peter Eastman
3 changed files
--- a/platforms/cuda/include/CudaKernels.h
+++ b/platforms/cuda/include/CudaKernels.h
@@ -1485,7 +1485,9 @@ private:
    class ReorderListener;
    class GlobalTarget;
    class DerivFunction;
-    std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
+    std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator,
+        const std::string& forceName, const std::string& energyName, std::vector<const TabulatedFunction*>& functions,
+        std::vector<std::pair<std::string, std::string> >& functionNames);
    void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
    Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
    void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
@@ -1504,6 +1506,7 @@ private:
    CudaArray* uniformRandoms;
    CudaArray* randomSeed;
    CudaArray* perDofEnergyParamDerivs;
+    std::vector<CudaArray*> tabulatedFunctions;
    std::map<int, CudaArray*> savedForces;
    std::set<int> validSavedForces;
    CudaParameterSet* perDofValues;

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -48,6 +48,7 @@
 #include "lepton/Operation.h"
 #include "lepton/Parser.h"
 #include "lepton/ParsedExpression.h"
+#include "ReferenceTabulatedFunction.h"
 #include "SimTKOpenMMRealType.h"
 #include "SimTKOpenMMUtilities.h"
 #include <algorithm>
@@ -7061,6 +7062,8 @@ CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() {
        delete perDofEnergyParamDerivs;
    if (perDofValues != NULL)
        delete perDofValues;
+    for (auto function : tabulatedFunctions)
+        delete function;
    for (auto& f : savedForces)
        delete f.second;
 }
@@ -7078,7 +7081,8 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
    SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
 }

-string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) {
+string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator,
+        const string& forceName, const string& energyName, vector<const TabulatedFunction*>& functions, vector<pair<string, string> >& functionNames) {
    const string suffixes[] = {".x", ".y", ".z"};
    string suffix = suffixes[component];
    map<string, Lepton::ParsedExpression> expressions;
@@ -7111,8 +7115,6 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
        variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
    for (int i = 0; i < (int) parameterNames.size(); i++)
        variables[parameterNames[i]] = "globals["+cu.intToString(parameterVariableIndex[i])+"]";
-    vector<const TabulatedFunction*> functions;
-    vector<pair<string, string> > functionNames;
    vector<pair<ExpressionTreeNode, string> > variableNodes;
    findExpressionsForDerivs(expr.getRootNode(), variableNodes);
    for (auto& var : variables)
@@ -7149,13 +7151,34 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
        defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
        defines["WORK_GROUP_SIZE"] = cu.intToString(CudaContext::ThreadBlockSize);
        defines["SUM_BUFFER_SIZE"] = "0";
-        
+
+        // Record the tabulated functions.
+
+        map<string, Lepton::CustomFunction*> functions;
+        vector<pair<string, string> > functionNames;
+        vector<const TabulatedFunction*> functionList;
+        vector<string> tableTypes;
+        for (int i = 0; i < integrator.getNumTabulatedFunctions(); i++) {
+            functionList.push_back(&integrator.getTabulatedFunction(i));
+            string name = integrator.getTabulatedFunctionName(i);
+            string arrayName = "table"+cu.intToString(i);
+            functionNames.push_back(make_pair(name, arrayName));
+            functions[name] = createReferenceTabulatedFunction(integrator.getTabulatedFunction(i));
+            int width;
+            vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(integrator.getTabulatedFunction(i), width);
+            tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
+            tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
+            if (width == 1)
+                tableTypes.push_back("float");
+            else
+                tableTypes.push_back("float"+cu.intToString(width));
+        }
+
        // Record information about all the computation steps.

        vector<string> variable(numSteps);
        vector<int> forceGroup;
        vector<vector<Lepton::ParsedExpression> > expression;
-        map<string, Lepton::CustomFunction*> functions;
        CustomIntegratorUtilities::analyzeComputations(context, integrator, expression, comparisons, blockEnd, invalidatesForces, needsForces, needsEnergy, computeBothForceAndEnergy, forceGroup, functions);
        for (int step = 0; step < numSteps; step++) {
            string expr;
@@ -7327,7 +7350,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
                    if (numUniform > 0)
                        compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
                    for (int i = 0; i < 3; i++)
-                        compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j]);
+                        compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j], functionList, functionNames);
                    if (variable[j] == "x") {
                        if (storePosAsDelta[j])
                            compute << "posDelta[index] = convertFromDouble4(position-convertToDouble4(loadPos(posq, posqCorrection, index)));\n";
@@ -7358,6 +7381,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
                    string valueName = "perDofValues"+cu.intToString(i+1);
                    args << ", " << buffer.getType() << "* __restrict__ " << valueName;
                }
+                for (int i = 0; i < (int) tableTypes.size(); i++)
+                    args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
                replacements["PARAMETER_ARGUMENTS"] = args.str();
                if (loadPosAsDelta[step])
                    defines["LOAD_POS_AS_DELTA"] = "1";
@@ -7387,6 +7412,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
                args1.push_back(&perDofEnergyParamDerivs->getDevicePointer());
                for (auto& buffer : perDofValues->getBuffers())
                    args1.push_back(&buffer.getMemory());
+                for (auto array : tabulatedFunctions)
+                    args1.push_back(&array->getDevicePointer());
                kernelArgs[step].push_back(args1);
                if (stepType[step] == CustomIntegrator::ComputeSum) {
                    // Create a second kernel for this step that sums the values.
@@ -7449,7 +7476,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
        }
        Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize();
        for (int i = 0; i < 3; i++)
-            computeKE << createPerDofComputation("", keExpression, i, integrator, "f", "");
+            computeKE << createPerDofComputation("", keExpression, i, integrator, "f", "", functionList, functionNames);
        map<string, string> replacements;
        replacements["COMPUTE_STEP"] = computeKE.str();
        stringstream args;
@@ -7458,6 +7485,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
            string valueName = "perDofValues"+cu.intToString(i+1);
            args << ", " << buffer.getType() << "* __restrict__ " << valueName;
        }
+        for (int i = 0; i < (int) tableTypes.size(); i++)
+            args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
        replacements["PARAMETER_ARGUMENTS"] = args.str();
        defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
        if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
@@ -7482,6 +7511,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
        kineticEnergyArgs.push_back(&perDofEnergyParamDerivs->getDevicePointer());
        for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
            kineticEnergyArgs.push_back(&perDofValues->getBuffers()[i].getMemory());
+        for (auto array : tabulatedFunctions)
+            kineticEnergyArgs.push_back(&array->getDevicePointer());
        keNeedsForce = usesVariable(keExpression, "f");

        // Create a second kernel to sum the values.
@@ -7489,6 +7520,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
        defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
        module = cu.createModule(CudaKernelSources::customIntegrator, defines);
        sumKineticEnergyKernel = cu.getKernel(module, useDouble ? "computeDoubleSum" : "computeFloatSum");
+
+        // Delete the custom functions.
+
+        for (auto& function : functions)
+            delete function.second;
    }
    
    // Make sure all values (variables, parameters, etc.) are up to date.

--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -7753,7 +7753,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
                kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
                for (auto& buffer : perDofValues->getBuffers())
                    kernel.setArg<cl::Memory>(index++, buffer.getMemory());
-                for (auto* array : tabulatedFunctions)
+                for (auto array : tabulatedFunctions)
                    kernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
                if (stepType[step] == CustomIntegrator::ComputeSum) {
                    // Create a second kernel for this step that sums the values.
@@ -7851,7 +7851,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
        kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
        for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
            kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
-        for (auto* array : tabulatedFunctions)
+        for (auto array : tabulatedFunctions)
            kineticEnergyKernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
        keNeedsForce = usesVariable(keExpression, "f");