Merge branch 'vector' of https://github.com/peastman/openmm into vector

9a19c7d6 · peastman · 9b381947 · fdc59e96 · 9a19c7d6 · 9a19c7d6
Commit 9a19c7d6 authored May 09, 2018 by peastman
10 changed files
--- a/openmmapi/include/openmm/CustomIntegrator.h
+++ b/openmmapi/include/openmm/CustomIntegrator.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -194,7 +194,7 @@ namespace OpenMM {
 *
 * <tt><pre>
 * integrator.beginIfBlock("uniform < acceptanceProbability");
- * integrator.computePerDof("x", "xnew");
+ * integrator.addComputePerDof("x", "xnew");
 * integrator.endBlock();
 * </pre></tt>
 *
@@ -203,6 +203,21 @@ namespace OpenMM {
 * following comparison operators: =, <. >, !=, <=, >=.  Blocks may be nested
 * inside each other.
 * 
+ * "Per-DOF" computations can also be thought of as per-particle computations
+ * that operate on three component vectors.  For example, "x+dt*v" means to take
+ * the particle's velocity (a vector), multiply it by the step size, and add the
+ * position (also a vector).  The result is a new vector that can be stored into
+ * a per-DOF variable with addComputePerDof(), or it can be summed over all
+ * components of all particles with addComputeSum().  Because the calculation is
+ * done on vectors, you can use functions that operate explicitly on vectors
+ * rather than just computing each component independently.  For example, the
+ * following line uses a cross product to compute the angular momentum of each
+ * particle and stores it into a per-DOF variable.
+ * 
+ * <tt><pre>
+ * integrator.addComputePerDof("angularMomentum", "m*cross(x, v)");
+ * </pre></tt>
+ * 
 * Another feature of CustomIntegrator is that it can use derivatives of the
 * potential energy with respect to context parameters.  These derivatives are
 * typically computed by custom forces, and are only computed if a Force object
@@ -243,6 +258,18 @@ namespace OpenMM {
 * are defined in radians, and log is the natural logarithm.  step(x) = 0 if x is less than 0, 1 otherwise.  delta(x) = 1 if x is 0, 0 otherwise.
 * select(x,y,z) = z if x = 0, y otherwise.  An expression may also involve intermediate quantities that are defined following the main expression, using ";" as a separator.
 * 
+ * Expressions used in ComputePerDof and ComputeSum steps can also use the following
+ * functions that operate on vectors: cross(a, b) is the cross product of two
+ * vectors; dot(a, b) is the dot product of two vectors; _x(a), _y(a), and _z(a)
+ * extract a single component from a vector; and vector(a, b, c) creates a new
+ * vector with the x component of the first argument, the y component of the
+ * second argument, and the z component of the third argument.  Remember that every
+ * quantity appearing in a vector expression is a vector.  Functions that appear
+ * to return a scalar really return a vector whose components are all the same.
+ * For example, _z(a) returns the vector (a.z, a.z, a.z).  Likewise, wherever a
+ * constant appears in the expression, it really means a vector whose components
+ * all have the same value.
+ *
 * In addition, you can call addTabulatedFunction() to define a new function based on tabulated values.  You specify the function by
 * creating a TabulatedFunction object.  That function can then appear in expressions.
 */

--- a/platforms/cuda/include/CudaExpressionUtilities.h
+++ b/platforms/cuda/include/CudaExpressionUtilities.h
@@ -122,6 +122,7 @@ private:
            std::vector<const Lepton::ExpressionTreeNode*>& nodes);
    void findRelatedPowers(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
            std::map<int, const Lepton::ExpressionTreeNode*>& powers);
+    void callFunction(std::stringstream& out, std::string singleFn, std::string doubleFn, const std::string& arg, const std::string& tempType);
    std::vector<std::vector<double> > computeFunctionParameters(const std::vector<const TabulatedFunction*>& functions);
    CudaContext& context;
    FunctionPlaceholder fp1, fp2, fp3, periodicDistance;

--- a/platforms/cuda/include/CudaKernels.h
+++ b/platforms/cuda/include/CudaKernels.h
@@ -1495,9 +1495,8 @@ class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
 public:
    enum GlobalTargetType {DT, VARIABLE, PARAMETER};
    CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
-            hasInitializedKernels(false), localValuesAreCurrent(false), perDofValues(NULL), needsEnergyParamDerivs(false) {
+            hasInitializedKernels(false), needsEnergyParamDerivs(false) {
    }
-    ~CudaIntegrateCustomStepKernel();
    /**
     * Initialize the kernel.
     * 
@@ -1561,7 +1560,7 @@ private:
    class ReorderListener;
    class GlobalTarget;
    class DerivFunction;
-    std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator,
+    std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator,
        const std::string& forceName, const std::string& energyName, std::vector<const TabulatedFunction*>& functions,
        std::vector<std::pair<std::string, std::string> >& functionNames);
    void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
@@ -1574,21 +1573,21 @@ private:
    double energy;
    float energyFloat;
    int numGlobalVariables, sumWorkGroupSize;
-    bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
+    bool hasInitializedKernels, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
-    mutable bool localValuesAreCurrent;
+    std::vector<bool> deviceValuesAreCurrent;
+    mutable std::vector<bool> localValuesAreCurrent; 
    CudaArray globalValues;
    CudaArray sumBuffer;
    CudaArray summedValue;
    CudaArray uniformRandoms;
    CudaArray randomSeed;
    CudaArray perDofEnergyParamDerivs;
-    std::vector<CudaArray> tabulatedFunctions;
+    std::vector<CudaArray> tabulatedFunctions, perDofValues;
    std::map<int, double> savedEnergy;
    std::map<int, CudaArray> savedForces;
    std::set<int> validSavedForces;
-    CudaParameterSet* perDofValues;
+    mutable std::vector<std::vector<float4> > localPerDofValuesFloat;
-    mutable std::vector<std::vector<float> > localPerDofValuesFloat;
+    mutable std::vector<std::vector<double4> > localPerDofValuesDouble;
-    mutable std::vector<std::vector<double> > localPerDofValuesDouble;
    std::map<std::string, double> energyParamDerivs;
    std::vector<std::string> perDofEnergyParamDerivNames;
    std::vector<float> localPerDofEnergyParamDerivsFloat;

--- a/platforms/cuda/src/CudaContext.cpp
+++ b/platforms/cuda/src/CudaContext.cpp
@@ -203,12 +203,13 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
                break;
            }
        }
-        if (this->deviceIndex == -1)
+        if (this->deviceIndex == -1) {
            if (deviceIndex != -1)
                throw OpenMMException("The requested CUDA device could not be loaded");
            else
                throw OpenMMException("No compatible CUDA device is available");
        }
+    }
    else {
        isLinkedContext = true;
        context = originalContext->context;

--- a/platforms/cuda/src/CudaExpressionUtilities.cpp
+++ b/platforms/cuda/src/CudaExpressionUtilities.cpp
--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
--- a/platforms/cuda/src/kernels/customIntegratorPerDof.cu
+++ b/platforms/cuda/src/kernels/customIntegratorPerDof.cu
@@ -23,10 +23,14 @@ inline __device__ void storePos(real4* __restrict__ posq, real4* __restrict__ po
 #endif
 }
-inline __device__ double4 convertToDouble4(mixed4 a) {
+inline __device__ double4 convertToDouble4(float4 a) {
    return make_double4(a.x, a.y, a.z, a.w);
 }
+inline __device__ double4 convertToDouble4(double4 a) {
+    return a;
+}
 inline __device__ mixed4 convertFromDouble4(double4 a) {
    return make_mixed4(a.x, a.y, a.z, a.w);
 }
@@ -36,7 +40,7 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest
        mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues,
        const mixed energy, mixed* __restrict__ energyParamDerivs
        PARAMETER_ARGUMENTS) {
-    mixed stepSize = dt[0].y;
+    double3 stepSize = make_double3(dt[0].y);
    int index = blockIdx.x*blockDim.x+threadIdx.x;
    const double forceScale = 1.0/0xFFFFFFFF;
    while (index < NUM_ATOMS) {
@@ -47,7 +51,7 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest
 #endif
        double4 velocity = convertToDouble4(velm[index]);
        double4 f = make_double4(forceScale*force[index], forceScale*force[index+PADDED_NUM_ATOMS], forceScale*force[index+PADDED_NUM_ATOMS*2], 0.0);
-        double mass = 1.0/velocity.w;
+        double3 mass = make_double3(1.0/velocity.w);
        if (velocity.w != 0.0) {
            int gaussianIndex = gaussianBaseIndex;
            int uniformIndex = 0;

--- a/platforms/opencl/src/OpenCLExpressionUtilities.cpp
+++ b/platforms/opencl/src/OpenCLExpressionUtilities.cpp
@@ -479,10 +479,10 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
            out << "erfc(" << getTempName(node.getChildren()[0], temps) << ")";
            break;
        case Operation::STEP:
-            out << getTempName(node.getChildren()[0], temps) << " >= 0.0f ? 1.0f : 0.0f";
+            out << getTempName(node.getChildren()[0], temps) << " >= 0.0f ? (" << tempType << ") 1 : (" << tempType << ") 0";
            break;
        case Operation::DELTA:
-            out << getTempName(node.getChildren()[0], temps) << " == 0.0f ? 1.0f : 0.0f";
+            out << getTempName(node.getChildren()[0], temps) << " == 0.0f ? (" << tempType << ") 1 : (" << tempType << ") 0";
            break;
        case Operation::SQUARE:
        {

--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -7592,26 +7592,15 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus
 string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator,
        const string& forceName, const string& energyName, vector<const TabulatedFunction*>& functions, vector<pair<string, string> >& functionNames) {
    string tempType = (cl.getSupportsDoublePrecision() ? "double3" : "float3");
+    string convert = (cl.getSupportsDoublePrecision() ? "convert_double3" : "");
    map<string, Lepton::ParsedExpression> expressions;
-    if (variable == "x")
+    expressions[tempType+" tempResult = "] = expr;
-        expressions["position.xyz = "] = expr;
-    else if (variable == "v")
-        expressions["velocity.xyz = "] = expr;
-    else if (variable == "")
-        expressions[tempType+" tempSum = "] = expr;
-    else {
-        for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
-            if (variable == integrator.getPerDofVariableName(i))
-                expressions["perDof"+cl.intToString(i)+" = "] = expr;
-    }
-    if (expressions.size() == 0)
-        throw OpenMMException("Unknown per-DOF variable: "+variable);
    map<string, string> variables;
-    variables["x"] = "position.xyz";
+    variables["x"] = convert+"(position.xyz)";
-    variables["v"] = "velocity.xyz";
+    variables["v"] = convert+"(velocity.xyz)";
-    variables[forceName] = "f.xyz";
+    variables[forceName] = convert+"(f.xyz)";
-    variables["gaussian"] = "convert_mixed4(gaussian).xyz";
+    variables["gaussian"] = convert+"(gaussian.xyz)";
-    variables["uniform"] = "convert_mixed4(uniform).xyz";
+    variables["uniform"] = convert+"(uniform.xyz)";
    variables["m"] = "mass";
    variables["dt"] = "stepSize";
    if (energyName != "")
@@ -7619,7 +7608,7 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
    for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
        variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(globalVariableIndex[i])+"]";
    for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
-        variables[integrator.getPerDofVariableName(i)] = "perDof"+cl.intToString(i);
+        variables[integrator.getPerDofVariableName(i)] = convert+"(perDof"+cl.intToString(i)+")";
    for (int i = 0; i < (int) parameterNames.size(); i++)
        variables[parameterNames[i]] = "globals["+cl.intToString(parameterVariableIndex[i])+"]";
    vector<pair<ExpressionTreeNode, string> > variableNodes;
@@ -7627,8 +7616,19 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
    for (auto& var : variables)
        variableNodes.push_back(make_pair(ExpressionTreeNode(new Operation::Variable(var.first)), var.second));
    string result = cl.getExpressionUtilities().createExpressions(expressions, variableNodes, functions, functionNames, "temp", tempType);
-    if (variable == "")
+    if (variable == "x")
-        result += "sum[index] = tempSum.x+tempSum.y+tempSum.z;\n";
+        result += "position.x = tempResult.x; position.y = tempResult.y; position.z = tempResult.z;\n";
+    else if (variable == "v")
+        result += "velocity.x = tempResult.x; velocity.y = tempResult.y; velocity.z = tempResult.z;\n";
+    else if (variable == "")
+        result += "sum[index] = tempResult.x+tempResult.y+tempResult.z;\n";
+    else {
+        for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
+            if (variable == integrator.getPerDofVariableName(i)) {
+                string varName = "perDof"+cl.intToString(i);
+                result += varName+".x = tempResult.x; "+varName+".y = tempResult.y; "+varName+".z = tempResult.z;\n";
+            }
+    }
    return result;
 }
@@ -7637,7 +7637,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
    int numAtoms = cl.getNumAtoms();
    int numSteps = integrator.getNumComputations();
    bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision();
-    string tempType = (useDouble ? "double3" : "float3");
+    string tempType = (cl.getSupportsDoublePrecision() ? "double3" : "float3");
    string perDofType = (useDouble ? "double4" : "float4");
    if (!hasInitializedKernels) {
        hasInitializedKernels = true;
@@ -7849,7 +7849,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
                stringstream compute;
                for (int i = 0; i < perDofValues.size(); i++)
-                    compute << tempType<<" perDof"<<cl.intToString(i)<<" = perDofValues"<<cl.intToString(i)<<"[index].xyz;\n";
+                    compute << tempType<<" perDof"<<cl.intToString(i)<<" = convert_"<<tempType<<"(perDofValues"<<cl.intToString(i)<<"[index].xyz);\n";
                int numGaussian = 0, numUniform = 0;
                for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
                    numGaussian += numAtoms*usesVariable(expression[j][0], "gaussian");
@@ -7874,7 +7874,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
                        compute << "velm[index] = convert_mixed4(velocity);\n";
                    else {
                        for (int i = 0; i < perDofValues.size(); i++)
-                            compute << "perDofValues"<<cl.intToString(i)<<"[index] = ("<<perDofType<<") (perDof"<<cl.intToString(i)<<", 0);\n";
+                            compute << "perDofValues"<<cl.intToString(i)<<"[index] = ("<<perDofType<<") (perDof"<<cl.intToString(i)<<".x, perDof"<<cl.intToString(i)<<".y, perDof"<<cl.intToString(i)<<".z, 0);\n";
                    }
                    if (numGaussian > 0)
                        compute << "gaussianIndex += NUM_ATOMS;\n";
@@ -7971,7 +7971,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
        stringstream computeKE;
        for (int i = 0; i < perDofValues.size(); i++)
-            computeKE << tempType<<" perDof"<<cl.intToString(i)<<" = perDofValues"<<cl.intToString(i)<<"[index].xyz;\n";
+            computeKE << tempType<<" perDof"<<cl.intToString(i)<<" = convert_"<<tempType<<"(perDofValues"<<cl.intToString(i)<<"[index].xyz);\n";
        Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize();
        computeKE << createPerDofComputation("", keExpression, integrator, "f", "", functionList, functionNames);
        map<string, string> replacements;
@@ -8005,7 +8005,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
            kineticEnergyKernel.setArg<cl_float>(index++, 0.0f);
        kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs.getDeviceBuffer());
        for (auto& array : perDofValues)
-            kineticEnergyKernel.setArg<cl::Memory>(index++, array.getDeviceBuffer());
+            kineticEnergyKernel.setArg<cl::Buffer>(index++, array.getDeviceBuffer());
        for (auto& array : tabulatedFunctions)
            kineticEnergyKernel.setArg<cl::Buffer>(index++, array.getDeviceBuffer());
        keNeedsForce = usesVariable(keExpression, "f");

--- a/tests/TestCustomIntegrator.h
+++ b/tests/TestCustomIntegrator.h
@@ -537,9 +537,11 @@ void testPerDofVariables() {
    CustomIntegrator integrator(0.01);
    integrator.addPerDofVariable("temp", 0);
    integrator.addPerDofVariable("pos", 0);
+    integrator.addPerDofVariable("computed", 0);
    integrator.addComputePerDof("v", "v+dt*f/m");
    integrator.addComputePerDof("x", "x+dt*v");
    integrator.addComputePerDof("pos", "x");
+    integrator.addComputePerDof("computed", "step(v)*log(x^2)");
    Context context(system, integrator, platform);
    context.setPositions(positions);
    vector<Vec3> initialValues(numParticles);
@@ -552,13 +554,24 @@ void testPerDofVariables() {
    vector<Vec3> values;
    for (int i = 0; i < 100; ++i) {
        integrator.step(1);
-        State state = context.getState(State::Positions);
+        State state = context.getState(State::Positions | State::Velocities);
        integrator.getPerDofVariable(0, values);
        for (int j = 0; j < numParticles; j++)
            ASSERT_EQUAL_VEC(initialValues[j], values[j], 1e-5);
        integrator.getPerDofVariable(1, values);
        for (int j = 0; j < numParticles; j++)
            ASSERT_EQUAL_VEC(state.getPositions()[j], values[j], 1e-5);
+        integrator.getPerDofVariable(2, values);
+        for (int j = 0; j < numParticles; j++)
+            for (int k = 0; k < 3; k++) {
+                if (state.getVelocities()[j][k] < 0) {
+                    ASSERT(values[j][k] == 0.0);
+                }
+                else {
+                    double v = state.getPositions()[j][k];
+                    ASSERT_EQUAL_TOL(log(v*v), values[j][k], 1e-5);
+                }
+            }
    }
 }