Very minor optimizations

ef627240 · peastman · c744444a · ef627240 · ef627240 · ef627240
Commit ef627240 authored Jul 29, 2015 by peastman
8 changed files
--- a/platforms/cuda/include/CudaBondedUtilities.h
+++ b/platforms/cuda/include/CudaBondedUtilities.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2012 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -130,7 +130,7 @@ private:
    std::vector<std::vector<CudaArray*> > atomIndices;
    std::vector<std::string> prefixCode;
    std::vector<void*> kernelArgs;
-    int numForceBuffers, maxBonds;
+    int numForceBuffers, maxBonds, allGroups;
    bool hasInitializedKernels, hasInteractions;
 };


--- a/platforms/cuda/include/CudaKernels.h
+++ b/platforms/cuda/include/CudaKernels.h
@@ -1294,7 +1294,7 @@ private:
    double prevStepSize, energy;
    float energyFloat;
    int numGlobalVariables;
-    bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce;
+    bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints;
    mutable bool localValuesAreCurrent;
    CudaArray* globalValues;
    CudaArray* sumBuffer;

--- a/platforms/cuda/src/CudaBondedUtilities.cpp
+++ b/platforms/cuda/src/CudaBondedUtilities.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2012 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -34,7 +34,7 @@
 using namespace OpenMM;
 using namespace std;

-CudaBondedUtilities::CudaBondedUtilities(CudaContext& context) : context(context), numForceBuffers(0), maxBonds(0), hasInitializedKernels(false) {
+CudaBondedUtilities::CudaBondedUtilities(CudaContext& context) : context(context), numForceBuffers(0), maxBonds(0), allGroups(0), hasInitializedKernels(false) {
 }

 CudaBondedUtilities::~CudaBondedUtilities() {
@@ -48,6 +48,7 @@ void CudaBondedUtilities::addInteraction(const vector<vector<int> >& atoms, cons
        forceAtoms.push_back(atoms);
        forceSource.push_back(source);
        forceGroup.push_back(group);
+        allGroups |= 1<<group;
    }
 }

@@ -152,6 +153,8 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
 }

 void CudaBondedUtilities::computeInteractions(int groups) {
+    if ((groups&allGroups) == 0)
+        return;
    if (!hasInitializedKernels) {
        hasInitializedKernels = true;
        kernelArgs.push_back(&context.getForce().getDevicePointer());

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -5877,22 +5877,25 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
        
        // Determine how each step will represent the position (as just a value, or a value plus a delta).
        
+        hasAnyConstraints = (context.getSystem().getNumConstraints() > 0);
        vector<bool> storePosAsDelta(numSteps, false);
        vector<bool> loadPosAsDelta(numSteps, false);
-        bool beforeConstrain = false;
-        for (int step = numSteps-1; step >= 0; step--) {
-            if (stepType[step] == CustomIntegrator::ConstrainPositions)
-                beforeConstrain = true;
-            else if (stepType[step] == CustomIntegrator::ComputePerDof && variable[step] == "x" && beforeConstrain)
-                storePosAsDelta[step] = true;
-        }
-        bool storedAsDelta = false;
-        for (int step = 0; step < numSteps; step++) {
-            loadPosAsDelta[step] = storedAsDelta;
-            if (storePosAsDelta[step] == true)
-                storedAsDelta = true;
-            if (stepType[step] == CustomIntegrator::ConstrainPositions)
-                storedAsDelta = false;
+        if (hasAnyConstraints) {
+            bool beforeConstrain = false;
+            for (int step = numSteps-1; step >= 0; step--) {
+                if (stepType[step] == CustomIntegrator::ConstrainPositions)
+                    beforeConstrain = true;
+                else if (stepType[step] == CustomIntegrator::ComputePerDof && variable[step] == "x" && beforeConstrain)
+                    storePosAsDelta[step] = true;
+            }
+            bool storedAsDelta = false;
+            for (int step = 0; step < numSteps; step++) {
+                loadPosAsDelta[step] = storedAsDelta;
+                if (storePosAsDelta[step] == true)
+                    storedAsDelta = true;
+                if (stepType[step] == CustomIntegrator::ConstrainPositions)
+                    storedAsDelta = false;
+            }
        }
        
        // Identify steps that can be merged into a single kernel.
@@ -6214,9 +6217,11 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
            context.updateContextState();
        }
        else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
-            cu.getIntegrationUtilities().applyConstraints(integrator.getConstraintTolerance());
-            kernelArgs[step][0][1] = &posCorrection;
-            cu.executeKernel(kernels[step][0], &kernelArgs[step][0][0], numAtoms);
+            if (hasAnyConstraints) {
+                cu.getIntegrationUtilities().applyConstraints(integrator.getConstraintTolerance());
+                kernelArgs[step][0][1] = &posCorrection;
+                cu.executeKernel(kernels[step][0], &kernelArgs[step][0][0], numAtoms);
+            }
            cu.getIntegrationUtilities().computeVirtualSites();
        }
        else if (stepType[step] == CustomIntegrator::ConstrainVelocities) {

--- a/platforms/opencl/include/OpenCLBondedUtilities.h
+++ b/platforms/opencl/include/OpenCLBondedUtilities.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011 Stanford University and the Authors.           *
+ * Portions copyright (c) 2011-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -137,7 +137,7 @@ private:
    std::vector<OpenCLArray*> atomIndices;
    std::vector<OpenCLArray*> bufferIndices;
    std::vector<std::string> prefixCode;
-    int numForceBuffers, maxBonds;
+    int numForceBuffers, maxBonds, allGroups;
    bool hasInitializedKernels;
 };


--- a/platforms/opencl/include/OpenCLKernels.h
+++ b/platforms/opencl/include/OpenCLKernels.h
@@ -1283,7 +1283,7 @@ private:
    double prevStepSize, energy;
    float energyFloat;
    int numGlobalVariables;
-    bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce;
+    bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints;
    mutable bool localValuesAreCurrent;
    OpenCLArray* globalValues;
    OpenCLArray* sumBuffer;

--- a/platforms/opencl/src/OpenCLBondedUtilities.cpp
+++ b/platforms/opencl/src/OpenCLBondedUtilities.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2012 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -33,7 +33,7 @@
 using namespace OpenMM;
 using namespace std;

-OpenCLBondedUtilities::OpenCLBondedUtilities(OpenCLContext& context) : context(context), numForceBuffers(0), maxBonds(0), hasInitializedKernels(false) {
+OpenCLBondedUtilities::OpenCLBondedUtilities(OpenCLContext& context) : context(context), numForceBuffers(0), maxBonds(0), allGroups(0), hasInitializedKernels(false) {
 }

 OpenCLBondedUtilities::~OpenCLBondedUtilities() {
@@ -48,6 +48,7 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
        forceAtoms.push_back(atoms);
        forceSource.push_back(source);
        forceGroup.push_back(group);
+        allGroups |= 1<<group;
        int width = 1;
        while (width < (int) atoms[0].size())
            width *= 2;
@@ -73,7 +74,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
    if (numForces == 0)
        return;
    
-    // Build the lists of atom indicse and buffer indices.
+    // Build the lists of atom indices and buffer indices.
    
    vector<vector<cl_uint> > bufferVec(numForces);
    vector<vector<int> > bufferCounter(numForces, vector<int>(system.getNumParticles(), 0));
@@ -253,6 +254,8 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
 }

 void OpenCLBondedUtilities::computeInteractions(int groups) {
+    if ((groups&allGroups) == 0)
+        return;
    if (!hasInitializedKernels) {
        hasInitializedKernels = true;
        for (int i = 0; i < (int) forceSets.size(); i++) {

--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -6141,22 +6141,25 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
        
        // Determine how each step will represent the position (as just a value, or a value plus a delta).
        
+        hasAnyConstraints = (context.getSystem().getNumConstraints() > 0);
        vector<bool> storePosAsDelta(numSteps, false);
        vector<bool> loadPosAsDelta(numSteps, false);
-        bool beforeConstrain = false;
-        for (int step = numSteps-1; step >= 0; step--) {
-            if (stepType[step] == CustomIntegrator::ConstrainPositions)
-                beforeConstrain = true;
-            else if (stepType[step] == CustomIntegrator::ComputePerDof && variable[step] == "x" && beforeConstrain)
-                storePosAsDelta[step] = true;
-        }
-        bool storedAsDelta = false;
-        for (int step = 0; step < numSteps; step++) {
-            loadPosAsDelta[step] = storedAsDelta;
-            if (storePosAsDelta[step] == true)
-                storedAsDelta = true;
-            if (stepType[step] == CustomIntegrator::ConstrainPositions)
-                storedAsDelta = false;
+        if (hasAnyConstraints) {
+            bool beforeConstrain = false;
+            for (int step = numSteps-1; step >= 0; step--) {
+                if (stepType[step] == CustomIntegrator::ConstrainPositions)
+                    beforeConstrain = true;
+                else if (stepType[step] == CustomIntegrator::ComputePerDof && variable[step] == "x" && beforeConstrain)
+                    storePosAsDelta[step] = true;
+            }
+            bool storedAsDelta = false;
+            for (int step = 0; step < numSteps; step++) {
+                loadPosAsDelta[step] = storedAsDelta;
+                if (storePosAsDelta[step] == true)
+                    storedAsDelta = true;
+                if (stepType[step] == CustomIntegrator::ConstrainPositions)
+                    storedAsDelta = false;
+            }
        }
        
        // Identify steps that can be merged into a single kernel.
@@ -6478,8 +6481,10 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
            context.updateContextState();
        }
        else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
-            cl.getIntegrationUtilities().applyConstraints(integrator.getConstraintTolerance());
-            cl.executeKernel(kernels[step][0], numAtoms);
+            if (hasAnyConstraints) {
+                cl.getIntegrationUtilities().applyConstraints(integrator.getConstraintTolerance());
+                cl.executeKernel(kernels[step][0], numAtoms);
+            }
            cl.getIntegrationUtilities().computeVirtualSites();
        }
        else if (stepType[step] == CustomIntegrator::ConstrainVelocities) {