Commit 59c809c0 authored by peastman's avatar peastman
Browse files

Began overhaul of CUDA CustomIntegrator in preparation for supporting flow control

parent 44b96f0c
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2014 Stanford University and the Authors. * * Portions copyright (c) 2014-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "lepton/CompiledExpression.h" #include "lepton/CompiledExpression.h"
#include "windowsExportCpu.h" #include "windowsExport.h"
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -42,7 +42,7 @@ namespace OpenMM { ...@@ -42,7 +42,7 @@ namespace OpenMM {
/** /**
* This class simplifies the management of a set of related CompiledExpressions that share variables. * This class simplifies the management of a set of related CompiledExpressions that share variables.
*/ */
class OPENMM_EXPORT_CPU CompiledExpressionSet { class OPENMM_EXPORT CompiledExpressionSet {
public: public:
CompiledExpressionSet(); CompiledExpressionSet();
/** /**
...@@ -60,6 +60,10 @@ public: ...@@ -60,6 +60,10 @@ public:
* @param value the value to set it to * @param value the value to set it to
*/ */
void setVariable(int index, double value); void setVariable(int index, double value);
/**
* Get the total number of variables for which indices have been allocated.
*/
int getNumVariables() const;
private: private:
std::vector<Lepton::CompiledExpression*> expressions; std::vector<Lepton::CompiledExpression*> expressions;
std::vector<std::string> variables; std::vector<std::string> variables;
......
/* Portions copyright (c) 2014 Stanford University and Simbios. /* Portions copyright (c) 2014-2015 Stanford University and Simbios.
* Contributors: Peter Eastman * Contributors: Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "CompiledExpressionSet.h" #include "openmm/internal/CompiledExpressionSet.h"
using namespace OpenMM; using namespace OpenMM;
using namespace Lepton; using namespace Lepton;
...@@ -54,3 +54,7 @@ void CompiledExpressionSet::setVariable(int index, double value) { ...@@ -54,3 +54,7 @@ void CompiledExpressionSet::setVariable(int index, double value) {
for (int i = 0; i < (int) variableReferences[index].size(); i++) for (int i = 0; i < (int) variableReferences[index].size(); i++)
*variableReferences[index][i] = value; *variableReferences[index][i] = value;
} }
int CompiledExpressionSet::getNumVariables() const {
return variables.size();
}
...@@ -25,10 +25,10 @@ ...@@ -25,10 +25,10 @@
#ifndef OPENMM_CPU_CUSTOM_GB_FORCE_H__ #ifndef OPENMM_CPU_CUSTOM_GB_FORCE_H__
#define OPENMM_CPU_CUSTOM_GB_FORCE_H__ #define OPENMM_CPU_CUSTOM_GB_FORCE_H__
#include "CompiledExpressionSet.h"
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "lepton/CompiledExpression.h" #include "lepton/CompiledExpression.h"
#include "openmm/CustomGBForce.h" #include "openmm/CustomGBForce.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/ThreadPool.h" #include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
#include <map> #include <map>
......
...@@ -27,9 +27,9 @@ ...@@ -27,9 +27,9 @@
#include "ReferenceForce.h" #include "ReferenceForce.h"
#include "ReferenceBondIxn.h" #include "ReferenceBondIxn.h"
#include "CompiledExpressionSet.h"
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "openmm/CustomManyParticleForce.h" #include "openmm/CustomManyParticleForce.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/ThreadPool.h" #include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
#include "lepton/CompiledExpression.h" #include "lepton/CompiledExpression.h"
......
...@@ -35,6 +35,9 @@ ...@@ -35,6 +35,9 @@
#include "CudaSort.h" #include "CudaSort.h"
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "lepton/CompiledExpression.h"
#include <cufft.h> #include <cufft.h>
namespace OpenMM { namespace OpenMM {
...@@ -1213,6 +1216,7 @@ private: ...@@ -1213,6 +1216,7 @@ private:
*/ */
class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel { class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public: public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER};
CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu), CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), contextParameterValues(NULL), sumBuffer(NULL), potentialEnergy(NULL), hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), contextParameterValues(NULL), sumBuffer(NULL), potentialEnergy(NULL),
kineticEnergy(NULL), uniformRandoms(NULL), randomSeed(NULL), perDofValues(NULL) { kineticEnergy(NULL), uniformRandoms(NULL), randomSeed(NULL), perDofValues(NULL) {
...@@ -1279,15 +1283,17 @@ public: ...@@ -1279,15 +1283,17 @@ public:
void setPerDofVariable(ContextImpl& context, int variable, const std::vector<Vec3>& values); void setPerDofVariable(ContextImpl& context, int variable, const std::vector<Vec3>& values);
private: private:
class ReorderListener; class ReorderListener;
class GlobalTarget;
std::string createGlobalComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const std::string& energyName); std::string createGlobalComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const std::string& energyName);
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName); std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid); void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
void recordGlobalValue(double value, GlobalTarget target);
void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
CudaContext& cu; CudaContext& cu;
double prevStepSize, energy; double prevStepSize, energy;
float energyFloat; float energyFloat;
int numGlobalVariables; int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters, keNeedsForce; bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce;
mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
CudaArray* globalValues; CudaArray* globalValues;
CudaArray* contextParameterValues; CudaArray* contextParameterValues;
...@@ -1303,19 +1309,43 @@ private: ...@@ -1303,19 +1309,43 @@ private:
mutable std::vector<std::vector<double> > localPerDofValuesDouble; mutable std::vector<std::vector<double> > localPerDofValuesDouble;
std::vector<float> contextValuesFloat; std::vector<float> contextValuesFloat;
std::vector<double> contextValuesDouble; std::vector<double> contextValuesDouble;
std::vector<float> globalValuesFloat;
std::vector<double> globalValuesDouble;
std::vector<double> initialGlobalVariables;
std::vector<std::vector<CUfunction> > kernels; std::vector<std::vector<CUfunction> > kernels;
std::vector<std::vector<std::vector<void*> > > kernelArgs; std::vector<std::vector<std::vector<void*> > > kernelArgs;
std::vector<void*> kineticEnergyArgs; std::vector<void*> kineticEnergyArgs;
CUfunction randomKernel, kineticEnergyKernel, sumKineticEnergyKernel; CUfunction randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
std::vector<CustomIntegrator::ComputationType> stepType; std::vector<CustomIntegrator::ComputationType> stepType;
std::vector<CustomIntegratorUtilities::Comparison> comparisons;
std::vector<std::vector<Lepton::CompiledExpression> > globalExpressions;
CompiledExpressionSet expressionSet;
std::vector<bool> needsGlobals;
std::vector<bool> needsForces; std::vector<bool> needsForces;
std::vector<bool> needsEnergy; std::vector<bool> needsEnergy;
std::vector<bool> computeBothForceAndEnergy;
std::vector<bool> invalidatesForces; std::vector<bool> invalidatesForces;
std::vector<bool> merged; std::vector<bool> merged;
std::vector<int> forceGroup; std::vector<int> forceGroupFlags;
std::vector<int> blockEnd;
std::vector<int> requiredGaussian; std::vector<int> requiredGaussian;
std::vector<int> requiredUniform; std::vector<int> requiredUniform;
std::vector<int> stepEnergyVariableIndex;
std::vector<int> globalVariableIndex;
std::vector<int> parameterVariableIndex;
int gaussianVariableIndex, uniformVariableIndex, dtVariableIndex;
std::vector<std::string> parameterNames; std::vector<std::string> parameterNames;
std::vector<GlobalTarget> stepTarget;
};
class CudaIntegrateCustomStepKernel::GlobalTarget {
public:
CudaIntegrateCustomStepKernel::GlobalTargetType type;
int variableIndex;
GlobalTarget() {
}
GlobalTarget(CudaIntegrateCustomStepKernel::GlobalTargetType type, int variableIndex) : type(type), variableIndex(variableIndex) {
}
}; };
/** /**
......
...@@ -5696,7 +5696,6 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo ...@@ -5696,7 +5696,6 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
numGlobalVariables = integrator.getNumGlobalVariables(); numGlobalVariables = integrator.getNumGlobalVariables();
int elementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
globalValues = new CudaArray(cu, max(1, numGlobalVariables), elementSize, "globalVariables");
sumBuffer = new CudaArray(cu, ((3*system.getNumParticles()+3)/4)*4, elementSize, "sumBuffer"); sumBuffer = new CudaArray(cu, ((3*system.getNumParticles()+3)/4)*4, elementSize, "sumBuffer");
potentialEnergy = new CudaArray(cu, 1, cu.getEnergyBuffer().getElementSize(), "potentialEnergy"); potentialEnergy = new CudaArray(cu, 1, cu.getEnergyBuffer().getElementSize(), "potentialEnergy");
kineticEnergy = new CudaArray(cu, 1, elementSize, "kineticEnergy"); kineticEnergy = new CudaArray(cu, 1, elementSize, "kineticEnergy");
...@@ -5764,11 +5763,11 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari ...@@ -5764,11 +5763,11 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
if (energyName != "") if (energyName != "")
variables[energyName] = "energy"; variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++) for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]"; variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(globalVariableIndex[i])+"]";
for (int i = 0; i < integrator.getNumPerDofVariables(); i++) for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i); variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
for (int i = 0; i < (int) parameterNames.size(); i++) for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "params["+cu.intToString(i)+"]"; variables[parameterNames[i]] = "globals["+cu.intToString(parameterVariableIndex[i])+"]";
vector<const TabulatedFunction*> functions; vector<const TabulatedFunction*> functions;
vector<pair<string, string> > functionNames; vector<pair<string, string> > functionNames;
return cu.getExpressionUtilities().createExpressions(expressions, variables, functions, functionNames, "temp"+cu.intToString(component)+"_", "double"); return cu.getExpressionUtilities().createExpressions(expressions, variables, functions, functionNames, "temp"+cu.intToString(component)+"_", "double");
...@@ -5808,10 +5807,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -5808,10 +5807,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kernelArgs.resize(integrator.getNumComputations()); kernelArgs.resize(integrator.getNumComputations());
requiredGaussian.resize(integrator.getNumComputations(), 0); requiredGaussian.resize(integrator.getNumComputations(), 0);
requiredUniform.resize(integrator.getNumComputations(), 0); requiredUniform.resize(integrator.getNumComputations(), 0);
needsForces.resize(numSteps, false); needsGlobals.resize(numSteps, false);
needsEnergy.resize(numSteps, false); globalExpressions.resize(numSteps);
forceGroup.resize(numSteps, -2); stepType.resize(numSteps);
invalidatesForces.resize(numSteps, false); stepTarget.resize(numSteps);
merged.resize(numSteps, false); merged.resize(numSteps, false);
modifiesParameters = false; modifiesParameters = false;
map<string, string> defines; map<string, string> defines;
...@@ -5819,24 +5818,40 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -5819,24 +5818,40 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["WORK_GROUP_SIZE"] = cu.intToString(CudaContext::ThreadBlockSize); defines["WORK_GROUP_SIZE"] = cu.intToString(CudaContext::ThreadBlockSize);
defines["SUM_BUFFER_SIZE"] = "0"; defines["SUM_BUFFER_SIZE"] = "0";
defines["SUM_OUTPUT_INDEX"] = "0";
// Build a list of all variables that affect the forces, so we can tell which // Record information about all the computation steps.
// steps invalidate them.
set<string> affectsForce; vector<string> variable(numSteps);
affectsForce.insert("x"); vector<int> forceGroup;
for (vector<ForceImpl*>::const_iterator iter = context.getForceImpls().begin(); iter != context.getForceImpls().end(); ++iter) { vector<vector<Lepton::ParsedExpression> > expression;
const map<string, double> params = (*iter)->getDefaultParameters(); CustomIntegratorUtilities::analyzeComputations(context, integrator, expression, comparisons, blockEnd, invalidatesForces, needsForces, needsEnergy, computeBothForceAndEnergy, forceGroup);
for (map<string, double>::const_iterator param = params.begin(); param != params.end(); ++param) for (int step = 0; step < numSteps; step++) {
affectsForce.insert(param->first); string expr;
integrator.getComputationStep(step, stepType[step], variable[step], expr);
if (stepType[step] == CustomIntegrator::BeginWhileBlock)
blockEnd[blockEnd[step]] = step; // Record where to branch back to.
if (stepType[step] == CustomIntegrator::ComputeGlobal || stepType[step] == CustomIntegrator::BeginIfBlock || stepType[step] == CustomIntegrator::BeginWhileBlock)
for (int i = 0; i < (int) expression[step].size(); i++)
globalExpressions[step].push_back(expression[step][i].createCompiledExpression());
}
for (int step = 0; step < numSteps; step++) {
for (int i = 0; i < (int) globalExpressions[step].size(); i++)
expressionSet.registerExpression(globalExpressions[step][i]);
} }
// Record information about all the computation steps. // Record the indices for variables in the CompiledExpressionSet.
stepType.resize(numSteps); gaussianVariableIndex = expressionSet.getVariableIndex("gaussian");
vector<string> variable(numSteps); uniformVariableIndex = expressionSet.getVariableIndex("uniform");
vector<Lepton::ParsedExpression> expression(numSteps); dtVariableIndex = expressionSet.getVariableIndex("dt");
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
globalVariableIndex.push_back(expressionSet.getVariableIndex(integrator.getGlobalVariableName(i)));
for (int i = 0; i < (int) parameterNames.size(); i++)
parameterVariableIndex.push_back(expressionSet.getVariableIndex(parameterNames[i]));
// Record the variable names and flags for the force and energy in each step.
forceGroupFlags.resize(numSteps, -1);
vector<string> forceGroupName; vector<string> forceGroupName;
vector<string> energyGroupName; vector<string> energyGroupName;
for (int i = 0; i < 32; i++) { for (int i = 0; i < 32; i++) {
...@@ -5849,41 +5864,65 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -5849,41 +5864,65 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
} }
vector<string> forceName(numSteps, "f"); vector<string> forceName(numSteps, "f");
vector<string> energyName(numSteps, "energy"); vector<string> energyName(numSteps, "energy");
stepEnergyVariableIndex.resize(numSteps, expressionSet.getVariableIndex("energy"));
for (int step = 0; step < numSteps; step++) { for (int step = 0; step < numSteps; step++) {
string expr; if (needsForces[step] && forceGroup[step] > -1)
integrator.getComputationStep(step, stepType[step], variable[step], expr); forceName[step] = forceGroupName[forceGroup[step]];
if (expr.size() > 0) { if (needsEnergy[step] && forceGroup[step] > -1) {
expression[step] = Lepton::Parser::parse(expr).optimize(); energyName[step] = energyGroupName[forceGroup[step]];
if (usesVariable(expression[step], "f")) { stepEnergyVariableIndex[step] = expressionSet.getVariableIndex(energyName[step]);
needsForces[step] = true;
forceGroup[step] = -1;
} }
if (usesVariable(expression[step], "energy")) { if (forceGroup[step] > -1)
needsEnergy[step] = true; forceGroupFlags[step] = 1<<forceGroup[step];
forceGroup[step] = -1; if (forceGroupFlags[step] == -2 && step > 0)
forceGroupFlags[step] = forceGroupFlags[step-1];
if (forceGroupFlags[step] != -2 && savedForces.find(forceGroupFlags[step]) == savedForces.end())
savedForces[forceGroupFlags[step]] = new CudaArray(cu, cu.getForce().getSize(), cu.getForce().getElementSize(), "savedForces");
} }
for (int i = 0; i < 32; i++) {
if (usesVariable(expression[step], forceGroupName[i])) { // Allocate space for storing global values, both on the host and the device.
if (forceGroup[step] != -2)
throw OpenMMException("A single computation step cannot depend on multiple force groups"); globalValuesFloat.resize(expressionSet.getNumVariables());
needsForces[step] = true; globalValuesDouble.resize(expressionSet.getNumVariables());
forceGroup[step] = 1<<i; int elementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
forceName[step] = forceGroupName[i]; globalValues = new CudaArray(cu, expressionSet.getNumVariables(), elementSize, "globalValues");
for (int i = 0; i < integrator.getNumGlobalVariables(); i++) {
globalValuesDouble[globalVariableIndex[i]] = initialGlobalVariables[i];
expressionSet.setVariable(globalVariableIndex[i], initialGlobalVariables[i]);
} }
if (usesVariable(expression[step], energyGroupName[i])) { for (int i = 0; i < (int) parameterVariableIndex.size(); i++) {
if (forceGroup[step] != -2) double value = context.getParameter(parameterNames[i]);
throw OpenMMException("A single computation step cannot depend on multiple force groups"); globalValuesDouble[parameterVariableIndex[i]] = value;
needsEnergy[step] = true; expressionSet.setVariable(parameterVariableIndex[i], value);
forceGroup[step] = 1<<i;
energyName[step] = energyGroupName[i];
} }
// Record information about the targets of steps that will be stored in global variables.
for (int step = 0; step < numSteps; step++) {
if (stepType[step] == CustomIntegrator::ComputeGlobal || stepType[step] == CustomIntegrator::ComputeSum) {
if (variable[step] == "dt")
stepTarget[step].type = DT;
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
if (variable[step] == integrator.getGlobalVariableName(i))
stepTarget[step].type = VARIABLE;
for (int i = 0; i < (int) parameterNames.size(); i++)
if (variable[step] == parameterNames[i])
stepTarget[step].type = PARAMETER;
stepTarget[step].variableIndex = expressionSet.getVariableIndex(variable[step]);
} }
} }
invalidatesForces[step] = (stepType[step] == CustomIntegrator::ConstrainPositions || affectsForce.find(variable[step]) != affectsForce.end());
if (forceGroup[step] == -2 && step > 0) // Identify which per-DOF steps are going to require global variables or context parameters.
forceGroup[step] = forceGroup[step-1];
if (forceGroup[step] != -2 && savedForces.find(forceGroup[step]) == savedForces.end()) for (int step = 0; step < numSteps; step++) {
savedForces[forceGroup[step]] = new CudaArray(cu, cu.getForce().getSize(), cu.getForce().getElementSize(), "savedForces"); if (stepType[step] == CustomIntegrator::ComputePerDof || stepType[step] == CustomIntegrator::ComputeSum) {
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
if (usesVariable(expression[step][0], integrator.getGlobalVariableName(i)))
needsGlobals[step] = true;
for (int i = 0; i < (int) parameterNames.size(); i++)
if (usesVariable(expression[step][0], parameterNames[i]))
needsGlobals[step] = true;
}
} }
// Determine how each step will represent the position (as just a value, or a value plus a delta). // Determine how each step will represent the position (as just a value, or a value plus a delta).
...@@ -5911,9 +5950,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -5911,9 +5950,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
for (int step = 1; step < numSteps; step++) { for (int step = 1; step < numSteps; step++) {
if (needsForces[step] || needsEnergy[step]) if (needsForces[step] || needsEnergy[step])
continue; continue;
if (stepType[step-1] == CustomIntegrator::ComputeGlobal && stepType[step] == CustomIntegrator::ComputeGlobal &&
!usesVariable(expression[step], "uniform") && !usesVariable(expression[step], "gaussian"))
merged[step] = true;
if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof) if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof)
merged[step] = true; merged[step] = true;
} }
...@@ -5933,15 +5969,15 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -5933,15 +5969,15 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
} }
int numGaussian = 0, numUniform = 0; int numGaussian = 0, numUniform = 0;
for (int j = step; j < numSteps && (j == step || merged[j]); j++) { for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
numGaussian += numAtoms*usesVariable(expression[j], "gaussian"); numGaussian += numAtoms*usesVariable(expression[j][0], "gaussian");
numUniform += numAtoms*usesVariable(expression[j], "uniform"); numUniform += numAtoms*usesVariable(expression[j][0], "uniform");
compute << "{\n"; compute << "{\n";
if (numGaussian > 0) if (numGaussian > 0)
compute << "float4 gaussian = gaussianValues[gaussianIndex+index];\n"; compute << "float4 gaussian = gaussianValues[gaussianIndex+index];\n";
if (numUniform > 0) if (numUniform > 0)
compute << "float4 uniform = uniformValues[uniformIndex+index];\n"; compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j], i, integrator, forceName[j], energyName[j]); compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j]);
if (variable[j] == "x") { if (variable[j] == "x") {
if (storePosAsDelta[j]) if (storePosAsDelta[j])
compute << "posDelta[index] = convertFromDouble4(position-convertToDouble4(loadPos(posq, posqCorrection, index)));\n"; compute << "posDelta[index] = convertFromDouble4(position-convertToDouble4(loadPos(posq, posqCorrection, index)));\n";
...@@ -6010,14 +6046,12 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -6010,14 +6046,12 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
bool found = false; bool found = false;
for (int j = 0; j < integrator.getNumGlobalVariables() && !found; j++) for (int j = 0; j < integrator.getNumGlobalVariables() && !found; j++)
if (variable[step] == integrator.getGlobalVariableName(j)) { if (variable[step] == integrator.getGlobalVariableName(j)) {
args2.push_back(&globalValues->getDevicePointer()); args2.push_back(&kineticEnergy->getDevicePointer());
defines["SUM_OUTPUT_INDEX"] = cu.intToString(j);
found = true; found = true;
} }
for (int j = 0; j < (int) parameterNames.size() && !found; j++) for (int j = 0; j < (int) parameterNames.size() && !found; j++)
if (variable[step] == parameterNames[j]) { if (variable[step] == parameterNames[j]) {
args2.push_back(&contextParameterValues->getDevicePointer()); args2.push_back(&kineticEnergy->getDevicePointer());
defines["SUM_OUTPUT_INDEX"] = cu.intToString(j);
found = true; found = true;
modifiesParameters = true; modifiesParameters = true;
} }
...@@ -6035,7 +6069,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -6035,7 +6069,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
stringstream compute; stringstream compute;
for (int i = step; i < numSteps && (i == step || merged[i]); i++) for (int i = step; i < numSteps && (i == step || merged[i]); i++)
compute << "{\n" << createGlobalComputation(variable[i], expression[i], integrator, energyName[i]) << "}\n"; compute << "{\n" << createGlobalComputation(variable[i], expression[i][0], integrator, energyName[i]) << "}\n";
map<string, string> replacements; map<string, string> replacements;
replacements["COMPUTE_STEP"] = compute.str(); replacements["COMPUTE_STEP"] = compute.str();
CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorGlobal, replacements), defines); CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorGlobal, replacements), defines);
...@@ -6111,7 +6145,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -6111,7 +6145,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args << ", " << buffer.getType() << "* __restrict__ " << valueName; args << ", " << buffer.getType() << "* __restrict__ " << valueName;
} }
replacements["PARAMETER_ARGUMENTS"] = args.str(); replacements["PARAMETER_ARGUMENTS"] = args.str();
defines["SUM_OUTPUT_INDEX"] = "0";
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms); defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
if (defines.find("LOAD_POS_AS_DELTA") != defines.end()) if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
defines.erase("LOAD_POS_AS_DELTA"); defines.erase("LOAD_POS_AS_DELTA");
...@@ -6144,7 +6177,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -6144,7 +6177,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
sumKineticEnergyKernel = cu.getKernel(module, useDouble ? "computeDoubleSum" : "computeFloatSum"); sumKineticEnergyKernel = cu.getKernel(module, useDouble ? "computeDoubleSum" : "computeFloatSum");
} }
// Make sure all values (variables, parameters, etc.) stored on the device are up to date. // Make sure all values (variables, parameters, etc.) are up to date.
if (!deviceValuesAreCurrent) { if (!deviceValuesAreCurrent) {
if (useDouble) if (useDouble)
...@@ -6156,39 +6189,15 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -6156,39 +6189,15 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
localValuesAreCurrent = false; localValuesAreCurrent = false;
double stepSize = integrator.getStepSize(); double stepSize = integrator.getStepSize();
if (stepSize != prevStepSize) { if (stepSize != prevStepSize) {
if (useDouble) { recordGlobalValue(stepSize, GlobalTarget(DT, dtVariableIndex));
double size[] = {0, stepSize};
integration.getStepSize().upload(size);
} }
else {
float size[] = {0, (float) stepSize};
integration.getStepSize().upload(size);
}
prevStepSize = stepSize;
}
bool paramsChanged = false;
if (useDouble) {
for (int i = 0; i < (int) parameterNames.size(); i++) { for (int i = 0; i < (int) parameterNames.size(); i++) {
double value = context.getParameter(parameterNames[i]); double value = context.getParameter(parameterNames[i]);
if (value != contextValuesDouble[i]) { if (value != globalValuesDouble[parameterVariableIndex[i]]) {
contextValuesDouble[i] = value; globalValuesDouble[parameterVariableIndex[i]] = value;
paramsChanged = true; deviceGlobalsAreCurrent = false;
}
}
if (paramsChanged)
contextParameterValues->upload(contextValuesDouble);
}
else {
for (int i = 0; i < (int) parameterNames.size(); i++) {
float value = (float) context.getParameter(parameterNames[i]);
if (value != contextValuesFloat[i]) {
contextValuesFloat[i] = value;
paramsChanged = true;
} }
} }
if (paramsChanged)
contextParameterValues->upload(contextValuesFloat);
}
} }
void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) { void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
...@@ -6204,7 +6213,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat ...@@ -6204,7 +6213,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0); CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
for (int i = 0; i < numSteps; i++) { for (int i = 0; i < numSteps; i++) {
int lastForceGroups = context.getLastForceGroups(); int lastForceGroups = context.getLastForceGroups();
if ((needsForces[i] || needsEnergy[i]) && (!forcesAreValid || lastForceGroups != forceGroup[i])) { if ((needsForces[i] || needsEnergy[i]) && (!forcesAreValid || lastForceGroups != forceGroupFlags[i])) {
if (forcesAreValid && savedForces.find(lastForceGroups) != savedForces.end()) { if (forcesAreValid && savedForces.find(lastForceGroups) != savedForces.end()) {
// The forces are still valid. We just need a different force group right now. Save the old // The forces are still valid. We just need a different force group right now. Save the old
// forces in case we need them again. // forces in case we need them again.
...@@ -6218,31 +6227,31 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat ...@@ -6218,31 +6227,31 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
// Recompute forces and/or energy. Figure out what is actually needed // Recompute forces and/or energy. Figure out what is actually needed
// between now and the next time they get invalidated again. // between now and the next time they get invalidated again.
bool computeForce = false, computeEnergy = false; bool computeForce = (needsForces[i] || computeBothForceAndEnergy[i]);
for (int j = i; ; j++) { bool computeEnergy = (needsEnergy[i] || computeBothForceAndEnergy[i]);
if (needsForces[j]) if (!computeEnergy && validSavedForces.find(forceGroupFlags[i]) != validSavedForces.end()) {
computeForce = true;
if (needsEnergy[j])
computeEnergy = true;
if (invalidatesForces[j])
break;
if (j == numSteps-1)
j = -1;
if (j == i-1)
break;
}
if (!computeEnergy && validSavedForces.find(forceGroup[i]) != validSavedForces.end()) {
// We can just restore the forces we saved earlier. // We can just restore the forces we saved earlier.
savedForces[forceGroup[i]]->copyTo(cu.getForce()); savedForces[forceGroupFlags[i]]->copyTo(cu.getForce());
} }
else { else {
recordChangedParameters(context); recordChangedParameters(context);
energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]); energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroupFlags[i]);
energyFloat = (float) energy; energyFloat = (float) energy;
} }
forcesAreValid = true; forcesAreValid = true;
} }
if (needsGlobals[i] && !deviceGlobalsAreCurrent) {
// Upload the global values to the device.
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision())
globalValues->upload(globalValuesDouble);
else {
for (int j = 0; j < (int) globalValuesDouble.size(); j++)
globalValuesFloat[j] = (float) globalValuesDouble[j];
globalValues->upload(globalValuesFloat);
}
}
if (stepType[i] == CustomIntegrator::ComputePerDof && !merged[i]) { if (stepType[i] == CustomIntegrator::ComputePerDof && !merged[i]) {
int randomIndex = integration.prepareRandomNumbers(requiredGaussian[i]); int randomIndex = integration.prepareRandomNumbers(requiredGaussian[i]);
kernelArgs[i][0][1] = &posCorrection; kernelArgs[i][0][1] = &posCorrection;
...@@ -6253,12 +6262,11 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat ...@@ -6253,12 +6262,11 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu.executeKernel(randomKernel, &randomArgs[0], numAtoms); cu.executeKernel(randomKernel, &randomArgs[0], numAtoms);
cu.executeKernel(kernels[i][0], &kernelArgs[i][0][0], numAtoms); cu.executeKernel(kernels[i][0], &kernelArgs[i][0][0], numAtoms);
} }
else if (stepType[i] == CustomIntegrator::ComputeGlobal && !merged[i]) { else if (stepType[i] == CustomIntegrator::ComputeGlobal) {
float uniform = SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber(); expressionSet.setVariable(uniformVariableIndex, SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber());
float gauss = SimTKOpenMMUtilities::getNormallyDistributedRandomNumber(); expressionSet.setVariable(gaussianVariableIndex, SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
kernelArgs[i][0][3] = &uniform; expressionSet.setVariable(stepEnergyVariableIndex[i], energy);
kernelArgs[i][0][4] = &gauss; recordGlobalValue(globalExpressions[i][0].evaluate(), stepTarget[i]);
cu.executeKernel(kernels[i][0], &kernelArgs[i][0][0], 1, 1);
} }
else if (stepType[i] == CustomIntegrator::ComputeSum) { else if (stepType[i] == CustomIntegrator::ComputeSum) {
int randomIndex = integration.prepareRandomNumbers(requiredGaussian[i]); int randomIndex = integration.prepareRandomNumbers(requiredGaussian[i]);
...@@ -6271,6 +6279,16 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat ...@@ -6271,6 +6279,16 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu.clearBuffer(*sumBuffer); cu.clearBuffer(*sumBuffer);
cu.executeKernel(kernels[i][0], &kernelArgs[i][0][0], numAtoms); cu.executeKernel(kernels[i][0], &kernelArgs[i][0][0], numAtoms);
cu.executeKernel(kernels[i][1], &kernelArgs[i][1][0], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize); cu.executeKernel(kernels[i][1], &kernelArgs[i][1][0], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double value;
kineticEnergy->download(&value);
globalValuesDouble[stepTarget[i].variableIndex] = value;
}
else {
float value;
kineticEnergy->download(&value);
globalValuesDouble[stepTarget[i].variableIndex] = value;
}
} }
else if (stepType[i] == CustomIntegrator::UpdateContextState) { else if (stepType[i] == CustomIntegrator::UpdateContextState) {
recordChangedParameters(context); recordChangedParameters(context);
...@@ -6335,52 +6353,63 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context, ...@@ -6335,52 +6353,63 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
} }
} }
void CudaIntegrateCustomStepKernel::recordGlobalValue(double value, GlobalTarget target) {
switch (target.type) {
case DT:
globalValuesDouble[dtVariableIndex] = value;
deviceGlobalsAreCurrent = false;
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double size[] = {0, value};
cu.getIntegrationUtilities().getStepSize().upload(size);
}
else {
float size[] = {0, (float) value};
cu.getIntegrationUtilities().getStepSize().upload(size);
}
prevStepSize = value;
break;
case VARIABLE:
case PARAMETER:
expressionSet.setVariable(target.variableIndex, value);
globalValuesDouble[target.variableIndex] = value;
deviceGlobalsAreCurrent = false;
break;
}
}
void CudaIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& context) { void CudaIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& context) {
if (!modifiesParameters) if (!modifiesParameters)
return; return;
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
contextParameterValues->download(contextValuesDouble);
for (int i = 0; i < (int) parameterNames.size(); i++) { for (int i = 0; i < (int) parameterNames.size(); i++) {
double value = context.getParameter(parameterNames[i]); double value = context.getParameter(parameterNames[i]);
if (value != contextValuesDouble[i]) if (value != globalValuesDouble[parameterVariableIndex[i]])
context.setParameter(parameterNames[i], contextValuesDouble[i]); context.setParameter(parameterNames[i], globalValuesDouble[parameterVariableIndex[i]]);
}
}
else {
contextParameterValues->download(contextValuesFloat);
for (int i = 0; i < (int) parameterNames.size(); i++) {
float value = (float) context.getParameter(parameterNames[i]);
if (value != contextValuesFloat[i])
context.setParameter(parameterNames[i], contextValuesFloat[i]);
}
} }
} }
void CudaIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const { void CudaIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const {
if (globalValues == NULL) {
// The data structures haven't been created yet, so just return the list of values that was given earlier.
values = initialGlobalVariables;
}
values.resize(numGlobalVariables); values.resize(numGlobalVariables);
if (numGlobalVariables == 0)
return;
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision())
globalValues->download(values);
else {
vector<float> buffer;
globalValues->download(buffer);
for (int i = 0; i < numGlobalVariables; i++) for (int i = 0; i < numGlobalVariables; i++)
values[i] = buffer[i]; values[i] = globalValuesDouble[globalVariableIndex[i]];
}
} }
void CudaIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) { void CudaIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) {
if (numGlobalVariables == 0) if (numGlobalVariables == 0)
return; return;
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) if (globalValues == NULL) {
globalValues->upload(values); // The data structures haven't been created yet, so just store the list of values.
else {
vector<float> buffer(numGlobalVariables); initialGlobalVariables = values;
for (int i = 0; i < numGlobalVariables; i++) return;
buffer[i] = (float) values[i];
globalValues->upload(buffer);
} }
for (int i = 0; i < numGlobalVariables; i++)
globalValuesDouble[globalVariableIndex[i]] = values[i];
deviceGlobalsAreCurrent = false;
} }
void CudaIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const { void CudaIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const {
......
...@@ -11,7 +11,7 @@ extern "C" __global__ void computeFloatSum(const float* __restrict__ sumBuffer, ...@@ -11,7 +11,7 @@ extern "C" __global__ void computeFloatSum(const float* __restrict__ sumBuffer,
tempBuffer[thread] += tempBuffer[thread+i]; tempBuffer[thread] += tempBuffer[thread+i];
} }
if (thread == 0) if (thread == 0)
result[SUM_OUTPUT_INDEX] = tempBuffer[0]; *result = tempBuffer[0];
} }
extern "C" __global__ void computeDoubleSum(const double* __restrict__ sumBuffer, double* result) { extern "C" __global__ void computeDoubleSum(const double* __restrict__ sumBuffer, double* result) {
...@@ -27,7 +27,7 @@ extern "C" __global__ void computeDoubleSum(const double* __restrict__ sumBuffer ...@@ -27,7 +27,7 @@ extern "C" __global__ void computeDoubleSum(const double* __restrict__ sumBuffer
tempBuffer[thread] += tempBuffer[thread+i]; tempBuffer[thread] += tempBuffer[thread+i];
} }
if (thread == 0) if (thread == 0)
result[SUM_OUTPUT_INDEX] = tempBuffer[0]; *result = tempBuffer[0];
} }
extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta) { extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta) {
......
...@@ -224,7 +224,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co ...@@ -224,7 +224,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co
mixed4 xpj2 = make_mixed4(0); mixed4 xpj2 = make_mixed4(0);
float invMassCentral = params.x; float invMassCentral = params.x;
float avgMass = params.y; float avgMass = params.y;
float d2 = params.z;
float invMassPeripheral = params.w; float invMassPeripheral = params.w;
if (atoms.z != -1) { if (atoms.z != -1) {
pos2 = loadPos(oldPos, posCorrection, atoms.z); pos2 = loadPos(oldPos, posCorrection, atoms.z);
...@@ -245,9 +244,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co ...@@ -245,9 +244,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co
mixed rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z; mixed rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z;
mixed rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z; mixed rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z;
mixed rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z; mixed rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z;
mixed ld1 = d2-rij1sq;
mixed ld2 = d2-rij2sq;
mixed ld3 = d2-rij3sq;
// Iterate until convergence. // Iterate until convergence.
...@@ -605,8 +601,6 @@ extern "C" __global__ void computeCCMAVelocityConstraintForce(const int2* __rest ...@@ -605,8 +601,6 @@ extern "C" __global__ void computeCCMAVelocityConstraintForce(const int2* __rest
if (threadIdx.x == 0) if (threadIdx.x == 0)
groupConverged = 1; groupConverged = 1;
__syncthreads(); __syncthreads();
mixed lowerTol = 1-2*tol+tol*tol;
mixed upperTol = 1+2*tol+tol*tol;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_CCMA_CONSTRAINTS; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_CCMA_CONSTRAINTS; index += blockDim.x*gridDim.x) {
// Compute the force due to this constraint. // Compute the force due to this constraint.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment